Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3254 lines
100 KiB

  1. /*++
  2. Copyright (c) Microsoft Corporation. All rights reserved.
  3. Module Name:
  4. xnamathmatrix.inl
  5. Abstract:
  6. XNA math library for Windows and Xbox 360: Matrix functions
  7. --*/
  8. #if defined(_MSC_VER) && (_MSC_VER > 1000)
  9. #pragma once
  10. #endif
  11. #ifndef __XNAMATHMATRIX_INL__
  12. #define __XNAMATHMATRIX_INL__
  13. /****************************************************************************
  14. *
  15. * Matrix
  16. *
  17. ****************************************************************************/
  18. //------------------------------------------------------------------------------
  19. // Comparison operations
  20. //------------------------------------------------------------------------------
  21. //------------------------------------------------------------------------------
  22. // Return TRUE if any entry in the matrix is NaN
  23. XMFINLINE BOOL XMMatrixIsNaN
  24. (
  25. CXMMATRIX M
  26. )
  27. {
  28. #if defined(_XM_NO_INTRINSICS_)
  29. UINT i, uTest;
  30. const UINT *pWork;
  31. i = 16;
  32. pWork = (const UINT *)(&M.m[0][0]);
  33. do {
  34. // Fetch value into integer unit
  35. uTest = pWork[0];
  36. // Remove sign
  37. uTest &= 0x7FFFFFFFU;
  38. // NaN is 0x7F800001 through 0x7FFFFFFF inclusive
  39. uTest -= 0x7F800001U;
  40. if (uTest<0x007FFFFFU) {
  41. break; // NaN found
  42. }
  43. ++pWork; // Next entry
  44. } while (--i);
  45. return (i!=0); // i == 0 if nothing matched
  46. #elif defined(_XM_SSE_INTRINSICS_)
  47. // Load in registers
  48. XMVECTOR vX = M.r[0];
  49. XMVECTOR vY = M.r[1];
  50. XMVECTOR vZ = M.r[2];
  51. XMVECTOR vW = M.r[3];
  52. // Test themselves to check for NaN
  53. vX = _mm_cmpneq_ps(vX,vX);
  54. vY = _mm_cmpneq_ps(vY,vY);
  55. vZ = _mm_cmpneq_ps(vZ,vZ);
  56. vW = _mm_cmpneq_ps(vW,vW);
  57. // Or all the results
  58. vX = _mm_or_ps(vX,vZ);
  59. vY = _mm_or_ps(vY,vW);
  60. vX = _mm_or_ps(vX,vY);
  61. // If any tested true, return true
  62. return (_mm_movemask_ps(vX)!=0);
  63. #else
  64. #endif
  65. }
  66. //------------------------------------------------------------------------------
  67. // Return TRUE if any entry in the matrix is +/-INF
  68. XMFINLINE BOOL XMMatrixIsInfinite
  69. (
  70. CXMMATRIX M
  71. )
  72. {
  73. #if defined(_XM_NO_INTRINSICS_)
  74. UINT i, uTest;
  75. const UINT *pWork;
  76. i = 16;
  77. pWork = (const UINT *)(&M.m[0][0]);
  78. do {
  79. // Fetch value into integer unit
  80. uTest = pWork[0];
  81. // Remove sign
  82. uTest &= 0x7FFFFFFFU;
  83. // INF is 0x7F800000
  84. if (uTest==0x7F800000U) {
  85. break; // INF found
  86. }
  87. ++pWork; // Next entry
  88. } while (--i);
  89. return (i!=0); // i == 0 if nothing matched
  90. #elif defined(_XM_SSE_INTRINSICS_)
  91. // Mask off the sign bits
  92. XMVECTOR vTemp1 = _mm_and_ps(M.r[0],g_XMAbsMask);
  93. XMVECTOR vTemp2 = _mm_and_ps(M.r[1],g_XMAbsMask);
  94. XMVECTOR vTemp3 = _mm_and_ps(M.r[2],g_XMAbsMask);
  95. XMVECTOR vTemp4 = _mm_and_ps(M.r[3],g_XMAbsMask);
  96. // Compare to infinity
  97. vTemp1 = _mm_cmpeq_ps(vTemp1,g_XMInfinity);
  98. vTemp2 = _mm_cmpeq_ps(vTemp2,g_XMInfinity);
  99. vTemp3 = _mm_cmpeq_ps(vTemp3,g_XMInfinity);
  100. vTemp4 = _mm_cmpeq_ps(vTemp4,g_XMInfinity);
  101. // Or the answers together
  102. vTemp1 = _mm_or_ps(vTemp1,vTemp2);
  103. vTemp3 = _mm_or_ps(vTemp3,vTemp4);
  104. vTemp1 = _mm_or_ps(vTemp1,vTemp3);
  105. // If any are infinity, the signs are true.
  106. return (_mm_movemask_ps(vTemp1)!=0);
  107. #else // _XM_VMX128_INTRINSICS_
  108. #endif // _XM_VMX128_INTRINSICS_
  109. }
  110. //------------------------------------------------------------------------------
  111. // Return TRUE if the XMMatrix is equal to identity
  112. XMFINLINE BOOL XMMatrixIsIdentity
  113. (
  114. CXMMATRIX M
  115. )
  116. {
  117. #if defined(_XM_NO_INTRINSICS_)
  118. unsigned int uOne, uZero;
  119. const unsigned int *pWork;
  120. // Use the integer pipeline to reduce branching to a minimum
  121. pWork = (const unsigned int*)(&M.m[0][0]);
  122. // Convert 1.0f to zero and or them together
  123. uOne = pWork[0]^0x3F800000U;
  124. // Or all the 0.0f entries together
  125. uZero = pWork[1];
  126. uZero |= pWork[2];
  127. uZero |= pWork[3];
  128. // 2nd row
  129. uZero |= pWork[4];
  130. uOne |= pWork[5]^0x3F800000U;
  131. uZero |= pWork[6];
  132. uZero |= pWork[7];
  133. // 3rd row
  134. uZero |= pWork[8];
  135. uZero |= pWork[9];
  136. uOne |= pWork[10]^0x3F800000U;
  137. uZero |= pWork[11];
  138. // 4th row
  139. uZero |= pWork[12];
  140. uZero |= pWork[13];
  141. uZero |= pWork[14];
  142. uOne |= pWork[15]^0x3F800000U;
  143. // If all zero entries are zero, the uZero==0
  144. uZero &= 0x7FFFFFFF; // Allow -0.0f
  145. // If all 1.0f entries are 1.0f, then uOne==0
  146. uOne |= uZero;
  147. return (uOne==0);
  148. #elif defined(_XM_SSE_INTRINSICS_)
  149. XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0],g_XMIdentityR0);
  150. XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1],g_XMIdentityR1);
  151. XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2],g_XMIdentityR2);
  152. XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3],g_XMIdentityR3);
  153. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  154. vTemp3 = _mm_and_ps(vTemp3,vTemp4);
  155. vTemp1 = _mm_and_ps(vTemp1,vTemp3);
  156. return (_mm_movemask_ps(vTemp1)==0x0f);
  157. #else // _XM_VMX128_INTRINSICS_
  158. #endif // _XM_VMX128_INTRINSICS_
  159. }
  160. //------------------------------------------------------------------------------
  161. // Computation operations
  162. //------------------------------------------------------------------------------
  163. //------------------------------------------------------------------------------
  164. // Perform a 4x4 matrix multiply by a 4x4 matrix
  165. XMFINLINE XMMATRIX XMMatrixMultiply
  166. (
  167. CXMMATRIX M1,
  168. CXMMATRIX M2
  169. )
  170. {
  171. #if defined(_XM_NO_INTRINSICS_)
  172. XMMATRIX mResult;
  173. // Cache the invariants in registers
  174. float x = M1.m[0][0];
  175. float y = M1.m[0][1];
  176. float z = M1.m[0][2];
  177. float w = M1.m[0][3];
  178. // Perform the operation on the first row
  179. mResult.m[0][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
  180. mResult.m[0][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
  181. mResult.m[0][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
  182. mResult.m[0][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
  183. // Repeat for all the other rows
  184. x = M1.m[1][0];
  185. y = M1.m[1][1];
  186. z = M1.m[1][2];
  187. w = M1.m[1][3];
  188. mResult.m[1][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
  189. mResult.m[1][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
  190. mResult.m[1][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
  191. mResult.m[1][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
  192. x = M1.m[2][0];
  193. y = M1.m[2][1];
  194. z = M1.m[2][2];
  195. w = M1.m[2][3];
  196. mResult.m[2][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
  197. mResult.m[2][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
  198. mResult.m[2][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
  199. mResult.m[2][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
  200. x = M1.m[3][0];
  201. y = M1.m[3][1];
  202. z = M1.m[3][2];
  203. w = M1.m[3][3];
  204. mResult.m[3][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
  205. mResult.m[3][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
  206. mResult.m[3][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
  207. mResult.m[3][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
  208. return mResult;
  209. #elif defined(_XM_SSE_INTRINSICS_)
  210. XMMATRIX mResult;
  211. // Use vW to hold the original row
  212. XMVECTOR vW = M1.r[0];
  213. // Splat the component X,Y,Z then W
  214. XMVECTOR vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
  215. XMVECTOR vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
  216. XMVECTOR vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
  217. vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
  218. // Perform the opertion on the first row
  219. vX = _mm_mul_ps(vX,M2.r[0]);
  220. vY = _mm_mul_ps(vY,M2.r[1]);
  221. vZ = _mm_mul_ps(vZ,M2.r[2]);
  222. vW = _mm_mul_ps(vW,M2.r[3]);
  223. // Perform a binary add to reduce cumulative errors
  224. vX = _mm_add_ps(vX,vZ);
  225. vY = _mm_add_ps(vY,vW);
  226. vX = _mm_add_ps(vX,vY);
  227. mResult.r[0] = vX;
  228. // Repeat for the other 3 rows
  229. vW = M1.r[1];
  230. vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
  231. vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
  232. vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
  233. vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
  234. vX = _mm_mul_ps(vX,M2.r[0]);
  235. vY = _mm_mul_ps(vY,M2.r[1]);
  236. vZ = _mm_mul_ps(vZ,M2.r[2]);
  237. vW = _mm_mul_ps(vW,M2.r[3]);
  238. vX = _mm_add_ps(vX,vZ);
  239. vY = _mm_add_ps(vY,vW);
  240. vX = _mm_add_ps(vX,vY);
  241. mResult.r[1] = vX;
  242. vW = M1.r[2];
  243. vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
  244. vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
  245. vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
  246. vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
  247. vX = _mm_mul_ps(vX,M2.r[0]);
  248. vY = _mm_mul_ps(vY,M2.r[1]);
  249. vZ = _mm_mul_ps(vZ,M2.r[2]);
  250. vW = _mm_mul_ps(vW,M2.r[3]);
  251. vX = _mm_add_ps(vX,vZ);
  252. vY = _mm_add_ps(vY,vW);
  253. vX = _mm_add_ps(vX,vY);
  254. mResult.r[2] = vX;
  255. vW = M1.r[3];
  256. vX = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(0,0,0,0));
  257. vY = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(1,1,1,1));
  258. vZ = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(2,2,2,2));
  259. vW = _mm_shuffle_ps(vW,vW,_MM_SHUFFLE(3,3,3,3));
  260. vX = _mm_mul_ps(vX,M2.r[0]);
  261. vY = _mm_mul_ps(vY,M2.r[1]);
  262. vZ = _mm_mul_ps(vZ,M2.r[2]);
  263. vW = _mm_mul_ps(vW,M2.r[3]);
  264. vX = _mm_add_ps(vX,vZ);
  265. vY = _mm_add_ps(vY,vW);
  266. vX = _mm_add_ps(vX,vY);
  267. mResult.r[3] = vX;
  268. return mResult;
  269. #else // _XM_VMX128_INTRINSICS_
  270. #endif // _XM_VMX128_INTRINSICS_
  271. }
  272. //------------------------------------------------------------------------------
  273. XMFINLINE XMMATRIX XMMatrixMultiplyTranspose
  274. (
  275. CXMMATRIX M1,
  276. CXMMATRIX M2
  277. )
  278. {
  279. #if defined(_XM_NO_INTRINSICS_)
  280. XMMATRIX mResult;
  281. // Cache the invariants in registers
  282. float x = M2.m[0][0];
  283. float y = M2.m[1][0];
  284. float z = M2.m[2][0];
  285. float w = M2.m[3][0];
  286. // Perform the operation on the first row
  287. mResult.m[0][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
  288. mResult.m[0][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
  289. mResult.m[0][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
  290. mResult.m[0][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
  291. // Repeat for all the other rows
  292. x = M2.m[0][1];
  293. y = M2.m[1][1];
  294. z = M2.m[2][1];
  295. w = M2.m[3][1];
  296. mResult.m[1][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
  297. mResult.m[1][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
  298. mResult.m[1][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
  299. mResult.m[1][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
  300. x = M2.m[0][2];
  301. y = M2.m[1][2];
  302. z = M2.m[2][2];
  303. w = M2.m[3][2];
  304. mResult.m[2][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
  305. mResult.m[2][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
  306. mResult.m[2][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
  307. mResult.m[2][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
  308. x = M2.m[0][3];
  309. y = M2.m[1][3];
  310. z = M2.m[2][3];
  311. w = M2.m[3][3];
  312. mResult.m[3][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
  313. mResult.m[3][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
  314. mResult.m[3][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
  315. mResult.m[3][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
  316. return mResult;
  317. #elif defined(_XM_SSE_INTRINSICS_)
  318. XMMATRIX Product;
  319. XMMATRIX Result;
  320. Product = XMMatrixMultiply(M1, M2);
  321. Result = XMMatrixTranspose(Product);
  322. return Result;
  323. #else // _XM_VMX128_INTRINSICS_
  324. #endif // _XM_VMX128_INTRINSICS_
  325. }
  326. //------------------------------------------------------------------------------
  327. XMFINLINE XMMATRIX XMMatrixTranspose
  328. (
  329. CXMMATRIX M
  330. )
  331. {
  332. #if defined(_XM_NO_INTRINSICS_)
  333. XMMATRIX P;
  334. XMMATRIX MT;
  335. // Original matrix:
  336. //
  337. // m00m01m02m03
  338. // m10m11m12m13
  339. // m20m21m22m23
  340. // m30m31m32m33
  341. P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21
  342. P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31
  343. P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23
  344. P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33
  345. MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30
  346. MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31
  347. MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32
  348. MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33
  349. return MT;
  350. #elif defined(_XM_SSE_INTRINSICS_)
  351. // x.x,x.y,y.x,y.y
  352. XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(1,0,1,0));
  353. // x.z,x.w,y.z,y.w
  354. XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(3,2,3,2));
  355. // z.x,z.y,w.x,w.y
  356. XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(1,0,1,0));
  357. // z.z,z.w,w.z,w.w
  358. XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(3,2,3,2));
  359. XMMATRIX mResult;
  360. // x.x,y.x,z.x,w.x
  361. mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
  362. // x.y,y.y,z.y,w.y
  363. mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
  364. // x.z,y.z,z.z,w.z
  365. mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
  366. // x.w,y.w,z.w,w.w
  367. mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
  368. return mResult;
  369. #else // _XM_VMX128_INTRINSICS_
  370. #endif // _XM_VMX128_INTRINSICS_
  371. }
  372. //------------------------------------------------------------------------------
  373. // Return the inverse and the determinant of a 4x4 matrix
  374. XMINLINE XMMATRIX XMMatrixInverse
  375. (
  376. XMVECTOR* pDeterminant,
  377. CXMMATRIX M
  378. )
  379. {
  380. #if defined(_XM_NO_INTRINSICS_)
  381. XMMATRIX R;
  382. XMMATRIX MT;
  383. XMVECTOR D0, D1, D2;
  384. XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7;
  385. XMVECTOR V0[4], V1[4];
  386. XMVECTOR Determinant;
  387. XMVECTOR Reciprocal;
  388. XMMATRIX Result;
  389. static CONST XMVECTORU32 SwizzleXXYY = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  390. static CONST XMVECTORU32 SwizzleZWZW = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
  391. static CONST XMVECTORU32 SwizzleYZXY = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0X, XM_PERMUTE_0Y};
  392. static CONST XMVECTORU32 SwizzleZWYZ = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_0Y, XM_PERMUTE_0Z};
  393. static CONST XMVECTORU32 SwizzleWXWX = {XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_0X};
  394. static CONST XMVECTORU32 SwizzleZXYX = {XM_PERMUTE_0Z, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0X};
  395. static CONST XMVECTORU32 SwizzleYWXZ = {XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Z};
  396. static CONST XMVECTORU32 SwizzleWZWY = {XM_PERMUTE_0W, XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_0Y};
  397. static CONST XMVECTORU32 Permute0X0Z1X1Z = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z};
  398. static CONST XMVECTORU32 Permute0Y0W1Y1W = {XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W};
  399. static CONST XMVECTORU32 Permute1Y0Y0W0X = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X};
  400. static CONST XMVECTORU32 Permute0W0X0Y1X = {XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1X};
  401. static CONST XMVECTORU32 Permute0Z1Y1X0Z = {XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z};
  402. static CONST XMVECTORU32 Permute0W1Y0Y0Z = {XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Z};
  403. static CONST XMVECTORU32 Permute0Z0Y1X0X = {XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0X};
  404. static CONST XMVECTORU32 Permute1Y0X0W1X = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X};
  405. static CONST XMVECTORU32 Permute1W0Y0W0X = {XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X};
  406. static CONST XMVECTORU32 Permute0W0X0Y1Z = {XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1Z};
  407. static CONST XMVECTORU32 Permute0Z1W1Z0Z = {XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z};
  408. static CONST XMVECTORU32 Permute0W1W0Y0Z = {XM_PERMUTE_0W, XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0Z};
  409. static CONST XMVECTORU32 Permute0Z0Y1Z0X = {XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X};
  410. static CONST XMVECTORU32 Permute1W0X0W1Z = {XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};
  411. XMASSERT(pDeterminant);
  412. MT = XMMatrixTranspose(M);
  413. V0[0] = XMVectorPermute(MT.r[2], MT.r[2], SwizzleXXYY.v);
  414. V1[0] = XMVectorPermute(MT.r[3], MT.r[3], SwizzleZWZW.v);
  415. V0[1] = XMVectorPermute(MT.r[0], MT.r[0], SwizzleXXYY.v);
  416. V1[1] = XMVectorPermute(MT.r[1], MT.r[1], SwizzleZWZW.v);
  417. V0[2] = XMVectorPermute(MT.r[2], MT.r[0], Permute0X0Z1X1Z.v);
  418. V1[2] = XMVectorPermute(MT.r[3], MT.r[1], Permute0Y0W1Y1W.v);
  419. D0 = XMVectorMultiply(V0[0], V1[0]);
  420. D1 = XMVectorMultiply(V0[1], V1[1]);
  421. D2 = XMVectorMultiply(V0[2], V1[2]);
  422. V0[0] = XMVectorPermute(MT.r[2], MT.r[2], SwizzleZWZW.v);
  423. V1[0] = XMVectorPermute(MT.r[3], MT.r[3], SwizzleXXYY.v);
  424. V0[1] = XMVectorPermute(MT.r[0], MT.r[0], SwizzleZWZW.v);
  425. V1[1] = XMVectorPermute(MT.r[1], MT.r[1], SwizzleXXYY.v);
  426. V0[2] = XMVectorPermute(MT.r[2], MT.r[0], Permute0Y0W1Y1W.v);
  427. V1[2] = XMVectorPermute(MT.r[3], MT.r[1], Permute0X0Z1X1Z.v);
  428. D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0);
  429. D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1);
  430. D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2);
  431. V0[0] = XMVectorPermute(MT.r[1], MT.r[1], SwizzleYZXY.v);
  432. V1[0] = XMVectorPermute(D0, D2, Permute1Y0Y0W0X.v);
  433. V0[1] = XMVectorPermute(MT.r[0], MT.r[0], SwizzleZXYX.v);
  434. V1[1] = XMVectorPermute(D0, D2, Permute0W1Y0Y0Z.v);
  435. V0[2] = XMVectorPermute(MT.r[3], MT.r[3], SwizzleYZXY.v);
  436. V1[2] = XMVectorPermute(D1, D2, Permute1W0Y0W0X.v);
  437. V0[3] = XMVectorPermute(MT.r[2], MT.r[2], SwizzleZXYX.v);
  438. V1[3] = XMVectorPermute(D1, D2, Permute0W1W0Y0Z.v);
  439. C0 = XMVectorMultiply(V0[0], V1[0]);
  440. C2 = XMVectorMultiply(V0[1], V1[1]);
  441. C4 = XMVectorMultiply(V0[2], V1[2]);
  442. C6 = XMVectorMultiply(V0[3], V1[3]);
  443. V0[0] = XMVectorPermute(MT.r[1], MT.r[1], SwizzleZWYZ.v);
  444. V1[0] = XMVectorPermute(D0, D2, Permute0W0X0Y1X.v);
  445. V0[1] = XMVectorPermute(MT.r[0], MT.r[0], SwizzleWZWY.v);
  446. V1[1] = XMVectorPermute(D0, D2, Permute0Z0Y1X0X.v);
  447. V0[2] = XMVectorPermute(MT.r[3], MT.r[3], SwizzleZWYZ.v);
  448. V1[2] = XMVectorPermute(D1, D2, Permute0W0X0Y1Z.v);
  449. V0[3] = XMVectorPermute(MT.r[2], MT.r[2], SwizzleWZWY.v);
  450. V1[3] = XMVectorPermute(D1, D2, Permute0Z0Y1Z0X.v);
  451. C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
  452. C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
  453. C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
  454. C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
  455. V0[0] = XMVectorPermute(MT.r[1], MT.r[1], SwizzleWXWX.v);
  456. V1[0] = XMVectorPermute(D0, D2, Permute0Z1Y1X0Z.v);
  457. V0[1] = XMVectorPermute(MT.r[0], MT.r[0], SwizzleYWXZ.v);
  458. V1[1] = XMVectorPermute(D0, D2, Permute1Y0X0W1X.v);
  459. V0[2] = XMVectorPermute(MT.r[3], MT.r[3], SwizzleWXWX.v);
  460. V1[2] = XMVectorPermute(D1, D2, Permute0Z1W1Z0Z.v);
  461. V0[3] = XMVectorPermute(MT.r[2], MT.r[2], SwizzleYWXZ.v);
  462. V1[3] = XMVectorPermute(D1, D2, Permute1W0X0W1Z.v);
  463. C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
  464. C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
  465. C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
  466. C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
  467. C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
  468. C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
  469. C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
  470. C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
  471. R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
  472. R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
  473. R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
  474. R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
  475. Determinant = XMVector4Dot(R.r[0], MT.r[0]);
  476. *pDeterminant = Determinant;
  477. Reciprocal = XMVectorReciprocal(Determinant);
  478. Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
  479. Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
  480. Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
  481. Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
  482. return Result;
  483. #elif defined(_XM_SSE_INTRINSICS_)
  484. XMASSERT(pDeterminant);
  485. XMMATRIX MT = XMMatrixTranspose(M);
  486. XMVECTOR V00 = _mm_shuffle_ps(MT.r[2], MT.r[2],_MM_SHUFFLE(1,1,0,0));
  487. XMVECTOR V10 = _mm_shuffle_ps(MT.r[3], MT.r[3],_MM_SHUFFLE(3,2,3,2));
  488. XMVECTOR V01 = _mm_shuffle_ps(MT.r[0], MT.r[0],_MM_SHUFFLE(1,1,0,0));
  489. XMVECTOR V11 = _mm_shuffle_ps(MT.r[1], MT.r[1],_MM_SHUFFLE(3,2,3,2));
  490. XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0],_MM_SHUFFLE(2,0,2,0));
  491. XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1],_MM_SHUFFLE(3,1,3,1));
  492. XMVECTOR D0 = _mm_mul_ps(V00,V10);
  493. XMVECTOR D1 = _mm_mul_ps(V01,V11);
  494. XMVECTOR D2 = _mm_mul_ps(V02,V12);
  495. V00 = _mm_shuffle_ps(MT.r[2],MT.r[2],_MM_SHUFFLE(3,2,3,2));
  496. V10 = _mm_shuffle_ps(MT.r[3],MT.r[3],_MM_SHUFFLE(1,1,0,0));
  497. V01 = _mm_shuffle_ps(MT.r[0],MT.r[0],_MM_SHUFFLE(3,2,3,2));
  498. V11 = _mm_shuffle_ps(MT.r[1],MT.r[1],_MM_SHUFFLE(1,1,0,0));
  499. V02 = _mm_shuffle_ps(MT.r[2],MT.r[0],_MM_SHUFFLE(3,1,3,1));
  500. V12 = _mm_shuffle_ps(MT.r[3],MT.r[1],_MM_SHUFFLE(2,0,2,0));
  501. V00 = _mm_mul_ps(V00,V10);
  502. V01 = _mm_mul_ps(V01,V11);
  503. V02 = _mm_mul_ps(V02,V12);
  504. D0 = _mm_sub_ps(D0,V00);
  505. D1 = _mm_sub_ps(D1,V01);
  506. D2 = _mm_sub_ps(D2,V02);
  507. // V11 = D0Y,D0W,D2Y,D2Y
  508. V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,1,3,1));
  509. V00 = _mm_shuffle_ps(MT.r[1], MT.r[1],_MM_SHUFFLE(1,0,2,1));
  510. V10 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(0,3,0,2));
  511. V01 = _mm_shuffle_ps(MT.r[0], MT.r[0],_MM_SHUFFLE(0,1,0,2));
  512. V11 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(2,1,2,1));
  513. // V13 = D1Y,D1W,D2W,D2W
  514. XMVECTOR V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,3,3,1));
  515. V02 = _mm_shuffle_ps(MT.r[3], MT.r[3],_MM_SHUFFLE(1,0,2,1));
  516. V12 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(0,3,0,2));
  517. XMVECTOR V03 = _mm_shuffle_ps(MT.r[2], MT.r[2],_MM_SHUFFLE(0,1,0,2));
  518. V13 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(2,1,2,1));
  519. XMVECTOR C0 = _mm_mul_ps(V00,V10);
  520. XMVECTOR C2 = _mm_mul_ps(V01,V11);
  521. XMVECTOR C4 = _mm_mul_ps(V02,V12);
  522. XMVECTOR C6 = _mm_mul_ps(V03,V13);
  523. // V11 = D0X,D0Y,D2X,D2X
  524. V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(0,0,1,0));
  525. V00 = _mm_shuffle_ps(MT.r[1], MT.r[1],_MM_SHUFFLE(2,1,3,2));
  526. V10 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(2,1,0,3));
  527. V01 = _mm_shuffle_ps(MT.r[0], MT.r[0],_MM_SHUFFLE(1,3,2,3));
  528. V11 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(0,2,1,2));
  529. // V13 = D1X,D1Y,D2Z,D2Z
  530. V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(2,2,1,0));
  531. V02 = _mm_shuffle_ps(MT.r[3], MT.r[3],_MM_SHUFFLE(2,1,3,2));
  532. V12 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(2,1,0,3));
  533. V03 = _mm_shuffle_ps(MT.r[2], MT.r[2],_MM_SHUFFLE(1,3,2,3));
  534. V13 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(0,2,1,2));
  535. V00 = _mm_mul_ps(V00,V10);
  536. V01 = _mm_mul_ps(V01,V11);
  537. V02 = _mm_mul_ps(V02,V12);
  538. V03 = _mm_mul_ps(V03,V13);
  539. C0 = _mm_sub_ps(C0,V00);
  540. C2 = _mm_sub_ps(C2,V01);
  541. C4 = _mm_sub_ps(C4,V02);
  542. C6 = _mm_sub_ps(C6,V03);
  543. V00 = _mm_shuffle_ps(MT.r[1],MT.r[1],_MM_SHUFFLE(0,3,0,3));
  544. // V10 = D0Z,D0Z,D2X,D2Y
  545. V10 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,2,2));
  546. V10 = _mm_shuffle_ps(V10,V10,_MM_SHUFFLE(0,2,3,0));
  547. V01 = _mm_shuffle_ps(MT.r[0],MT.r[0],_MM_SHUFFLE(2,0,3,1));
  548. // V11 = D0X,D0W,D2X,D2Y
  549. V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,3,0));
  550. V11 = _mm_shuffle_ps(V11,V11,_MM_SHUFFLE(2,1,0,3));
  551. V02 = _mm_shuffle_ps(MT.r[3],MT.r[3],_MM_SHUFFLE(0,3,0,3));
  552. // V12 = D1Z,D1Z,D2Z,D2W
  553. V12 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,2,2));
  554. V12 = _mm_shuffle_ps(V12,V12,_MM_SHUFFLE(0,2,3,0));
  555. V03 = _mm_shuffle_ps(MT.r[2],MT.r[2],_MM_SHUFFLE(2,0,3,1));
  556. // V13 = D1X,D1W,D2Z,D2W
  557. V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,3,0));
  558. V13 = _mm_shuffle_ps(V13,V13,_MM_SHUFFLE(2,1,0,3));
  559. V00 = _mm_mul_ps(V00,V10);
  560. V01 = _mm_mul_ps(V01,V11);
  561. V02 = _mm_mul_ps(V02,V12);
  562. V03 = _mm_mul_ps(V03,V13);
  563. XMVECTOR C1 = _mm_sub_ps(C0,V00);
  564. C0 = _mm_add_ps(C0,V00);
  565. XMVECTOR C3 = _mm_add_ps(C2,V01);
  566. C2 = _mm_sub_ps(C2,V01);
  567. XMVECTOR C5 = _mm_sub_ps(C4,V02);
  568. C4 = _mm_add_ps(C4,V02);
  569. XMVECTOR C7 = _mm_add_ps(C6,V03);
  570. C6 = _mm_sub_ps(C6,V03);
  571. C0 = _mm_shuffle_ps(C0,C1,_MM_SHUFFLE(3,1,2,0));
  572. C2 = _mm_shuffle_ps(C2,C3,_MM_SHUFFLE(3,1,2,0));
  573. C4 = _mm_shuffle_ps(C4,C5,_MM_SHUFFLE(3,1,2,0));
  574. C6 = _mm_shuffle_ps(C6,C7,_MM_SHUFFLE(3,1,2,0));
  575. C0 = _mm_shuffle_ps(C0,C0,_MM_SHUFFLE(3,1,2,0));
  576. C2 = _mm_shuffle_ps(C2,C2,_MM_SHUFFLE(3,1,2,0));
  577. C4 = _mm_shuffle_ps(C4,C4,_MM_SHUFFLE(3,1,2,0));
  578. C6 = _mm_shuffle_ps(C6,C6,_MM_SHUFFLE(3,1,2,0));
  579. // Get the determinate
  580. XMVECTOR vTemp = XMVector4Dot(C0,MT.r[0]);
  581. *pDeterminant = vTemp;
  582. vTemp = _mm_div_ps(g_XMOne,vTemp);
  583. XMMATRIX mResult;
  584. mResult.r[0] = _mm_mul_ps(C0,vTemp);
  585. mResult.r[1] = _mm_mul_ps(C2,vTemp);
  586. mResult.r[2] = _mm_mul_ps(C4,vTemp);
  587. mResult.r[3] = _mm_mul_ps(C6,vTemp);
  588. return mResult;
  589. #else // _XM_VMX128_INTRINSICS_
  590. #endif // _XM_VMX128_INTRINSICS_
  591. }
  592. //------------------------------------------------------------------------------
  593. XMINLINE XMVECTOR XMMatrixDeterminant
  594. (
  595. CXMMATRIX M
  596. )
  597. {
  598. #if defined(_XM_NO_INTRINSICS_)
  599. XMVECTOR V0, V1, V2, V3, V4, V5;
  600. XMVECTOR P0, P1, P2, R, S;
  601. XMVECTOR Result;
  602. static CONST XMVECTORU32 SwizzleYXXX = {XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  603. static CONST XMVECTORU32 SwizzleZZYY = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  604. static CONST XMVECTORU32 SwizzleWWWZ = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0Z};
  605. static CONST XMVECTOR Sign = {1.0f, -1.0f, 1.0f, -1.0f};
  606. V0 = XMVectorPermute(M.r[2], M.r[2], SwizzleYXXX.v);
  607. V1 = XMVectorPermute(M.r[3], M.r[3], SwizzleZZYY.v);
  608. V2 = XMVectorPermute(M.r[2], M.r[2], SwizzleYXXX.v);
  609. V3 = XMVectorPermute(M.r[3], M.r[3], SwizzleWWWZ.v);
  610. V4 = XMVectorPermute(M.r[2], M.r[2], SwizzleZZYY.v);
  611. V5 = XMVectorPermute(M.r[3], M.r[3], SwizzleWWWZ.v);
  612. P0 = XMVectorMultiply(V0, V1);
  613. P1 = XMVectorMultiply(V2, V3);
  614. P2 = XMVectorMultiply(V4, V5);
  615. V0 = XMVectorPermute(M.r[2], M.r[2], SwizzleZZYY.v);
  616. V1 = XMVectorPermute(M.r[3], M.r[3], SwizzleYXXX.v);
  617. V2 = XMVectorPermute(M.r[2], M.r[2], SwizzleWWWZ.v);
  618. V3 = XMVectorPermute(M.r[3], M.r[3], SwizzleYXXX.v);
  619. V4 = XMVectorPermute(M.r[2], M.r[2], SwizzleWWWZ.v);
  620. V5 = XMVectorPermute(M.r[3], M.r[3], SwizzleZZYY.v);
  621. P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0);
  622. P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1);
  623. P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2);
  624. V0 = XMVectorPermute(M.r[1], M.r[1], SwizzleWWWZ.v);
  625. V1 = XMVectorPermute(M.r[1], M.r[1], SwizzleZZYY.v);
  626. V2 = XMVectorPermute(M.r[1], M.r[1], SwizzleYXXX.v);
  627. S = XMVectorMultiply(M.r[0], Sign);
  628. R = XMVectorMultiply(V0, P0);
  629. R = XMVectorNegativeMultiplySubtract(V1, P1, R);
  630. R = XMVectorMultiplyAdd(V2, P2, R);
  631. Result = XMVector4Dot(S, R);
  632. return Result;
  633. #elif defined(_XM_SSE_INTRINSICS_)
  634. XMVECTOR V0, V1, V2, V3, V4, V5;
  635. XMVECTOR P0, P1, P2, R, S;
  636. XMVECTOR Result;
  637. static CONST XMVECTORU32 SwizzleYXXX = {XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  638. static CONST XMVECTORU32 SwizzleZZYY = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  639. static CONST XMVECTORU32 SwizzleWWWZ = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0Z};
  640. static CONST XMVECTORF32 Sign = {1.0f, -1.0f, 1.0f, -1.0f};
  641. V0 = XMVectorPermute(M.r[2], M.r[2], SwizzleYXXX);
  642. V1 = XMVectorPermute(M.r[3], M.r[3], SwizzleZZYY);
  643. V2 = XMVectorPermute(M.r[2], M.r[2], SwizzleYXXX);
  644. V3 = XMVectorPermute(M.r[3], M.r[3], SwizzleWWWZ);
  645. V4 = XMVectorPermute(M.r[2], M.r[2], SwizzleZZYY);
  646. V5 = XMVectorPermute(M.r[3], M.r[3], SwizzleWWWZ);
  647. P0 = _mm_mul_ps(V0, V1);
  648. P1 = _mm_mul_ps(V2, V3);
  649. P2 = _mm_mul_ps(V4, V5);
  650. V0 = XMVectorPermute(M.r[2], M.r[2], SwizzleZZYY);
  651. V1 = XMVectorPermute(M.r[3], M.r[3], SwizzleYXXX);
  652. V2 = XMVectorPermute(M.r[2], M.r[2], SwizzleWWWZ);
  653. V3 = XMVectorPermute(M.r[3], M.r[3], SwizzleYXXX);
  654. V4 = XMVectorPermute(M.r[2], M.r[2], SwizzleWWWZ);
  655. V5 = XMVectorPermute(M.r[3], M.r[3], SwizzleZZYY);
  656. P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0);
  657. P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1);
  658. P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2);
  659. V0 = XMVectorPermute(M.r[1], M.r[1], SwizzleWWWZ);
  660. V1 = XMVectorPermute(M.r[1], M.r[1], SwizzleZZYY);
  661. V2 = XMVectorPermute(M.r[1], M.r[1], SwizzleYXXX);
  662. S = _mm_mul_ps(M.r[0], Sign);
  663. R = _mm_mul_ps(V0, P0);
  664. R = XMVectorNegativeMultiplySubtract(V1, P1, R);
  665. R = XMVectorMultiplyAdd(V2, P2, R);
  666. Result = XMVector4Dot(S, R);
  667. return Result;
  668. #else // _XM_VMX128_INTRINSICS_
  669. #endif // _XM_VMX128_INTRINSICS_
  670. }
  671. #define XMRANKDECOMPOSE(a, b, c, x, y, z) \
  672. if((x) < (y)) \
  673. { \
  674. if((y) < (z)) \
  675. { \
  676. (a) = 2; \
  677. (b) = 1; \
  678. (c) = 0; \
  679. } \
  680. else \
  681. { \
  682. (a) = 1; \
  683. \
  684. if((x) < (z)) \
  685. { \
  686. (b) = 2; \
  687. (c) = 0; \
  688. } \
  689. else \
  690. { \
  691. (b) = 0; \
  692. (c) = 2; \
  693. } \
  694. } \
  695. } \
  696. else \
  697. { \
  698. if((x) < (z)) \
  699. { \
  700. (a) = 2; \
  701. (b) = 0; \
  702. (c) = 1; \
  703. } \
  704. else \
  705. { \
  706. (a) = 0; \
  707. \
  708. if((y) < (z)) \
  709. { \
  710. (b) = 2; \
  711. (c) = 1; \
  712. } \
  713. else \
  714. { \
  715. (b) = 1; \
  716. (c) = 2; \
  717. } \
  718. } \
  719. }
  720. #define XM_DECOMP_EPSILON 0.0001f
  721. XMINLINE BOOL XMMatrixDecompose( XMVECTOR *outScale, XMVECTOR *outRotQuat, XMVECTOR *outTrans, CXMMATRIX M )
  722. {
  723. FLOAT fDet;
  724. FLOAT *pfScales;
  725. XMVECTOR *ppvBasis[3];
  726. XMMATRIX matTemp;
  727. UINT a, b, c;
  728. static const XMVECTOR *pvCanonicalBasis[3] = {
  729. &g_XMIdentityR0.v,
  730. &g_XMIdentityR1.v,
  731. &g_XMIdentityR2.v
  732. };
  733. // Get the translation
  734. outTrans[0] = M.r[3];
  735. ppvBasis[0] = &matTemp.r[0];
  736. ppvBasis[1] = &matTemp.r[1];
  737. ppvBasis[2] = &matTemp.r[2];
  738. matTemp.r[0] = M.r[0];
  739. matTemp.r[1] = M.r[1];
  740. matTemp.r[2] = M.r[2];
  741. matTemp.r[3] = g_XMIdentityR3.v;
  742. pfScales = (FLOAT *)outScale;
  743. XMVectorGetXPtr(&pfScales[0],XMVector3Length(ppvBasis[0][0]));
  744. XMVectorGetXPtr(&pfScales[1],XMVector3Length(ppvBasis[1][0]));
  745. XMVectorGetXPtr(&pfScales[2],XMVector3Length(ppvBasis[2][0]));
  746. XMRANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2])
  747. if(pfScales[a] < XM_DECOMP_EPSILON)
  748. {
  749. ppvBasis[a][0] = pvCanonicalBasis[a][0];
  750. }
  751. ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]);
  752. if(pfScales[b] < XM_DECOMP_EPSILON)
  753. {
  754. UINT aa, bb, cc;
  755. FLOAT fAbsX, fAbsY, fAbsZ;
  756. fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0]));
  757. fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0]));
  758. fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0]));
  759. XMRANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ)
  760. ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0],pvCanonicalBasis[cc][0]);
  761. }
  762. ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]);
  763. if(pfScales[c] < XM_DECOMP_EPSILON)
  764. {
  765. ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0],ppvBasis[b][0]);
  766. }
  767. ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]);
  768. fDet = XMVectorGetX(XMMatrixDeterminant(matTemp));
  769. // use Kramer's rule to check for handedness of coordinate system
  770. if(fDet < 0.0f)
  771. {
  772. // switch coordinate system by negating the scale and inverting the basis vector on the x-axis
  773. pfScales[a] = -pfScales[a];
  774. ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]);
  775. fDet = -fDet;
  776. }
  777. fDet -= 1.0f;
  778. fDet *= fDet;
  779. if(XM_DECOMP_EPSILON < fDet)
  780. {
  781. // Non-SRT matrix encountered
  782. return FALSE;
  783. }
  784. // generate the quaternion from the matrix
  785. outRotQuat[0] = XMQuaternionRotationMatrix(matTemp);
  786. return TRUE;
  787. }
  788. //------------------------------------------------------------------------------
  789. // Transformation operations
  790. //------------------------------------------------------------------------------
  791. //------------------------------------------------------------------------------
  792. XMFINLINE XMMATRIX XMMatrixIdentity()
  793. {
  794. #if defined(_XM_NO_INTRINSICS_)
  795. XMMATRIX M;
  796. M.r[0] = g_XMIdentityR0.v;
  797. M.r[1] = g_XMIdentityR1.v;
  798. M.r[2] = g_XMIdentityR2.v;
  799. M.r[3] = g_XMIdentityR3.v;
  800. return M;
  801. #elif defined(_XM_SSE_INTRINSICS_)
  802. XMMATRIX M;
  803. M.r[0] = g_XMIdentityR0;
  804. M.r[1] = g_XMIdentityR1;
  805. M.r[2] = g_XMIdentityR2;
  806. M.r[3] = g_XMIdentityR3;
  807. return M;
  808. #else // _XM_VMX128_INTRINSICS_
  809. #endif // _XM_VMX128_INTRINSICS_
  810. }
  811. //------------------------------------------------------------------------------
  812. XMFINLINE XMMATRIX XMMatrixSet
  813. (
  814. FLOAT m00, FLOAT m01, FLOAT m02, FLOAT m03,
  815. FLOAT m10, FLOAT m11, FLOAT m12, FLOAT m13,
  816. FLOAT m20, FLOAT m21, FLOAT m22, FLOAT m23,
  817. FLOAT m30, FLOAT m31, FLOAT m32, FLOAT m33
  818. )
  819. {
  820. XMMATRIX M;
  821. M.r[0] = XMVectorSet(m00, m01, m02, m03);
  822. M.r[1] = XMVectorSet(m10, m11, m12, m13);
  823. M.r[2] = XMVectorSet(m20, m21, m22, m23);
  824. M.r[3] = XMVectorSet(m30, m31, m32, m33);
  825. return M;
  826. }
  827. //------------------------------------------------------------------------------
  828. XMFINLINE XMMATRIX XMMatrixTranslation
  829. (
  830. FLOAT OffsetX,
  831. FLOAT OffsetY,
  832. FLOAT OffsetZ
  833. )
  834. {
  835. #if defined(_XM_NO_INTRINSICS_)
  836. XMMATRIX M;
  837. M.m[0][0] = 1.0f;
  838. M.m[0][1] = 0.0f;
  839. M.m[0][2] = 0.0f;
  840. M.m[0][3] = 0.0f;
  841. M.m[1][0] = 0.0f;
  842. M.m[1][1] = 1.0f;
  843. M.m[1][2] = 0.0f;
  844. M.m[1][3] = 0.0f;
  845. M.m[2][0] = 0.0f;
  846. M.m[2][1] = 0.0f;
  847. M.m[2][2] = 1.0f;
  848. M.m[2][3] = 0.0f;
  849. M.m[3][0] = OffsetX;
  850. M.m[3][1] = OffsetY;
  851. M.m[3][2] = OffsetZ;
  852. M.m[3][3] = 1.0f;
  853. return M;
  854. #elif defined(_XM_SSE_INTRINSICS_)
  855. XMMATRIX M;
  856. M.r[0] = g_XMIdentityR0;
  857. M.r[1] = g_XMIdentityR1;
  858. M.r[2] = g_XMIdentityR2;
  859. M.r[3] = _mm_set_ps(1.0f,OffsetZ,OffsetY,OffsetX);
  860. return M;
  861. #else // _XM_VMX128_INTRINSICS_
  862. #endif // _XM_VMX128_INTRINSICS_
  863. }
  864. //------------------------------------------------------------------------------
  865. XMFINLINE XMMATRIX XMMatrixTranslationFromVector
  866. (
  867. FXMVECTOR Offset
  868. )
  869. {
  870. #if defined(_XM_NO_INTRINSICS_)
  871. XMMATRIX M;
  872. M.m[0][0] = 1.0f;
  873. M.m[0][1] = 0.0f;
  874. M.m[0][2] = 0.0f;
  875. M.m[0][3] = 0.0f;
  876. M.m[1][0] = 0.0f;
  877. M.m[1][1] = 1.0f;
  878. M.m[1][2] = 0.0f;
  879. M.m[1][3] = 0.0f;
  880. M.m[2][0] = 0.0f;
  881. M.m[2][1] = 0.0f;
  882. M.m[2][2] = 1.0f;
  883. M.m[2][3] = 0.0f;
  884. M.m[3][0] = Offset.vector4_f32[0];
  885. M.m[3][1] = Offset.vector4_f32[1];
  886. M.m[3][2] = Offset.vector4_f32[2];
  887. M.m[3][3] = 1.0f;
  888. return M;
  889. #elif defined(_XM_SSE_INTRINSICS_)
  890. XMVECTOR vTemp = _mm_and_ps(Offset,g_XMMask3);
  891. vTemp = _mm_or_ps(vTemp,g_XMIdentityR3);
  892. XMMATRIX M;
  893. M.r[0] = g_XMIdentityR0;
  894. M.r[1] = g_XMIdentityR1;
  895. M.r[2] = g_XMIdentityR2;
  896. M.r[3] = vTemp;
  897. return M;
  898. #else // _XM_VMX128_INTRINSICS_
  899. #endif // _XM_VMX128_INTRINSICS_
  900. }
  901. //------------------------------------------------------------------------------
  902. XMFINLINE XMMATRIX XMMatrixScaling
  903. (
  904. FLOAT ScaleX,
  905. FLOAT ScaleY,
  906. FLOAT ScaleZ
  907. )
  908. {
  909. #if defined(_XM_NO_INTRINSICS_)
  910. XMMATRIX M;
  911. M.r[0] = XMVectorSet(ScaleX, 0.0f, 0.0f, 0.0f);
  912. M.r[1] = XMVectorSet(0.0f, ScaleY, 0.0f, 0.0f);
  913. M.r[2] = XMVectorSet(0.0f, 0.0f, ScaleZ, 0.0f);
  914. M.r[3] = g_XMIdentityR3.v;
  915. return M;
  916. #elif defined(_XM_SSE_INTRINSICS_)
  917. XMMATRIX M;
  918. M.r[0] = _mm_set_ps( 0, 0, 0, ScaleX );
  919. M.r[1] = _mm_set_ps( 0, 0, ScaleY, 0 );
  920. M.r[2] = _mm_set_ps( 0, ScaleZ, 0, 0 );
  921. M.r[3] = g_XMIdentityR3;
  922. return M;
  923. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  924. #endif // _XM_VMX128_INTRINSICS_
  925. }
  926. //------------------------------------------------------------------------------
  927. XMFINLINE XMMATRIX XMMatrixScalingFromVector
  928. (
  929. FXMVECTOR Scale
  930. )
  931. {
  932. #if defined(_XM_NO_INTRINSICS_)
  933. XMMATRIX M;
  934. M.m[0][0] = Scale.vector4_f32[0];
  935. M.m[0][1] = 0.0f;
  936. M.m[0][2] = 0.0f;
  937. M.m[0][3] = 0.0f;
  938. M.m[1][0] = 0.0f;
  939. M.m[1][1] = Scale.vector4_f32[1];
  940. M.m[1][2] = 0.0f;
  941. M.m[1][3] = 0.0f;
  942. M.m[2][0] = 0.0f;
  943. M.m[2][1] = 0.0f;
  944. M.m[2][2] = Scale.vector4_f32[2];
  945. M.m[2][3] = 0.0f;
  946. M.m[3][0] = 0.0f;
  947. M.m[3][1] = 0.0f;
  948. M.m[3][2] = 0.0f;
  949. M.m[3][3] = 1.0f;
  950. return M;
  951. #elif defined(_XM_SSE_INTRINSICS_)
  952. XMMATRIX M;
  953. M.r[0] = _mm_and_ps(Scale,g_XMMaskX);
  954. M.r[1] = _mm_and_ps(Scale,g_XMMaskY);
  955. M.r[2] = _mm_and_ps(Scale,g_XMMaskZ);
  956. M.r[3] = g_XMIdentityR3;
  957. return M;
  958. #else // _XM_VMX128_INTRINSICS_
  959. #endif // _XM_VMX128_INTRINSICS_
  960. }
  961. //------------------------------------------------------------------------------
  962. XMINLINE XMMATRIX XMMatrixRotationX
  963. (
  964. FLOAT Angle
  965. )
  966. {
  967. #if defined(_XM_NO_INTRINSICS_)
  968. XMMATRIX M;
  969. FLOAT fSinAngle = sinf(Angle);
  970. FLOAT fCosAngle = cosf(Angle);
  971. M.m[0][0] = 1.0f;
  972. M.m[0][1] = 0.0f;
  973. M.m[0][2] = 0.0f;
  974. M.m[0][3] = 0.0f;
  975. M.m[1][0] = 0.0f;
  976. M.m[1][1] = fCosAngle;
  977. M.m[1][2] = fSinAngle;
  978. M.m[1][3] = 0.0f;
  979. M.m[2][0] = 0.0f;
  980. M.m[2][1] = -fSinAngle;
  981. M.m[2][2] = fCosAngle;
  982. M.m[2][3] = 0.0f;
  983. M.m[3][0] = 0.0f;
  984. M.m[3][1] = 0.0f;
  985. M.m[3][2] = 0.0f;
  986. M.m[3][3] = 1.0f;
  987. return M;
  988. #elif defined(_XM_SSE_INTRINSICS_)
  989. FLOAT SinAngle = sinf(Angle);
  990. FLOAT CosAngle = cosf(Angle);
  991. XMVECTOR vSin = _mm_set_ss(SinAngle);
  992. XMVECTOR vCos = _mm_set_ss(CosAngle);
  993. // x = 0,y = cos,z = sin, w = 0
  994. vCos = _mm_shuffle_ps(vCos,vSin,_MM_SHUFFLE(3,0,0,3));
  995. XMMATRIX M;
  996. M.r[0] = g_XMIdentityR0;
  997. M.r[1] = vCos;
  998. // x = 0,y = sin,z = cos, w = 0
  999. vCos = _mm_shuffle_ps(vCos,vCos,_MM_SHUFFLE(3,1,2,0));
  1000. // x = 0,y = -sin,z = cos, w = 0
  1001. vCos = _mm_mul_ps(vCos,g_XMNegateY);
  1002. M.r[2] = vCos;
  1003. M.r[3] = g_XMIdentityR3;
  1004. return M;
  1005. #else // _XM_VMX128_INTRINSICS_
  1006. #endif // _XM_VMX128_INTRINSICS_
  1007. }
  1008. //------------------------------------------------------------------------------
  1009. XMINLINE XMMATRIX XMMatrixRotationY
  1010. (
  1011. FLOAT Angle
  1012. )
  1013. {
  1014. #if defined(_XM_NO_INTRINSICS_)
  1015. XMMATRIX M;
  1016. FLOAT fSinAngle = sinf(Angle);
  1017. FLOAT fCosAngle = cosf(Angle);
  1018. M.m[0][0] = fCosAngle;
  1019. M.m[0][1] = 0.0f;
  1020. M.m[0][2] = -fSinAngle;
  1021. M.m[0][3] = 0.0f;
  1022. M.m[1][0] = 0.0f;
  1023. M.m[1][1] = 1.0f;
  1024. M.m[1][2] = 0.0f;
  1025. M.m[1][3] = 0.0f;
  1026. M.m[2][0] = fSinAngle;
  1027. M.m[2][1] = 0.0f;
  1028. M.m[2][2] = fCosAngle;
  1029. M.m[2][3] = 0.0f;
  1030. M.m[3][0] = 0.0f;
  1031. M.m[3][1] = 0.0f;
  1032. M.m[3][2] = 0.0f;
  1033. M.m[3][3] = 1.0f;
  1034. return M;
  1035. #elif defined(_XM_SSE_INTRINSICS_)
  1036. FLOAT SinAngle = sinf(Angle);
  1037. FLOAT CosAngle = cosf(Angle);
  1038. XMVECTOR vSin = _mm_set_ss(SinAngle);
  1039. XMVECTOR vCos = _mm_set_ss(CosAngle);
  1040. // x = sin,y = 0,z = cos, w = 0
  1041. vSin = _mm_shuffle_ps(vSin,vCos,_MM_SHUFFLE(3,0,3,0));
  1042. XMMATRIX M;
  1043. M.r[2] = vSin;
  1044. M.r[1] = g_XMIdentityR1;
  1045. // x = cos,y = 0,z = sin, w = 0
  1046. vSin = _mm_shuffle_ps(vSin,vSin,_MM_SHUFFLE(3,0,1,2));
  1047. // x = cos,y = 0,z = -sin, w = 0
  1048. vSin = _mm_mul_ps(vSin,g_XMNegateZ);
  1049. M.r[0] = vSin;
  1050. M.r[3] = g_XMIdentityR3;
  1051. return M;
  1052. #else // _XM_VMX128_INTRINSICS_
  1053. #endif // _XM_VMX128_INTRINSICS_
  1054. }
  1055. //------------------------------------------------------------------------------
  1056. XMINLINE XMMATRIX XMMatrixRotationZ
  1057. (
  1058. FLOAT Angle
  1059. )
  1060. {
  1061. #if defined(_XM_NO_INTRINSICS_)
  1062. XMMATRIX M;
  1063. FLOAT fSinAngle = sinf(Angle);
  1064. FLOAT fCosAngle = cosf(Angle);
  1065. M.m[0][0] = fCosAngle;
  1066. M.m[0][1] = fSinAngle;
  1067. M.m[0][2] = 0.0f;
  1068. M.m[0][3] = 0.0f;
  1069. M.m[1][0] = -fSinAngle;
  1070. M.m[1][1] = fCosAngle;
  1071. M.m[1][2] = 0.0f;
  1072. M.m[1][3] = 0.0f;
  1073. M.m[2][0] = 0.0f;
  1074. M.m[2][1] = 0.0f;
  1075. M.m[2][2] = 1.0f;
  1076. M.m[2][3] = 0.0f;
  1077. M.m[3][0] = 0.0f;
  1078. M.m[3][1] = 0.0f;
  1079. M.m[3][2] = 0.0f;
  1080. M.m[3][3] = 1.0f;
  1081. return M;
  1082. #elif defined(_XM_SSE_INTRINSICS_)
  1083. FLOAT SinAngle = sinf(Angle);
  1084. FLOAT CosAngle = cosf(Angle);
  1085. XMVECTOR vSin = _mm_set_ss(SinAngle);
  1086. XMVECTOR vCos = _mm_set_ss(CosAngle);
  1087. // x = cos,y = sin,z = 0, w = 0
  1088. vCos = _mm_unpacklo_ps(vCos,vSin);
  1089. XMMATRIX M;
  1090. M.r[0] = vCos;
  1091. // x = sin,y = cos,z = 0, w = 0
  1092. vCos = _mm_shuffle_ps(vCos,vCos,_MM_SHUFFLE(3,2,0,1));
  1093. // x = cos,y = -sin,z = 0, w = 0
  1094. vCos = _mm_mul_ps(vCos,g_XMNegateX);
  1095. M.r[1] = vCos;
  1096. M.r[2] = g_XMIdentityR2;
  1097. M.r[3] = g_XMIdentityR3;
  1098. return M;
  1099. #else // _XM_VMX128_INTRINSICS_
  1100. #endif // _XM_VMX128_INTRINSICS_
  1101. }
  1102. //------------------------------------------------------------------------------
  1103. XMINLINE XMMATRIX XMMatrixRotationRollPitchYaw
  1104. (
  1105. FLOAT Pitch,
  1106. FLOAT Yaw,
  1107. FLOAT Roll
  1108. )
  1109. {
  1110. XMVECTOR Angles;
  1111. XMMATRIX M;
  1112. Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
  1113. M = XMMatrixRotationRollPitchYawFromVector(Angles);
  1114. return M;
  1115. }
  1116. //------------------------------------------------------------------------------
  1117. XMINLINE XMMATRIX XMMatrixRotationRollPitchYawFromVector
  1118. (
  1119. FXMVECTOR Angles // <Pitch, Yaw, Roll, undefined>
  1120. )
  1121. {
  1122. XMVECTOR Q;
  1123. XMMATRIX M;
  1124. Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
  1125. M = XMMatrixRotationQuaternion(Q);
  1126. return M;
  1127. }
  1128. //------------------------------------------------------------------------------
  1129. XMINLINE XMMATRIX XMMatrixRotationNormal
  1130. (
  1131. FXMVECTOR NormalAxis,
  1132. FLOAT Angle
  1133. )
  1134. {
  1135. #if defined(_XM_NO_INTRINSICS_)
  1136. XMVECTOR A;
  1137. XMVECTOR N0, N1;
  1138. XMVECTOR V0, V1, V2;
  1139. XMVECTOR R0, R1, R2;
  1140. XMVECTOR C0, C1, C2;
  1141. XMMATRIX M;
  1142. static CONST XMVECTORU32 SwizzleYZXW = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0X, XM_PERMUTE_0W};
  1143. static CONST XMVECTORU32 SwizzleZXYW = {XM_PERMUTE_0Z, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
  1144. static CONST XMVECTORU32 Permute0Z1Y1Z0X = {XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0X};
  1145. static CONST XMVECTORU32 Permute0Y1X0Y1X = {XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1X};
  1146. static CONST XMVECTORU32 Permute0X1X1Y0W = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W};
  1147. static CONST XMVECTORU32 Permute1Z0Y1W0W = {XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W};
  1148. static CONST XMVECTORU32 Permute1X1Y0Z0W = {XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W};
  1149. FLOAT fSinAngle = sinf(Angle);
  1150. FLOAT fCosAngle = cosf(Angle);
  1151. A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f);
  1152. C2 = XMVectorSplatZ(A);
  1153. C1 = XMVectorSplatY(A);
  1154. C0 = XMVectorSplatX(A);
  1155. N0 = XMVectorPermute(NormalAxis, NormalAxis, SwizzleYZXW.v);
  1156. N1 = XMVectorPermute(NormalAxis, NormalAxis, SwizzleZXYW.v);
  1157. V0 = XMVectorMultiply(C2, N0);
  1158. V0 = XMVectorMultiply(V0, N1);
  1159. R0 = XMVectorMultiply(C2, NormalAxis);
  1160. R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1);
  1161. R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0);
  1162. R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0);
  1163. V0 = XMVectorSelect(A, R0, g_XMSelect1110.v);
  1164. V1 = XMVectorPermute(R1, R2, Permute0Z1Y1Z0X.v);
  1165. V2 = XMVectorPermute(R1, R2, Permute0Y1X0Y1X.v);
  1166. M.r[0] = XMVectorPermute(V0, V1, Permute0X1X1Y0W.v);
  1167. M.r[1] = XMVectorPermute(V0, V1, Permute1Z0Y1W0W.v);
  1168. M.r[2] = XMVectorPermute(V0, V2, Permute1X1Y0Z0W.v);
  1169. M.r[3] = g_XMIdentityR3.v;
  1170. return M;
  1171. #elif defined(_XM_SSE_INTRINSICS_)
  1172. XMVECTOR N0, N1;
  1173. XMVECTOR V0, V1, V2;
  1174. XMVECTOR R0, R1, R2;
  1175. XMVECTOR C0, C1, C2;
  1176. XMMATRIX M;
  1177. FLOAT fSinAngle = sinf(Angle);
  1178. FLOAT fCosAngle = cosf(Angle);
  1179. C2 = _mm_set_ps1(1.0f - fCosAngle);
  1180. C1 = _mm_set_ps1(fCosAngle);
  1181. C0 = _mm_set_ps1(fSinAngle);
  1182. N0 = _mm_shuffle_ps(NormalAxis,NormalAxis,_MM_SHUFFLE(3,0,2,1));
  1183. // N0 = XMVectorPermute(NormalAxis, NormalAxis, SwizzleYZXW);
  1184. N1 = _mm_shuffle_ps(NormalAxis,NormalAxis,_MM_SHUFFLE(3,1,0,2));
  1185. // N1 = XMVectorPermute(NormalAxis, NormalAxis, SwizzleZXYW);
  1186. V0 = _mm_mul_ps(C2, N0);
  1187. V0 = _mm_mul_ps(V0, N1);
  1188. R0 = _mm_mul_ps(C2, NormalAxis);
  1189. R0 = _mm_mul_ps(R0, NormalAxis);
  1190. R0 = _mm_add_ps(R0, C1);
  1191. R1 = _mm_mul_ps(C0, NormalAxis);
  1192. R1 = _mm_add_ps(R1, V0);
  1193. R2 = _mm_mul_ps(C0, NormalAxis);
  1194. R2 = _mm_sub_ps(V0,R2);
  1195. V0 = _mm_and_ps(R0,g_XMMask3);
  1196. // V0 = XMVectorSelect(A, R0, g_XMSelect1110);
  1197. V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,1,2,0));
  1198. V1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,3,2,1));
  1199. // V1 = XMVectorPermute(R1, R2, Permute0Z1Y1Z0X);
  1200. V2 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(0,0,1,1));
  1201. V2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,2,0));
  1202. // V2 = XMVectorPermute(R1, R2, Permute0Y1X0Y1X);
  1203. R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(1,0,3,0));
  1204. R2 = _mm_shuffle_ps(R2,R2,_MM_SHUFFLE(1,3,2,0));
  1205. M.r[0] = R2;
  1206. // M.r[0] = XMVectorPermute(V0, V1, Permute0X1X1Y0W);
  1207. R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(3,2,3,1));
  1208. R2 = _mm_shuffle_ps(R2,R2,_MM_SHUFFLE(1,3,0,2));
  1209. M.r[1] = R2;
  1210. // M.r[1] = XMVectorPermute(V0, V1, Permute1Z0Y1W0W);
  1211. V2 = _mm_shuffle_ps(V2,V0,_MM_SHUFFLE(3,2,1,0));
  1212. // R2 = _mm_shuffle_ps(R2,R2,_MM_SHUFFLE(3,2,1,0));
  1213. M.r[2] = V2;
  1214. // M.r[2] = XMVectorPermute(V0, V2, Permute1X1Y0Z0W);
  1215. M.r[3] = g_XMIdentityR3;
  1216. return M;
  1217. #else // _XM_VMX128_INTRINSICS_
  1218. #endif // _XM_VMX128_INTRINSICS_
  1219. }
  1220. //------------------------------------------------------------------------------
  1221. XMINLINE XMMATRIX XMMatrixRotationAxis
  1222. (
  1223. FXMVECTOR Axis,
  1224. FLOAT Angle
  1225. )
  1226. {
  1227. #if defined(_XM_NO_INTRINSICS_)
  1228. XMVECTOR Normal;
  1229. XMMATRIX M;
  1230. XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
  1231. XMASSERT(!XMVector3IsInfinite(Axis));
  1232. Normal = XMVector3Normalize(Axis);
  1233. M = XMMatrixRotationNormal(Normal, Angle);
  1234. return M;
  1235. #elif defined(_XM_SSE_INTRINSICS_)
  1236. XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
  1237. XMASSERT(!XMVector3IsInfinite(Axis));
  1238. XMVECTOR Normal = XMVector3Normalize(Axis);
  1239. XMMATRIX M = XMMatrixRotationNormal(Normal, Angle);
  1240. return M;
  1241. #else // _XM_VMX128_INTRINSICS_
  1242. #endif // _XM_VMX128_INTRINSICS_
  1243. }
  1244. //------------------------------------------------------------------------------
  1245. XMFINLINE XMMATRIX XMMatrixRotationQuaternion
  1246. (
  1247. FXMVECTOR Quaternion
  1248. )
  1249. {
  1250. #if defined(_XM_NO_INTRINSICS_)
  1251. XMMATRIX M;
  1252. XMVECTOR Q0, Q1;
  1253. XMVECTOR V0, V1, V2;
  1254. XMVECTOR R0, R1, R2;
  1255. static CONST XMVECTOR Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f};
  1256. static CONST XMVECTORU32 SwizzleXXYW = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
  1257. static CONST XMVECTORU32 SwizzleZYZW = {XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0W};
  1258. static CONST XMVECTORU32 SwizzleYZXW = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0X, XM_PERMUTE_0W};
  1259. static CONST XMVECTORU32 Permute0Y0X0X1W = {XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_1W};
  1260. static CONST XMVECTORU32 Permute0Z0Z0Y1W = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1W};
  1261. static CONST XMVECTORU32 Permute0Y1X1Y0Z = {XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z};
  1262. static CONST XMVECTORU32 Permute0X1Z0X1Z = {XM_PERMUTE_0X, XM_PERMUTE_1Z, XM_PERMUTE_0X, XM_PERMUTE_1Z};
  1263. static CONST XMVECTORU32 Permute0X1X1Y0W = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W};
  1264. static CONST XMVECTORU32 Permute1Z0Y1W0W = {XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W};
  1265. static CONST XMVECTORU32 Permute1X1Y0Z0W = {XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W};
  1266. Q0 = XMVectorAdd(Quaternion, Quaternion);
  1267. Q1 = XMVectorMultiply(Quaternion, Q0);
  1268. V0 = XMVectorPermute(Q1, Constant1110, Permute0Y0X0X1W.v);
  1269. V1 = XMVectorPermute(Q1, Constant1110, Permute0Z0Z0Y1W.v);
  1270. R0 = XMVectorSubtract(Constant1110, V0);
  1271. R0 = XMVectorSubtract(R0, V1);
  1272. V0 = XMVectorPermute(Quaternion, Quaternion, SwizzleXXYW.v);
  1273. V1 = XMVectorPermute(Q0, Q0, SwizzleZYZW.v);
  1274. V0 = XMVectorMultiply(V0, V1);
  1275. V1 = XMVectorSplatW(Quaternion);
  1276. V2 = XMVectorPermute(Q0, Q0, SwizzleYZXW.v);
  1277. V1 = XMVectorMultiply(V1, V2);
  1278. R1 = XMVectorAdd(V0, V1);
  1279. R2 = XMVectorSubtract(V0, V1);
  1280. V0 = XMVectorPermute(R1, R2, Permute0Y1X1Y0Z.v);
  1281. V1 = XMVectorPermute(R1, R2, Permute0X1Z0X1Z.v);
  1282. M.r[0] = XMVectorPermute(R0, V0, Permute0X1X1Y0W.v);
  1283. M.r[1] = XMVectorPermute(R0, V0, Permute1Z0Y1W0W.v);
  1284. M.r[2] = XMVectorPermute(R0, V1, Permute1X1Y0Z0W.v);
  1285. M.r[3] = g_XMIdentityR3.v;
  1286. return M;
  1287. #elif defined(_XM_SSE_INTRINSICS_)
  1288. XMMATRIX M;
  1289. XMVECTOR Q0, Q1;
  1290. XMVECTOR V0, V1, V2;
  1291. XMVECTOR R0, R1, R2;
  1292. static CONST XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f};
  1293. Q0 = _mm_add_ps(Quaternion,Quaternion);
  1294. Q1 = _mm_mul_ps(Quaternion,Q0);
  1295. V0 = _mm_shuffle_ps(Q1,Q1,_MM_SHUFFLE(3,0,0,1));
  1296. V0 = _mm_and_ps(V0,g_XMMask3);
  1297. // V0 = XMVectorPermute(Q1, Constant1110,Permute0Y0X0X1W);
  1298. V1 = _mm_shuffle_ps(Q1,Q1,_MM_SHUFFLE(3,1,2,2));
  1299. V1 = _mm_and_ps(V1,g_XMMask3);
  1300. // V1 = XMVectorPermute(Q1, Constant1110,Permute0Z0Z0Y1W);
  1301. R0 = _mm_sub_ps(Constant1110,V0);
  1302. R0 = _mm_sub_ps(R0, V1);
  1303. V0 = _mm_shuffle_ps(Quaternion,Quaternion,_MM_SHUFFLE(3,1,0,0));
  1304. // V0 = XMVectorPermute(Quaternion, Quaternion,SwizzleXXYW);
  1305. V1 = _mm_shuffle_ps(Q0,Q0,_MM_SHUFFLE(3,2,1,2));
  1306. // V1 = XMVectorPermute(Q0, Q0,SwizzleZYZW);
  1307. V0 = _mm_mul_ps(V0, V1);
  1308. V1 = _mm_shuffle_ps(Quaternion,Quaternion,_MM_SHUFFLE(3,3,3,3));
  1309. // V1 = XMVectorSplatW(Quaternion);
  1310. V2 = _mm_shuffle_ps(Q0,Q0,_MM_SHUFFLE(3,0,2,1));
  1311. // V2 = XMVectorPermute(Q0, Q0,SwizzleYZXW);
  1312. V1 = _mm_mul_ps(V1, V2);
  1313. R1 = _mm_add_ps(V0, V1);
  1314. R2 = _mm_sub_ps(V0, V1);
  1315. V0 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(1,0,2,1));
  1316. V0 = _mm_shuffle_ps(V0,V0,_MM_SHUFFLE(1,3,2,0));
  1317. // V0 = XMVectorPermute(R1, R2,Permute0Y1X1Y0Z);
  1318. V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,2,0,0));
  1319. V1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,0,2,0));
  1320. // V1 = XMVectorPermute(R1, R2,Permute0X1Z0X1Z);
  1321. Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(1,0,3,0));
  1322. Q1 = _mm_shuffle_ps(Q1,Q1,_MM_SHUFFLE(1,3,2,0));
  1323. M.r[0] = Q1;
  1324. // M.r[0] = XMVectorPermute(R0, V0,Permute0X1X1Y0W);
  1325. Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(3,2,3,1));
  1326. Q1 = _mm_shuffle_ps(Q1,Q1,_MM_SHUFFLE(1,3,0,2));
  1327. M.r[1] = Q1;
  1328. // M.r[1] = XMVectorPermute(R0, V0,Permute1Z0Y1W0W);
  1329. Q1 = _mm_shuffle_ps(V1,R0,_MM_SHUFFLE(3,2,1,0));
  1330. M.r[2] = Q1;
  1331. // M.r[2] = XMVectorPermute(R0, V1,Permute1X1Y0Z0W);
  1332. M.r[3] = g_XMIdentityR3;
  1333. return M;
  1334. #else // _XM_VMX128_INTRINSICS_
  1335. #endif // _XM_VMX128_INTRINSICS_
  1336. }
  1337. //------------------------------------------------------------------------------
  1338. XMINLINE XMMATRIX XMMatrixTransformation2D
  1339. (
  1340. FXMVECTOR ScalingOrigin,
  1341. FLOAT ScalingOrientation,
  1342. FXMVECTOR Scaling,
  1343. FXMVECTOR RotationOrigin,
  1344. FLOAT Rotation,
  1345. CXMVECTOR Translation
  1346. )
  1347. {
  1348. #if defined(_XM_NO_INTRINSICS_)
  1349. XMMATRIX M;
  1350. XMVECTOR VScaling;
  1351. XMVECTOR NegScalingOrigin;
  1352. XMVECTOR VScalingOrigin;
  1353. XMMATRIX MScalingOriginI;
  1354. XMMATRIX MScalingOrientation;
  1355. XMMATRIX MScalingOrientationT;
  1356. XMMATRIX MScaling;
  1357. XMVECTOR VRotationOrigin;
  1358. XMMATRIX MRotation;
  1359. XMVECTOR VTranslation;
  1360. // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
  1361. // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1362. VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v);
  1363. NegScalingOrigin = XMVectorNegate(VScalingOrigin);
  1364. MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
  1365. MScalingOrientation = XMMatrixRotationZ(ScalingOrientation);
  1366. MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
  1367. VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
  1368. MScaling = XMMatrixScalingFromVector(VScaling);
  1369. VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
  1370. MRotation = XMMatrixRotationZ(Rotation);
  1371. VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
  1372. M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
  1373. M = XMMatrixMultiply(M, MScaling);
  1374. M = XMMatrixMultiply(M, MScalingOrientation);
  1375. M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
  1376. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1377. M = XMMatrixMultiply(M, MRotation);
  1378. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1379. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1380. return M;
  1381. #elif defined(_XM_SSE_INTRINSICS_)
  1382. XMMATRIX M;
  1383. XMVECTOR VScaling;
  1384. XMVECTOR NegScalingOrigin;
  1385. XMVECTOR VScalingOrigin;
  1386. XMMATRIX MScalingOriginI;
  1387. XMMATRIX MScalingOrientation;
  1388. XMMATRIX MScalingOrientationT;
  1389. XMMATRIX MScaling;
  1390. XMVECTOR VRotationOrigin;
  1391. XMMATRIX MRotation;
  1392. XMVECTOR VTranslation;
  1393. // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
  1394. // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1395. static const XMVECTORU32 Mask2 = {0xFFFFFFFF,0xFFFFFFFF,0,0};
  1396. static const XMVECTORF32 ZWOne = {0,0,1.0f,1.0f};
  1397. VScalingOrigin = _mm_and_ps(ScalingOrigin, Mask2);
  1398. NegScalingOrigin = XMVectorNegate(VScalingOrigin);
  1399. MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
  1400. MScalingOrientation = XMMatrixRotationZ(ScalingOrientation);
  1401. MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
  1402. VScaling = _mm_and_ps(Scaling, Mask2);
  1403. VScaling = _mm_or_ps(VScaling,ZWOne);
  1404. MScaling = XMMatrixScalingFromVector(VScaling);
  1405. VRotationOrigin = _mm_and_ps(RotationOrigin, Mask2);
  1406. MRotation = XMMatrixRotationZ(Rotation);
  1407. VTranslation = _mm_and_ps(Translation, Mask2);
  1408. M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
  1409. M = XMMatrixMultiply(M, MScaling);
  1410. M = XMMatrixMultiply(M, MScalingOrientation);
  1411. M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
  1412. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1413. M = XMMatrixMultiply(M, MRotation);
  1414. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1415. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1416. return M;
  1417. #else // _XM_VMX128_INTRINSICS_
  1418. #endif // _XM_VMX128_INTRINSICS_
  1419. }
  1420. //------------------------------------------------------------------------------
  1421. XMINLINE XMMATRIX XMMatrixTransformation
  1422. (
  1423. FXMVECTOR ScalingOrigin,
  1424. FXMVECTOR ScalingOrientationQuaternion,
  1425. FXMVECTOR Scaling,
  1426. CXMVECTOR RotationOrigin,
  1427. CXMVECTOR RotationQuaternion,
  1428. CXMVECTOR Translation
  1429. )
  1430. {
  1431. #if defined(_XM_NO_INTRINSICS_)
  1432. XMMATRIX M;
  1433. XMVECTOR NegScalingOrigin;
  1434. XMVECTOR VScalingOrigin;
  1435. XMMATRIX MScalingOriginI;
  1436. XMMATRIX MScalingOrientation;
  1437. XMMATRIX MScalingOrientationT;
  1438. XMMATRIX MScaling;
  1439. XMVECTOR VRotationOrigin;
  1440. XMMATRIX MRotation;
  1441. XMVECTOR VTranslation;
  1442. // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
  1443. // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1444. VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v);
  1445. NegScalingOrigin = XMVectorNegate(ScalingOrigin);
  1446. MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
  1447. MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion);
  1448. MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
  1449. MScaling = XMMatrixScalingFromVector(Scaling);
  1450. VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v);
  1451. MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
  1452. VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v);
  1453. M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
  1454. M = XMMatrixMultiply(M, MScaling);
  1455. M = XMMatrixMultiply(M, MScalingOrientation);
  1456. M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
  1457. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1458. M = XMMatrixMultiply(M, MRotation);
  1459. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1460. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1461. return M;
  1462. #elif defined(_XM_SSE_INTRINSICS_)
  1463. XMMATRIX M;
  1464. XMVECTOR NegScalingOrigin;
  1465. XMVECTOR VScalingOrigin;
  1466. XMMATRIX MScalingOriginI;
  1467. XMMATRIX MScalingOrientation;
  1468. XMMATRIX MScalingOrientationT;
  1469. XMMATRIX MScaling;
  1470. XMVECTOR VRotationOrigin;
  1471. XMMATRIX MRotation;
  1472. XMVECTOR VTranslation;
  1473. // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
  1474. // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1475. VScalingOrigin = _mm_and_ps(ScalingOrigin,g_XMMask3);
  1476. NegScalingOrigin = XMVectorNegate(ScalingOrigin);
  1477. MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
  1478. MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion);
  1479. MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
  1480. MScaling = XMMatrixScalingFromVector(Scaling);
  1481. VRotationOrigin = _mm_and_ps(RotationOrigin,g_XMMask3);
  1482. MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
  1483. VTranslation = _mm_and_ps(Translation,g_XMMask3);
  1484. M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
  1485. M = XMMatrixMultiply(M, MScaling);
  1486. M = XMMatrixMultiply(M, MScalingOrientation);
  1487. M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
  1488. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1489. M = XMMatrixMultiply(M, MRotation);
  1490. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1491. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1492. return M;
  1493. #else // _XM_VMX128_INTRINSICS_
  1494. #endif // _XM_VMX128_INTRINSICS_
  1495. }
  1496. //------------------------------------------------------------------------------
  1497. XMINLINE XMMATRIX XMMatrixAffineTransformation2D
  1498. (
  1499. FXMVECTOR Scaling,
  1500. FXMVECTOR RotationOrigin,
  1501. FLOAT Rotation,
  1502. FXMVECTOR Translation
  1503. )
  1504. {
  1505. #if defined(_XM_NO_INTRINSICS_)
  1506. XMMATRIX M;
  1507. XMVECTOR VScaling;
  1508. XMMATRIX MScaling;
  1509. XMVECTOR VRotationOrigin;
  1510. XMMATRIX MRotation;
  1511. XMVECTOR VTranslation;
  1512. // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1513. VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
  1514. MScaling = XMMatrixScalingFromVector(VScaling);
  1515. VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
  1516. MRotation = XMMatrixRotationZ(Rotation);
  1517. VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
  1518. M = MScaling;
  1519. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1520. M = XMMatrixMultiply(M, MRotation);
  1521. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1522. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1523. return M;
  1524. #elif defined(_XM_SSE_INTRINSICS_)
  1525. XMMATRIX M;
  1526. XMVECTOR VScaling;
  1527. XMMATRIX MScaling;
  1528. XMVECTOR VRotationOrigin;
  1529. XMMATRIX MRotation;
  1530. XMVECTOR VTranslation;
  1531. static const XMVECTORU32 Mask2 = {0xFFFFFFFFU,0xFFFFFFFFU,0,0};
  1532. static const XMVECTORF32 ZW1 = {0,0,1.0f,1.0f};
  1533. // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1534. VScaling = _mm_and_ps(Scaling, Mask2);
  1535. VScaling = _mm_or_ps(VScaling, ZW1);
  1536. MScaling = XMMatrixScalingFromVector(VScaling);
  1537. VRotationOrigin = _mm_and_ps(RotationOrigin, Mask2);
  1538. MRotation = XMMatrixRotationZ(Rotation);
  1539. VTranslation = _mm_and_ps(Translation, Mask2);
  1540. M = MScaling;
  1541. M.r[3] = _mm_sub_ps(M.r[3], VRotationOrigin);
  1542. M = XMMatrixMultiply(M, MRotation);
  1543. M.r[3] = _mm_add_ps(M.r[3], VRotationOrigin);
  1544. M.r[3] = _mm_add_ps(M.r[3], VTranslation);
  1545. return M;
  1546. #else // _XM_VMX128_INTRINSICS_
  1547. #endif // _XM_VMX128_INTRINSICS_
  1548. }
  1549. //------------------------------------------------------------------------------
  1550. XMINLINE XMMATRIX XMMatrixAffineTransformation
  1551. (
  1552. FXMVECTOR Scaling,
  1553. FXMVECTOR RotationOrigin,
  1554. FXMVECTOR RotationQuaternion,
  1555. CXMVECTOR Translation
  1556. )
  1557. {
  1558. #if defined(_XM_NO_INTRINSICS_)
  1559. XMMATRIX M;
  1560. XMMATRIX MScaling;
  1561. XMVECTOR VRotationOrigin;
  1562. XMMATRIX MRotation;
  1563. XMVECTOR VTranslation;
  1564. // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1565. MScaling = XMMatrixScalingFromVector(Scaling);
  1566. VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin,g_XMSelect1110.v);
  1567. MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
  1568. VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation,g_XMSelect1110.v);
  1569. M = MScaling;
  1570. M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
  1571. M = XMMatrixMultiply(M, MRotation);
  1572. M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
  1573. M.r[3] = XMVectorAdd(M.r[3], VTranslation);
  1574. return M;
  1575. #elif defined(_XM_SSE_INTRINSICS_)
  1576. XMMATRIX M;
  1577. XMMATRIX MScaling;
  1578. XMVECTOR VRotationOrigin;
  1579. XMMATRIX MRotation;
  1580. XMVECTOR VTranslation;
  1581. // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
  1582. MScaling = XMMatrixScalingFromVector(Scaling);
  1583. VRotationOrigin = _mm_and_ps(RotationOrigin,g_XMMask3);
  1584. MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
  1585. VTranslation = _mm_and_ps(Translation,g_XMMask3);
  1586. M = MScaling;
  1587. M.r[3] = _mm_sub_ps(M.r[3], VRotationOrigin);
  1588. M = XMMatrixMultiply(M, MRotation);
  1589. M.r[3] = _mm_add_ps(M.r[3], VRotationOrigin);
  1590. M.r[3] = _mm_add_ps(M.r[3], VTranslation);
  1591. return M;
  1592. #else // _XM_VMX128_INTRINSICS_
  1593. #endif // _XM_VMX128_INTRINSICS_
  1594. }
  1595. //------------------------------------------------------------------------------
  1596. XMFINLINE XMMATRIX XMMatrixReflect
  1597. (
  1598. FXMVECTOR ReflectionPlane
  1599. )
  1600. {
  1601. #if defined(_XM_NO_INTRINSICS_)
  1602. XMVECTOR P;
  1603. XMVECTOR S;
  1604. XMVECTOR A, B, C, D;
  1605. XMMATRIX M;
  1606. static CONST XMVECTOR NegativeTwo = {-2.0f, -2.0f, -2.0f, 0.0f};
  1607. XMASSERT(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
  1608. XMASSERT(!XMPlaneIsInfinite(ReflectionPlane));
  1609. P = XMPlaneNormalize(ReflectionPlane);
  1610. S = XMVectorMultiply(P, NegativeTwo);
  1611. A = XMVectorSplatX(P);
  1612. B = XMVectorSplatY(P);
  1613. C = XMVectorSplatZ(P);
  1614. D = XMVectorSplatW(P);
  1615. M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v);
  1616. M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v);
  1617. M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v);
  1618. M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v);
  1619. return M;
  1620. #elif defined(_XM_SSE_INTRINSICS_)
  1621. XMMATRIX M;
  1622. static CONST XMVECTORF32 NegativeTwo = {-2.0f, -2.0f, -2.0f, 0.0f};
  1623. XMASSERT(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
  1624. XMASSERT(!XMPlaneIsInfinite(ReflectionPlane));
  1625. XMVECTOR P = XMPlaneNormalize(ReflectionPlane);
  1626. XMVECTOR S = _mm_mul_ps(P,NegativeTwo);
  1627. XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
  1628. XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
  1629. XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
  1630. P = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
  1631. X = _mm_mul_ps(X,S);
  1632. Y = _mm_mul_ps(Y,S);
  1633. Z = _mm_mul_ps(Z,S);
  1634. P = _mm_mul_ps(P,S);
  1635. X = _mm_add_ps(X,g_XMIdentityR0);
  1636. Y = _mm_add_ps(Y,g_XMIdentityR1);
  1637. Z = _mm_add_ps(Z,g_XMIdentityR2);
  1638. P = _mm_add_ps(P,g_XMIdentityR3);
  1639. M.r[0] = X;
  1640. M.r[1] = Y;
  1641. M.r[2] = Z;
  1642. M.r[3] = P;
  1643. return M;
  1644. #else // _XM_VMX128_INTRINSICS_
  1645. #endif // _XM_VMX128_INTRINSICS_
  1646. }
  1647. //------------------------------------------------------------------------------
  1648. XMFINLINE XMMATRIX XMMatrixShadow
  1649. (
  1650. FXMVECTOR ShadowPlane,
  1651. FXMVECTOR LightPosition
  1652. )
  1653. {
  1654. #if defined(_XM_NO_INTRINSICS_)
  1655. XMVECTOR P;
  1656. XMVECTOR Dot;
  1657. XMVECTOR A, B, C, D;
  1658. XMMATRIX M;
  1659. static CONST XMVECTORU32 Select0001 = {XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1};
  1660. XMASSERT(!XMVector3Equal(ShadowPlane, XMVectorZero()));
  1661. XMASSERT(!XMPlaneIsInfinite(ShadowPlane));
  1662. P = XMPlaneNormalize(ShadowPlane);
  1663. Dot = XMPlaneDot(P, LightPosition);
  1664. P = XMVectorNegate(P);
  1665. D = XMVectorSplatW(P);
  1666. C = XMVectorSplatZ(P);
  1667. B = XMVectorSplatY(P);
  1668. A = XMVectorSplatX(P);
  1669. Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v);
  1670. M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot);
  1671. Dot = XMVectorRotateLeft(Dot, 1);
  1672. M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot);
  1673. Dot = XMVectorRotateLeft(Dot, 1);
  1674. M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot);
  1675. Dot = XMVectorRotateLeft(Dot, 1);
  1676. M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot);
  1677. return M;
  1678. #elif defined(_XM_SSE_INTRINSICS_)
  1679. XMMATRIX M;
  1680. XMASSERT(!XMVector3Equal(ShadowPlane, XMVectorZero()));
  1681. XMASSERT(!XMPlaneIsInfinite(ShadowPlane));
  1682. XMVECTOR P = XMPlaneNormalize(ShadowPlane);
  1683. XMVECTOR Dot = XMPlaneDot(P,LightPosition);
  1684. // Negate
  1685. P = _mm_mul_ps(P,g_XMNegativeOne);
  1686. XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
  1687. XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
  1688. XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
  1689. P = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
  1690. Dot = _mm_and_ps(Dot,g_XMMaskW);
  1691. X = _mm_mul_ps(X,LightPosition);
  1692. Y = _mm_mul_ps(Y,LightPosition);
  1693. Z = _mm_mul_ps(Z,LightPosition);
  1694. P = _mm_mul_ps(P,LightPosition);
  1695. P = _mm_add_ps(P,Dot);
  1696. Dot = _mm_shuffle_ps(Dot,Dot,_MM_SHUFFLE(0,3,2,1));
  1697. Z = _mm_add_ps(Z,Dot);
  1698. Dot = _mm_shuffle_ps(Dot,Dot,_MM_SHUFFLE(0,3,2,1));
  1699. Y = _mm_add_ps(Y,Dot);
  1700. Dot = _mm_shuffle_ps(Dot,Dot,_MM_SHUFFLE(0,3,2,1));
  1701. X = _mm_add_ps(X,Dot);
  1702. // Store the resulting matrix
  1703. M.r[0] = X;
  1704. M.r[1] = Y;
  1705. M.r[2] = Z;
  1706. M.r[3] = P;
  1707. return M;
  1708. #else // _XM_VMX128_INTRINSICS_
  1709. #endif // _XM_VMX128_INTRINSICS_
  1710. }
  1711. //------------------------------------------------------------------------------
  1712. // View and projection initialization operations
  1713. //------------------------------------------------------------------------------
  1714. //------------------------------------------------------------------------------
  1715. XMFINLINE XMMATRIX XMMatrixLookAtLH
  1716. (
  1717. FXMVECTOR EyePosition,
  1718. FXMVECTOR FocusPosition,
  1719. FXMVECTOR UpDirection
  1720. )
  1721. {
  1722. XMVECTOR EyeDirection;
  1723. XMMATRIX M;
  1724. EyeDirection = XMVectorSubtract(FocusPosition, EyePosition);
  1725. M = XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection);
  1726. return M;
  1727. }
  1728. //------------------------------------------------------------------------------
  1729. XMFINLINE XMMATRIX XMMatrixLookAtRH
  1730. (
  1731. FXMVECTOR EyePosition,
  1732. FXMVECTOR FocusPosition,
  1733. FXMVECTOR UpDirection
  1734. )
  1735. {
  1736. XMVECTOR NegEyeDirection;
  1737. XMMATRIX M;
  1738. NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition);
  1739. M = XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
  1740. return M;
  1741. }
  1742. //------------------------------------------------------------------------------
  1743. XMINLINE XMMATRIX XMMatrixLookToLH
  1744. (
  1745. FXMVECTOR EyePosition,
  1746. FXMVECTOR EyeDirection,
  1747. FXMVECTOR UpDirection
  1748. )
  1749. {
  1750. #if defined(_XM_NO_INTRINSICS_)
  1751. XMVECTOR NegEyePosition;
  1752. XMVECTOR D0, D1, D2;
  1753. XMVECTOR R0, R1, R2;
  1754. XMMATRIX M;
  1755. XMASSERT(!XMVector3Equal(EyeDirection, XMVectorZero()));
  1756. XMASSERT(!XMVector3IsInfinite(EyeDirection));
  1757. XMASSERT(!XMVector3Equal(UpDirection, XMVectorZero()));
  1758. XMASSERT(!XMVector3IsInfinite(UpDirection));
  1759. R2 = XMVector3Normalize(EyeDirection);
  1760. R0 = XMVector3Cross(UpDirection, R2);
  1761. R0 = XMVector3Normalize(R0);
  1762. R1 = XMVector3Cross(R2, R0);
  1763. NegEyePosition = XMVectorNegate(EyePosition);
  1764. D0 = XMVector3Dot(R0, NegEyePosition);
  1765. D1 = XMVector3Dot(R1, NegEyePosition);
  1766. D2 = XMVector3Dot(R2, NegEyePosition);
  1767. M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v);
  1768. M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v);
  1769. M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v);
  1770. M.r[3] = g_XMIdentityR3.v;
  1771. M = XMMatrixTranspose(M);
  1772. return M;
  1773. #elif defined(_XM_SSE_INTRINSICS_)
  1774. XMMATRIX M;
  1775. XMASSERT(!XMVector3Equal(EyeDirection, XMVectorZero()));
  1776. XMASSERT(!XMVector3IsInfinite(EyeDirection));
  1777. XMASSERT(!XMVector3Equal(UpDirection, XMVectorZero()));
  1778. XMASSERT(!XMVector3IsInfinite(UpDirection));
  1779. XMVECTOR R2 = XMVector3Normalize(EyeDirection);
  1780. XMVECTOR R0 = XMVector3Cross(UpDirection, R2);
  1781. R0 = XMVector3Normalize(R0);
  1782. XMVECTOR R1 = XMVector3Cross(R2,R0);
  1783. XMVECTOR NegEyePosition = _mm_mul_ps(EyePosition,g_XMNegativeOne);
  1784. XMVECTOR D0 = XMVector3Dot(R0,NegEyePosition);
  1785. XMVECTOR D1 = XMVector3Dot(R1,NegEyePosition);
  1786. XMVECTOR D2 = XMVector3Dot(R2,NegEyePosition);
  1787. R0 = _mm_and_ps(R0,g_XMMask3);
  1788. R1 = _mm_and_ps(R1,g_XMMask3);
  1789. R2 = _mm_and_ps(R2,g_XMMask3);
  1790. D0 = _mm_and_ps(D0,g_XMMaskW);
  1791. D1 = _mm_and_ps(D1,g_XMMaskW);
  1792. D2 = _mm_and_ps(D2,g_XMMaskW);
  1793. D0 = _mm_or_ps(D0,R0);
  1794. D1 = _mm_or_ps(D1,R1);
  1795. D2 = _mm_or_ps(D2,R2);
  1796. M.r[0] = D0;
  1797. M.r[1] = D1;
  1798. M.r[2] = D2;
  1799. M.r[3] = g_XMIdentityR3;
  1800. M = XMMatrixTranspose(M);
  1801. return M;
  1802. #else // _XM_VMX128_INTRINSICS_
  1803. #endif // _XM_VMX128_INTRINSICS_
  1804. }
  1805. //------------------------------------------------------------------------------
  1806. XMFINLINE XMMATRIX XMMatrixLookToRH
  1807. (
  1808. FXMVECTOR EyePosition,
  1809. FXMVECTOR EyeDirection,
  1810. FXMVECTOR UpDirection
  1811. )
  1812. {
  1813. XMVECTOR NegEyeDirection;
  1814. XMMATRIX M;
  1815. NegEyeDirection = XMVectorNegate(EyeDirection);
  1816. M = XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
  1817. return M;
  1818. }
  1819. //------------------------------------------------------------------------------
  1820. XMFINLINE XMMATRIX XMMatrixPerspectiveLH
  1821. (
  1822. FLOAT ViewWidth,
  1823. FLOAT ViewHeight,
  1824. FLOAT NearZ,
  1825. FLOAT FarZ
  1826. )
  1827. {
  1828. #if defined(_XM_NO_INTRINSICS_)
  1829. FLOAT TwoNearZ, fRange;
  1830. XMMATRIX M;
  1831. XMASSERT(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  1832. XMASSERT(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  1833. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1834. TwoNearZ = NearZ + NearZ;
  1835. fRange = FarZ / (FarZ - NearZ);
  1836. M.m[0][0] = TwoNearZ / ViewWidth;
  1837. M.m[0][1] = 0.0f;
  1838. M.m[0][2] = 0.0f;
  1839. M.m[0][3] = 0.0f;
  1840. M.m[1][0] = 0.0f;
  1841. M.m[1][1] = TwoNearZ / ViewHeight;
  1842. M.m[1][2] = 0.0f;
  1843. M.m[1][3] = 0.0f;
  1844. M.m[2][0] = 0.0f;
  1845. M.m[2][1] = 0.0f;
  1846. M.m[2][2] = fRange;
  1847. M.m[2][3] = 1.0f;
  1848. M.m[3][0] = 0.0f;
  1849. M.m[3][1] = 0.0f;
  1850. M.m[3][2] = -fRange * NearZ;
  1851. M.m[3][3] = 0.0f;
  1852. return M;
  1853. #elif defined(_XM_SSE_INTRINSICS_)
  1854. XMASSERT(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  1855. XMASSERT(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  1856. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1857. XMMATRIX M;
  1858. FLOAT TwoNearZ = NearZ + NearZ;
  1859. FLOAT fRange = FarZ / (FarZ - NearZ);
  1860. // Note: This is recorded on the stack
  1861. XMVECTOR rMem = {
  1862. TwoNearZ / ViewWidth,
  1863. TwoNearZ / ViewHeight,
  1864. fRange,
  1865. -fRange * NearZ
  1866. };
  1867. // Copy from memory to SSE register
  1868. XMVECTOR vValues = rMem;
  1869. XMVECTOR vTemp = _mm_setzero_ps();
  1870. // Copy x only
  1871. vTemp = _mm_move_ss(vTemp,vValues);
  1872. // TwoNearZ / ViewWidth,0,0,0
  1873. M.r[0] = vTemp;
  1874. // 0,TwoNearZ / ViewHeight,0,0
  1875. vTemp = vValues;
  1876. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  1877. M.r[1] = vTemp;
  1878. // x=fRange,y=-fRange * NearZ,0,1.0f
  1879. vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
  1880. // 0,0,fRange,1.0f
  1881. vTemp = _mm_setzero_ps();
  1882. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
  1883. M.r[2] = vTemp;
  1884. // 0,0,-fRange * NearZ,0
  1885. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
  1886. M.r[3] = vTemp;
  1887. return M;
  1888. #else // _XM_VMX128_INTRINSICS_
  1889. #endif // _XM_VMX128_INTRINSICS_
  1890. }
  1891. //------------------------------------------------------------------------------
  1892. XMFINLINE XMMATRIX XMMatrixPerspectiveRH
  1893. (
  1894. FLOAT ViewWidth,
  1895. FLOAT ViewHeight,
  1896. FLOAT NearZ,
  1897. FLOAT FarZ
  1898. )
  1899. {
  1900. #if defined(_XM_NO_INTRINSICS_)
  1901. FLOAT TwoNearZ, fRange;
  1902. XMMATRIX M;
  1903. XMASSERT(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  1904. XMASSERT(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  1905. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1906. TwoNearZ = NearZ + NearZ;
  1907. fRange = FarZ / (NearZ - FarZ);
  1908. M.m[0][0] = TwoNearZ / ViewWidth;
  1909. M.m[0][1] = 0.0f;
  1910. M.m[0][2] = 0.0f;
  1911. M.m[0][3] = 0.0f;
  1912. M.m[1][0] = 0.0f;
  1913. M.m[1][1] = TwoNearZ / ViewHeight;
  1914. M.m[1][2] = 0.0f;
  1915. M.m[1][3] = 0.0f;
  1916. M.m[2][0] = 0.0f;
  1917. M.m[2][1] = 0.0f;
  1918. M.m[2][2] = fRange;
  1919. M.m[2][3] = -1.0f;
  1920. M.m[3][0] = 0.0f;
  1921. M.m[3][1] = 0.0f;
  1922. M.m[3][2] = fRange * NearZ;
  1923. M.m[3][3] = 0.0f;
  1924. return M;
  1925. #elif defined(_XM_SSE_INTRINSICS_)
  1926. XMASSERT(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  1927. XMASSERT(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  1928. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1929. XMMATRIX M;
  1930. FLOAT TwoNearZ = NearZ + NearZ;
  1931. FLOAT fRange = FarZ / (NearZ-FarZ);
  1932. // Note: This is recorded on the stack
  1933. XMVECTOR rMem = {
  1934. TwoNearZ / ViewWidth,
  1935. TwoNearZ / ViewHeight,
  1936. fRange,
  1937. fRange * NearZ
  1938. };
  1939. // Copy from memory to SSE register
  1940. XMVECTOR vValues = rMem;
  1941. XMVECTOR vTemp = _mm_setzero_ps();
  1942. // Copy x only
  1943. vTemp = _mm_move_ss(vTemp,vValues);
  1944. // TwoNearZ / ViewWidth,0,0,0
  1945. M.r[0] = vTemp;
  1946. // 0,TwoNearZ / ViewHeight,0,0
  1947. vTemp = vValues;
  1948. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  1949. M.r[1] = vTemp;
  1950. // x=fRange,y=-fRange * NearZ,0,-1.0f
  1951. vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
  1952. // 0,0,fRange,-1.0f
  1953. vTemp = _mm_setzero_ps();
  1954. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
  1955. M.r[2] = vTemp;
  1956. // 0,0,-fRange * NearZ,0
  1957. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
  1958. M.r[3] = vTemp;
  1959. return M;
  1960. #else // _XM_VMX128_INTRINSICS_
  1961. #endif // _XM_VMX128_INTRINSICS_
  1962. }
  1963. //------------------------------------------------------------------------------
  1964. XMFINLINE XMMATRIX XMMatrixPerspectiveFovLH
  1965. (
  1966. FLOAT FovAngleY,
  1967. FLOAT AspectRatio,
  1968. FLOAT NearZ,
  1969. FLOAT FarZ
  1970. )
  1971. {
  1972. #if defined(_XM_NO_INTRINSICS_)
  1973. FLOAT SinFov;
  1974. FLOAT CosFov;
  1975. FLOAT Height;
  1976. FLOAT Width;
  1977. XMMATRIX M;
  1978. XMASSERT(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
  1979. XMASSERT(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
  1980. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1981. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  1982. Height = CosFov / SinFov;
  1983. Width = Height / AspectRatio;
  1984. M.r[0] = XMVectorSet(Width, 0.0f, 0.0f, 0.0f);
  1985. M.r[1] = XMVectorSet(0.0f, Height, 0.0f, 0.0f);
  1986. M.r[2] = XMVectorSet(0.0f, 0.0f, FarZ / (FarZ - NearZ), 1.0f);
  1987. M.r[3] = XMVectorSet(0.0f, 0.0f, -M.r[2].vector4_f32[2] * NearZ, 0.0f);
  1988. return M;
  1989. #elif defined(_XM_SSE_INTRINSICS_)
  1990. XMASSERT(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
  1991. XMASSERT(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
  1992. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  1993. XMMATRIX M;
  1994. FLOAT SinFov;
  1995. FLOAT CosFov;
  1996. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  1997. FLOAT fRange = FarZ / (FarZ-NearZ);
  1998. // Note: This is recorded on the stack
  1999. FLOAT Height = CosFov / SinFov;
  2000. XMVECTOR rMem = {
  2001. Height / AspectRatio,
  2002. Height,
  2003. fRange,
  2004. -fRange * NearZ
  2005. };
  2006. // Copy from memory to SSE register
  2007. XMVECTOR vValues = rMem;
  2008. XMVECTOR vTemp = _mm_setzero_ps();
  2009. // Copy x only
  2010. vTemp = _mm_move_ss(vTemp,vValues);
  2011. // CosFov / SinFov,0,0,0
  2012. M.r[0] = vTemp;
  2013. // 0,Height / AspectRatio,0,0
  2014. vTemp = vValues;
  2015. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2016. M.r[1] = vTemp;
  2017. // x=fRange,y=-fRange * NearZ,0,1.0f
  2018. vTemp = _mm_setzero_ps();
  2019. vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
  2020. // 0,0,fRange,1.0f
  2021. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
  2022. M.r[2] = vTemp;
  2023. // 0,0,-fRange * NearZ,0.0f
  2024. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
  2025. M.r[3] = vTemp;
  2026. return M;
  2027. #else // _XM_VMX128_INTRINSICS_
  2028. #endif // _XM_VMX128_INTRINSICS_
  2029. }
  2030. //------------------------------------------------------------------------------
  2031. XMFINLINE XMMATRIX XMMatrixPerspectiveFovRH
  2032. (
  2033. FLOAT FovAngleY,
  2034. FLOAT AspectRatio,
  2035. FLOAT NearZ,
  2036. FLOAT FarZ
  2037. )
  2038. {
  2039. #if defined(_XM_NO_INTRINSICS_)
  2040. FLOAT SinFov;
  2041. FLOAT CosFov;
  2042. FLOAT Height;
  2043. FLOAT Width;
  2044. XMMATRIX M;
  2045. XMASSERT(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
  2046. XMASSERT(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
  2047. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2048. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  2049. Height = CosFov / SinFov;
  2050. Width = Height / AspectRatio;
  2051. M.r[0] = XMVectorSet(Width, 0.0f, 0.0f, 0.0f);
  2052. M.r[1] = XMVectorSet(0.0f, Height, 0.0f, 0.0f);
  2053. M.r[2] = XMVectorSet(0.0f, 0.0f, FarZ / (NearZ - FarZ), -1.0f);
  2054. M.r[3] = XMVectorSet(0.0f, 0.0f, M.r[2].vector4_f32[2] * NearZ, 0.0f);
  2055. return M;
  2056. #elif defined(_XM_SSE_INTRINSICS_)
  2057. XMASSERT(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
  2058. XMASSERT(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
  2059. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2060. XMMATRIX M;
  2061. FLOAT SinFov;
  2062. FLOAT CosFov;
  2063. XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
  2064. FLOAT fRange = FarZ / (NearZ-FarZ);
  2065. // Note: This is recorded on the stack
  2066. FLOAT Height = CosFov / SinFov;
  2067. XMVECTOR rMem = {
  2068. Height / AspectRatio,
  2069. Height,
  2070. fRange,
  2071. fRange * NearZ
  2072. };
  2073. // Copy from memory to SSE register
  2074. XMVECTOR vValues = rMem;
  2075. XMVECTOR vTemp = _mm_setzero_ps();
  2076. // Copy x only
  2077. vTemp = _mm_move_ss(vTemp,vValues);
  2078. // CosFov / SinFov,0,0,0
  2079. M.r[0] = vTemp;
  2080. // 0,Height / AspectRatio,0,0
  2081. vTemp = vValues;
  2082. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2083. M.r[1] = vTemp;
  2084. // x=fRange,y=-fRange * NearZ,0,-1.0f
  2085. vTemp = _mm_setzero_ps();
  2086. vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
  2087. // 0,0,fRange,-1.0f
  2088. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
  2089. M.r[2] = vTemp;
  2090. // 0,0,fRange * NearZ,0.0f
  2091. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
  2092. M.r[3] = vTemp;
  2093. return M;
  2094. #else // _XM_VMX128_INTRINSICS_
  2095. #endif // _XM_VMX128_INTRINSICS_
  2096. }
  2097. //------------------------------------------------------------------------------
  2098. XMFINLINE XMMATRIX XMMatrixPerspectiveOffCenterLH
  2099. (
  2100. FLOAT ViewLeft,
  2101. FLOAT ViewRight,
  2102. FLOAT ViewBottom,
  2103. FLOAT ViewTop,
  2104. FLOAT NearZ,
  2105. FLOAT FarZ
  2106. )
  2107. {
  2108. #if defined(_XM_NO_INTRINSICS_)
  2109. FLOAT TwoNearZ;
  2110. FLOAT ReciprocalWidth;
  2111. FLOAT ReciprocalHeight;
  2112. XMMATRIX M;
  2113. XMASSERT(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2114. XMASSERT(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2115. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2116. TwoNearZ = NearZ + NearZ;
  2117. ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2118. ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2119. M.r[0] = XMVectorSet(TwoNearZ * ReciprocalWidth, 0.0f, 0.0f, 0.0f);
  2120. M.r[1] = XMVectorSet(0.0f, TwoNearZ * ReciprocalHeight, 0.0f, 0.0f);
  2121. M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
  2122. -(ViewTop + ViewBottom) * ReciprocalHeight,
  2123. FarZ / (FarZ - NearZ),
  2124. 1.0f);
  2125. M.r[3] = XMVectorSet(0.0f, 0.0f, -M.r[2].vector4_f32[2] * NearZ, 0.0f);
  2126. return M;
  2127. #elif defined(_XM_SSE_INTRINSICS_)
  2128. XMASSERT(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2129. XMASSERT(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2130. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2131. XMMATRIX M;
  2132. FLOAT TwoNearZ = NearZ+NearZ;
  2133. FLOAT ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2134. FLOAT ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2135. FLOAT fRange = FarZ / (FarZ-NearZ);
  2136. // Note: This is recorded on the stack
  2137. XMVECTOR rMem = {
  2138. TwoNearZ*ReciprocalWidth,
  2139. TwoNearZ*ReciprocalHeight,
  2140. -fRange * NearZ,
  2141. 0
  2142. };
  2143. // Copy from memory to SSE register
  2144. XMVECTOR vValues = rMem;
  2145. XMVECTOR vTemp = _mm_setzero_ps();
  2146. // Copy x only
  2147. vTemp = _mm_move_ss(vTemp,vValues);
  2148. // TwoNearZ*ReciprocalWidth,0,0,0
  2149. M.r[0] = vTemp;
  2150. // 0,TwoNearZ*ReciprocalHeight,0,0
  2151. vTemp = vValues;
  2152. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2153. M.r[1] = vTemp;
  2154. // 0,0,fRange,1.0f
  2155. M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
  2156. M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
  2157. M.m[2][2] = fRange;
  2158. M.m[2][3] = 1.0f;
  2159. // 0,0,-fRange * NearZ,0.0f
  2160. vValues = _mm_and_ps(vValues,g_XMMaskZ);
  2161. M.r[3] = vValues;
  2162. return M;
  2163. #else // _XM_VMX128_INTRINSICS_
  2164. #endif // _XM_VMX128_INTRINSICS_
  2165. }
  2166. //------------------------------------------------------------------------------
  2167. XMFINLINE XMMATRIX XMMatrixPerspectiveOffCenterRH
  2168. (
  2169. FLOAT ViewLeft,
  2170. FLOAT ViewRight,
  2171. FLOAT ViewBottom,
  2172. FLOAT ViewTop,
  2173. FLOAT NearZ,
  2174. FLOAT FarZ
  2175. )
  2176. {
  2177. #if defined(_XM_NO_INTRINSICS_)
  2178. FLOAT TwoNearZ;
  2179. FLOAT ReciprocalWidth;
  2180. FLOAT ReciprocalHeight;
  2181. XMMATRIX M;
  2182. XMASSERT(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2183. XMASSERT(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2184. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2185. TwoNearZ = NearZ + NearZ;
  2186. ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2187. ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2188. M.r[0] = XMVectorSet(TwoNearZ * ReciprocalWidth, 0.0f, 0.0f, 0.0f);
  2189. M.r[1] = XMVectorSet(0.0f, TwoNearZ * ReciprocalHeight, 0.0f, 0.0f);
  2190. M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth,
  2191. (ViewTop + ViewBottom) * ReciprocalHeight,
  2192. FarZ / (NearZ - FarZ),
  2193. -1.0f);
  2194. M.r[3] = XMVectorSet(0.0f, 0.0f, M.r[2].vector4_f32[2] * NearZ, 0.0f);
  2195. return M;
  2196. #elif defined(_XM_SSE_INTRINSICS_)
  2197. XMASSERT(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2198. XMASSERT(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2199. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2200. XMMATRIX M;
  2201. FLOAT TwoNearZ = NearZ+NearZ;
  2202. FLOAT ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2203. FLOAT ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2204. FLOAT fRange = FarZ / (NearZ-FarZ);
  2205. // Note: This is recorded on the stack
  2206. XMVECTOR rMem = {
  2207. TwoNearZ*ReciprocalWidth,
  2208. TwoNearZ*ReciprocalHeight,
  2209. fRange * NearZ,
  2210. 0
  2211. };
  2212. // Copy from memory to SSE register
  2213. XMVECTOR vValues = rMem;
  2214. XMVECTOR vTemp = _mm_setzero_ps();
  2215. // Copy x only
  2216. vTemp = _mm_move_ss(vTemp,vValues);
  2217. // TwoNearZ*ReciprocalWidth,0,0,0
  2218. M.r[0] = vTemp;
  2219. // 0,TwoNearZ*ReciprocalHeight,0,0
  2220. vTemp = vValues;
  2221. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2222. M.r[1] = vTemp;
  2223. // 0,0,fRange,1.0f
  2224. M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth;
  2225. M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight;
  2226. M.m[2][2] = fRange;
  2227. M.m[2][3] = -1.0f;
  2228. // 0,0,-fRange * NearZ,0.0f
  2229. vValues = _mm_and_ps(vValues,g_XMMaskZ);
  2230. M.r[3] = vValues;
  2231. return M;
  2232. #else // _XM_VMX128_INTRINSICS_
  2233. #endif // _XM_VMX128_INTRINSICS_
  2234. }
  2235. //------------------------------------------------------------------------------
  2236. XMFINLINE XMMATRIX XMMatrixOrthographicLH
  2237. (
  2238. FLOAT ViewWidth,
  2239. FLOAT ViewHeight,
  2240. FLOAT NearZ,
  2241. FLOAT FarZ
  2242. )
  2243. {
  2244. #if defined(_XM_NO_INTRINSICS_)
  2245. FLOAT fRange;
  2246. XMMATRIX M;
  2247. XMASSERT(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  2248. XMASSERT(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  2249. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2250. fRange = 1.0f / (FarZ-NearZ);
  2251. M.r[0] = XMVectorSet(2.0f / ViewWidth, 0.0f, 0.0f, 0.0f);
  2252. M.r[1] = XMVectorSet(0.0f, 2.0f / ViewHeight, 0.0f, 0.0f);
  2253. M.r[2] = XMVectorSet(0.0f, 0.0f, fRange, 0.0f);
  2254. M.r[3] = XMVectorSet(0.0f, 0.0f, -fRange * NearZ, 1.0f);
  2255. return M;
  2256. #elif defined(_XM_SSE_INTRINSICS_)
  2257. XMASSERT(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  2258. XMASSERT(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  2259. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2260. XMMATRIX M;
  2261. FLOAT fRange = 1.0f / (FarZ-NearZ);
  2262. // Note: This is recorded on the stack
  2263. XMVECTOR rMem = {
  2264. 2.0f / ViewWidth,
  2265. 2.0f / ViewHeight,
  2266. fRange,
  2267. -fRange * NearZ
  2268. };
  2269. // Copy from memory to SSE register
  2270. XMVECTOR vValues = rMem;
  2271. XMVECTOR vTemp = _mm_setzero_ps();
  2272. // Copy x only
  2273. vTemp = _mm_move_ss(vTemp,vValues);
  2274. // 2.0f / ViewWidth,0,0,0
  2275. M.r[0] = vTemp;
  2276. // 0,2.0f / ViewHeight,0,0
  2277. vTemp = vValues;
  2278. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2279. M.r[1] = vTemp;
  2280. // x=fRange,y=-fRange * NearZ,0,1.0f
  2281. vTemp = _mm_setzero_ps();
  2282. vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
  2283. // 0,0,fRange,0.0f
  2284. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
  2285. M.r[2] = vTemp;
  2286. // 0,0,-fRange * NearZ,1.0f
  2287. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
  2288. M.r[3] = vTemp;
  2289. return M;
  2290. #else // _XM_VMX128_INTRINSICS_
  2291. #endif // _XM_VMX128_INTRINSICS_
  2292. }
  2293. //------------------------------------------------------------------------------
  2294. XMFINLINE XMMATRIX XMMatrixOrthographicRH
  2295. (
  2296. FLOAT ViewWidth,
  2297. FLOAT ViewHeight,
  2298. FLOAT NearZ,
  2299. FLOAT FarZ
  2300. )
  2301. {
  2302. #if defined(_XM_NO_INTRINSICS_)
  2303. XMMATRIX M;
  2304. XMASSERT(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  2305. XMASSERT(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  2306. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2307. M.r[0] = XMVectorSet(2.0f / ViewWidth, 0.0f, 0.0f, 0.0f);
  2308. M.r[1] = XMVectorSet(0.0f, 2.0f / ViewHeight, 0.0f, 0.0f);
  2309. M.r[2] = XMVectorSet(0.0f, 0.0f, 1.0f / (NearZ - FarZ), 0.0f);
  2310. M.r[3] = XMVectorSet(0.0f, 0.0f, M.r[2].vector4_f32[2] * NearZ, 1.0f);
  2311. return M;
  2312. #elif defined(_XM_SSE_INTRINSICS_)
  2313. XMASSERT(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
  2314. XMASSERT(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
  2315. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2316. XMMATRIX M;
  2317. FLOAT fRange = 1.0f / (NearZ-FarZ);
  2318. // Note: This is recorded on the stack
  2319. XMVECTOR rMem = {
  2320. 2.0f / ViewWidth,
  2321. 2.0f / ViewHeight,
  2322. fRange,
  2323. fRange * NearZ
  2324. };
  2325. // Copy from memory to SSE register
  2326. XMVECTOR vValues = rMem;
  2327. XMVECTOR vTemp = _mm_setzero_ps();
  2328. // Copy x only
  2329. vTemp = _mm_move_ss(vTemp,vValues);
  2330. // 2.0f / ViewWidth,0,0,0
  2331. M.r[0] = vTemp;
  2332. // 0,2.0f / ViewHeight,0,0
  2333. vTemp = vValues;
  2334. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2335. M.r[1] = vTemp;
  2336. // x=fRange,y=fRange * NearZ,0,1.0f
  2337. vTemp = _mm_setzero_ps();
  2338. vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
  2339. // 0,0,fRange,0.0f
  2340. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
  2341. M.r[2] = vTemp;
  2342. // 0,0,fRange * NearZ,1.0f
  2343. vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
  2344. M.r[3] = vTemp;
  2345. return M;
  2346. #else // _XM_VMX128_INTRINSICS_
  2347. #endif // _XM_VMX128_INTRINSICS_
  2348. }
  2349. //------------------------------------------------------------------------------
  2350. XMFINLINE XMMATRIX XMMatrixOrthographicOffCenterLH
  2351. (
  2352. FLOAT ViewLeft,
  2353. FLOAT ViewRight,
  2354. FLOAT ViewBottom,
  2355. FLOAT ViewTop,
  2356. FLOAT NearZ,
  2357. FLOAT FarZ
  2358. )
  2359. {
  2360. #if defined(_XM_NO_INTRINSICS_)
  2361. FLOAT ReciprocalWidth;
  2362. FLOAT ReciprocalHeight;
  2363. XMMATRIX M;
  2364. XMASSERT(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2365. XMASSERT(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2366. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2367. ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2368. ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2369. M.r[0] = XMVectorSet(ReciprocalWidth + ReciprocalWidth, 0.0f, 0.0f, 0.0f);
  2370. M.r[1] = XMVectorSet(0.0f, ReciprocalHeight + ReciprocalHeight, 0.0f, 0.0f);
  2371. M.r[2] = XMVectorSet(0.0f, 0.0f, 1.0f / (FarZ - NearZ), 0.0f);
  2372. M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
  2373. -(ViewTop + ViewBottom) * ReciprocalHeight,
  2374. -M.r[2].vector4_f32[2] * NearZ,
  2375. 1.0f);
  2376. return M;
  2377. #elif defined(_XM_SSE_INTRINSICS_)
  2378. XMMATRIX M;
  2379. FLOAT fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2380. FLOAT fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2381. FLOAT fRange = 1.0f / (FarZ-NearZ);
  2382. // Note: This is recorded on the stack
  2383. XMVECTOR rMem = {
  2384. fReciprocalWidth,
  2385. fReciprocalHeight,
  2386. fRange,
  2387. 1.0f
  2388. };
  2389. XMVECTOR rMem2 = {
  2390. -(ViewLeft + ViewRight),
  2391. -(ViewTop + ViewBottom),
  2392. -NearZ,
  2393. 1.0f
  2394. };
  2395. // Copy from memory to SSE register
  2396. XMVECTOR vValues = rMem;
  2397. XMVECTOR vTemp = _mm_setzero_ps();
  2398. // Copy x only
  2399. vTemp = _mm_move_ss(vTemp,vValues);
  2400. // fReciprocalWidth*2,0,0,0
  2401. vTemp = _mm_add_ss(vTemp,vTemp);
  2402. M.r[0] = vTemp;
  2403. // 0,fReciprocalHeight*2,0,0
  2404. vTemp = vValues;
  2405. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2406. vTemp = _mm_add_ps(vTemp,vTemp);
  2407. M.r[1] = vTemp;
  2408. // 0,0,fRange,0.0f
  2409. vTemp = vValues;
  2410. vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
  2411. M.r[2] = vTemp;
  2412. // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
  2413. vValues = _mm_mul_ps(vValues,rMem2);
  2414. M.r[3] = vValues;
  2415. return M;
  2416. #else // _XM_VMX128_INTRINSICS_
  2417. #endif // _XM_VMX128_INTRINSICS_
  2418. }
  2419. //------------------------------------------------------------------------------
  2420. XMFINLINE XMMATRIX XMMatrixOrthographicOffCenterRH
  2421. (
  2422. FLOAT ViewLeft,
  2423. FLOAT ViewRight,
  2424. FLOAT ViewBottom,
  2425. FLOAT ViewTop,
  2426. FLOAT NearZ,
  2427. FLOAT FarZ
  2428. )
  2429. {
  2430. #if defined(_XM_NO_INTRINSICS_)
  2431. FLOAT ReciprocalWidth;
  2432. FLOAT ReciprocalHeight;
  2433. XMMATRIX M;
  2434. XMASSERT(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
  2435. XMASSERT(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
  2436. XMASSERT(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
  2437. ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2438. ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2439. M.r[0] = XMVectorSet(ReciprocalWidth + ReciprocalWidth, 0.0f, 0.0f, 0.0f);
  2440. M.r[1] = XMVectorSet(0.0f, ReciprocalHeight + ReciprocalHeight, 0.0f, 0.0f);
  2441. M.r[2] = XMVectorSet(0.0f, 0.0f, 1.0f / (NearZ - FarZ), 0.0f);
  2442. M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
  2443. -(ViewTop + ViewBottom) * ReciprocalHeight,
  2444. M.r[2].vector4_f32[2] * NearZ,
  2445. 1.0f);
  2446. return M;
  2447. #elif defined(_XM_SSE_INTRINSICS_)
  2448. XMMATRIX M;
  2449. FLOAT fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
  2450. FLOAT fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
  2451. FLOAT fRange = 1.0f / (NearZ-FarZ);
  2452. // Note: This is recorded on the stack
  2453. XMVECTOR rMem = {
  2454. fReciprocalWidth,
  2455. fReciprocalHeight,
  2456. fRange,
  2457. 1.0f
  2458. };
  2459. XMVECTOR rMem2 = {
  2460. -(ViewLeft + ViewRight),
  2461. -(ViewTop + ViewBottom),
  2462. NearZ,
  2463. 1.0f
  2464. };
  2465. // Copy from memory to SSE register
  2466. XMVECTOR vValues = rMem;
  2467. XMVECTOR vTemp = _mm_setzero_ps();
  2468. // Copy x only
  2469. vTemp = _mm_move_ss(vTemp,vValues);
  2470. // fReciprocalWidth*2,0,0,0
  2471. vTemp = _mm_add_ss(vTemp,vTemp);
  2472. M.r[0] = vTemp;
  2473. // 0,fReciprocalHeight*2,0,0
  2474. vTemp = vValues;
  2475. vTemp = _mm_and_ps(vTemp,g_XMMaskY);
  2476. vTemp = _mm_add_ps(vTemp,vTemp);
  2477. M.r[1] = vTemp;
  2478. // 0,0,fRange,0.0f
  2479. vTemp = vValues;
  2480. vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
  2481. M.r[2] = vTemp;
  2482. // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
  2483. vValues = _mm_mul_ps(vValues,rMem2);
  2484. M.r[3] = vValues;
  2485. return M;
  2486. #else // _XM_VMX128_INTRINSICS_
  2487. #endif // _XM_VMX128_INTRINSICS_
  2488. }
  2489. #ifdef __cplusplus
  2490. /****************************************************************************
  2491. *
  2492. * XMMATRIX operators and methods
  2493. *
  2494. ****************************************************************************/
  2495. //------------------------------------------------------------------------------
  2496. XMFINLINE _XMMATRIX::_XMMATRIX
  2497. (
  2498. FXMVECTOR R0,
  2499. FXMVECTOR R1,
  2500. FXMVECTOR R2,
  2501. CXMVECTOR R3
  2502. )
  2503. {
  2504. r[0] = R0;
  2505. r[1] = R1;
  2506. r[2] = R2;
  2507. r[3] = R3;
  2508. }
  2509. //------------------------------------------------------------------------------
  2510. XMFINLINE _XMMATRIX::_XMMATRIX
  2511. (
  2512. FLOAT m00, FLOAT m01, FLOAT m02, FLOAT m03,
  2513. FLOAT m10, FLOAT m11, FLOAT m12, FLOAT m13,
  2514. FLOAT m20, FLOAT m21, FLOAT m22, FLOAT m23,
  2515. FLOAT m30, FLOAT m31, FLOAT m32, FLOAT m33
  2516. )
  2517. {
  2518. r[0] = XMVectorSet(m00, m01, m02, m03);
  2519. r[1] = XMVectorSet(m10, m11, m12, m13);
  2520. r[2] = XMVectorSet(m20, m21, m22, m23);
  2521. r[3] = XMVectorSet(m30, m31, m32, m33);
  2522. }
  2523. //------------------------------------------------------------------------------
  2524. XMFINLINE _XMMATRIX::_XMMATRIX
  2525. (
  2526. CONST FLOAT* pArray
  2527. )
  2528. {
  2529. r[0] = XMLoadFloat4((XMFLOAT4*)pArray);
  2530. r[1] = XMLoadFloat4((XMFLOAT4*)(pArray + 4));
  2531. r[2] = XMLoadFloat4((XMFLOAT4*)(pArray + 8));
  2532. r[3] = XMLoadFloat4((XMFLOAT4*)(pArray + 12));
  2533. }
  2534. //------------------------------------------------------------------------------
  2535. XMFINLINE _XMMATRIX& _XMMATRIX::operator=
  2536. (
  2537. CONST _XMMATRIX& M
  2538. )
  2539. {
  2540. r[0] = M.r[0];
  2541. r[1] = M.r[1];
  2542. r[2] = M.r[2];
  2543. r[3] = M.r[3];
  2544. return *this;
  2545. }
  2546. //------------------------------------------------------------------------------
  2547. #ifndef XM_NO_OPERATOR_OVERLOADS
  2548. #if !defined(_XBOX_VER) && defined(_XM_ISVS2005_) && defined(_XM_X64_)
  2549. #pragma warning(push)
  2550. #pragma warning(disable : 4328)
  2551. #endif
  2552. XMFINLINE _XMMATRIX& _XMMATRIX::operator*=
  2553. (
  2554. CONST _XMMATRIX& M
  2555. )
  2556. {
  2557. *this = XMMatrixMultiply(*this, M);
  2558. return *this;
  2559. }
  2560. //------------------------------------------------------------------------------
  2561. XMFINLINE _XMMATRIX _XMMATRIX::operator*
  2562. (
  2563. CONST _XMMATRIX& M
  2564. ) CONST
  2565. {
  2566. return XMMatrixMultiply(*this, M);
  2567. }
  2568. #if !defined(_XBOX_VER) && defined(_XM_ISVS2005_) && defined(_XM_X64_)
  2569. #pragma warning(pop)
  2570. #endif
  2571. #endif // !XM_NO_OPERATOR_OVERLOADS
  2572. /****************************************************************************
  2573. *
  2574. * XMFLOAT3X3 operators
  2575. *
  2576. ****************************************************************************/
  2577. //------------------------------------------------------------------------------
  2578. XMFINLINE _XMFLOAT3X3::_XMFLOAT3X3
  2579. (
  2580. FLOAT m00, FLOAT m01, FLOAT m02,
  2581. FLOAT m10, FLOAT m11, FLOAT m12,
  2582. FLOAT m20, FLOAT m21, FLOAT m22
  2583. )
  2584. {
  2585. m[0][0] = m00;
  2586. m[0][1] = m01;
  2587. m[0][2] = m02;
  2588. m[1][0] = m10;
  2589. m[1][1] = m11;
  2590. m[1][2] = m12;
  2591. m[2][0] = m20;
  2592. m[2][1] = m21;
  2593. m[2][2] = m22;
  2594. }
  2595. //------------------------------------------------------------------------------
  2596. XMFINLINE _XMFLOAT3X3::_XMFLOAT3X3
  2597. (
  2598. CONST FLOAT* pArray
  2599. )
  2600. {
  2601. UINT Row;
  2602. UINT Column;
  2603. for (Row = 0; Row < 3; Row++)
  2604. {
  2605. for (Column = 0; Column < 3; Column++)
  2606. {
  2607. m[Row][Column] = pArray[Row * 3 + Column];
  2608. }
  2609. }
  2610. }
  2611. //------------------------------------------------------------------------------
  2612. XMFINLINE _XMFLOAT3X3& _XMFLOAT3X3::operator=
  2613. (
  2614. CONST _XMFLOAT3X3& Float3x3
  2615. )
  2616. {
  2617. _11 = Float3x3._11;
  2618. _12 = Float3x3._12;
  2619. _13 = Float3x3._13;
  2620. _21 = Float3x3._21;
  2621. _22 = Float3x3._22;
  2622. _23 = Float3x3._23;
  2623. _31 = Float3x3._31;
  2624. _32 = Float3x3._32;
  2625. _33 = Float3x3._33;
  2626. return *this;
  2627. }
  2628. /****************************************************************************
  2629. *
  2630. * XMFLOAT4X3 operators
  2631. *
  2632. ****************************************************************************/
  2633. //------------------------------------------------------------------------------
  2634. XMFINLINE _XMFLOAT4X3::_XMFLOAT4X3
  2635. (
  2636. FLOAT m00, FLOAT m01, FLOAT m02,
  2637. FLOAT m10, FLOAT m11, FLOAT m12,
  2638. FLOAT m20, FLOAT m21, FLOAT m22,
  2639. FLOAT m30, FLOAT m31, FLOAT m32
  2640. )
  2641. {
  2642. m[0][0] = m00;
  2643. m[0][1] = m01;
  2644. m[0][2] = m02;
  2645. m[1][0] = m10;
  2646. m[1][1] = m11;
  2647. m[1][2] = m12;
  2648. m[2][0] = m20;
  2649. m[2][1] = m21;
  2650. m[2][2] = m22;
  2651. m[3][0] = m30;
  2652. m[3][1] = m31;
  2653. m[3][2] = m32;
  2654. }
  2655. //------------------------------------------------------------------------------
  2656. XMFINLINE _XMFLOAT4X3::_XMFLOAT4X3
  2657. (
  2658. CONST FLOAT* pArray
  2659. )
  2660. {
  2661. UINT Row;
  2662. UINT Column;
  2663. for (Row = 0; Row < 4; Row++)
  2664. {
  2665. for (Column = 0; Column < 3; Column++)
  2666. {
  2667. m[Row][Column] = pArray[Row * 3 + Column];
  2668. }
  2669. }
  2670. }
  2671. //------------------------------------------------------------------------------
  2672. XMFINLINE _XMFLOAT4X3& _XMFLOAT4X3::operator=
  2673. (
  2674. CONST _XMFLOAT4X3& Float4x3
  2675. )
  2676. {
  2677. XMVECTOR V1 = XMLoadFloat4((XMFLOAT4*)&Float4x3._11);
  2678. XMVECTOR V2 = XMLoadFloat4((XMFLOAT4*)&Float4x3._22);
  2679. XMVECTOR V3 = XMLoadFloat4((XMFLOAT4*)&Float4x3._33);
  2680. XMStoreFloat4((XMFLOAT4*)&_11, V1);
  2681. XMStoreFloat4((XMFLOAT4*)&_22, V2);
  2682. XMStoreFloat4((XMFLOAT4*)&_33, V3);
  2683. return *this;
  2684. }
  2685. //------------------------------------------------------------------------------
  2686. XMFINLINE XMFLOAT4X3A& XMFLOAT4X3A::operator=
  2687. (
  2688. CONST XMFLOAT4X3A& Float4x3
  2689. )
  2690. {
  2691. XMVECTOR V1 = XMLoadFloat4A((XMFLOAT4A*)&Float4x3._11);
  2692. XMVECTOR V2 = XMLoadFloat4A((XMFLOAT4A*)&Float4x3._22);
  2693. XMVECTOR V3 = XMLoadFloat4A((XMFLOAT4A*)&Float4x3._33);
  2694. XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
  2695. XMStoreFloat4A((XMFLOAT4A*)&_22, V2);
  2696. XMStoreFloat4A((XMFLOAT4A*)&_33, V3);
  2697. return *this;
  2698. }
  2699. /****************************************************************************
  2700. *
  2701. * XMFLOAT4X4 operators
  2702. *
  2703. ****************************************************************************/
  2704. //------------------------------------------------------------------------------
  2705. XMFINLINE _XMFLOAT4X4::_XMFLOAT4X4
  2706. (
  2707. FLOAT m00, FLOAT m01, FLOAT m02, FLOAT m03,
  2708. FLOAT m10, FLOAT m11, FLOAT m12, FLOAT m13,
  2709. FLOAT m20, FLOAT m21, FLOAT m22, FLOAT m23,
  2710. FLOAT m30, FLOAT m31, FLOAT m32, FLOAT m33
  2711. )
  2712. {
  2713. m[0][0] = m00;
  2714. m[0][1] = m01;
  2715. m[0][2] = m02;
  2716. m[0][3] = m03;
  2717. m[1][0] = m10;
  2718. m[1][1] = m11;
  2719. m[1][2] = m12;
  2720. m[1][3] = m13;
  2721. m[2][0] = m20;
  2722. m[2][1] = m21;
  2723. m[2][2] = m22;
  2724. m[2][3] = m23;
  2725. m[3][0] = m30;
  2726. m[3][1] = m31;
  2727. m[3][2] = m32;
  2728. m[3][3] = m33;
  2729. }
  2730. //------------------------------------------------------------------------------
  2731. XMFINLINE _XMFLOAT4X4::_XMFLOAT4X4
  2732. (
  2733. CONST FLOAT* pArray
  2734. )
  2735. {
  2736. UINT Row;
  2737. UINT Column;
  2738. for (Row = 0; Row < 4; Row++)
  2739. {
  2740. for (Column = 0; Column < 4; Column++)
  2741. {
  2742. m[Row][Column] = pArray[Row * 4 + Column];
  2743. }
  2744. }
  2745. }
  2746. //------------------------------------------------------------------------------
  2747. XMFINLINE _XMFLOAT4X4& _XMFLOAT4X4::operator=
  2748. (
  2749. CONST _XMFLOAT4X4& Float4x4
  2750. )
  2751. {
  2752. XMVECTOR V1 = XMLoadFloat4((XMFLOAT4*)&Float4x4._11);
  2753. XMVECTOR V2 = XMLoadFloat4((XMFLOAT4*)&Float4x4._21);
  2754. XMVECTOR V3 = XMLoadFloat4((XMFLOAT4*)&Float4x4._31);
  2755. XMVECTOR V4 = XMLoadFloat4((XMFLOAT4*)&Float4x4._41);
  2756. XMStoreFloat4((XMFLOAT4*)&_11, V1);
  2757. XMStoreFloat4((XMFLOAT4*)&_21, V2);
  2758. XMStoreFloat4((XMFLOAT4*)&_31, V3);
  2759. XMStoreFloat4((XMFLOAT4*)&_41, V4);
  2760. return *this;
  2761. }
  2762. //------------------------------------------------------------------------------
  2763. XMFINLINE XMFLOAT4X4A& XMFLOAT4X4A::operator=
  2764. (
  2765. CONST XMFLOAT4X4A& Float4x4
  2766. )
  2767. {
  2768. XMVECTOR V1 = XMLoadFloat4A((XMFLOAT4A*)&Float4x4._11);
  2769. XMVECTOR V2 = XMLoadFloat4A((XMFLOAT4A*)&Float4x4._21);
  2770. XMVECTOR V3 = XMLoadFloat4A((XMFLOAT4A*)&Float4x4._31);
  2771. XMVECTOR V4 = XMLoadFloat4A((XMFLOAT4A*)&Float4x4._41);
  2772. XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
  2773. XMStoreFloat4A((XMFLOAT4A*)&_21, V2);
  2774. XMStoreFloat4A((XMFLOAT4A*)&_31, V3);
  2775. XMStoreFloat4A((XMFLOAT4A*)&_41, V4);
  2776. return *this;
  2777. }
  2778. #endif // __cplusplus
  2779. #endif // __XNAMATHMATRIX_INL__