Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
4.7 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose: 3DNow Math primitives.
  4. //
  5. //=====================================================================================//
  6. #include <math.h>
  7. #include <float.h> // Needed for FLT_EPSILON
  8. #include "basetypes.h"
  9. #include <memory.h>
  10. #include "tier0/dbg.h"
  11. #include "mathlib/mathlib.h"
  12. #include "mathlib/amd3dx.h"
  13. #include "mathlib/vector.h"
  14. // memdbgon must be the last include file in a .cpp file!!!
  15. #include "tier0/memdbgon.h"
  16. #if !defined(COMPILER_MSVC64) && !defined(LINUX)
  17. // Implement for 64-bit Windows if needed.
  18. // Clang hits "fatal error: error in backend:" and other errors when trying
  19. // to compile the inline assembly below. 3DNow support is highly unlikely to
  20. // be useful/used, so it's not worth spending time on fixing.
  21. #pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
  22. #pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
  23. //-----------------------------------------------------------------------------
  24. // 3D Now Implementations of optimized routines:
  25. //-----------------------------------------------------------------------------
  26. float _3DNow_Sqrt(float x)
  27. {
  28. Assert( s_bMathlibInitialized );
  29. float root = 0.f;
  30. #ifdef _WIN32
  31. _asm
  32. {
  33. femms
  34. movd mm0, x
  35. PFRSQRT (mm1,mm0)
  36. punpckldq mm0, mm0
  37. PFMUL (mm0, mm1)
  38. movd root, mm0
  39. femms
  40. }
  41. #elif LINUX
  42. __asm __volatile__( "femms" );
  43. __asm __volatile__
  44. (
  45. "pfrsqrt %y0, %y1 \n\t"
  46. "punpckldq %y1, %y1 \n\t"
  47. "pfmul %y1, %y0 \n\t"
  48. : "=y" (root), "=y" (x)
  49. :"0" (x)
  50. );
  51. __asm __volatile__( "femms" );
  52. #else
  53. #error
  54. #endif
  55. return root;
  56. }
  57. // NJS FIXME: Need to test Recripricol squareroot performance and accuraccy
  58. // on AMD's before using the specialized instruction.
  59. float _3DNow_RSqrt(float x)
  60. {
  61. Assert( s_bMathlibInitialized );
  62. return 1.f / _3DNow_Sqrt(x);
  63. }
  64. float FASTCALL _3DNow_VectorNormalize (Vector& vec)
  65. {
  66. Assert( s_bMathlibInitialized );
  67. float *v = &vec[0];
  68. float radius = 0.f;
  69. if ( v[0] || v[1] || v[2] )
  70. {
  71. #ifdef _WIN32
  72. _asm
  73. {
  74. mov eax, v
  75. femms
  76. movq mm0, QWORD PTR [eax]
  77. movd mm1, DWORD PTR [eax+8]
  78. movq mm2, mm0
  79. movq mm3, mm1
  80. PFMUL (mm0, mm0)
  81. PFMUL (mm1, mm1)
  82. PFACC (mm0, mm0)
  83. PFADD (mm1, mm0)
  84. PFRSQRT (mm0, mm1)
  85. punpckldq mm1, mm1
  86. PFMUL (mm1, mm0)
  87. PFMUL (mm2, mm0)
  88. PFMUL (mm3, mm0)
  89. movq QWORD PTR [eax], mm2
  90. movd DWORD PTR [eax+8], mm3
  91. movd radius, mm1
  92. femms
  93. }
  94. #elif LINUX
  95. long long a,c;
  96. int b,d;
  97. memcpy(&a,&vec[0],sizeof(a));
  98. memcpy(&b,&vec[2],sizeof(b));
  99. memcpy(&c,&vec[0],sizeof(c));
  100. memcpy(&d,&vec[2],sizeof(d));
  101. __asm __volatile__( "femms" );
  102. __asm __volatile__
  103. (
  104. "pfmul %y3, %y3\n\t"
  105. "pfmul %y0, %y0 \n\t"
  106. "pfacc %y3, %y3 \n\t"
  107. "pfadd %y3, %y0 \n\t"
  108. "pfrsqrt %y0, %y3 \n\t"
  109. "punpckldq %y0, %y0 \n\t"
  110. "pfmul %y3, %y0 \n\t"
  111. "pfmul %y3, %y2 \n\t"
  112. "pfmul %y3, %y1 \n\t"
  113. : "=y" (radius), "=y" (c), "=y" (d)
  114. : "y" (a), "0" (b), "1" (c), "2" (d)
  115. );
  116. memcpy(&vec[0],&c,sizeof(c));
  117. memcpy(&vec[2],&d,sizeof(d));
  118. __asm __volatile__( "femms" );
  119. #else
  120. #error
  121. #endif
  122. }
  123. return radius;
  124. }
  125. void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec)
  126. {
  127. _3DNow_VectorNormalize( vec );
  128. }
  129. // JAY: This complains with the latest processor pack
  130. #pragma warning(disable: 4730)
  131. float _3DNow_InvRSquared(const float* v)
  132. {
  133. Assert( s_bMathlibInitialized );
  134. float r2 = 1.f;
  135. #ifdef _WIN32
  136. _asm { // AMD 3DNow only routine
  137. mov eax, v
  138. femms
  139. movq mm0, QWORD PTR [eax]
  140. movd mm1, DWORD PTR [eax+8]
  141. movd mm2, [r2]
  142. PFMUL (mm0, mm0)
  143. PFMUL (mm1, mm1)
  144. PFACC (mm0, mm0)
  145. PFADD (mm1, mm0)
  146. PFMAX (mm1, mm2)
  147. PFRCP (mm0, mm1)
  148. movd [r2], mm0
  149. femms
  150. }
  151. #elif LINUX
  152. long long a,c;
  153. int b;
  154. memcpy(&a,&v[0],sizeof(a));
  155. memcpy(&b,&v[2],sizeof(b));
  156. memcpy(&c,&v[0],sizeof(c));
  157. __asm __volatile__( "femms" );
  158. __asm __volatile__
  159. (
  160. "PFMUL %y2, %y2 \n\t"
  161. "PFMUL %y3, %y3 \n\t"
  162. "PFACC %y2, %y2 \n\t"
  163. "PFADD %y2, %y3 \n\t"
  164. "PFMAX %y3, %y4 \n\t"
  165. "PFRCP %y3, %y2 \n\t"
  166. "movq %y2, %y0 \n\t"
  167. : "=y" (r2)
  168. : "0" (r2), "y" (a), "y" (b), "y" (c)
  169. );
  170. __asm __volatile__( "femms" );
  171. #else
  172. #error
  173. #endif
  174. return r2;
  175. }
  176. #endif // COMPILER_MSVC64