Counter Strike : Global Offensive Source Code

235 lines
10 KiB

  1. //========= Copyright � 1996-2006, Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose: Fast low quality noise suitable for real time use
  4. //
  5. //=====================================================================================//
  6. #include <math.h>
  7. #include <float.h> // needed for flt_epsilon
  8. #include "basetypes.h"
  9. #include "tier0/dbg.h"
  10. #include "mathlib/mathlib.h"
  11. #include "mathlib/vector.h"
  12. #include "mathlib/ssemath.h"
  13. // memdbgon must be the last include file in a .cpp file!!!
  14. #include "tier0/memdbgon.h"
  15. #include "noisedata.h"
  16. #define MAGIC_NUMBER (1<<15) // gives 8 bits of fraction
  17. static fltx4 Four_MagicNumbers = { MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
  18. static ALIGN16 int32 idx_mask[4]= {0xffff, 0xffff, 0xffff, 0xffff};
  19. #define MASK255 (*((fltx4 *)(& idx_mask )))
  20. // returns 0..1
  21. static inline float GetLatticePointValue( int idx_x, int idx_y, int idx_z )
  22. {
  23. int ret_idx = perm_a[idx_x & 0xff];
  24. ret_idx = perm_b[( idx_y + ret_idx ) & 0xff];
  25. ret_idx = perm_c[( idx_z + ret_idx ) & 0xff];
  26. return impulse_xcoords[ret_idx];
  27. }
  28. fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z )
  29. {
  30. // use magic to convert to integer index
  31. fltx4 x_idx = AndSIMD( MASK255, AddSIMD( x, Four_MagicNumbers ) );
  32. fltx4 y_idx = AndSIMD( MASK255, AddSIMD( y, Four_MagicNumbers ) );
  33. fltx4 z_idx = AndSIMD( MASK255, AddSIMD( z, Four_MagicNumbers ) );
  34. fltx4 lattice000 = Four_Zeros, lattice001 = Four_Zeros, lattice010 = Four_Zeros, lattice011 = Four_Zeros;
  35. fltx4 lattice100 = Four_Zeros, lattice101 = Four_Zeros, lattice110 = Four_Zeros, lattice111 = Four_Zeros;
  36. // FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes)
  37. // Converting the indexed noise values back to vectors will cause more (128 bytes)
  38. // The noise table could store vectors if we chunked it into 2x2x2 blocks.
  39. fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros;
  40. #define DOPASS(i) \
  41. { unsigned int xi = SubInt( x_idx, i ); \
  42. unsigned int yi = SubInt( y_idx, i ); \
  43. unsigned int zi = SubInt( z_idx, i ); \
  44. SubFloat( xfrac, i ) = (xi & 0xff)*(1.0/256.0); \
  45. SubFloat( yfrac, i ) = (yi & 0xff)*(1.0/256.0); \
  46. SubFloat( zfrac, i ) = (zi & 0xff)*(1.0/256.0); \
  47. xi>>=8; \
  48. yi>>=8; \
  49. zi>>=8; \
  50. \
  51. SubFloat( lattice000, i ) = GetLatticePointValue( xi,yi,zi ); \
  52. SubFloat( lattice001, i ) = GetLatticePointValue( xi,yi,zi+1 ); \
  53. SubFloat( lattice010, i ) = GetLatticePointValue( xi,yi+1,zi ); \
  54. SubFloat( lattice011, i ) = GetLatticePointValue( xi,yi+1,zi+1 ); \
  55. SubFloat( lattice100, i ) = GetLatticePointValue( xi+1,yi,zi ); \
  56. SubFloat( lattice101, i ) = GetLatticePointValue( xi+1,yi,zi+1 ); \
  57. SubFloat( lattice110, i ) = GetLatticePointValue( xi+1,yi+1,zi ); \
  58. SubFloat( lattice111, i ) = GetLatticePointValue( xi+1,yi+1,zi+1 ); \
  59. }
  60. DOPASS( 0 );
  61. DOPASS( 1 );
  62. DOPASS( 2 );
  63. DOPASS( 3 );
  64. // now, we have 8 lattice values for each of four points as m128s, and interpolant values for
  65. // each axis in m128 form in [xyz]frac. Perfom the trilinear interpolation as SIMD ops
  66. // first, do x interpolation
  67. fltx4 l2d00 = AddSIMD( lattice000, MulSIMD( xfrac, SubSIMD( lattice100, lattice000 ) ) );
  68. fltx4 l2d01 = AddSIMD( lattice001, MulSIMD( xfrac, SubSIMD( lattice101, lattice001 ) ) );
  69. fltx4 l2d10 = AddSIMD( lattice010, MulSIMD( xfrac, SubSIMD( lattice110, lattice010 ) ) );
  70. fltx4 l2d11 = AddSIMD( lattice011, MulSIMD( xfrac, SubSIMD( lattice111, lattice011 ) ) );
  71. // now, do y interpolation
  72. fltx4 l1d0 = AddSIMD( l2d00, MulSIMD( yfrac, SubSIMD( l2d10, l2d00 ) ) );
  73. fltx4 l1d1 = AddSIMD( l2d01, MulSIMD( yfrac, SubSIMD( l2d11, l2d01 ) ) );
  74. // final z interpolation
  75. fltx4 rslt = AddSIMD( l1d0, MulSIMD( zfrac, SubSIMD( l1d1, l1d0 ) ) );
  76. // map to 0..1
  77. return MulSIMD( Four_Twos, SubSIMD( rslt, Four_PointFives ) );
  78. }
  79. static inline void GetVectorLatticePointValue( int idx, fltx4 &x, fltx4 &y, fltx4 &z,
  80. int idx_x, int idx_y, int idx_z )
  81. {
  82. int ret_idx = perm_a[idx_x & 0xff];
  83. ret_idx = perm_b[( idx_y + ret_idx ) & 0xff];
  84. ret_idx = perm_c[( idx_z + ret_idx ) & 0xff];
  85. float const *pData = s_randomGradients + ret_idx * 3;
  86. SubFloat( x, idx ) = pData[0];
  87. SubFloat( y, idx ) = pData[1];
  88. SubFloat( z, idx ) = pData[2];
  89. }
  90. FourVectors DNoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z )
  91. {
  92. // use magic to convert to integer index
  93. fltx4 x_idx = AndSIMD( MASK255, AddSIMD( x, Four_MagicNumbers ) );
  94. fltx4 y_idx = AndSIMD( MASK255, AddSIMD( y, Four_MagicNumbers ) );
  95. fltx4 z_idx = AndSIMD( MASK255, AddSIMD( z, Four_MagicNumbers ) );
  96. fltx4 xlattice000 = Four_Zeros, xlattice001 = Four_Zeros, xlattice010 = Four_Zeros, xlattice011 = Four_Zeros;
  97. fltx4 xlattice100 = Four_Zeros, xlattice101 = Four_Zeros, xlattice110 = Four_Zeros, xlattice111 = Four_Zeros;
  98. fltx4 ylattice000 = Four_Zeros, ylattice001 = Four_Zeros, ylattice010 = Four_Zeros, ylattice011 = Four_Zeros;
  99. fltx4 ylattice100 = Four_Zeros, ylattice101 = Four_Zeros, ylattice110 = Four_Zeros, ylattice111 = Four_Zeros;
  100. fltx4 zlattice000 = Four_Zeros, zlattice001 = Four_Zeros, zlattice010 = Four_Zeros, zlattice011 = Four_Zeros;
  101. fltx4 zlattice100 = Four_Zeros, zlattice101 = Four_Zeros, zlattice110 = Four_Zeros, zlattice111 = Four_Zeros;
  102. // FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes)
  103. // Converting the indexed noise values back to vectors will cause more (128 bytes)
  104. // The noise table could store vectors if we chunked it into 2x2x2 blocks.
  105. fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros;
  106. #define DODPASS(i) \
  107. { unsigned int xi = SubInt( x_idx, i ); \
  108. unsigned int yi = SubInt( y_idx, i ); \
  109. unsigned int zi = SubInt( z_idx, i ); \
  110. SubFloat( xfrac, i ) = (xi & 0xff)*(1.0/256.0); \
  111. SubFloat( yfrac, i ) = (yi & 0xff)*(1.0/256.0); \
  112. SubFloat( zfrac, i ) = (zi & 0xff)*(1.0/256.0); \
  113. xi>>=8; \
  114. yi>>=8; \
  115. zi>>=8; \
  116. \
  117. GetVectorLatticePointValue( i, xlattice000, ylattice000, zlattice000, xi,yi,zi ); \
  118. GetVectorLatticePointValue( i, xlattice001, ylattice001, zlattice001, xi,yi,zi+1 ); \
  119. GetVectorLatticePointValue( i, xlattice010, ylattice010, zlattice010, xi,yi+1,zi ); \
  120. GetVectorLatticePointValue( i, xlattice011, ylattice011, zlattice011, xi,yi+1,zi+1 ); \
  121. GetVectorLatticePointValue( i, xlattice100, ylattice100, zlattice100, xi+1,yi,zi ); \
  122. GetVectorLatticePointValue( i, xlattice101, ylattice101, zlattice101, xi+1,yi,zi+1 ); \
  123. GetVectorLatticePointValue( i, xlattice110, ylattice110, zlattice110, xi+1,yi+1,zi ); \
  124. GetVectorLatticePointValue( i, xlattice111, ylattice111, zlattice111, xi+1,yi+1,zi+1 ); \
  125. }
  126. DODPASS( 0 );
  127. DODPASS( 1 );
  128. DODPASS( 2 );
  129. DODPASS( 3 );
  130. // now, we have 8 lattice values for each of four points as m128s, and interpolant values for
  131. // each axis in m128 form in [xyz]frac. Perfom the trilinear interpolation as SIMD ops
  132. // first, do x interpolation
  133. fltx4 xl2d00 = AddSIMD( xlattice000, MulSIMD( xfrac, SubSIMD( xlattice100, xlattice000 ) ) );
  134. fltx4 xl2d01 = AddSIMD( xlattice001, MulSIMD( xfrac, SubSIMD( xlattice101, xlattice001 ) ) );
  135. fltx4 xl2d10 = AddSIMD( xlattice010, MulSIMD( xfrac, SubSIMD( xlattice110, xlattice010 ) ) );
  136. fltx4 xl2d11 = AddSIMD( xlattice011, MulSIMD( xfrac, SubSIMD( xlattice111, xlattice011 ) ) );
  137. // now, do y interpolation
  138. fltx4 xl1d0 = AddSIMD( xl2d00, MulSIMD( yfrac, SubSIMD( xl2d10, xl2d00 ) ) );
  139. fltx4 xl1d1 = AddSIMD( xl2d01, MulSIMD( yfrac, SubSIMD( xl2d11, xl2d01 ) ) );
  140. // final z interpolation
  141. FourVectors rslt;
  142. rslt.x = AddSIMD( xl1d0, MulSIMD( zfrac, SubSIMD( xl1d1, xl1d0 ) ) );
  143. fltx4 yl2d00 = AddSIMD( ylattice000, MulSIMD( xfrac, SubSIMD( ylattice100, ylattice000 ) ) );
  144. fltx4 yl2d01 = AddSIMD( ylattice001, MulSIMD( xfrac, SubSIMD( ylattice101, ylattice001 ) ) );
  145. fltx4 yl2d10 = AddSIMD( ylattice010, MulSIMD( xfrac, SubSIMD( ylattice110, ylattice010 ) ) );
  146. fltx4 yl2d11 = AddSIMD( ylattice011, MulSIMD( xfrac, SubSIMD( ylattice111, ylattice011 ) ) );
  147. // now, do y interpolation
  148. fltx4 yl1d0 = AddSIMD( yl2d00, MulSIMD( yfrac, SubSIMD( yl2d10, yl2d00 ) ) );
  149. fltx4 yl1d1 = AddSIMD( yl2d01, MulSIMD( yfrac, SubSIMD( yl2d11, yl2d01 ) ) );
  150. // final z interpolation
  151. rslt.y = AddSIMD( yl1d0, MulSIMD( zfrac, SubSIMD( yl1d1, yl1d0 ) ) );
  152. fltx4 zl2d00 = AddSIMD( zlattice000, MulSIMD( xfrac, SubSIMD( zlattice100, zlattice000 ) ) );
  153. fltx4 zl2d01 = AddSIMD( zlattice001, MulSIMD( xfrac, SubSIMD( zlattice101, zlattice001 ) ) );
  154. fltx4 zl2d10 = AddSIMD( zlattice010, MulSIMD( xfrac, SubSIMD( zlattice110, zlattice010 ) ) );
  155. fltx4 zl2d11 = AddSIMD( zlattice011, MulSIMD( xfrac, SubSIMD( zlattice111, zlattice011 ) ) );
  156. // now, do y interpolation
  157. fltx4 zl1d0 = AddSIMD( zl2d00, MulSIMD( yfrac, SubSIMD( zl2d10, zl2d00 ) ) );
  158. fltx4 zl1d1 = AddSIMD( zl2d01, MulSIMD( yfrac, SubSIMD( zl2d11, zl2d01 ) ) );
  159. // final z interpolation
  160. rslt.z = AddSIMD( zl1d0, MulSIMD( zfrac, SubSIMD( zl1d1, zl1d0 ) ) );
  161. return rslt;
  162. }
  163. fltx4 NoiseSIMD( FourVectors const &pos )
  164. {
  165. return NoiseSIMD( pos.x, pos.y, pos.z );
  166. }
  167. FourVectors DNoiseSIMD( FourVectors const &pos )
  168. {
  169. return DNoiseSIMD( pos.x, pos.y, pos.z );
  170. }
  171. FourVectors CurlNoiseSIMD( FourVectors const &pos )
  172. {
  173. FourVectors fl4Comp1 = DNoiseSIMD( pos );
  174. FourVectors fl4Pos = pos;
  175. fl4Pos.x = AddSIMD( fl4Pos.x, ReplicateX4( 43.256 ) );
  176. fl4Pos.y = AddSIMD( fl4Pos.y, ReplicateX4( -67.89 ) );
  177. fl4Pos.z = AddSIMD( fl4Pos.z, ReplicateX4( 1338.2 ) );
  178. FourVectors fl4Comp2 = DNoiseSIMD( fl4Pos );
  179. fl4Pos.x = AddSIMD( fl4Pos.x, ReplicateX4( -129.856 ) );
  180. fl4Pos.y = AddSIMD( fl4Pos.y, ReplicateX4( -967.23 ) );
  181. fl4Pos.z = AddSIMD( fl4Pos.z, ReplicateX4( 2338.98 ) );
  182. FourVectors fl4Comp3 = DNoiseSIMD( fl4Pos );
  183. // now we have the 3 derivatives of a vector valued field. return the curl of the field.
  184. FourVectors fl4Ret;
  185. fl4Ret.x = SubSIMD( fl4Comp3.y, fl4Comp2.z );
  186. fl4Ret.y = SubSIMD( fl4Comp1.z, fl4Comp3.x );
  187. fl4Ret.z = SubSIMD( fl4Comp2.x, fl4Comp1.y );
  188. return fl4Ret;
  189. }