Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

906 lines
29 KiB

  1. //====== Copyright � 1996-2007, Valve Corporation, All rights reserved. =======//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //
  7. // A Fixed-allocation class for maintaining a 1d or 2d or 3d array of data in a structure-of-arrays
  8. // (SOA) sse-friendly manner.
  9. // =============================================================================//
  10. #ifndef UTLSOACONTAINER_H
  11. #define UTLSOACONTAINER_H
  12. #ifdef _WIN32
  13. #pragma once
  14. #endif
  15. #include "tier0/platform.h"
  16. #include "tier0/dbg.h"
  17. #include "tier0/threadtools.h"
  18. #include "tier1/utlmemory.h"
  19. #include "tier1/utlblockmemory.h"
  20. #include "mathlib/ssemath.h"
  21. // strided pointers. gives you a class that acts like a pointer, but the ++ and += operators do the
  22. // right thing
  23. template<class T> class CStridedPtr
  24. {
  25. protected:
  26. T *m_pData;
  27. size_t m_nStride;
  28. public:
  29. FORCEINLINE CStridedPtr<T>( void *pData, size_t nByteStride )
  30. {
  31. m_pData = reinterpret_cast<T *>( pData );
  32. m_nStride = nByteStride / sizeof( T );
  33. }
  34. FORCEINLINE CStridedPtr<T>( void ) {}
  35. T *operator->(void) const
  36. {
  37. return m_pData;
  38. }
  39. T & operator*(void) const
  40. {
  41. return *m_pData;
  42. }
  43. FORCEINLINE operator T *(void)
  44. {
  45. return m_pData;
  46. }
  47. FORCEINLINE CStridedPtr<T> & operator++(void)
  48. {
  49. m_pData += m_nStride;
  50. return *this;
  51. }
  52. FORCEINLINE void operator+=( size_t nNumElements )
  53. {
  54. m_pData += nNumElements * m_nStride;
  55. }
  56. FORCEINLINE size_t Stride( void ) const
  57. {
  58. return m_nStride;
  59. }
  60. };
  61. template<class T> class CStridedConstPtr
  62. {
  63. protected:
  64. const T *m_pData;
  65. size_t m_nStride;
  66. public:
  67. FORCEINLINE CStridedConstPtr<T>( void const *pData, size_t nByteStride )
  68. {
  69. m_pData = reinterpret_cast<T const *>( pData );
  70. m_nStride = nByteStride / sizeof( T );
  71. }
  72. FORCEINLINE CStridedConstPtr<T>( void ) {}
  73. const T *operator->(void) const
  74. {
  75. return m_pData;
  76. }
  77. const T & operator*(void) const
  78. {
  79. return *m_pData;
  80. }
  81. FORCEINLINE operator const T *(void) const
  82. {
  83. return m_pData;
  84. }
  85. FORCEINLINE CStridedConstPtr<T> &operator++(void)
  86. {
  87. m_pData += m_nStride;
  88. return *this;
  89. }
  90. FORCEINLINE void operator+=( size_t nNumElements )
  91. {
  92. m_pData += nNumElements*m_nStride;
  93. }
  94. FORCEINLINE size_t Stride( void ) const
  95. {
  96. return m_nStride;
  97. }
  98. };
  99. // allowed field data types. if you change these values, you need to change the tables in the .cpp file
  100. enum EAttributeDataType
  101. {
  102. ATTRDATATYPE_NONE = -1, // pad and varargs ender
  103. ATTRDATATYPE_FLOAT = 0, // a float attribute
  104. ATTRDATATYPE_4V, // vector data type, stored as class FourVectors
  105. ATTRDATATYPE_INT, // integer. not especially sse-able on all architectures.
  106. ATTRDATATYPE_POINTER, // a pointer.
  107. ATTRDATATYPE_COUNT,
  108. };
  109. #define MAX_SOA_FIELDS 32
  110. class KMeansQuantizedValue;
  111. class IKMeansErrorMetric;
  112. typedef fltx4 (*UNARYSIMDFUNCTION)( fltx4 const & );
  113. typedef fltx4 (*BINARYSIMDFUNCTION)( fltx4 const &, fltx4 const & );
  114. class CSOAAttributeReference;
  115. /// mode of threading for a container. Normalyy automatically set based upon dimensions, but
  116. /// controllable via SetThreadMode.
  117. enum SOAThreadMode_t
  118. {
  119. SOATHREADMODE_NONE = 0,
  120. SOATHREADMODE_BYROWS = 1,
  121. SOATHREADMODE_BYSLICES = 2,
  122. SOATHREADMODE_BYROWS_AND_SLICES = 3,
  123. SOATHREADMODE_AUTO = -1, // compute based upon dimensions
  124. };
  125. class CSOAContainer
  126. {
  127. friend class CSOAAttributeReference;
  128. public:
  129. // Constructor, destructor
  130. CSOAContainer( void ); // an empty one with no attributes
  131. CSOAContainer( int nCols, int nRows, int nSlices, ... );
  132. ~CSOAContainer( void );
  133. // !!!!! UPDATE SERIALIZATION CODE WHENEVER THE STRUCTURE OF CSOAContainer CHANGES !!!!!
  134. // To avoid dependency on datamodel, serialization is implemented in utlsoacontainer_serialization.cpp, in dmxloader.lib
  135. //bool Serialize( CDmxElement *pRootElement );
  136. //bool Unserialize( const CDmxElement *pRootElement );
  137. // Set the data type for an attribute. If you set the data type, but tell it not to allocate,
  138. // the data type will be set but writes will assert, and reads will give you back zeros. if
  139. // AllocateData hasn't been called yet, this will set up for AllocateData to reserve space for
  140. // this attribute. If you have already called AllocateData, but wish to add an attribute, you
  141. // can also use this, which will result in separate memory being allocated for this attribute.
  142. void SetAttributeType( int nAttrIdx, EAttributeDataType nDataType, bool bAllocateMemory = true );
  143. EAttributeDataType GetAttributeType( int nAttrIdx ) const;
  144. // Set the attribute type for a field, if that field is not already present (potentially
  145. // allocating memory). You can use this, for instance, to make sure an already loaded image has
  146. // an alpha channel.
  147. void EnsureDataType( int nAttrIdx, EAttributeDataType nDataType );
  148. // set back to un-initted state, freeing memory
  149. void Purge( void );
  150. // Allocate, purge data
  151. void AllocateData( int nNCols, int nNRows, int nSlices = 1 ); // actually allocate the memory and set the pointers up
  152. void PurgeData( void );
  153. // Did the container allocate memory for this attribute?
  154. bool HasAllocatedMemory( int nAttrIdx ) const;
  155. // easy constructor for 2d using varargs. call like
  156. // #define ATTR_RED 0
  157. // #define ATTR_GREEN 1
  158. // #define ATTR_BLUE 2
  159. // CSOAContainer myimage( 256, 256, ATTR_RED, ATTRDATATYPE_FLOAT, ATTR_GREEN, ATTRDATATYPE_FLOAT,
  160. // ATTR_BLUE, ATTRDATATYPE_FLOAT, -1 );
  161. int NumCols( void ) const;
  162. int NumRows( void ) const;
  163. int NumSlices( void ) const;
  164. void AssertDataType( int nAttrIdx, EAttributeDataType nDataType ) const;
  165. // # of groups of 4 elements per row
  166. int NumQuadsPerRow( void ) const;
  167. int Count( void ) const; // for 1d data
  168. int NumElements( void ) const;
  169. // how much to step to go from the end of one row to the start of the next one. Basically, how
  170. // many bytes to add at the end of a row when iterating over the whole 2d array with ++
  171. size_t RowToRowStep( int nAttrIdx ) const;
  172. template<class T> T *RowPtr( int nAttributeIdx, int nRowNumber, int nSliceNumber = 0 ) const;
  173. void const *ConstRowPtr( int nAttributeIdx, int nRowNumber, int nSliceNumber = 0 ) const;
  174. template<class T> T *ElementPointer( int nAttributeIdx, int nX = 0, int nY = 0, int nZ = 0 ) const;
  175. FourVectors *ElementPointer4V( int nAttributeIdx, int nX = 0, int nY = 0, int nZ = 0 ) const;
  176. size_t ItemByteStride( int nAttributeIdx ) const;
  177. FORCEINLINE float &FloatValue( int nAttrIdx, int nX, int nY, int nZ ) const
  178. {
  179. AssertDataType( nAttrIdx, ATTRDATATYPE_FLOAT );
  180. return RowPtr<float>( nAttrIdx, nY, nZ )[nX];
  181. }
  182. // return a reference to an attribute, which can have operations performed on it. For instance,
  183. // this is valid code to zero out the red component of a whole image:
  184. // myImage[FBM_ATTR_RED] = 0.;
  185. CSOAAttributeReference operator[]( int nAttrIdx );
  186. // this is just an alias for readbaility w/ ptrs. instead of (*p)[FBM_ATTR_RED], you can do p->Attr( FBM_ATTR_RED );
  187. FORCEINLINE CSOAAttributeReference Attr( int nAttrIdx );
  188. // copy the attribute data from another soacontainer. must be compatible geometry.
  189. void CopyAttrFrom( CSOAContainer const &other, int nDestAttributeIdx, int nSrcAttributeIndex = -1 );
  190. // copy the attribute data from another attribute. must be compatible data format
  191. void CopyAttrToAttr( int nSrcAttributeIndex, int nDestAttributeIndex);
  192. // copy a subvolume of attribute data from one container to another.
  193. void CopyRegionFrom( CSOAContainer const &src, int nSrcAttr, int nDestAttr,
  194. int nSrcMinX, int nSrcMaxX, int nSrcMinY, int nSrcMaxY, int nSrcMinZ, int nSrcMaxZ,
  195. int nDestX, int nDestY, int nDestZ );
  196. // copy all fields from a region of src to this.
  197. void CopyRegionFrom( CSOAContainer const &src,
  198. int nSrcMinX, int nSrcMaxX, int nSrcMinY, int nSrcMaxY, int nSrcMinZ, int nSrcMaxZ,
  199. int nDestX, int nDestY, int nDestZ );
  200. // move all the data from one csoacontainer to another, leaving the source empty. this is just
  201. // a pointer copy.
  202. FORCEINLINE void MoveDataFrom( CSOAContainer other );
  203. // arithmetic and data filling functions. All SIMD and hopefully fast
  204. /// set all elements of a float attribute to random #s
  205. void RandomizeAttribute( int nAttr, float flMin, float flMax ) const;
  206. /// this.attr = vec
  207. void FillAttr( int nAttr, Vector const &vecValue );
  208. /// this.attr = float
  209. void FillAttr( int nAttr, float flValue );
  210. /// this.nDestAttr *= src.nSrcAttr
  211. void MulAttr( CSOAContainer const &src, int nSrcAttr, int nDestAttr );
  212. /// Returns the result of repeatedly combining attr values with the initial value using the specified function.
  213. /// For instance, SumAttributeValue is just ReduceAttr<AddSIMD>( attr, FOUR_ZEROS );
  214. template<BINARYSIMDFUNCTION fn> float ReduceAttr( int nSrcAttr, fltx4 const &fl4InitialValue ) const;
  215. template<BINARYSIMDFUNCTION fn> void ApplyBinaryFunctionToAttr( int nDestAttr, fltx4 const &flFnArg1 );
  216. /// this.attr = fn1( fn2( attr, arg2 ), arg1 )
  217. template<BINARYSIMDFUNCTION fn1, BINARYSIMDFUNCTION fn2> void ApplyTwoComposedBinaryFunctionsToAttr( int nDestAttr, fltx4 const &flFnArg1, fltx4 const &flFnArg2 );
  218. /// this.nDestAttr *= flValue
  219. void MulAttr( int nDestAttr, float flScale )
  220. {
  221. ApplyBinaryFunctionToAttr<MulSIMD>( nDestAttr, ReplicateX4( flScale ) );
  222. }
  223. void AddToAttr( int nDestAttr, float flAddend )
  224. {
  225. ApplyBinaryFunctionToAttr<AddSIMD>( nDestAttr, ReplicateX4( flAddend ) );
  226. }
  227. // this.attr = max( this.attr, flminvalue )
  228. void MaxAttr( int nDestAttr, float flMinValue )
  229. {
  230. ApplyBinaryFunctionToAttr<MaxSIMD>( nDestAttr, ReplicateX4( flMinValue ) );
  231. }
  232. /// this.attr = min( this.attr, flminvalue )
  233. void MinAttr( int nDestAttr, float flMaxValue )
  234. {
  235. ApplyBinaryFunctionToAttr<MinSIMD>( nDestAttr, ReplicateX4( flMaxValue ) );
  236. }
  237. void ClampAttr( int nDestAttr, float flMinValue, float flMaxValue )
  238. {
  239. ApplyTwoComposedBinaryFunctionsToAttr<MinSIMD, MaxSIMD>( nDestAttr, ReplicateX4( flMaxValue ), ReplicateX4( flMinValue ) );
  240. }
  241. /// this.attr = normalize( this.attr )
  242. void NormalizeAttr( int nAttr );
  243. /// fill 2d a rectangle with values interpolated from 4 corner values.
  244. void FillAttrWithInterpolatedValues( int nAttr, float flValue00, float flValue10, float flValue01, float flValue11 ) const;
  245. void FillAttrWithInterpolatedValues( int nAttr, Vector flValue00, Vector flValue10,
  246. Vector const &flValue01, Vector const &flValue11 ) const;
  247. /// grab 3 scalar attributes from one csoaa and fill in a fourvector attr in.
  248. void PackScalarAttributesToVectorAttribute( CSOAContainer *pInput,
  249. int nVecAttributeOut,
  250. int nScalarAttributeX,
  251. int nScalarAttributeY,
  252. int nScalarAttributeZ );
  253. /// grab the 3 components of a vector attribute and store in 3 scalar attributes.
  254. void UnPackVectorAttributeToScalarAttributes( CSOAContainer *pInput,
  255. int nVecAttributeIn,
  256. int nScalarAttributeX,
  257. int nScalarAttributeY,
  258. int nScalarAttributeZ );
  259. /// this.attrout = src.attrin * vec (component by component )
  260. void MultiplyVectorAttribute( CSOAContainer *pInput, int nAttributeIn, Vector const &vecScalar, int nAttributeOut );
  261. /// Given an soa container of a different dimension, resize one attribute from it to fit this
  262. /// table's geometry. point sampling only
  263. void ResampleAttribute( CSOAContainer &pInput, int nAttr );
  264. /// sum of all floats in an attribute
  265. float SumAttributeValue( int nAttr ) const;
  266. /// sum(attr) / ( w * h * d )
  267. float AverageFloatAttributeValue( int nAttr ) const;
  268. /// maximum float value in a float attr
  269. float MaxAttributeValue( int nAttr ) const;
  270. /// minimum float value in a float attr
  271. float MinAttributeValue( int nAttr ) const;
  272. /// scalartargetattribute += w*exp( vecdir dot ndirection)
  273. void AddGaussianSRBF( float flWeight, Vector vecDir, int nDirectionAttribute, int nScalarTargetAttribute );
  274. /// vec3targetattribute += w*exp( vecdir dot ndirection)
  275. void AddGaussianSRBF( Vector vecWeight, Vector vecDir, int nDirectionAttribute,
  276. int nVectorTargetAttribute );
  277. /// find the largest value of a vector attribute
  278. void FindLargestMagnitudeVector( int nAttr, int *nx, int *ny, int *nz );
  279. void KMeansQuantization( int const *pFieldIndices, int nNumFields,
  280. KMeansQuantizedValue *pOutValues,
  281. int nNumResultsDesired, IKMeansErrorMetric *pErrorCalculator,
  282. int nFieldToStoreIndexInto, int nNumIterations,
  283. int nChannelToReceiveErrorSignal = -1 );
  284. // Calculate the signed distance, in voxels, between all voxels and a surface boundary defined
  285. // by nSrcField being >0. Voxels with nSrcField <0 will end up with negative distances. Voxels
  286. // with nSrcField == 0 will get 0, and nSrcField >0 will yield positive distances. Note the
  287. // min/max x/y/z fields don't reflect the range to be written, but rather represent the bounds
  288. // of updated voxels that you want your distance field modified to take into account. This
  289. // volume will be bloated based upon the nMaxDistance parameter and simd padding. A
  290. // brute-force algorithm is used, but it is threaded and simd'd. Large "nMaxDistance" values
  291. // applied to large images can take a long time, as the execution time per output pixel is
  292. // proportional to maxdistance^2. The rect argument, if passed, will be modified to be the
  293. // entire rectangle modified by the operation.
  294. void GenerateDistanceField( int nSrcField, int nDestField,
  295. int nMaxDistance,
  296. Rect3D_t *pRect = NULL );
  297. void SetThreadMode( SOAThreadMode_t eThreadMode );
  298. protected:
  299. int m_nColumns; // # of rows and columns created with
  300. int m_nRows;
  301. int m_nSlices;
  302. int m_nPaddedColumns; // # of columns rounded up for sse
  303. int m_nNumQuadsPerRow; // # of groups of 4 elements per row
  304. uint8 *m_pDataMemory; // the actual data memory
  305. uint8 *m_pAttributePtrs[MAX_SOA_FIELDS];
  306. EAttributeDataType m_nDataType[MAX_SOA_FIELDS];
  307. size_t m_nStrideInBytes[MAX_SOA_FIELDS]; // stride from one field datum to another
  308. size_t m_nRowStrideInBytes[MAX_SOA_FIELDS]; // stride from one row datum to another per field
  309. size_t m_nSliceStrideInBytes[MAX_SOA_FIELDS]; // stride from one slice datum to another per field
  310. uint32 m_nFieldPresentMask;
  311. uint8 *m_pConstantDataMemory;
  312. uint8 *m_pSeparateDataMemory[MAX_SOA_FIELDS]; // for fields allocated separately from the main allocation
  313. SOAThreadMode_t m_eThreadMode; // set thread mode
  314. FORCEINLINE void Init( void )
  315. {
  316. memset( m_nDataType, 0xff, sizeof( m_nDataType ) );
  317. memset( m_pSeparateDataMemory, 0, sizeof( m_pSeparateDataMemory ) );
  318. #ifdef _DEBUG
  319. memset( m_pAttributePtrs, 0xFF, sizeof( m_pAttributePtrs ) );
  320. memset( m_nStrideInBytes, 0xFF, sizeof( m_nStrideInBytes ) );
  321. memset( m_nRowStrideInBytes, 0xFF, sizeof( m_nRowStrideInBytes ) );
  322. memset( m_nSliceStrideInBytes, 0xFF, sizeof( m_nSliceStrideInBytes ) );
  323. #endif
  324. m_pConstantDataMemory = NULL;
  325. m_pDataMemory = 0;
  326. m_nNumQuadsPerRow = 0;
  327. m_nColumns = m_nPaddedColumns = m_nRows = m_nSlices = 0;
  328. m_nFieldPresentMask = 0;
  329. m_eThreadMode = SOATHREADMODE_NONE;
  330. }
  331. void UpdateDistanceRow( int nSearchRadius, int nMinX, int nMaxX, int nY, int nZ,
  332. int nSrcField, int nDestField );
  333. // parallel helper functions. These do the work, and all take a row/column range as their first arguments.
  334. void CopyAttrFromPartial( int nStartRow, int nNumRows, int nStartSlice, int nEndSlice, CSOAContainer const *pOther, int nDestAttributeIndex, int nSrcAttributeIndex );
  335. void FillAttrPartial( int nStartRow, int nNumRows, int nStartSlice, int nEndSlice, int nAttr, fltx4 fl4Value );
  336. // Allocation utility funcs (NOTE: all allocs are multiples of 16, and are aligned allocs)
  337. size_t DataMemorySize( void ) const; // total bytes of data memory to allocate at m_pDataMemory (if all attributes were allocated in a single block)
  338. size_t ConstantMemorySize( void ) const; // total bytes of constant memory to allocate at m_pConstantDataMemory (if all constant attributes were allocated in a single block)
  339. size_t AttributeMemorySize( int nAttrIndex ) const; // total bytes of data memory allocated to a single attribute (constant or otherwise)
  340. void AllocateDataMemory( void );
  341. void AllocateConstantMemory( void );
  342. };
  343. // define binary op class to allow this construct without temps:
  344. // dest( FBM_ATTR_RED ) = src( FBM_ATTR_BLUE ) + src( FBM_ATTR_GREEN )
  345. template<BINARYSIMDFUNCTION fn> class CSOAAttributeReferenceBinaryOp;
  346. class CSOAAttributeReference
  347. {
  348. friend class CSOAContainer;
  349. class CSOAContainer *m_pContainer;
  350. int m_nAttributeID;
  351. public:
  352. FORCEINLINE void operator *=( float flScale ) const
  353. {
  354. m_pContainer->MulAttr( m_nAttributeID, flScale );
  355. }
  356. FORCEINLINE void operator +=( float flAddend ) const
  357. {
  358. m_pContainer->AddToAttr( m_nAttributeID, flAddend );
  359. }
  360. FORCEINLINE void operator -=( float flAddend ) const
  361. {
  362. m_pContainer->AddToAttr( m_nAttributeID, -flAddend );
  363. }
  364. FORCEINLINE void operator =( float flValue ) const
  365. {
  366. m_pContainer->FillAttr( m_nAttributeID, flValue );
  367. }
  368. FORCEINLINE void operator =( CSOAAttributeReference const &other ) const
  369. {
  370. m_pContainer->CopyAttrFrom( *other.m_pContainer, m_nAttributeID, other.m_nAttributeID );
  371. }
  372. template<BINARYSIMDFUNCTION fn> FORCEINLINE void operator =( CSOAAttributeReferenceBinaryOp<fn> const &op );
  373. FORCEINLINE void CopyTo( CSOAAttributeReference &other ) const; // since operator= is over-ridden
  374. };
  375. // define binary op class to allow this construct without temps:
  376. // dest( FBM_ATTR_RED ) = src( FBM_ATTR_BLUE ) + src( FBM_ATTR_GREEN )
  377. template<BINARYSIMDFUNCTION fn> class CSOAAttributeReferenceBinaryOp
  378. {
  379. public:
  380. CSOAAttributeReference m_opA;
  381. CSOAAttributeReference m_opB;
  382. CSOAAttributeReferenceBinaryOp( CSOAAttributeReference const &a, CSOAAttributeReference const & b )
  383. {
  384. a.CopyTo( m_opA );
  385. b.CopyTo( m_opB );
  386. }
  387. };
  388. #define DEFINE_OP( opname, fnname ) \
  389. FORCEINLINE CSOAAttributeReferenceBinaryOp<fnname> operator opname( CSOAAttributeReference const &left, CSOAAttributeReference const &right ) \
  390. { \
  391. return CSOAAttributeReferenceBinaryOp<fnname>( left, right ); \
  392. }
  393. // these operator overloads let you do
  394. // dst[ATT1] = src1[ATT] + src2[ATT] with no temporaries generated
  395. DEFINE_OP( +, AddSIMD );
  396. DEFINE_OP( *, MulSIMD );
  397. DEFINE_OP( -, SubSIMD );
  398. DEFINE_OP( /, DivSIMD );
  399. template<BINARYSIMDFUNCTION fn> FORCEINLINE void CSOAAttributeReference::operator =( CSOAAttributeReferenceBinaryOp<fn> const &op )
  400. {
  401. m_pContainer->AssertDataType( m_nAttributeID, ATTRDATATYPE_FLOAT );
  402. fltx4 *pOut = m_pContainer->RowPtr<fltx4>( m_nAttributeID, 0 );
  403. // GCC on PS3 gets confused by this code, so we literally have to break it into multiple statements
  404. CSOAContainer *pContainerA = op.m_opA.m_pContainer;
  405. CSOAContainer *pContainerB = op.m_opB.m_pContainer;
  406. fltx4 *pInA = pContainerA->RowPtr< fltx4 >( op.m_opA.m_nAttributeID, 0 );
  407. fltx4 *pInB = pContainerB->RowPtr< fltx4 >( op.m_opB.m_nAttributeID, 0 );
  408. size_t nRowToRowStride = m_pContainer->RowToRowStep( m_nAttributeID ) / sizeof( fltx4 );
  409. int nRowCtr = m_pContainer->NumRows() * m_pContainer->NumSlices();
  410. do
  411. {
  412. int nColCtr = m_pContainer->NumQuadsPerRow();
  413. do
  414. {
  415. *(pOut++) = fn( *( pInA++ ), *( pInB++ ) );
  416. } while ( --nColCtr );
  417. pOut += nRowToRowStride;
  418. pInA += nRowToRowStride;
  419. pInB += nRowToRowStride;
  420. } while ( --nRowCtr );
  421. }
  422. FORCEINLINE void CSOAAttributeReference::CopyTo( CSOAAttributeReference &other ) const
  423. {
  424. other.m_pContainer = m_pContainer;
  425. other.m_nAttributeID = m_nAttributeID;
  426. }
  427. FORCEINLINE CSOAAttributeReference CSOAContainer::operator[]( int nAttrIdx )
  428. {
  429. CSOAAttributeReference ret;
  430. ret.m_pContainer = this;
  431. ret.m_nAttributeID = nAttrIdx;
  432. return ret;
  433. }
  434. FORCEINLINE CSOAAttributeReference CSOAContainer::Attr( int nAttrIdx )
  435. {
  436. return (*this)[nAttrIdx];
  437. }
  438. template<BINARYSIMDFUNCTION fn1, BINARYSIMDFUNCTION fn2> void CSOAContainer::ApplyTwoComposedBinaryFunctionsToAttr( int nDestAttr, fltx4 const &fl4FnArg1, fltx4 const &fl4FnArg2 )
  439. {
  440. if ( m_nDataType[nDestAttr] == ATTRDATATYPE_4V )
  441. {
  442. FourVectors *pOut = RowPtr<FourVectors>( nDestAttr, 0 );
  443. size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( FourVectors );
  444. int nRowCtr = NumRows() * NumSlices();
  445. do
  446. {
  447. int nColCtr = NumQuadsPerRow();
  448. do
  449. {
  450. pOut->x = fn1( fn2( pOut->x, fl4FnArg2 ), fl4FnArg1 );
  451. pOut->y = fn1( fn2( pOut->y, fl4FnArg2 ), fl4FnArg1 );
  452. pOut->z = fn1( fn2( pOut->z, fl4FnArg2 ), fl4FnArg1 );
  453. } while ( --nColCtr );
  454. pOut += nRowToRowStride;
  455. } while ( --nRowCtr );
  456. }
  457. else
  458. {
  459. AssertDataType( nDestAttr, ATTRDATATYPE_FLOAT );
  460. fltx4 *pOut = RowPtr<fltx4>( nDestAttr, 0 );
  461. size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( fltx4 );
  462. int nRowCtr = NumRows() * NumSlices();
  463. do
  464. {
  465. int nColCtr = NumQuadsPerRow();
  466. do
  467. {
  468. *pOut = fn1( fn2( *pOut, fl4FnArg2 ), fl4FnArg1 );
  469. pOut++;
  470. } while ( --nColCtr );
  471. pOut += nRowToRowStride;
  472. } while ( --nRowCtr );
  473. }
  474. }
  475. template<BINARYSIMDFUNCTION fn> void CSOAContainer::ApplyBinaryFunctionToAttr( int nDestAttr, fltx4 const &fl4FnArg1 )
  476. {
  477. if ( m_nDataType[nDestAttr] == ATTRDATATYPE_4V )
  478. {
  479. FourVectors *pOut = RowPtr<FourVectors>( nDestAttr, 0 );
  480. size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( FourVectors );
  481. int nRowCtr = NumRows() * NumSlices();
  482. do
  483. {
  484. int nColCtr = NumQuadsPerRow();
  485. do
  486. {
  487. pOut->x = fn( pOut->x, fl4FnArg1 );
  488. pOut->y = fn( pOut->y, fl4FnArg1 );
  489. pOut->z = fn( pOut->z, fl4FnArg1 );
  490. } while ( --nColCtr );
  491. pOut += nRowToRowStride;
  492. } while ( --nRowCtr );
  493. }
  494. else
  495. {
  496. AssertDataType( nDestAttr, ATTRDATATYPE_FLOAT );
  497. fltx4 *pOut = RowPtr<fltx4>( nDestAttr, 0 );
  498. size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( fltx4 );
  499. int nRowCtr = NumRows() * NumSlices();
  500. do
  501. {
  502. int nColCtr = NumQuadsPerRow();
  503. do
  504. {
  505. *pOut = fn( *pOut, fl4FnArg1 );
  506. pOut++;
  507. } while ( --nColCtr );
  508. pOut += nRowToRowStride;
  509. } while ( --nRowCtr );
  510. }
  511. }
  512. template<BINARYSIMDFUNCTION fn> float CSOAContainer::ReduceAttr( int nSrcAttr, fltx4 const &fl4InitialValue ) const
  513. {
  514. AssertDataType( nSrcAttr, ATTRDATATYPE_FLOAT );
  515. fltx4 fl4Result = fl4InitialValue;
  516. fltx4 const *pIn = RowPtr<fltx4>( nSrcAttr, 0 );
  517. size_t nRowToRowStride = RowToRowStep( nSrcAttr ) / sizeof( fltx4 );
  518. int nRowCtr = NumRows() * NumSlices();
  519. bi32x4 fl4LastColumnMask = (bi32x4)LoadAlignedSIMD( g_SIMD_SkipTailMask[NumCols() & 3 ] );
  520. do
  521. {
  522. for( int i = 0; i < NumQuadsPerRow() - 1; i++ )
  523. {
  524. fl4Result = fn( fl4Result, *( pIn++ ) );
  525. }
  526. // handle the last column in case its not a multiple of 4 wide
  527. fl4Result = MaskedAssign( fl4LastColumnMask, fn( fl4Result, *( pIn++ ) ), fl4Result );
  528. pIn += nRowToRowStride;
  529. } while ( --nRowCtr );
  530. // now, combine the subfields
  531. fl4Result = fn(
  532. fn( fl4Result, SplatYSIMD( fl4Result ) ),
  533. fn( SplatZSIMD( fl4Result ), SplatWSIMD( fl4Result ) ) );
  534. return SubFloat( fl4Result, 0 );
  535. }
  536. #define QUANTIZER_NJOBS 1 // # of simultaneous subjobs to execute for kmeans quantizer
  537. // kmeans quantization classes
  538. // the array of quantized values returned by quantization
  539. class KMeansQuantizedValue
  540. {
  541. public:
  542. FourVectors m_vecValuePosition; // replicated
  543. fltx4 m_fl4Values[MAX_SOA_FIELDS]; // replicated
  544. float m_flValueAccumulators[QUANTIZER_NJOBS][MAX_SOA_FIELDS];
  545. float m_flWeightAccumulators[QUANTIZER_NJOBS];
  546. FORCEINLINE float operator()( int n )
  547. {
  548. return SubFloat( m_fl4Values[n], 0 );
  549. }
  550. };
  551. class KMeansSampleDescriptor
  552. {
  553. public:
  554. fltx4 *m_pInputValues[MAX_SOA_FIELDS];
  555. FORCEINLINE fltx4 const & operator()( int nField ) const
  556. {
  557. return *m_pInputValues[nField];
  558. }
  559. };
  560. class IKMeansErrorMetric
  561. {
  562. public:
  563. virtual void CalculateError( KMeansSampleDescriptor const &sampleAddresses,
  564. FourVectors const &v4SamplePositions,
  565. KMeansQuantizedValue const &valueToCompareAgainst,
  566. fltx4 *pfl4ErrOut ) =0;
  567. // for things like normalization, etc
  568. virtual void PostAdjustQuantizedValue( KMeansQuantizedValue &valueToAdjust )
  569. {
  570. }
  571. // for global fixup after each adjustment step
  572. virtual void PostStep( int const *pFieldIndices, int nNumFields,
  573. KMeansQuantizedValue *pValues, int nNumQuantizedValues,
  574. int nIndexField, CSOAContainer &data )
  575. {
  576. }
  577. };
  578. FORCEINLINE CSOAContainer::CSOAContainer( void )
  579. {
  580. Init();
  581. }
  582. //-----------------------------------------------------------------------------
  583. // Did the container allocate memory for this attribute?
  584. //-----------------------------------------------------------------------------
  585. FORCEINLINE bool CSOAContainer::HasAllocatedMemory( int nAttrIdx ) const
  586. {
  587. return ( m_nFieldPresentMask & ( 1 << nAttrIdx ) ) != 0;
  588. }
  589. FORCEINLINE EAttributeDataType CSOAContainer::GetAttributeType( int nAttrIdx ) const
  590. {
  591. Assert( ( nAttrIdx >= 0 ) && ( nAttrIdx < MAX_SOA_FIELDS ) );
  592. return m_nDataType[nAttrIdx];
  593. }
  594. FORCEINLINE void CSOAContainer::EnsureDataType( int nAttrIdx, EAttributeDataType nDataType )
  595. {
  596. if ( !HasAllocatedMemory( nAttrIdx ) )
  597. {
  598. SetAttributeType( nAttrIdx, nDataType );
  599. }
  600. }
  601. FORCEINLINE int CSOAContainer::NumRows( void ) const
  602. {
  603. return m_nRows;
  604. }
  605. FORCEINLINE int CSOAContainer::NumCols( void ) const
  606. {
  607. return m_nColumns;
  608. }
  609. FORCEINLINE int CSOAContainer::NumSlices( void ) const
  610. {
  611. return m_nSlices;
  612. }
  613. FORCEINLINE void CSOAContainer::AssertDataType( int nAttrIdx, EAttributeDataType nDataType ) const
  614. {
  615. Assert( nAttrIdx >= 0 );
  616. Assert( nAttrIdx < MAX_SOA_FIELDS );
  617. Assert( m_nDataType[ nAttrIdx ] == nDataType );
  618. }
  619. // # of groups of 4 elements per row
  620. FORCEINLINE int CSOAContainer::NumQuadsPerRow( void ) const
  621. {
  622. return m_nNumQuadsPerRow;
  623. }
  624. FORCEINLINE int CSOAContainer::Count( void ) const // for 1d data
  625. {
  626. return NumCols();
  627. }
  628. FORCEINLINE int CSOAContainer::NumElements( void ) const
  629. {
  630. return NumCols() * NumRows() * NumSlices();
  631. }
  632. // how much to step to go from the end of one row to the start of the next one. Basically, how
  633. // many bytes to add at the end of a row when iterating over the whole 2d array with ++
  634. FORCEINLINE size_t CSOAContainer::RowToRowStep( int nAttrIdx ) const
  635. {
  636. return 0;
  637. }
  638. template<class T> FORCEINLINE T *CSOAContainer::RowPtr( int nAttributeIdx, int nRowNumber, int nSliceNumber ) const
  639. {
  640. Assert( nRowNumber < m_nRows );
  641. Assert( nAttributeIdx < MAX_SOA_FIELDS );
  642. Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_NONE );
  643. Assert( ( m_nFieldPresentMask & ( 1 << nAttributeIdx ) ) || ( ( nRowNumber == 0 ) && ( nSliceNumber == 0 ) ) );
  644. return reinterpret_cast<T *>(
  645. m_pAttributePtrs[nAttributeIdx] +
  646. + nRowNumber * m_nRowStrideInBytes[nAttributeIdx]
  647. + nSliceNumber * m_nSliceStrideInBytes[nAttributeIdx] );
  648. }
  649. FORCEINLINE void const *CSOAContainer::ConstRowPtr( int nAttributeIdx, int nRowNumber, int nSliceNumber ) const
  650. {
  651. Assert( nRowNumber < m_nRows );
  652. Assert( nAttributeIdx < MAX_SOA_FIELDS );
  653. Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_NONE );
  654. return m_pAttributePtrs[nAttributeIdx]
  655. + nRowNumber * m_nRowStrideInBytes[nAttributeIdx]
  656. + nSliceNumber * m_nSliceStrideInBytes[nAttributeIdx];
  657. }
  658. template<class T> FORCEINLINE T *CSOAContainer::ElementPointer( int nAttributeIdx, int nX, int nY, int nZ ) const
  659. {
  660. Assert( nAttributeIdx < MAX_SOA_FIELDS );
  661. Assert( nX < m_nColumns );
  662. Assert( nY < m_nRows );
  663. Assert( nZ < m_nSlices );
  664. Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_NONE );
  665. Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_4V );
  666. return reinterpret_cast<T *>( m_pAttributePtrs[nAttributeIdx]
  667. + nX * m_nStrideInBytes[nAttributeIdx]
  668. + nY * m_nRowStrideInBytes[nAttributeIdx]
  669. + nZ * m_nSliceStrideInBytes[nAttributeIdx]
  670. );
  671. }
  672. FORCEINLINE FourVectors *CSOAContainer::ElementPointer4V( int nAttributeIdx, int nX, int nY, int nZ ) const
  673. {
  674. Assert( nAttributeIdx < MAX_SOA_FIELDS );
  675. Assert( nX < m_nColumns );
  676. Assert( nY < m_nRows );
  677. Assert( nZ < m_nSlices );
  678. Assert( m_nDataType[nAttributeIdx] == ATTRDATATYPE_4V );
  679. int nXIdx = nX / 4;
  680. uint8 *pRet = m_pAttributePtrs[nAttributeIdx]
  681. + nXIdx * 4 * m_nStrideInBytes[nAttributeIdx]
  682. + nY * m_nRowStrideInBytes[nAttributeIdx]
  683. + nZ * m_nSliceStrideInBytes[nAttributeIdx];
  684. pRet += 4 * ( nX & 3 );
  685. return reinterpret_cast<FourVectors *>( pRet );
  686. }
  687. FORCEINLINE size_t CSOAContainer::ItemByteStride( int nAttributeIdx ) const
  688. {
  689. Assert( nAttributeIdx < MAX_SOA_FIELDS );
  690. Assert( m_nDataType[nAttributeIdx] != ATTRDATATYPE_NONE );
  691. return m_nStrideInBytes[ nAttributeIdx ];
  692. }
  693. // move all the data from one csoacontainer to another, leaving the source empty.
  694. // this is just a pointer copy.
  695. FORCEINLINE void CSOAContainer::MoveDataFrom( CSOAContainer other )
  696. {
  697. (*this) = other;
  698. other.Init();
  699. }
  700. class CFltX4AttributeIterator : public CStridedConstPtr<fltx4>
  701. {
  702. FORCEINLINE CFltX4AttributeIterator( CSOAContainer const *pContainer, int nAttribute, int nRowNumber = 0 )
  703. : CStridedConstPtr<fltx4>( pContainer->ConstRowPtr( nAttribute, nRowNumber),
  704. pContainer->ItemByteStride( nAttribute ) )
  705. {
  706. }
  707. };
  708. class CFltX4AttributeWriteIterator : public CStridedPtr<fltx4>
  709. {
  710. FORCEINLINE CFltX4AttributeWriteIterator( CSOAContainer const *pContainer, int nAttribute, int nRowNumber = 0 )
  711. : CStridedPtr<fltx4>( pContainer->RowPtr<uint8>( nAttribute, nRowNumber),
  712. pContainer->ItemByteStride( nAttribute ) )
  713. {
  714. }
  715. };
  716. FORCEINLINE FourVectors CompressSIMD( FourVectors const &a, FourVectors const &b )
  717. {
  718. FourVectors ret;
  719. ret.x = CompressSIMD( a.x, b.x );
  720. ret.y = CompressSIMD( a.y, b.y );
  721. ret.z = CompressSIMD( a.z, b.z );
  722. return ret;
  723. }
  724. FORCEINLINE FourVectors Compress4SIMD( FourVectors const &a, FourVectors const &b,
  725. FourVectors const &c, FourVectors const &d )
  726. {
  727. FourVectors ret;
  728. ret.x = Compress4SIMD( a.x, b.x, c.x, d.x );
  729. ret.y = Compress4SIMD( a.y, b.y, c.y, d.y );
  730. ret.z = Compress4SIMD( a.z, b.z, c.z, d.z );
  731. return ret;
  732. }
  733. #endif