Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1113 lines
34 KiB

  1. //===== Copyright � 1996-2007, Valve Corporation, All rights reserved. ======//
  2. //
  3. // $Header: $
  4. // $NoKeywords: $
  5. //
  6. // SOA container
  7. //===========================================================================//
  8. #include "utlsoacontainer.h"
  9. #include <stdio.h>
  10. #include <stdarg.h>
  11. #include <ctype.h>
  12. #include <stdlib.h>
  13. #include <limits.h>
  14. #include "mathlib/halton.h"
  15. #include "vstdlib/jobthread.h"
  16. #include "tier1/callqueue.h"
  17. // memdbgon must be the last include file in a .cpp file!!!
  18. #include "tier0/memdbgon.h"
  19. //-----------------------------------------------------------------------------
  20. // Globals
  21. //-----------------------------------------------------------------------------
  22. static size_t s_DataTypeByteSize[]=
  23. {
  24. sizeof( float ),
  25. 3 * sizeof( float ),
  26. sizeof( int ),
  27. sizeof( void * ),
  28. };
  29. static fltx4 s_ZeroFields[3];
  30. //-----------------------------------------------------------------------------
  31. // Constructor, destructor
  32. //-----------------------------------------------------------------------------
  33. CSOAContainer::CSOAContainer( int nCols, int nRows, int nSlices, ... )
  34. {
  35. COMPILE_TIME_ASSERT( ATTRDATATYPE_COUNT == ARRAYSIZE( s_DataTypeByteSize ) );
  36. Init();
  37. va_list args;
  38. va_start( args, nSlices );
  39. for(;;)
  40. {
  41. int nFieldNumber = va_arg( args, int );
  42. if ( nFieldNumber == -1 )
  43. break;
  44. EAttributeDataType nDataType = (EAttributeDataType)va_arg( args, int );
  45. SetAttributeType( nFieldNumber, nDataType );
  46. }
  47. va_end( args );
  48. AllocateData( nCols, nRows, nSlices );
  49. }
  50. CSOAContainer::~CSOAContainer( void )
  51. {
  52. Purge();
  53. }
  54. //-----------------------------------------------------------------------------
  55. // Purge
  56. //-----------------------------------------------------------------------------
  57. void CSOAContainer::Purge( void )
  58. {
  59. PurgeData();
  60. Init();
  61. }
  62. //-----------------------------------------------------------------------------
  63. // Allocate data, purge data
  64. //-----------------------------------------------------------------------------
  65. void CSOAContainer::AllocateData( int nNCols, int nNRows, int nSlices )
  66. {
  67. m_nColumns = nNCols;
  68. m_nRows = nNRows;
  69. m_nSlices = nSlices;
  70. m_nPaddedColumns = ( 3 + nNCols ) & ~3; // pad up for sse
  71. m_nNumQuadsPerRow = ( m_nPaddedColumns >> 2 );
  72. // Allocate data memory and constant memory
  73. AllocateDataMemory();
  74. AllocateConstantMemory();
  75. // now, fill in strides and pointers
  76. uint8 *pBasePtr = m_pDataMemory;
  77. uint8 *pConstantDataPtr = m_pConstantDataMemory;
  78. for( int i = 0; i < MAX_SOA_FIELDS; i++ )
  79. {
  80. if ( m_nDataType[i] == ATTRDATATYPE_NONE )
  81. {
  82. m_pAttributePtrs[i] = reinterpret_cast<uint8 *>( s_ZeroFields );
  83. m_nStrideInBytes[i] = 0;
  84. m_nRowStrideInBytes[i] = 0;
  85. m_nSliceStrideInBytes[i] = 0;
  86. continue;
  87. }
  88. if ( m_nFieldPresentMask & ( 1 << i ) )
  89. {
  90. m_pAttributePtrs[i] = pBasePtr;
  91. m_nStrideInBytes[i] = s_DataTypeByteSize[m_nDataType[i]];
  92. m_nRowStrideInBytes[i] = m_nPaddedColumns * m_nStrideInBytes[i];
  93. m_nSliceStrideInBytes[i] = m_nRowStrideInBytes[i] * m_nRows;
  94. pBasePtr += AttributeMemorySize( i );
  95. }
  96. else
  97. {
  98. m_pAttributePtrs[i] = pConstantDataPtr;
  99. m_nStrideInBytes[i] = 0;
  100. m_nRowStrideInBytes[i] = 0;
  101. m_nSliceStrideInBytes[i] = 0;
  102. pConstantDataPtr += AttributeMemorySize( i );
  103. }
  104. }
  105. SetThreadMode( SOATHREADMODE_AUTO );
  106. }
  107. void CSOAContainer::SetAttributeType( int nAttrIdx, EAttributeDataType nDataType, bool bAllocateMemory )
  108. {
  109. Assert( nAttrIdx < MAX_SOA_FIELDS );
  110. if ( !m_pDataMemory )
  111. {
  112. // Attributes will be allocated/setup later, when AllocateData is called
  113. if ( ( nDataType != ATTRDATATYPE_NONE ) && bAllocateMemory )
  114. m_nFieldPresentMask |= ( 1 << nAttrIdx );
  115. else
  116. m_nFieldPresentMask &= ~( 1 << nAttrIdx );
  117. m_nDataType[nAttrIdx] = nDataType;
  118. return;
  119. }
  120. // Attributes have already been allocated/setup by AllocateData
  121. if ( m_nDataType[nAttrIdx] != ATTRDATATYPE_NONE )
  122. {
  123. // This attribute was already setup, can't change it now!
  124. if ( m_nDataType[nAttrIdx] != nDataType )
  125. {
  126. Warning( "CSOAContainer::SetAttributeType - ERROR, trying to change type of previously-defined attribute %d!\n", nAttrIdx );
  127. Assert( 0 );
  128. }
  129. return;
  130. }
  131. // Add a new attribute with a separate allocation
  132. m_nDataType[nAttrIdx] = nDataType;
  133. if ( bAllocateMemory )
  134. {
  135. m_nFieldPresentMask |= ( 1 << nAttrIdx );
  136. m_nStrideInBytes[nAttrIdx] = s_DataTypeByteSize[nDataType];
  137. m_nRowStrideInBytes[nAttrIdx] = m_nStrideInBytes[nAttrIdx] * m_nPaddedColumns;
  138. m_nSliceStrideInBytes[nAttrIdx] = m_nRowStrideInBytes[nAttrIdx] * m_nRows;
  139. }
  140. else
  141. {
  142. // New attribute is constant
  143. m_nStrideInBytes[nAttrIdx] = 0;
  144. m_nRowStrideInBytes[nAttrIdx] = 0;
  145. m_nSliceStrideInBytes[nAttrIdx] = 0;
  146. }
  147. m_pSeparateDataMemory[nAttrIdx] = reinterpret_cast<uint8 *>( MemAlloc_AllocAligned( AttributeMemorySize( nAttrIdx ), 16 ) );
  148. m_pAttributePtrs[nAttrIdx] = m_pSeparateDataMemory[nAttrIdx];
  149. if ( !bAllocateMemory )
  150. {
  151. // Set constant memory to zero as the default value
  152. memset( m_pSeparateDataMemory[nAttrIdx], 0, AttributeMemorySize( nAttrIdx ) );
  153. }
  154. }
  155. void CSOAContainer::PurgeData( void )
  156. {
  157. if ( m_pConstantDataMemory )
  158. {
  159. MemAlloc_FreeAligned( m_pConstantDataMemory );
  160. m_pConstantDataMemory = NULL;
  161. }
  162. if ( m_pDataMemory )
  163. {
  164. MemAlloc_FreeAligned( m_pDataMemory );
  165. m_pDataMemory = NULL;
  166. }
  167. for( int i = 0; i < ARRAYSIZE( m_pSeparateDataMemory ); i++ )
  168. {
  169. if ( m_pSeparateDataMemory[i] )
  170. {
  171. MemAlloc_FreeAligned( m_pSeparateDataMemory[i] );
  172. m_pSeparateDataMemory[i] = NULL;
  173. }
  174. }
  175. }
  176. size_t CSOAContainer::AttributeMemorySize( int nAttrIndex ) const
  177. {
  178. EAttributeDataType nDataType = m_nDataType[ nAttrIndex ];
  179. if ( nDataType == ATTRDATATYPE_NONE )
  180. return 0;
  181. else if ( m_nFieldPresentMask & ( 1 << nAttrIndex ) )
  182. return ( s_DataTypeByteSize[ nDataType ] * m_nPaddedColumns * m_nRows * m_nSlices );
  183. else
  184. return ( 4 * s_DataTypeByteSize[ nDataType ] );
  185. }
  186. size_t CSOAContainer::DataMemorySize( void ) const
  187. {
  188. size_t nDataMemorySize = 0;
  189. for( int i = 0; i < MAX_SOA_FIELDS; i++ )
  190. {
  191. if ( !( m_nFieldPresentMask & ( 1 << i ) ) )
  192. continue;
  193. nDataMemorySize += AttributeMemorySize( i );
  194. }
  195. return nDataMemorySize;
  196. }
  197. void CSOAContainer::AllocateDataMemory( void )
  198. {
  199. Assert( !m_pDataMemory );
  200. size_t nMemorySize = DataMemorySize();
  201. if ( nMemorySize )
  202. {
  203. m_pDataMemory = reinterpret_cast<uint8 *> ( MemAlloc_AllocAligned( nMemorySize, 16 ) );
  204. }
  205. }
  206. size_t CSOAContainer::ConstantMemorySize( void ) const
  207. {
  208. size_t nConstantDataSize = 0;
  209. for( int i = 0; i < MAX_SOA_FIELDS; i++ )
  210. {
  211. if ( ( m_nDataType[i] == ATTRDATATYPE_NONE ) || ( m_nFieldPresentMask & ( 1 << i ) ) )
  212. continue;
  213. nConstantDataSize += AttributeMemorySize( i );
  214. }
  215. return nConstantDataSize;
  216. }
  217. void CSOAContainer::AllocateConstantMemory( void )
  218. {
  219. Assert( !m_pConstantDataMemory );
  220. size_t nConstantDataSize = ConstantMemorySize();
  221. if ( nConstantDataSize > 0 )
  222. {
  223. m_pConstantDataMemory = (uint8*)MemAlloc_AllocAligned( nConstantDataSize, 16 );
  224. memset( m_pConstantDataMemory, 0, nConstantDataSize );
  225. }
  226. }
  227. void CSOAContainer::SetThreadMode( SOAThreadMode_t eThreadMode )
  228. {
  229. if ( eThreadMode == SOATHREADMODE_AUTO )
  230. {
  231. eThreadMode = SOATHREADMODE_NONE;
  232. if ( NumRows() * NumCols() > ( 16 * 16 ) )
  233. {
  234. eThreadMode = SOATHREADMODE_BYROWS;
  235. }
  236. }
  237. m_eThreadMode = eThreadMode;
  238. }
  239. #define THREAD_NJOBS 32
  240. #define PARALLEL_DISPATCH( method, ... ) \
  241. { \
  242. if ( m_eThreadMode == SOATHREADMODE_NONE ) \
  243. { \
  244. method( 0, NumRows(), 0, NumSlices(), __VA_ARGS__ ); \
  245. } \
  246. else \
  247. { \
  248. CCallQueue workList; \
  249. int nStep = MAX( 1, ( NumRows() / THREAD_NJOBS ) ); \
  250. int nY = 0; \
  251. while( nY < NumRows() ) \
  252. { \
  253. nStep = MIN( nStep, NumRows() - nY ); \
  254. workList.QueueCall( this, &CSOAContainer::method, nY, nStep, 0, NumSlices(), __VA_ARGS__ ); \
  255. nY += nStep; \
  256. } \
  257. workList.ParallelCallQueued(); \
  258. } \
  259. }
  260. void CSOAContainer::CopyAttrFromPartial( int nStartRow, int nNumRows, int nStartSlice, int nEndSlice, CSOAContainer const *pOther, int nDestAttributeIndex, int nSrcAttributeIndex )
  261. {
  262. // copy a subregion in parallel
  263. for( int z = nStartSlice; z < nEndSlice; z++ )
  264. {
  265. size_t nCopySize = m_nRowStrideInBytes[nDestAttributeIndex] * nNumRows;
  266. memcpy( RowPtr<fltx4>( nDestAttributeIndex, nStartRow, z ),
  267. pOther->ConstRowPtr( nSrcAttributeIndex, nStartRow, z ),
  268. nCopySize );
  269. }
  270. }
  271. void CSOAContainer::CopyAttrFrom( CSOAContainer const &other, int nDestAttributeIndex, int nSrcAttributeIndex )
  272. {
  273. if ( nSrcAttributeIndex == -1 )
  274. {
  275. nSrcAttributeIndex = nDestAttributeIndex;
  276. }
  277. Assert( other.NumRows() == NumRows() );
  278. Assert( other.NumCols() == NumCols() );
  279. Assert( other.NumSlices() == NumSlices() );
  280. Assert( m_nDataType[nDestAttributeIndex] == other.m_nDataType[nSrcAttributeIndex] );
  281. if ( m_eThreadMode == SOATHREADMODE_NONE )
  282. {
  283. memcpy( m_pAttributePtrs[nDestAttributeIndex], other.m_pAttributePtrs[nSrcAttributeIndex], AttributeMemorySize( nDestAttributeIndex ) );
  284. }
  285. else
  286. {
  287. PARALLEL_DISPATCH( CopyAttrFromPartial, &other, nDestAttributeIndex, nSrcAttributeIndex );
  288. }
  289. }
  290. void CSOAContainer::CopyAttrToAttr( int nSrcAttributeIndex, int nDestAttributeIndex)
  291. {
  292. Assert( m_nDataType[nSrcAttributeIndex] == m_nDataType[nDestAttributeIndex] );
  293. memcpy( m_pAttributePtrs[nDestAttributeIndex], m_pAttributePtrs[nSrcAttributeIndex], AttributeMemorySize( nSrcAttributeIndex ) );
  294. }
  295. void CSOAContainer::PackScalarAttributesToVectorAttribute( CSOAContainer *pInput,
  296. int nVecAttributeOut,
  297. int nScalarAttributeX,
  298. int nScalarAttributeY,
  299. int nScalarAttributeZ )
  300. {
  301. AssertDataType( nVecAttributeOut, ATTRDATATYPE_4V );
  302. pInput->AssertDataType( nScalarAttributeX, ATTRDATATYPE_FLOAT );
  303. pInput->AssertDataType( nScalarAttributeY, ATTRDATATYPE_FLOAT );
  304. pInput->AssertDataType( nScalarAttributeZ, ATTRDATATYPE_FLOAT );
  305. FourVectors *pOut = RowPtr<FourVectors>( nVecAttributeOut, 0 );
  306. fltx4 *pInX = pInput->RowPtr<fltx4>( nScalarAttributeX, 0 );
  307. fltx4 *pInY = pInput->RowPtr<fltx4>( nScalarAttributeY, 0 );
  308. fltx4 *pInZ = pInput->RowPtr<fltx4>( nScalarAttributeZ, 0 );
  309. size_t nRowToRowStride = RowToRowStep( nVecAttributeOut ) / sizeof( FourVectors );
  310. size_t nRowToRowStrideX = pInput->RowToRowStep( nScalarAttributeX ) / sizeof( fltx4 );
  311. size_t nRowToRowStrideY = pInput->RowToRowStep( nScalarAttributeY ) / sizeof( fltx4 );
  312. size_t nRowToRowStrideZ = pInput->RowToRowStep( nScalarAttributeZ ) / sizeof( fltx4 );
  313. int nRowCtr = NumRows() * NumSlices();
  314. do
  315. {
  316. int nColCtr = NumQuadsPerRow();
  317. do
  318. {
  319. pOut->x = *( pInX++ );
  320. pOut->y = *( pInY++ );
  321. pOut->z = *( pInZ++ );
  322. pOut++;
  323. } while ( --nColCtr );
  324. pOut += nRowToRowStride;
  325. pInX += nRowToRowStrideX;
  326. pInY += nRowToRowStrideY;
  327. pInZ += nRowToRowStrideZ;
  328. } while ( --nRowCtr );
  329. }
  330. void CSOAContainer::UnPackVectorAttributeToScalarAttributes( CSOAContainer *pInput,
  331. int nVecAttributeIn,
  332. int nScalarAttributeX,
  333. int nScalarAttributeY,
  334. int nScalarAttributeZ )
  335. {
  336. pInput->AssertDataType( nVecAttributeIn, ATTRDATATYPE_4V );
  337. AssertDataType( nScalarAttributeX, ATTRDATATYPE_FLOAT );
  338. AssertDataType( nScalarAttributeY, ATTRDATATYPE_FLOAT );
  339. AssertDataType( nScalarAttributeZ, ATTRDATATYPE_FLOAT );
  340. Assert( pInput->NumCols() == NumCols() );
  341. Assert( pInput->NumRows() == NumRows() );
  342. Assert( pInput->NumSlices() == NumSlices() );
  343. FourVectors *pIn = pInput->RowPtr<FourVectors>( nVecAttributeIn, 0 );
  344. fltx4 *pX = RowPtr<fltx4>( nScalarAttributeX, 0 );
  345. fltx4 *pY = RowPtr<fltx4>( nScalarAttributeY, 0 );
  346. fltx4 *pZ = RowPtr<fltx4>( nScalarAttributeZ, 0 );
  347. size_t nRowToRowStride = pInput->RowToRowStep( nVecAttributeIn ) / sizeof( FourVectors );
  348. size_t nRowToRowStrideX = RowToRowStep( nScalarAttributeX ) / sizeof( fltx4 );
  349. size_t nRowToRowStrideY = RowToRowStep( nScalarAttributeY ) / sizeof( fltx4 );
  350. size_t nRowToRowStrideZ = RowToRowStep( nScalarAttributeZ ) / sizeof( fltx4 );
  351. int nRowCtr = NumRows() * NumSlices();
  352. do
  353. {
  354. int nColCtr = NumQuadsPerRow();
  355. do
  356. {
  357. *( pX++ ) = pIn->x;
  358. *( pY++ ) = pIn->y;
  359. *( pZ++ ) = pIn->z;
  360. pIn++;
  361. } while ( --nColCtr );
  362. pIn += nRowToRowStride;
  363. pX += nRowToRowStrideX;
  364. pY += nRowToRowStrideY;
  365. pZ += nRowToRowStrideZ;
  366. } while ( --nRowCtr );
  367. }
  368. void CSOAContainer::MultiplyVectorAttribute( CSOAContainer *pInput, int nAttributeIn,
  369. const Vector &vecScalar,
  370. int nAttributeOut )
  371. {
  372. Assert( pInput->NumCols() == NumCols() );
  373. Assert( pInput->NumRows() == NumRows() );
  374. FourVectors v4Scale;
  375. v4Scale.DuplicateVector( vecScalar );
  376. pInput->AssertDataType( nAttributeIn, ATTRDATATYPE_4V );
  377. AssertDataType( nAttributeOut, ATTRDATATYPE_4V );
  378. size_t nRowToRowStride = pInput->RowToRowStep( nAttributeIn ) / sizeof( FourVectors );
  379. size_t nRowToRowStrideOut = RowToRowStep( nAttributeOut ) / sizeof( FourVectors );
  380. int nRowCtr = NumRows() * NumSlices();
  381. FourVectors const *pIn = pInput->RowPtr<FourVectors>( nAttributeIn, 0 );
  382. FourVectors *pOut = RowPtr<FourVectors>( nAttributeOut, 0 );
  383. do
  384. {
  385. int nColCtr = NumQuadsPerRow();
  386. do
  387. {
  388. FourVectors v4In = *( pIn++ );
  389. v4In *= v4Scale;
  390. *(pOut++) = v4In;
  391. } while ( --nColCtr );
  392. pOut += nRowToRowStrideOut;
  393. pIn += nRowToRowStride;
  394. } while ( --nRowCtr );
  395. }
  396. void CSOAContainer::RandomizeAttribute( int nAttr, float flMin, float flMax ) const
  397. {
  398. AssertDataType( nAttr, ATTRDATATYPE_FLOAT );
  399. fltx4 *pOut = RowPtr<fltx4>( nAttr, 0 );
  400. size_t nRowToRowStride = RowToRowStep( nAttr ) / sizeof( fltx4 );
  401. int nContext = GetSIMDRandContext();
  402. int nRowCtr = NumRows() * NumSlices();
  403. fltx4 fl4Min = ReplicateX4( flMin );
  404. fltx4 fl4Domain = ReplicateX4( flMin - flMin );
  405. do
  406. {
  407. int nColCtr = NumQuadsPerRow();
  408. do
  409. {
  410. *(pOut++) = AddSIMD( fl4Min, MulSIMD( fl4Domain, RandSIMD( nContext ) ) );
  411. } while ( --nColCtr );
  412. pOut += nRowToRowStride;
  413. } while ( --nRowCtr );
  414. ReleaseSIMDRandContext( nContext );
  415. }
  416. void CSOAContainer::FillAttrWithInterpolatedValues( int nAttr, float flValue00, float flValue10, float flValue01, float flValue11 ) const
  417. {
  418. float ooWidth = 1.0 / ( NumCols() - 1 );
  419. float ooHeight = 1.0 / ( NumRows() - 1 );
  420. float flYDelta0 = ooHeight * ( flValue01 - flValue00 );
  421. float flYDelta1 = ooHeight * ( flValue11 - flValue10 );
  422. int nRowCtr = NumRows();
  423. fltx4 *pOut = RowPtr<fltx4>( nAttr, 0 );
  424. size_t nRowToRowStride = RowToRowStep( nAttr ) / sizeof( fltx4 );
  425. do
  426. {
  427. float flXDelta = ooWidth * ( flValue10 - flValue00 );
  428. fltx4 fl4Value;
  429. SubFloat( fl4Value, 0 ) = flValue00;
  430. SubFloat( fl4Value, 1 ) = flValue00 + flXDelta;
  431. SubFloat( fl4Value, 2 ) = flValue00 + flXDelta + flXDelta;
  432. SubFloat( fl4Value, 3 ) = flValue00 + flXDelta + flXDelta + flXDelta;
  433. fltx4 fl4XDelta = ReplicateX4( flXDelta * 4.0 );
  434. int nColCtr = NumQuadsPerRow();
  435. do
  436. {
  437. *( pOut++ ) = fl4Value;
  438. fl4Value = AddSIMD( fl4Value, fl4XDelta );
  439. } while( --nColCtr );
  440. pOut += nRowToRowStride;
  441. flValue00 += flYDelta0;
  442. flValue10 += flYDelta1;
  443. } while ( --nRowCtr );
  444. }
  445. void CSOAContainer::FillAttrWithInterpolatedValues( int nAttr, Vector vecValue00, Vector vecValue10, const Vector &vecValue01, const Vector &vecValue11 ) const
  446. {
  447. float ooWidth = 1.0 / ( NumCols() - 1 );
  448. float ooHeight = 1.0 / ( NumRows() - 1 );
  449. Vector vecYDelta0 = ooHeight * ( vecValue01 - vecValue00 );
  450. Vector vecYDelta1 = ooHeight * ( vecValue11 - vecValue10 );
  451. int nRowCtr = NumRows();
  452. FourVectors *pOut = RowPtr<FourVectors>( nAttr, 0 );
  453. size_t nRowToRowStride = RowToRowStep( nAttr ) / sizeof( FourVectors );
  454. do
  455. {
  456. Vector vecXDelta = ooWidth * ( vecValue10 - vecValue00 );
  457. FourVectors v4Value;
  458. v4Value.LoadAndSwizzle( vecValue00, vecValue00 + vecXDelta,
  459. vecValue00 + vecXDelta + vecXDelta, vecValue00 + vecXDelta + vecXDelta + vecXDelta );
  460. FourVectors v4XDelta;
  461. v4XDelta.DuplicateVector( vecXDelta * 4.0 );
  462. int nColCtr = NumQuadsPerRow();
  463. do
  464. {
  465. *( pOut++ ) = v4Value;
  466. v4Value += v4XDelta;
  467. } while( --nColCtr );
  468. pOut += nRowToRowStride;
  469. vecValue00 += vecYDelta0;
  470. vecValue10 += vecYDelta1;
  471. } while ( --nRowCtr );
  472. }
  473. void CSOAContainer::FillAttr( int nAttr, const Vector &vecValue )
  474. {
  475. FourVectors v4Fill;
  476. v4Fill.DuplicateVector( vecValue );
  477. if ( !HasAllocatedMemory( nAttr ) )
  478. {
  479. FourVectors *pOut = (FourVectors*)m_pAttributePtrs[ nAttr ];
  480. *pOut = v4Fill;
  481. return;
  482. }
  483. AssertDataType( nAttr, ATTRDATATYPE_4V );
  484. FourVectors *pOut = RowPtr<FourVectors>( nAttr, 0 );
  485. size_t nRowToRowStride = RowToRowStep( nAttr ) / sizeof( FourVectors );
  486. int nRowCtr = NumRows() * NumSlices();
  487. do
  488. {
  489. int nColCtr = NumQuadsPerRow();
  490. do
  491. {
  492. *(pOut++) = v4Fill;
  493. } while ( --nColCtr );
  494. pOut += nRowToRowStride;
  495. } while ( --nRowCtr );
  496. }
  497. void CSOAContainer::FillAttrPartial( int nStartRow, int nNumRows, int nStartSlice, int nEndSlice, int nAttr, fltx4 fl4Value )
  498. {
  499. for( int z = nStartSlice; z < nEndSlice; z++ )
  500. {
  501. fltx4 *pOut = RowPtr<fltx4>( nAttr, nStartRow, z );
  502. size_t nRowToRowStride = RowToRowStep( nAttr ) / sizeof( fltx4 );
  503. int nRowCtr = nNumRows;
  504. do
  505. {
  506. int nColCtr = NumQuadsPerRow();
  507. do
  508. {
  509. *(pOut++) = fl4Value;
  510. } while ( --nColCtr );
  511. pOut += nRowToRowStride;
  512. } while ( --nRowCtr );
  513. }
  514. }
  515. void CSOAContainer::FillAttr( int nAttr, float flValue )
  516. {
  517. fltx4 fl4Fill = ReplicateX4( flValue );
  518. if ( !HasAllocatedMemory( nAttr ) )
  519. {
  520. fltx4 *pOut = (fltx4*)m_pAttributePtrs[ nAttr ];
  521. *pOut = fl4Fill;
  522. return;
  523. }
  524. AssertDataType( nAttr, ATTRDATATYPE_FLOAT );
  525. PARALLEL_DISPATCH( FillAttrPartial, nAttr, fl4Fill );
  526. }
  527. float CSOAContainer::SumAttributeValue( int nAttr ) const
  528. {
  529. return ReduceAttr<AddSIMD>( nAttr, Four_Zeros );
  530. }
  531. float CSOAContainer::AverageFloatAttributeValue( int nAttr ) const
  532. {
  533. if ( HasAllocatedMemory( nAttr ) )
  534. {
  535. return SumAttributeValue( nAttr ) / ( NumCols() * NumRows() * NumSlices() );
  536. }
  537. else
  538. {
  539. return FloatValue( nAttr, 0, 0, 0 );
  540. }
  541. }
  542. float CSOAContainer::MaxAttributeValue( int nAttr ) const
  543. {
  544. return ReduceAttr<MaxSIMD>( nAttr, Four_Negative_FLT_MAX );
  545. }
  546. float CSOAContainer::MinAttributeValue( int nAttr ) const
  547. {
  548. return ReduceAttr<MinSIMD>( nAttr, Four_FLT_MAX );
  549. }
  550. void CSOAContainer::NormalizeAttr( int nAttr )
  551. {
  552. AssertDataType( nAttr, ATTRDATATYPE_4V );
  553. FourVectors *pOut = RowPtr<FourVectors>( nAttr, 0 );
  554. size_t nRowToRowStride = RowToRowStep( nAttr ) / sizeof( FourVectors );
  555. int nRowCtr = NumRows() * NumSlices();
  556. do
  557. {
  558. int nColCtr = NumQuadsPerRow();
  559. do
  560. {
  561. FourVectors v4Data = *pOut;
  562. v4Data.VectorNormalize();
  563. *( pOut++ ) = v4Data;
  564. } while ( --nColCtr );
  565. pOut += nRowToRowStride;
  566. } while ( --nRowCtr );
  567. }
  568. void CSOAContainer::MulAttr( CSOAContainer const &src, int nSrcAttr, int nDestAttr )
  569. {
  570. AssertDataType( nDestAttr, ATTRDATATYPE_4V );
  571. src.AssertDataType( nSrcAttr, ATTRDATATYPE_4V );
  572. FourVectors *pOut = RowPtr<FourVectors>( nDestAttr, 0 );
  573. FourVectors *pIn = src.RowPtr<FourVectors>( nSrcAttr, 0 );
  574. size_t nSrcRowToRowStride = src.RowToRowStep( nSrcAttr ) / sizeof( FourVectors );
  575. size_t nRowToRowStride = RowToRowStep( nDestAttr ) / sizeof( FourVectors );
  576. int nRowCtr = NumRows() * NumSlices();
  577. do
  578. {
  579. int nColCtr = NumQuadsPerRow();
  580. do
  581. {
  582. FourVectors rslt = *( pIn++ );
  583. rslt *= *pOut;
  584. *(pOut++) = rslt;
  585. } while ( --nColCtr );
  586. pOut += nRowToRowStride;
  587. pIn += nSrcRowToRowStride;
  588. } while ( --nRowCtr );
  589. }
  590. void CSOAContainer::AddGaussianSRBF( float flWeight, Vector vecDir, int nDirectionAttribute, int nScalarTargetAttribute )
  591. {
  592. AssertDataType( nDirectionAttribute, ATTRDATATYPE_4V );
  593. AssertDataType( nScalarTargetAttribute, ATTRDATATYPE_FLOAT );
  594. fltx4 fl4Weight = ReplicateX4( flWeight );
  595. FourVectors v4Dir;
  596. v4Dir.DuplicateVector( vecDir );
  597. FourVectors *pDirIn = RowPtr<FourVectors>( nDirectionAttribute, 0 );
  598. size_t nRowToRowStride = RowToRowStep( nDirectionAttribute ) / sizeof( FourVectors );
  599. fltx4 *pTarget = RowPtr<fltx4>( nScalarTargetAttribute, 0 );
  600. size_t nRowToRowStrideTarget = RowToRowStep( nScalarTargetAttribute ) / sizeof( fltx4 );
  601. int nRowCtr = NumRows() * NumSlices();
  602. do
  603. {
  604. int nColCtr = NumQuadsPerRow();
  605. do
  606. {
  607. FourVectors v4InDir = *( pDirIn++ );
  608. fltx4 fl4ExpDot = NatExpSIMD( v4Dir * v4InDir );
  609. fltx4 fl4Addend = MulSIMD( fl4Weight, fl4ExpDot );
  610. fl4Addend = AddSIMD( fl4Addend, *( pTarget ) );
  611. *( pTarget++ ) = fl4Addend;
  612. } while ( --nColCtr );
  613. pDirIn += nRowToRowStride;
  614. pTarget += nRowToRowStrideTarget;
  615. } while ( --nRowCtr );
  616. }
  617. void CSOAContainer::AddGaussianSRBF( Vector vecWeight, Vector vecDir, int nDirectionAttribute,
  618. int nVectorTargetAttribute )
  619. {
  620. AssertDataType( nDirectionAttribute, ATTRDATATYPE_4V );
  621. AssertDataType( nVectorTargetAttribute, ATTRDATATYPE_4V );
  622. FourVectors v4Weight;
  623. v4Weight.DuplicateVector( vecWeight );
  624. FourVectors v4Dir;
  625. v4Dir.DuplicateVector( vecDir );
  626. FourVectors *pDirIn = RowPtr<FourVectors>( nDirectionAttribute, 0 );
  627. size_t nRowToRowStride = RowToRowStep( nDirectionAttribute ) / sizeof( FourVectors );
  628. FourVectors *pTarget = RowPtr<FourVectors>( nVectorTargetAttribute, 0 );
  629. int nRowCtr = NumRows() * NumSlices();
  630. do
  631. {
  632. int nColCtr = NumQuadsPerRow();
  633. do
  634. {
  635. fltx4 fl4ExpDot = NatExpSIMD( *( pDirIn++ ) * v4Dir );
  636. FourVectors v4Addend = v4Weight;
  637. v4Addend *= fl4ExpDot;
  638. *( pTarget++ ) += v4Addend;
  639. } while ( --nColCtr );
  640. pDirIn += nRowToRowStride;
  641. pTarget += nRowToRowStride;
  642. } while ( --nRowCtr );
  643. }
  644. enum EResampleHorzMode {
  645. HMODE_DOWNSAMPLE_4X,
  646. HMODE_DOWNSAMPLE_2X,
  647. HMODE_DOWNSAMPLE_1X,
  648. };
  649. template<EResampleHorzMode M, class T> void ResampleAttributeInternal( CSOAContainer &src, CSOAContainer &dst, int nAttr )
  650. {
  651. // we'll just point sample in rows + slices. Within a row, we need do do simd expand/no-expand
  652. for( int s = 0; s < dst.NumSlices(); s++ )
  653. {
  654. int srcs = (int)RemapVal( s, 0, dst.NumSlices() - 1, 0, src.NumSlices() - 1 );
  655. for( int r = 0; r < dst.NumRows(); r++ )
  656. {
  657. int srcr = (int)RemapVal( r, 0, dst.NumRows() - 1, 0, src.NumRows() - 1 );
  658. T *pSrc = src.RowPtr<T>( nAttr, srcr, srcs );
  659. T *pDest = dst.RowPtr<T>( nAttr, r, s );
  660. int n = dst.NumQuadsPerRow();
  661. if ( M == HMODE_DOWNSAMPLE_4X )
  662. {
  663. do
  664. {
  665. *( pDest++ ) = Compress4SIMD( pSrc[0], pSrc[1], pSrc[2], pSrc[3] );
  666. pSrc += 4;
  667. } while( --n );
  668. }
  669. if ( M == HMODE_DOWNSAMPLE_2X )
  670. {
  671. do
  672. {
  673. *( pDest++ ) = CompressSIMD( pSrc[0], pSrc[1] );
  674. pSrc += 2;
  675. } while( --n );
  676. }
  677. if ( M == HMODE_DOWNSAMPLE_1X )
  678. {
  679. memcpy( pDest, pSrc, n * sizeof( T ) );
  680. }
  681. }
  682. }
  683. }
  684. template<class T> void ResampleAttributeInternalDType( CSOAContainer &src, CSOAContainer &dst, int nAttr )
  685. {
  686. int nSrcW = src.NumCols();
  687. int nDstW = dst.NumCols();
  688. if ( nSrcW == nDstW )
  689. {
  690. ResampleAttributeInternal<HMODE_DOWNSAMPLE_1X, T>( src, dst, nAttr );
  691. }
  692. else
  693. {
  694. if ( nSrcW == ( nDstW << 2 ) )
  695. {
  696. ResampleAttributeInternal<HMODE_DOWNSAMPLE_4X, T>( src, dst, nAttr );
  697. }
  698. else
  699. {
  700. if ( nSrcW == ( nDstW << 1 ) )
  701. {
  702. ResampleAttributeInternal<HMODE_DOWNSAMPLE_2X, T>( src, dst, nAttr );
  703. }
  704. }
  705. }
  706. }
  707. void CSOAContainer::ResampleAttribute( CSOAContainer &src, int nAttr )
  708. {
  709. if ( m_nDataType[nAttr] == ATTRDATATYPE_FLOAT )
  710. {
  711. ResampleAttributeInternalDType<fltx4>( src, *this, nAttr );
  712. }
  713. else
  714. {
  715. if ( m_nDataType[nAttr] == ATTRDATATYPE_4V )
  716. {
  717. ResampleAttributeInternalDType<FourVectors>( src, *this, nAttr );
  718. }
  719. }
  720. }
  721. struct KMeansQuantizationWorkUnit
  722. {
  723. CSOAContainer *m_pContainer;
  724. int m_nRowIndex;
  725. int m_nNumResultsDesired;
  726. IKMeansErrorMetric *m_pErrorCalculator;
  727. int const *m_pFieldIndices;
  728. int m_nNumFields;
  729. int m_nFieldToStoreIndexInto;
  730. KMeansQuantizedValue *m_pOutValues;
  731. int m_nErrorChannel;
  732. void Process( void );
  733. };
  734. static void DoKMeansWork( KMeansQuantizationWorkUnit &jobDesc )
  735. {
  736. jobDesc.Process();
  737. }
  738. void KMeansQuantizationWorkUnit::Process( void )
  739. {
  740. FourVectors v4SamplePositions;
  741. for( int nZ = 0; nZ < m_pContainer->NumSlices(); nZ++ )
  742. {
  743. v4SamplePositions.z = ReplicateX4( nZ );
  744. for( int nY = m_nRowIndex; nY < m_pContainer->NumRows(); nY += QUANTIZER_NJOBS )
  745. {
  746. v4SamplePositions.y = ReplicateX4( nY );
  747. KMeansSampleDescriptor samples;
  748. for( int c = 0; c < m_nNumFields; c++ )
  749. {
  750. samples.m_pInputValues[c] = m_pContainer->RowPtr<fltx4>( m_pFieldIndices[c], nY, nZ );
  751. }
  752. fltx4 *pIndexOut = m_pContainer->RowPtr<fltx4>( m_nFieldToStoreIndexInto, nY, nZ );
  753. fltx4 *pErrOut = NULL;
  754. if ( m_nErrorChannel != -1 )
  755. {
  756. pErrOut = m_pContainer->RowPtr<fltx4>( m_nErrorChannel, nY, nZ );
  757. }
  758. v4SamplePositions.x = g_SIMD_0123;
  759. // simd closest match search
  760. int nXSize = m_pContainer->NumQuadsPerRow();
  761. do
  762. {
  763. fltx4 fl4SampleIdx = Four_Zeros;
  764. fltx4 fl4ClosestError = Four_FLT_MAX;
  765. fltx4 fl4BestSampleIdx = Four_Zeros;
  766. for( int n = 0; n < m_nNumResultsDesired; n++ )
  767. {
  768. fltx4 fl4TrialError;
  769. m_pErrorCalculator->CalculateError( samples, v4SamplePositions, m_pOutValues[n], &fl4TrialError );
  770. // find which samples got a closest match from this comparison
  771. bi32x4 fl4BetterMask = CmpLeSIMD( fl4TrialError, fl4ClosestError );
  772. fl4BestSampleIdx = MaskedAssign( fl4BetterMask, fl4SampleIdx, fl4BestSampleIdx );
  773. fl4ClosestError = MaskedAssign( fl4BetterMask, fl4TrialError, fl4ClosestError );
  774. fl4SampleIdx = AddSIMD( fl4SampleIdx, Four_Ones );
  775. }
  776. // now, we have found the best match for 4 sample values. Need to update output indices and statistics
  777. *( pIndexOut++ ) = fl4BestSampleIdx;
  778. if ( pErrOut )
  779. {
  780. *( pErrOut++ ) = fl4ClosestError;
  781. }
  782. // unfortunately, we can not quite simd this because of needing scatter
  783. for( int s = 0; s < 4; s++ )
  784. {
  785. int nIdx = ( int )SubFloat( fl4BestSampleIdx, s );
  786. for( int c = 0; c < m_nNumFields; c++ )
  787. {
  788. m_pOutValues[nIdx].m_flValueAccumulators[m_nRowIndex][c] += SubFloat( *samples.m_pInputValues[c], s );
  789. }
  790. m_pOutValues[nIdx].m_flWeightAccumulators[m_nRowIndex] += 1.0;
  791. }
  792. for( int c = 0; c < m_nNumFields; c++ )
  793. {
  794. samples.m_pInputValues[c]++;
  795. }
  796. fl4SampleIdx = AddSIMD( fl4SampleIdx, Four_Ones );
  797. v4SamplePositions.x = AddSIMD( v4SamplePositions.x, Four_Fours );
  798. } while( -- nXSize );
  799. }
  800. }
  801. }
  802. // kmeans quantization
  803. void CSOAContainer:: KMeansQuantization( int const *pFieldIndices, int nNumFields,
  804. KMeansQuantizedValue *pOutValues,
  805. int nNumResultsDesired, IKMeansErrorMetric *pErrorCalculator,
  806. int nFieldToStoreIndexInto, int nNumIterations,
  807. int nChannelToReceiveErrorSignal )
  808. {
  809. // first, initialize trial samples randomly
  810. HaltonSequenceGenerator_t xSequence( 13 );
  811. HaltonSequenceGenerator_t ySequence( 17 );
  812. HaltonSequenceGenerator_t zSequence( 23 );
  813. for( int i = 0; i < nNumResultsDesired; i++ )
  814. {
  815. int nX = ( int )( ( NumCols() - 1 ) * xSequence.NextValue() );
  816. int nY = ( int )( ( NumRows() - 1 ) * ySequence.NextValue() );
  817. int nZ = ( int )( ( NumSlices() - 1 ) * zSequence.NextValue() );
  818. pOutValues[i].m_vecValuePosition.DuplicateVector( Vector( nX, nY, nZ ) );
  819. for( int c = 0; c < nNumFields; c++ )
  820. {
  821. pOutValues[i].m_fl4Values[c] = ReplicateX4( FloatValue( pFieldIndices[c], nX, nY, nZ ) );
  822. }
  823. }
  824. // now,. run iterations
  825. while( nNumIterations-- )
  826. {
  827. for( int i = 0; i < nNumResultsDesired; i++ )
  828. {
  829. memset( pOutValues[i].m_flValueAccumulators, 0, sizeof( pOutValues[i].m_flValueAccumulators ) );
  830. memset( pOutValues[i].m_flWeightAccumulators, 0, sizeof( pOutValues[i].m_flWeightAccumulators ) );
  831. }
  832. // now, find the closest matches for all data samples, in parallel
  833. KMeansQuantizationWorkUnit jobs[QUANTIZER_NJOBS];
  834. for( int i = 0; i < QUANTIZER_NJOBS; i++ )
  835. {
  836. jobs[i].m_pContainer = this;
  837. jobs[i].m_nRowIndex = i;
  838. jobs[i].m_nNumResultsDesired = nNumResultsDesired;
  839. jobs[i].m_pErrorCalculator = pErrorCalculator;
  840. jobs[i].m_pFieldIndices = pFieldIndices;
  841. jobs[i].m_nNumFields = nNumFields;
  842. jobs[i].m_nFieldToStoreIndexInto = nFieldToStoreIndexInto;
  843. jobs[i].m_pOutValues = pOutValues;
  844. jobs[i].m_nErrorChannel = nChannelToReceiveErrorSignal;
  845. }
  846. ParallelProcess( jobs, ARRAYSIZE( jobs ), DoKMeansWork );
  847. if ( nNumIterations ) // don't refine the results after the last pass
  848. {
  849. for( int n = 0; n < nNumResultsDesired; n++ )
  850. {
  851. // accumulate over all threads
  852. for( int j = 1; j < QUANTIZER_NJOBS; j++ )
  853. {
  854. pOutValues[n].m_flWeightAccumulators[0] += pOutValues[n].m_flWeightAccumulators[j];
  855. for( int c = 0; c < nNumFields; c++ )
  856. {
  857. pOutValues[n].m_flValueAccumulators[0][c] += pOutValues[n].m_flValueAccumulators[j][c];
  858. }
  859. }
  860. // re-adjust quantized values
  861. float flOOWeight = 1.0 / MAX( FLT_EPSILON, pOutValues[n].m_flWeightAccumulators[0] );
  862. for( int c = 0; c < nNumFields; c++ )
  863. {
  864. pOutValues[n].m_fl4Values[c] = ReplicateX4( pOutValues[n].m_flValueAccumulators[0][c] * flOOWeight );
  865. }
  866. pErrorCalculator->PostAdjustQuantizedValue( pOutValues[n] );
  867. }
  868. pErrorCalculator->PostStep( pFieldIndices, nNumFields, pOutValues, nNumResultsDesired, nFieldToStoreIndexInto, *this );
  869. }
  870. }
  871. }
  872. #define THRESH 0.9
  873. void CSOAContainer::UpdateDistanceRow( int nSearchRadius, int nMinX, int nMaxX, int nY, int nZ,
  874. int nSrcField, int nDestField )
  875. {
  876. float const *pDataIn = RowPtr<float>( nSrcField, nY, nZ ) + nMinX;
  877. float *pDataOut = RowPtr<float>( nDestField, nY, nZ ) + nMinX;
  878. int nStartY = MAX( 0, nY - nSearchRadius );
  879. int nEndY = MIN( NumRows() - 1, nY + nSearchRadius );
  880. int nStartZ = MAX( 0, nZ - nSearchRadius );
  881. int nEndZ = MIN( NumSlices() - 1, nZ + nSearchRadius );
  882. fltx4 fl4Thresh = ReplicateX4( THRESH );
  883. for( int x = nMinX; x <= nMaxX; x++ )
  884. {
  885. float flReferenceValue = *( pDataIn++ );
  886. // map it to 0 or 1
  887. fltx4 fl4ReferenceValue = ( flReferenceValue > THRESH ) ? Four_Ones: Four_Zeros;
  888. fltx4 fl4ClosestDistance = ReplicateX4( nSearchRadius );
  889. // now, we need to walk over a (3d) window around the sample
  890. int nStartX = MAX( 0, x - nSearchRadius );
  891. int nEndX = MIN( NumCols() - 1, x + nSearchRadius );
  892. // pad to simd values
  893. nStartX = nStartX & ~3;
  894. nEndX = nEndX & ~3;
  895. int nCount = 1 + ( ( nEndX - nStartX ) / 4 );
  896. for( int z1 = nStartZ; z1 <= nEndZ; z1++ )
  897. {
  898. for( int y1 = nStartY; y1 <= nEndY; y1++ )
  899. {
  900. fltx4 fl4YZDist = ReplicateX4( ( y1 - nY ) * ( y1 - nY ) + ( z1 - nZ ) * ( z1 - nZ ) );
  901. fltx4 fl4SrcXDiff = AddSIMD( ReplicateX4( nStartX - x ), g_SIMD_0123 );
  902. fltx4 *pfl4SrcData = RowPtr<fltx4>( nSrcField, y1, z1 ) + ( nStartX / 4 );
  903. for( int x1 = 0; x1 < nCount; x1++ )
  904. {
  905. // fetch the source data, mapping it to 1 or 0.
  906. fltx4 fl4SrcData = *( pfl4SrcData++ );
  907. fl4SrcData = MaskedAssign( CmpGtSIMD( fl4SrcData, fl4Thresh ), Four_Ones, Four_Zeros );
  908. fltx4 fl4Distance = SqrtSIMD( AddSIMD( MulSIMD( fl4SrcXDiff, fl4SrcXDiff ), fl4YZDist ) );
  909. fl4ClosestDistance = MaskedAssign(
  910. AndNotSIMD( CmpEqSIMD( fl4SrcData, fl4ReferenceValue ), CmpLtSIMD( fl4Distance, fl4ClosestDistance ) ),
  911. fl4Distance, fl4ClosestDistance );
  912. fl4SrcXDiff = AddSIMD( fl4SrcXDiff, Four_Fours );
  913. }
  914. }
  915. }
  916. // we have found the closest different voxel. store it
  917. float flClosestDistance = MIN( MIN( SubFloat( fl4ClosestDistance, 0 ), SubFloat( fl4ClosestDistance, 1 ) ),
  918. MIN( SubFloat( fl4ClosestDistance, 2 ), SubFloat( fl4ClosestDistance, 3 ) ) );
  919. flClosestDistance = MIN( flClosestDistance, nSearchRadius );
  920. if ( flReferenceValue <= THRESH )
  921. {
  922. flClosestDistance = -flClosestDistance;
  923. }
  924. *( pDataOut++ ) = flClosestDistance;
  925. }
  926. }
  927. void CSOAContainer::GenerateDistanceField( int nSrcField, int nDestField,
  928. int nMaxDistance,
  929. Rect3D_t *pRect )
  930. {
  931. int nMinX, nMaxX, nMinY, nMaxY, nMinZ, nMaxZ;
  932. if ( pRect )
  933. {
  934. nMinX = pRect->x;
  935. nMinY = pRect->y;
  936. nMinZ = pRect->z;
  937. nMaxX = nMinX + pRect->width - 1;
  938. nMaxY = nMinY + pRect->height - 1;
  939. nMaxZ = nMinZ + pRect->depth;
  940. }
  941. else
  942. {
  943. nMinX = nMinY = nMinZ = 0;
  944. nMaxX = NumCols() - 1;
  945. nMaxY = NumRows() - 1;
  946. nMaxZ = NumSlices() - 1;
  947. }
  948. nMinX -= nMaxDistance;
  949. nMinZ -= nMaxDistance;
  950. nMinY -= nMaxDistance;
  951. nMinX = MAX( 0, nMinX );
  952. nMinY = MAX( 0, nMinY );
  953. nMinZ = MAX( 0, nMinZ );
  954. nMaxX += nMaxDistance;
  955. nMaxY += nMaxDistance;
  956. nMaxZ += nMaxDistance;
  957. nMaxX = MIN( NumCols() - 1, nMaxX );
  958. nMaxY = MIN( NumRows() - 1, nMaxY );
  959. nMaxZ = MIN( NumSlices() - 1, nMaxZ );
  960. if ( pRect ) // update rect?
  961. {
  962. pRect->x = nMinX;
  963. pRect->y = nMinY;
  964. pRect->z = nMaxZ;
  965. pRect->width = 1 + nMaxX - nMinX;
  966. pRect->height = 1 + nMaxY - nMinY;
  967. pRect->depth = 1 + nMaxZ - nMinZ;
  968. }
  969. CCallQueue workList;
  970. for( int z = nMinZ; z <= nMaxZ; z++ )
  971. {
  972. for( int y = nMinY; y <= nMaxY; y++ )
  973. {
  974. workList.QueueCall( this, &CSOAContainer::UpdateDistanceRow,
  975. nMaxDistance, nMinX, nMaxX, y, z, nSrcField, nDestField );
  976. }
  977. }
  978. workList.ParallelCallQueued();
  979. }
  980. void CSOAContainer::CopyRegionFrom( CSOAContainer const &src, int nSrcAttr, int nDestAttr,
  981. int nSrcMinX, int nSrcMaxX, int nSrcMinY, int nSrcMaxY, int nSrcMinZ, int nSrcMaxZ,
  982. int nDestX, int nDestY, int nDestZ )
  983. {
  984. Assert( HasAllocatedMemory( nDestAttr ) );
  985. Assert( src.HasAllocatedMemory( nSrcAttr ) );
  986. Assert( ItemByteStride( nDestAttr ) == src.ItemByteStride( nSrcAttr ) );
  987. size_t nRowSize = ( 1 + nSrcMaxX - nSrcMinX ) * ItemByteStride( nDestAttr );
  988. for( int z = nSrcMinZ; z <= nSrcMaxZ; z++ )
  989. {
  990. for( int y = nSrcMinY; y <= nSrcMaxY; y++ )
  991. {
  992. uint8 const *pSrc = src.RowPtr<uint8>( nSrcAttr, y,z ) + nSrcMinX * ItemByteStride( nDestAttr );
  993. uint8 *pDest = RowPtr<uint8>( nDestAttr, y + nDestY - nSrcMinY, z + nDestZ - nSrcMinZ ) + nDestX * ItemByteStride( nDestAttr );
  994. memcpy( pDest, pSrc, nRowSize );
  995. }
  996. }
  997. }
  998. void CSOAContainer::CopyRegionFrom( CSOAContainer const &src,
  999. int nSrcMinX, int nSrcMaxX, int nSrcMinY, int nSrcMaxY, int nSrcMinZ, int nSrcMaxZ,
  1000. int nDestX, int nDestY, int nDestZ )
  1001. {
  1002. for( int i = 0; i < MAX_SOA_FIELDS; i++ )
  1003. {
  1004. if ( src.HasAllocatedMemory( i ) && ( HasAllocatedMemory( i ) ) && ( ItemByteStride( i ) == src.ItemByteStride( i ) ) )
  1005. {
  1006. CopyRegionFrom( src, i, i, nSrcMinX, nSrcMaxX, nSrcMinY, nSrcMaxY, nSrcMinZ, nSrcMaxZ, nDestX, nDestY, nDestZ );
  1007. }
  1008. }
  1009. }