Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

753 lines
19 KiB

  1. //========= Copyright � 1996-2004, Valve LLC, All rights reserved. ============
  2. //
  3. // This is the common include file to be included in SPU jobs.
  4. // It takes care to remap/emulate some SPU-specific functinality on PPU
  5. //
  6. #ifndef PS3_SPU_JOB_SHARED_HDR
  7. #define PS3_SPU_JOB_SHARED_HDR
  8. #ifdef _PS3
  9. #include <ps3/ps3_platform.h>
  10. #include <cell/spurs/job_chain.h>
  11. #include <cell/spurs/job_queue.h>
  12. #include <cell/spurs/job_queue_port2.h>
  13. #include <cell/dma/types.h>
  14. //
  15. // NOTE: Enable the following block for debugging GCM on SPU; works as of SDK 350
  16. //
  17. #if 0 && defined( __SPU__ )
  18. #include <cell/gcm/gcm_macros.h>
  19. #undef CELL_GCM_ASSERT
  20. #undef CELL_GCM_ASSERTS
  21. #define CELL_GCM_ASSERT(condition) Assert( condition )
  22. #define CELL_GCM_ASSERTS(condition, description) AssertSpuMsg( condition, description )
  23. #define CELL_GCM_ASSERT_ENABLE
  24. #endif
  25. enum DmaTagEnum_t
  26. {
  27. DMATAG_SYNC = 2, // used for synchronous transfers, where we need the transfer to finish very soon/immediately after issuing
  28. DMATAG_TEXTURES = 3,
  29. DMATAG_SHADERS = 4,
  30. DMATAG_SCRATCH = 5, // used for DMA PUTs from Scratch memory, so we need to wait for this to finish before job finishes
  31. // each jobchain needs 2 dma tags, up to tag 30
  32. // DMATAG_EDGE_JOBCHAIN = 8,
  33. // DMATAG_FPCP_JOBCHAIN = 10,
  34. // DMATAG_GCM_JOBCHAIN = 12,
  35. DMATAG_ANIM = 8, // non immediate dma's
  36. DMATAG_BUILDINDICES = 8,
  37. DMATAG_BUILDRENDERABLES = 8,
  38. }; // shouldn't overlap with the tags used by the workload
  39. // Enable this define to disable assert. This may be necessary to detect timing issues in DEBUG and RELEASE,
  40. // or incorrectly generated code from compiler. When LSGUARD is enabled, we disable asserts to force potential issues.
  41. #ifdef USE_LSGUARD
  42. # define DISABLE_ASSERT
  43. #endif
  44. template <typename T>
  45. inline T* AddBytes( T* p, int nBytes )
  46. {
  47. return ( T* )( int( p ) + nBytes );
  48. }
  49. template <typename T>
  50. inline T Min( T a, T b )
  51. {
  52. return a < b ? a : b;
  53. }
  54. template <typename T>
  55. inline T Max( T a, T b )
  56. {
  57. return a > b ? a : b;
  58. }
  59. template <typename T>
  60. inline void Swap( T& a , T & b )
  61. {
  62. T c = a; a = b; b = c;
  63. }
  64. // <sergiy> should I port platform.h to SPU?
  65. #ifdef SPU
  66. #include <cell/spurs/job_context.h>
  67. #include "cell/spurs/common.h"
  68. #include <cell/atomic.h>
  69. #include <spu_intrinsics.h>
  70. #include <vmx2spu.h>
  71. #define PPU_ONLY(X)
  72. #define SPU_ONLY(X) X
  73. #define vector __vector
  74. void CheckBufferOverflow_Impl();
  75. void CheckDmaGet_Impl( const void * pBuffer, size_t nSize );
  76. #if defined(_CERT) || defined(DISABLE_ASSERT)
  77. # define VjobSpuLog(...)
  78. # define DebuggerBreak()
  79. # define Warning(...)
  80. # define CheckBufferOverflow()
  81. # define CheckDmaGet(p, size)
  82. #else
  83. # include <spu_printf.h>
  84. # define VjobSpuLog( MSG, ... ) spu_printf( "[%d]" MSG, cellSpursGetCurrentSpuId(), ##__VA_ARGS__ )
  85. # define Msg( MSG, ... ) spu_printf( "[%d]" MSG, cellSpursGetCurrentSpuId(), ##__VA_ARGS__ )
  86. #ifndef BASETYPES_H
  87. #define DebuggerBreak() __asm volatile ("stopd $0,$0,$0")
  88. #endif
  89. # define Warning( MSG, ... ) spu_printf( "[%d] Warning: " MSG, cellSpursGetCurrentSpuId(), ##__VA_ARGS__ )
  90. # define CELL_DMA_ASSERT_VERBOSE
  91. # define CheckBufferOverflow() CheckBufferOverflow_Impl()
  92. # define CheckDmaGet(p, size) CheckDmaGet_Impl( p, size )
  93. #endif
  94. #define LWSYNC_PPU_ONLY()
  95. #define VJOB_IOBUFFER_DMATAG g_stInfo->dmaTag // fake DMA tag
  96. #include <cell/spurs/common.h>
  97. #define VjobDmaPut cellDmaPut
  98. #define VjobDmaGet cellDmaGet
  99. #define VjobDmaGetf cellDmaGetf
  100. #define VjobDmaListGet cellDmaListGet
  101. #define VjobDmaLargePut cellDmaLargePut
  102. #define VjobDmaLargePutf cellDmaLargePutf
  103. //#define VjobDmaLargePutb cellDmaLargePutb
  104. #define VjobDmaPutf cellDmaPutf
  105. #define VjobDmaSmallPut cellDmaSmallPut
  106. #define VjobDmaSmallPutf cellDmaSmallPutf
  107. //#define VjobDmaSmallPutb cellDmaSmallPutb
  108. #define VjobDmaSmallGet cellDmaSmallGet
  109. #define VjobWaitTagStatusAll cellDmaWaitTagStatusAll
  110. #define VjobWaitTagStatusImmediate cellDmaWaitTagStatusImmediate
  111. #define VjobDmaGetUint32 cellDmaGetUint32
  112. #define VjobDmaPutUint32 cellDmaPutUint32
  113. #define VjobDmaGetUint64 cellDmaGetUint64
  114. #define VjobDmaPutUint64 cellDmaPutUint64
  115. #define VjobDmaUnalignedPutf cellDmaUnalignedPutf
  116. #define VjobDmaUnalignedPut cellDmaUnalignedPut
  117. #define VjobDmaPutfUintTemplate(SIZE, value, ea, tag, tid, rid) \
  118. do { \
  119. uint64_t __cellDma_ea = ea; \
  120. uint32_t __cellDma_tag = tag; \
  121. qword _buf = (qword)spu_splats(value); \
  122. cellDmaDataAssert(__cellDma_ea,sizeof(uint##SIZE##_t),__cellDma_tag); \
  123. cellDmaAndWait(cellDmaEa2Ls(__cellDma_ea,&_buf),__cellDma_ea,sizeof(uint##SIZE##_t),__cellDma_tag,MFC_CMD_WORD(tid,rid,MFC_PUTF_CMD)); \
  124. } while(0)
  125. #define VjobDmaPutfUint8(value, ea, tag) cellDmaPutUintTemplate(8, ((uint8_t)value), ea, tag, 0, 0)
  126. #define VjobDmaPutfUint16(value, ea, tag) cellDmaPutUintTemplate(16, ((uint16_t)value), ea, tag, 0, 0)
  127. #define VjobDmaPutfUint32(value, ea, tag) cellDmaPutUintTemplate(32, ((uint32_t)value), ea, tag, 0, 0)
  128. #define VjobDmaPutfUint64(value, ea, tag) cellDmaPutUintTemplate(64, ((uint64_t)value), ea, tag, 0, 0)
  129. #define VjobSpuId() int( cellSpursGetCurrentSpuId() )
  130. #define V_memset __builtin_memset
  131. #define V_memcpy __builtin_memcpy
  132. #if !defined ARRAYSIZE
  133. #define ARRAYSIZE( ARRAY ) ( sizeof( ARRAY ) / sizeof( ( ARRAY )[0] ) )
  134. #endif
  135. typedef signed int int32;
  136. typedef unsigned int uint;
  137. typedef signed char int8;
  138. typedef unsigned char uint8;
  139. typedef signed short int16;
  140. typedef unsigned short uint16;
  141. typedef signed int int32;
  142. typedef unsigned int uint32;
  143. typedef signed long long int64;
  144. typedef unsigned long long uint64;
  145. typedef unsigned int uintp;
  146. typedef vector float fltx4 ;
  147. #define INT_MAX 0x7fffffff
  148. #define DECL_ALIGN(x) __attribute__( ( aligned( x ) ) )
  149. #ifndef BASETYPES_H
  150. #define ALIGN16 DECL_ALIGN(16)
  151. #define ALIGN16_POST
  152. #define ALIGN128 DECL_ALIGN(128)
  153. #define ALIGN128_POST
  154. template <typename T>
  155. inline T AlignValue( T val, uintp alignment )
  156. {
  157. return ( T )( ( ( uintp )val + alignment - 1 ) & ~( alignment - 1 ) );
  158. }
  159. #define ALIGN_VALUE( val, alignment ) ( ( val + alignment - 1 ) & ~( alignment - 1 ) )
  160. inline bool IsPowerOfTwo( uint x )
  161. {
  162. return ( x & ( x - 1 ) ) == 0;
  163. }
  164. #endif
  165. #define FORCEINLINE inline /* __attribute__ ((always_inline)) */
  166. #define IsPlatformPS3() 1
  167. #define IsPlatformPS3_PPU() 0
  168. #define IsPlatformPS3_SPU() 1
  169. #define IsPlatformX360() 0
  170. #define IsPlatformOSX() 0
  171. #if !defined RESTRICT
  172. #define RESTRICT
  173. #endif
  174. #define V_memset __builtin_memset
  175. #define V_memcpy __builtin_memcpy
  176. inline void VjobPpuRereadEA( uintp ea ){}
  177. #if defined(_CERT) || defined(DISABLE_ASSERT)
  178. #define Assert(x) ((void)(0))
  179. #define AssertSpuMsg(x,MSG,...)((void)0)
  180. #ifndef DBG_H
  181. #define COMPILE_TIME_ASSERT( pred ) // to avoid any unpredictable affects in the optimizer
  182. #endif
  183. #else
  184. #define DBGFLAG_ASSERT
  185. #ifndef DBG_H
  186. #define Assert(x) do{if( !( x ) ) { spu_printf( "Assert on SPU[%d](" #x ")\n", cellSpursGetCurrentSpuId() ); DebuggerBreak(); } }while(0)
  187. #endif
  188. #define AssertSpuMsg(x,MSG,...) do{if( !( x ) ) { spu_printf( "Assert on SPU[%d](" #x "), " MSG, cellSpursGetCurrentSpuId(), ## __VA_ARGS__ ); DebuggerBreak(); } }while(0)
  189. #ifndef DBG_H
  190. #define COMPILE_TIME_ASSERT( pred ) switch(0){case 0:case pred:;}
  191. #endif
  192. #endif
  193. // mimic the PPU class on SPU
  194. // template< int bytesAlignment, class T >
  195. // class CAlignedNewDelete : public T
  196. // {public:
  197. // }
  198. // WARNING: SLOWNESS. DO NOT USE IN PRODUCTION.
  199. inline void DebugMemcpyEa( uint eaDest, uint eaSrc, uint nSize, void *lsScratch )
  200. {
  201. Assert( ! ( 0xF & ( eaSrc | eaDest | nSize ) ) );
  202. uint nBytesLeft = nSize, nOffset = 0;
  203. while( nBytesLeft )
  204. {
  205. uint nChunk = Min<uint>( 16 * 1024, nBytesLeft );
  206. VjobDmaGet( lsScratch, eaSrc + nOffset, nChunk, DMATAG_SYNC, 0, 0 );
  207. VjobWaitTagStatusAll( 1 << DMATAG_SYNC );
  208. VjobDmaPut( lsScratch, eaDest + nOffset, nChunk, DMATAG_SYNC, 0, 0 );
  209. VjobWaitTagStatusAll( 1 << DMATAG_SYNC );
  210. nBytesLeft -= nChunk;
  211. nOffset += nChunk;
  212. }
  213. }
  214. #define vec_to_uint32(X) si_to_uint( ( qword )( X ) )
  215. #define VjobQueuePort2PushJob( eaPort, eaJob, sizeDesc, tag, dmaTag, flag ) cellSpursJobQueuePort2PushJob( (uintp)( eaPort ), (uintp)( eaJob ), ( sizeDesc ), ( tag ), ( dmaTag ), ( flag ) )
  216. #define VjobQueuePort2PushSync( eaPort2, tagMask, dmaTag, flag ) cellSpursJobQueuePort2PushSync( ( uintp ) ( eaPort2), ( tagMask ), ( dmaTag ), ( flag ) )
  217. inline void VjobQueuePort2PushJobBlocking( CellSpursJobQueuePort2 *eaPort2, CellSpursJobHeader *eaJob, size_t sizeDesc, uint nQueueTag, uint nDmaTag )
  218. {
  219. int nError;
  220. for(;;)
  221. {
  222. nError = cellSpursJobQueuePort2PushJob( uintp( eaPort2 ), uintp( eaJob ) , sizeDesc, nQueueTag, nDmaTag, CELL_SPURS_JOBQUEUE_FLAG_NON_BLOCKING );
  223. if( nError != CELL_SPURS_JOB_ERROR_AGAIN )
  224. {
  225. break;
  226. }
  227. }
  228. if ( nError != CELL_OK )
  229. {
  230. VjobSpuLog( "Cannot push job, error %d. RSX is going to hang, then SPUs, then PPU.\n", nError );
  231. DebuggerBreak();
  232. }
  233. }
  234. inline void VjobQueuePort2PushSyncBlocking( CellSpursJobQueuePort2 *eaPort2, unsigned tagMask, uint nDmaTag )
  235. {
  236. int nError;
  237. for(;;)
  238. {
  239. nError = cellSpursJobQueuePort2PushSync( uintp( eaPort2 ), tagMask, nDmaTag, CELL_SPURS_JOBQUEUE_FLAG_NON_BLOCKING );
  240. if( nError != CELL_SPURS_JOB_ERROR_AGAIN )
  241. {
  242. break;
  243. }
  244. }
  245. if ( nError != CELL_OK )
  246. {
  247. VjobSpuLog( "Cannot push job, error %d. RSX is going to hang, then SPUs, then PPU.\n", nError );
  248. DebuggerBreak();
  249. }
  250. }
  251. #else
  252. #include "tier0/platform.h"
  253. #include "tier1/strtools.h"
  254. #include "mathlib/ssemath.h"
  255. #include <altivec.h>
  256. #include <cell/spurs/job_context_types.h>
  257. inline uint32_t GetCurrentSpuId()
  258. {
  259. return 0xFFFFFFFF;
  260. }
  261. using namespace ::cell::Spurs;
  262. extern void VjobSpuLog( const char * p, ... );
  263. #define VJOB_IOBUFFER_DMATAG 0 // fake DMA tag
  264. #define PPU_ONLY(X) X
  265. #define SPU_ONLY(X)
  266. #ifdef _DEBUG
  267. #define AssertSpuMsg(x,MSG,...) do { if( !( x ) ) { Warning( "Assert(" #x "), " MSG, ## __VA_ARGS__ ); DebuggerBreak(); } }while( 0 )
  268. #else
  269. #define AssertSpuMsg(x,MSG,...)
  270. #endif
  271. #define VjobQueuePort2PushJob( eaPort, eaJob, sizeDesc, tag, dmaTag, flag ) cellSpursJobQueuePort2PushJob( (CellSpursJobQueuePort2 *)( eaPort ), (CellSpursJobHeader *)( eaJob ), ( sizeDesc ), ( tag ), ( flag ) )
  272. #define VjobQueuePort2PushSync( eaPort2, tagMask, dmaTag, flag ) cellSpursJobQueuePort2PushSync( (CellSpursJobQueuePort2 *) ( eaPort2), ( tagMask ), ( flag ) )
  273. inline void VjobQueuePort2PushJobBlocking( CellSpursJobQueuePort2 *eaPort2, CellSpursJobHeader *eaJob, size_t sizeDesc, uint nQueueTag, uint nDmaTag )
  274. {
  275. int nError = cellSpursJobQueuePort2PushJob( eaPort2, eaJob, sizeDesc, nQueueTag, 0 );// synchronous call
  276. (void) nError;
  277. Assert( nError == CELL_OK );
  278. }
  279. inline void VjobQueuePort2PushSyncBlocking( CellSpursJobQueuePort2 *eaPort2, unsigned tagMask, uint nDmaTag )
  280. {
  281. int nError = cellSpursJobQueuePort2PushSync( eaPort2, tagMask, 0 ); // synchronous call
  282. (void) nError;
  283. Assert( nError == CELL_OK );
  284. }
  285. #define VjobSpuId() -1
  286. #define LWSYNC_PPU_ONLY() __lwsync()
  287. extern void VjobDmaPut(
  288. const void * ls,
  289. uint64_t ea,
  290. uint32_t size,
  291. uint32_t tag,
  292. uint32_t tid,
  293. uint32_t rid
  294. );
  295. extern void VjobDmaGet(
  296. void * ls,
  297. uint64_t ea,
  298. uint32_t size,
  299. uint32_t tag,
  300. uint32_t tid,
  301. uint32_t rid
  302. );
  303. extern void VjobDmaGetf(
  304. void * ls,
  305. uint64_t ea,
  306. uint32_t size,
  307. uint32_t tag,
  308. uint32_t tid,
  309. uint32_t rid
  310. );
  311. extern void VjobDmaListGet(
  312. void *ls,
  313. uint64_t ea,
  314. const CellDmaListElement *list,
  315. uint32_t listSize,
  316. uint32_t tag,
  317. uint32_t tid,
  318. uint32_t rid
  319. );
  320. extern void VjobDmaLargePut(
  321. const void * ls,
  322. uint64_t ea,
  323. uint32_t size,
  324. uint32_t tag,
  325. uint32_t tid,
  326. uint32_t rid
  327. );
  328. extern void VjobDmaLargePutf(
  329. const void * ls,
  330. uint64_t ea,
  331. uint32_t size,
  332. uint32_t tag,
  333. uint32_t tid,
  334. uint32_t rid
  335. );
  336. extern void VjobDmaLargePutb(
  337. const void * ls,
  338. uint64_t ea,
  339. uint32_t size,
  340. uint32_t tag,
  341. uint32_t tid,
  342. uint32_t rid
  343. );
  344. extern void VjobDmaPutf(
  345. const void * ls,
  346. uint64_t ea,
  347. uint32_t size,
  348. uint32_t tag,
  349. uint32_t tid,
  350. uint32_t rid
  351. );
  352. extern void VjobDmaSmallPut(
  353. const void * ls,
  354. uint64_t ea,
  355. uint32_t size,
  356. uint32_t tag,
  357. uint32_t tid,
  358. uint32_t rid
  359. );
  360. extern void VjobDmaSmallGet(
  361. void * ls,
  362. uint64_t ea,
  363. uint32_t size,
  364. uint32_t tag,
  365. uint32_t tid,
  366. uint32_t rid
  367. );
  368. extern void VjobDmaSmallPutb(
  369. const void * ls,
  370. uint64_t ea,
  371. uint32_t size,
  372. uint32_t tag,
  373. uint32_t tid,
  374. uint32_t rid
  375. );
  376. extern void VjobDmaSmallPutf(
  377. const void * ls,
  378. uint64_t ea,
  379. uint32_t size,
  380. uint32_t tag,
  381. uint32_t tid,
  382. uint32_t rid
  383. );
  384. // NOTE: implementation must wait for tag
  385. uint32_t VjobDmaGetUint32(
  386. uint64_t ea,
  387. uint32_t tag,
  388. uint32_t tid,
  389. uint32_t rid
  390. );
  391. void VjobDmaPutUint32(
  392. uint32_t value,
  393. uint64_t ea,
  394. uint32_t tag,
  395. uint32_t tid,
  396. uint32_t rid
  397. );
  398. uint64_t VjobDmaGetUint64(
  399. uint64_t ea,
  400. uint32_t tag,
  401. uint32_t tid,
  402. uint32_t rid
  403. );
  404. void VjobDmaPutUint64(
  405. uint64_t value,
  406. uint64_t ea,
  407. uint32_t tag,
  408. uint32_t tid,
  409. uint32_t rid
  410. );
  411. void VjobDmaUnalignedPutf(
  412. const void *ls,
  413. uint64_t ea,
  414. uint32_t size,
  415. uint32_t tag,
  416. uint32_t tid,
  417. uint32_t rid
  418. );
  419. void VjobDmaUnalignedPut(
  420. const void *ls,
  421. uint64_t ea,
  422. uint32_t size,
  423. uint32_t tag,
  424. uint32_t tid,
  425. uint32_t rid
  426. );
  427. // These functions are empty because I'm too lazy to implement deferred DMA emulation ...
  428. inline uint VjobWaitTagStatusAll( uint nTagMask ){ return nTagMask;}
  429. inline uint VjobWaitTagStatusImmediate( uint nTagMask ) { return nTagMask ; }
  430. #define VjobDmaPutfUint8(value, ea, tag) *(uint8*)ea = (uint8)value
  431. #define VjobDmaPutfUint16(value, ea, tag) *(uint16*)ea = (uint16)value
  432. #define VjobDmaPutfUint32(value, ea, tag) *(uint32*)ea = (uint32)value
  433. #define VjobDmaPutfUint64(value, ea, tag) *(uint64*)ea = (uint64)value
  434. void VjobPushJob( void ( *pfnMain )( CellSpursJobContext2 * stInfo, CellSpursJob256 * job ), CellSpursJob128 * job );
  435. extern void VjobSpuLog( const char * p, ... );
  436. extern void VjobPpuRereadEA( uintp ea );
  437. inline void DebugMemcpyEa( uint eaDest, uint eaSrc, uint nSize, void *lsScratch )
  438. {
  439. Assert( ! ( 0xF & ( eaSrc | eaDest | nSize ) ) );
  440. memcpy( (void*)eaDest, (void*)eaSrc, nSize );
  441. }
  442. extern void TestAlignBuffer();
  443. #define vec_to_uint32(X) (*(uint32*)&(X))
  444. #endif // SPU
  445. #define VjobDmaEa2Ls16(ea, ls) ((uintptr_t)(ls)+((uint32_t)(ea)&15))
  446. #define VjobDmaEa2Ls128(ea, ls) ((uintptr_t)(ls)+((uint32_t)(ea)&127))
  447. inline uint32* PrepareSmallPut32( vector unsigned int * lsAligned, volatile uint32 * eaUnaligned, uint32 nInitialValue )
  448. {
  449. Assert( !( 3 & uint( lsAligned ) ) );
  450. uint32 * ls = ( uint32* )VjobDmaEa2Ls16( eaUnaligned, lsAligned );
  451. *ls = nInitialValue;
  452. return ls;
  453. }
  454. inline uint64* PrepareSmallPut64( vector unsigned int * lsAligned, volatile uint64 * eaUnaligned, uint64 nInitialValue )
  455. {
  456. Assert( !( 7 & uint( lsAligned ) ) );
  457. uint64 * ls = ( uint64* )VjobDmaEa2Ls16( eaUnaligned, lsAligned );
  458. *ls = nInitialValue;
  459. return ls;
  460. }
  461. extern CellSpursJobContext2* g_stInfo;
  462. #ifndef IsDebug
  463. # ifdef _DEBUG
  464. # define IsDebug() true
  465. # else
  466. # define IsDebug() false
  467. # endif
  468. #endif
  469. #ifndef IsCert
  470. # ifdef _CERT
  471. # define IsCert() true
  472. # else
  473. # define IsCert() false
  474. # endif
  475. #endif
  476. extern uint g_nBreakMask ;
  477. #ifdef _CERT
  478. # define BreakOn( nId )
  479. #else
  480. # define BreakOn( nId ) do \
  481. { \
  482. if( g_nBreakMask & ( 1 << nId ) ) \
  483. DebuggerBreak(); \
  484. }while( 0 )
  485. #endif
  486. inline void VjobDebugSpinCycles( uint nCycles )
  487. {
  488. if( !IsCert() )
  489. {
  490. #ifdef SPU
  491. uint nStart = spu_read_decrementer();
  492. while( nStart - spu_read_decrementer() < nCycles / 40 )
  493. continue;
  494. #else
  495. sys_timer_usleep( nCycles / 3200 );
  496. /*
  497. uint nStart = __mftb();
  498. while( __mftb() - nStart() < nCycles / 40 )
  499. continue;
  500. */
  501. #endif
  502. }
  503. }
  504. // this is the DMA list element without notify or reserved fields, so that it's easy to fill it in
  505. // and be sure there is no garbage left (in notify and reserved fields) and there are no bit field operations (to store size, which is effectively only 14-bit value)
  506. struct BasicDmaListElement_t
  507. {
  508. uint32 size;
  509. uint32 eal;
  510. };
  511. // shifts unaligned pBuffer of given size left by 0..15 bytes to make it aligned
  512. // returns the aligned pointer, pBuffer & -16
  513. extern void* AlignBuffer( void * pBuffer, uint nBytes);
  514. //
  515. // Adds constant nAdd to the given unaligned buffer of uint16's
  516. //
  517. extern void UnalignedBufferAddU16( uint16 * pBuffer, uint nCount, uint16 nAdd );
  518. // SpursJob_t must be one of CellSpursJob64, CellSpursJob128, CellSpursJob256,...
  519. // JobParam_t is the parameter structure passed to the job
  520. template < typename JobParam_t , typename SpursJob_t >
  521. inline JobParam_t * VjobGetJobParams( void * pJob )
  522. {
  523. Assert( sizeof( JobParam_t ) + sizeof( CellSpursJobHeader ) <= sizeof( SpursJob_t ) );
  524. JobParam_t * pJobParams = ( JobParam_t* ) ( uintp( pJob ) + ( sizeof( SpursJob_t ) - sizeof( JobParam_t ) ) );
  525. Assert( uintp( pJobParams + 1 ) == uintp( pJob ) + sizeof( SpursJob_t ) );
  526. return pJobParams;
  527. }
  528. extern void UnalignedBufferAddU16( );
  529. template <uint n> struct Log2{};
  530. template<>struct Log2<8> {enum{VALUE=3};};
  531. template<>struct Log2<16>{enum{VALUE=4};};
  532. template<>struct Log2<32>{enum{VALUE=5};};
  533. template<>struct Log2<256>{enum{VALUE=8};};
  534. #define COMPILE_TIME_LOG2(VAL) ( Log2<VAL>::VALUE )
  535. inline void ZeroMemAligned( void * p, uint nSize )
  536. {
  537. Assert( !( ( uintp( p ) | nSize ) & 15 ) );
  538. for( uint i = 0; i < nSize; i += 16 )
  539. {
  540. *( vec_uint4* )( uintp( p ) + i ) = (vec_uint4){0,0,0,0};
  541. }
  542. }
  543. inline void CopyMemAligned( void * pDst, const void * pSrc, uint nSize )
  544. {
  545. Assert( !( ( uintp( pDst ) | uintp( pSrc ) | nSize ) & 15 ) );
  546. for( uint i = 0; i < nSize; i += 16 )
  547. {
  548. *( vec_uint4* )( uintp( pDst ) + i ) = *( vec_uint4* )( uintp( pSrc ) + i );
  549. }
  550. }
  551. ///////////////////////////////////////////////////////////////////////////
  552. //
  553. // Reference implementation
  554. //
  555. template <uint nBitCount>
  556. class CBitArray
  557. {
  558. public:
  559. void Clear()
  560. {
  561. for( uint i = 0; i < ( nBitCount >> 7 ); ++i )
  562. {
  563. m_qword[i] = ( vec_uint4 ){0,0,0,0};
  564. }
  565. //m_nSetCount = 0;
  566. }
  567. void SetRange( uint nStart, uint nEnd )
  568. {
  569. nEnd = Min( nEnd, nBitCount );
  570. if( nStart > nEnd )
  571. return;
  572. //m_nSetCount = Max( nEnd, m_nSetCount );
  573. uint nMask = uint( -1 ) >> ( nStart & 0x1F );
  574. for( uint i = ( nStart >> 5 ); i < ( nEnd >> 5); ++i )
  575. {
  576. m_u32[i] |= nMask;
  577. nMask = uint( -1 );
  578. }
  579. nMask &= ~( uint( -1 ) >> ( nEnd & 0x1F ) );
  580. m_u32[ nEnd >> 5 ] |= nMask;
  581. }
  582. //uint GetSetCount()const{return m_nSetCount;}
  583. uint GetFirst1( uint nFrom )const
  584. {
  585. for( uint i = nFrom; i < nBitCount; ++i )
  586. if( GetBit( i ) )
  587. return i;
  588. return nBitCount;
  589. }
  590. uint GetFirst0( uint nFrom )const
  591. {
  592. for( uint i = nFrom; i < nBitCount; ++i )
  593. if( !GetBit( i ) )
  594. return i;
  595. return nBitCount;
  596. }
  597. uint GetBit( uint n )const
  598. {
  599. return m_u32[ n >> 5 ] & ( 0x80000000 >> ( n & 0x1F ) );
  600. }
  601. protected:
  602. union
  603. {
  604. vec_uint4 m_qword[ ( nBitCount + 127 ) / 128 ];
  605. uint32 m_u32[ ( nBitCount + 31 ) / 32 ];
  606. };
  607. //uint m_nSetCount;
  608. };
  609. #endif // _PS3
  610. #endif