Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

434 lines
9.0 KiB

  1. //========== Copyright � Valve Corporation, All rights reserved. ========
  2. #include "ps3/spu_job_shared.h"
  3. uint g_nBreakMask = 0;
  4. void* AlignBuffer( void * pBuffer, uint nBytes )
  5. {
  6. if( !( uintp( pBuffer ) & 15 ) )
  7. {
  8. return pBuffer;
  9. }
  10. Assert( nBytes < 232*1024 ); // sanity check
  11. vector int *pBegin = ( vector int * )( pBuffer ), *pEnd = ( vector int* )( uintp( pBuffer ) + nBytes );
  12. vector int vLast = *pBegin;
  13. vector int *pLast = pBegin;
  14. vector unsigned char vShuf = vec_lvsl( 0, (uint8*)pBuffer );
  15. while( pLast < pEnd )
  16. {
  17. vector int * pNext = pLast + 1;
  18. vector int vNext = *pNext;
  19. *pLast = vec_perm( vLast, vNext, vShuf );
  20. pLast = pNext;
  21. vLast = vNext;
  22. }
  23. return ( void* )( uintp( pBuffer ) & -16 );
  24. }
  25. //
  26. // Adds constant nAdd to the given unaligned buffer of uint16's
  27. //
  28. void UnalignedBufferAddU16( uint16 * pBuffer, uint nCount, uint16 nAdd )
  29. {
  30. #ifdef SPU
  31. if( nCount )
  32. {
  33. uint16 *pBufferEnd = pBuffer + nCount;
  34. vector unsigned short vuAdd = vec_splat_u16( nAdd );
  35. vector unsigned short vuLeft = spu_rlmaskqwbyte( vuAdd, -( 0xF & int( pBuffer ) ) );
  36. vector unsigned short vuRight = spu_slqwbyte( vuAdd, 0xF & -int( pBufferEnd ) );
  37. vector unsigned short * pLeft = ( vector unsigned short * )( uintp( pBuffer ) & -16 ), * pRight = ( vector unsigned short* )( uintp( pBufferEnd - 1 ) & -16 );
  38. if( pLeft == pRight )
  39. {
  40. *pLeft = vec_add( *pLeft, vec_and( vuLeft, vuRight ) );
  41. }
  42. else
  43. {
  44. *pLeft = vec_add( *pLeft, vuLeft );
  45. *pRight = vec_add( *pRight, vuRight );
  46. for( vector unsigned short * p = pLeft + 1; p < pRight; ++p )
  47. {
  48. *p = vec_add( *p, vuAdd );
  49. }
  50. }
  51. }
  52. #else
  53. for( uint i = 0; i < nCount; ++i )
  54. {
  55. pBuffer[i] += nAdd;
  56. }
  57. #endif
  58. }
  59. void TestUnalignedBufferAddU16( )
  60. {
  61. uint16 ALIGN16 test[8 * 6] ALIGN16_POST;
  62. for( uint l = 0; l <= 8; ++l )
  63. {
  64. for( uint e = l; e < ARRAYSIZE( test ); ++e )
  65. {
  66. V_memset( test, 0, sizeof( test ) );
  67. UnalignedBufferAddU16( test + l, e - l, e+1 );
  68. for( uint t = 0; t < l; ++ t )
  69. Assert( test[t] == 0 );
  70. for( uint t = l; t < e; ++t )
  71. Assert( test[t] == e+1 );
  72. for( uint t = e; t < ARRAYSIZE( test ); ++t )
  73. Assert( test[t] == 0 );
  74. }
  75. }
  76. }
  77. #ifndef SPU
  78. void TestAlignBuffer()
  79. {
  80. for( uint i = 0; i < 16; ++i )
  81. {
  82. uint8 ALIGN16 test[16 * 10] ALIGN16_POST;
  83. for( uint j = i; j < sizeof( test ); ++j )
  84. test[j] = uint8( j - i );
  85. uint8 * pBeginTest = (uint8*)AlignBuffer( test + i, sizeof( test ) - 16 );
  86. Assert( pBeginTest == test );
  87. for( uint j = 0; j < sizeof( test ) - 16; ++j )
  88. Assert( test[j] == uint8( j ) );
  89. }
  90. }
  91. CellSpursJobContext2* g_stInfo = NULL;
  92. static void SyncDmaListTransfer( void * pDmaList, uint nDmaListSize, void * pTarget, uint nTargetMaxSize )
  93. {
  94. Assert( !( nDmaListSize & 7 ) && !( uintp( pDmaList ) & 0xF ) );
  95. //uintp dmaTarget = ( uintp ) pTarget, dmaTargetEnd = dmaTarget + nTargetMaxSize;
  96. CellSpursJobInputList * pInputDmaList = ( CellSpursJobInputList* )pDmaList, *pInputDmaListEnd = ( CellSpursJobInputList * )( uintp( pDmaList ) + nDmaListSize );
  97. uintp lsDmaTarget = ( uintp ) pTarget, lsDmaTargetEnd = lsDmaTarget + nTargetMaxSize;
  98. for ( CellSpursJobInputList * pDmaElement = pInputDmaList; pDmaElement < pInputDmaListEnd; pDmaElement++ )
  99. {
  100. Assert( pDmaElement->asInputList.size <= 16 * 1024 ); // max size of a DMA element
  101. uintp lsDmaEnd = lsDmaTarget + pDmaElement->asInputList.size;
  102. Assert( lsDmaEnd <= lsDmaTargetEnd );
  103. V_memcpy( ( void* )lsDmaTarget, ( const void* ) pDmaElement->asInputList.eal, pDmaElement->asInputList.size );
  104. lsDmaTarget = AlignValue( lsDmaEnd, 16 ); // for small transfers, we must stalign every transfer by 16
  105. }
  106. }
  107. void VjobPushJob( void ( *pfnMain )( CellSpursJobContext2 * stInfo, CellSpursJob256 * job ), CellSpursJob128 * job )
  108. {
  109. CellSpursJobContext2 info;
  110. V_memset( &info, 0, sizeof( info ) );
  111. void * ioBuffer = MemAlloc_AllocAligned( job->header.sizeInOrInOut, 16 );
  112. info.ioBuffer = ioBuffer;
  113. info.eaJobDescriptor = ( uintp ) job;
  114. CellSpursJob256 jobCopy;
  115. V_memcpy( &jobCopy, job, sizeof( *job ) );
  116. SyncDmaListTransfer( job->workArea.dmaList, job->header.sizeDmaList, ioBuffer, job->header.sizeInOrInOut );
  117. g_stInfo = &info;
  118. pfnMain( &info, ( CellSpursJob256* ) job );
  119. g_stInfo = NULL;
  120. MemAlloc_FreeAligned( ioBuffer );
  121. }
  122. void VjobSpuLog( const char * p, ... )
  123. {
  124. va_list args;
  125. va_start( args, p );
  126. char szBuffer[2048];
  127. V_vsnprintf( szBuffer, sizeof( szBuffer ), p, args );
  128. Msg( "SPU-on-PPU: %s\n", szBuffer );
  129. va_end( args );
  130. }
  131. #define Check(b) if(!(b))DebuggerBreak();
  132. void VjobDmaPut(
  133. const void * ls,
  134. uint64_t ea,
  135. uint32_t size,
  136. uint32_t tag,
  137. uint32_t tid,
  138. uint32_t rid
  139. )
  140. {
  141. Check( !( size & 0xF ) && size <= 16 * 1024 );
  142. Check( !( ea & 0xF ) && !( uintp( ls ) & 0xF ) );
  143. V_memcpy( ( void* )( uintp )ea, ls, size );
  144. }
  145. void VjobDmaLargePut(
  146. const void * ls,
  147. uint64_t ea,
  148. uint32_t size,
  149. uint32_t tag,
  150. uint32_t tid,
  151. uint32_t rid
  152. )
  153. {
  154. Check( !( size & 0xF ) && size <= 240 * 1024 );
  155. Check( !( ea & 0xF ) && !( uintp( ls ) & 0xF ) );
  156. V_memcpy( ( void* )( uintp )ea, ls, size );
  157. }
  158. void VjobDmaLargePutf(
  159. const void * ls,
  160. uint64_t ea,
  161. uint32_t size,
  162. uint32_t tag,
  163. uint32_t tid,
  164. uint32_t rid
  165. )
  166. {
  167. VjobDmaLargePut( ls, ea, size, tag, tid, rid );
  168. }
  169. void VjobDmaUnalignedPutf(
  170. const void *ls,
  171. uint64_t ea,
  172. uint32_t size,
  173. uint32_t tag,
  174. uint32_t tid,
  175. uint32_t rid
  176. )
  177. {
  178. Assert( 0 == ( 0xF & ( uintp( ls ) ^ ea ) ) );
  179. V_memcpy( (void*)(uintp)ea, ls, size );
  180. }
  181. void VjobDmaUnalignedPut(
  182. const void *ls,
  183. uint64_t ea,
  184. uint32_t size,
  185. uint32_t tag,
  186. uint32_t tid,
  187. uint32_t rid
  188. )
  189. {
  190. Assert( 0 == ( 0xF & ( uintp( ls ) ^ ea ) ) );
  191. V_memcpy( (void*)(uintp)ea, ls, size );
  192. }
  193. void VjobDmaLargePutb(
  194. const void * ls,
  195. uint64_t ea,
  196. uint32_t size,
  197. uint32_t tag,
  198. uint32_t tid,
  199. uint32_t rid
  200. )
  201. {
  202. VjobDmaLargePut( ls, ea, size, tag, tid, rid );
  203. }
  204. void VjobDmaPutf(
  205. const void * ls,
  206. uint64_t ea,
  207. uint32_t size,
  208. uint32_t tag,
  209. uint32_t tid,
  210. uint32_t rid
  211. )
  212. {
  213. Check( !( size & 0xF ) && size <= 16 * 1024 );
  214. Check( !( ea & 0xF ) && !( uintp( ls ) & 0xF ) );
  215. V_memcpy( ( void* )( uintp )ea, ls, size );
  216. }
  217. void VjobDmaSmallPut(
  218. const void * ls,
  219. uint64_t ea,
  220. uint32_t size,
  221. uint32_t tag,
  222. uint32_t tid,
  223. uint32_t rid
  224. )
  225. {
  226. Check( !( size & ( size - 1 ) ) );
  227. Check( !( 0xF & ( ea ^ uintp( ls ) ) ) );
  228. if ( size == 4 )
  229. {
  230. // special case to handle atomically, because we may use this to write RSX registers
  231. *( uint32* )( uintp )ea = *( uint32* )ls;
  232. }
  233. else
  234. {
  235. V_memcpy( ( void* )( uintp )ea, ls, size );
  236. }
  237. }
  238. void VjobDmaGet(
  239. void * ls,
  240. uint64_t ea,
  241. uint32_t size,
  242. uint32_t tag,
  243. uint32_t tid,
  244. uint32_t rid
  245. )
  246. {
  247. Check( !( size & ( size - 1 ) ) );
  248. Check( !( 0xF & ( ea | uintp( ls ) ) ) );
  249. if ( size == 4 )
  250. {
  251. // special case to handle atomically, because we may use this to read RSX registers
  252. *( uint32* )ls = *( uint32* )( uintp )ea;
  253. }
  254. else
  255. {
  256. V_memcpy( ls, ( const void* )( uintp )ea, size );
  257. }
  258. }
  259. void VjobDmaGetf(
  260. void * ls,
  261. uint64_t ea,
  262. uint32_t size,
  263. uint32_t tag,
  264. uint32_t tid,
  265. uint32_t rid
  266. )
  267. {
  268. VjobDmaGet( ls, ea, size, tag, tid, rid );
  269. }
  270. // NOTE: implementation must wait for tag
  271. uint32_t VjobDmaGetUint32(
  272. uint64_t ea,
  273. uint32_t tag,
  274. uint32_t tid,
  275. uint32_t rid
  276. )
  277. {
  278. return * ( volatile uint32 * )( uintp )ea;
  279. }
  280. void VjobDmaPutUint32(
  281. uint32_t value,
  282. uint64_t ea,
  283. uint32_t tag,
  284. uint32_t tid,
  285. uint32_t rid
  286. )
  287. {
  288. ( * ( volatile uint32 * )( uintp )ea ) = value;
  289. }
  290. uint64_t VjobDmaGetUint64(
  291. uint64_t ea,
  292. uint32_t tag,
  293. uint32_t tid,
  294. uint32_t rid
  295. )
  296. {
  297. return *( volatile uint64 * )( uintp )ea;
  298. }
  299. void VjobDmaPutUint64(
  300. uint64_t value,
  301. uint64_t ea,
  302. uint32_t tag,
  303. uint32_t tid,
  304. uint32_t rid
  305. )
  306. {
  307. ( * ( volatile uint64 * )( uintp )ea ) = value;
  308. }
  309. void VjobDmaListGet(
  310. void * ls,
  311. uint64_t ea,
  312. const CellDmaListElement * list,
  313. uint32_t listSize,
  314. uint32_t tag,
  315. uint32_t tid,
  316. uint32_t rid
  317. )
  318. {
  319. Check( !( listSize % 8 ) );
  320. uint8 * pLsTarget = ( uint8* )ls;
  321. for ( uint i = 0; i < listSize / 8; ++i )
  322. {
  323. uint64 nSize = list[i].size;
  324. VjobDmaGet( pLsTarget, ea + list[i].eal, ( uint32 )nSize, tag, tid, rid );
  325. }
  326. }
  327. void VjobDmaSmallGet(
  328. void * ls,
  329. uint64_t ea,
  330. uint32_t size,
  331. uint32_t tag,
  332. uint32_t tid,
  333. uint32_t rid
  334. )
  335. {
  336. Check( !( size & ( size - 1 ) ) );
  337. Check( !( 0xF & ( ea ^ uintp( ls ) ) ) );
  338. V_memcpy( ls, ( const void* )( uintp )ea, size );
  339. }
  340. void VjobDmaSmallPutf(
  341. const void * ls,
  342. uint64_t ea,
  343. uint32_t size,
  344. uint32_t tag,
  345. uint32_t tid,
  346. uint32_t rid
  347. )
  348. {
  349. Check( !( size & ( size - 1 ) ) );
  350. Check( !( 0xF & ( ea ^ uintp( ls ) ) ) );
  351. V_memcpy( ( void* )( uintp )ea, ls, size );
  352. }
  353. void VjobDmaSmallPutb(
  354. const void * ls,
  355. uint64_t ea,
  356. uint32_t size,
  357. uint32_t tag,
  358. uint32_t tid,
  359. uint32_t rid
  360. )
  361. {
  362. Check( !( size & ( size - 1 ) ) );
  363. Check( !( 0xF & ( ea ^ uintp( ls ) ) ) );
  364. V_memcpy( ( void* )( uintp )ea, ls, size );
  365. }
  366. void VjobPpuRereadEA( uintp ea )
  367. {
  368. __lwsync();
  369. int eaContent = *( volatile int * ) ea;
  370. __lwsync();
  371. }
  372. #endif