Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

485 lines
14 KiB

  1. //================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
  2. //
  3. //
  4. //
  5. //==================================================================================================
  6. //--------------------------------------------------------------------------------------------------
  7. // Headers
  8. //--------------------------------------------------------------------------------------------------
  9. #include "SpuMgr_spu.h"
  10. #include <cell/atomic.h>
  11. #ifndef _CERT
  12. #include <libsn_spu.h>
  13. #endif
  14. #include <stdlib.h>
  15. #include <string.h>
  16. //--------------------------------------------------------------------------------------------------
  17. // Globals
  18. //--------------------------------------------------------------------------------------------------
  19. // singleton instance
  20. SpuMgr gSpuMgr __attribute__((aligned(128)));
  21. unsigned char gUnalignedMem[16] __attribute__((aligned(16)));
  22. MemCpyHeader gMemCpyHeader __attribute__((aligned(16)));
  23. //--------------------------------------------------------------------------------------------------
  24. //
  25. //--------------------------------------------------------------------------------------------------
  26. void SPU_memcpy( void *pBuf1, void *pBuf2 )
  27. {
  28. uint32_t header;
  29. gSpuMgr.ReadMailbox( &header );
  30. gSpuMgr.MemcpyLock();
  31. gSpuMgr.DmaGetUNSAFE( &gMemCpyHeader, header, sizeof( MemCpyHeader ), 0 );
  32. gSpuMgr.DmaDone( 0x1 );
  33. DEBUG_ERROR( ( gMemCpyHeader.src & 0xf ) == 0 );
  34. uint32_t sizeAligned;
  35. uint32_t sizeAlignedDown;
  36. uint32_t dstAlignedDown;
  37. uint32_t offset;
  38. memcpy( gUnalignedMem, gMemCpyHeader.cacheLine, 16 );
  39. while ( gMemCpyHeader.size > 8192 )
  40. {
  41. sizeAligned = 8192;
  42. dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 );
  43. offset = gMemCpyHeader.dst - dstAlignedDown;
  44. gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 );
  45. gSpuMgr.DmaDone( 0x1 );
  46. if ( offset )
  47. {
  48. memcpy( pBuf2, gUnalignedMem, offset );
  49. }
  50. memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, sizeAligned );
  51. gSpuMgr.DmaSync();
  52. gSpuMgr.DmaPut( dstAlignedDown, pBuf2, SPUMGR_ALIGN_UP( sizeAligned + offset, 16 ), 0 );
  53. gSpuMgr.DmaDone( 0x1 );
  54. sizeAlignedDown = SPUMGR_ALIGN_DOWN( sizeAligned + offset, 16 );
  55. memcpy( gUnalignedMem, (void *) ( (uint32_t) pBuf2 + sizeAlignedDown ), 16 );
  56. gMemCpyHeader.size -= sizeAligned;
  57. gMemCpyHeader.dst += 8192;
  58. gMemCpyHeader.src += 8192;
  59. }
  60. sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size, 16 );
  61. dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 );
  62. offset = gMemCpyHeader.dst - dstAlignedDown;
  63. gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 );
  64. gSpuMgr.DmaDone( 0x1 );
  65. if ( offset )
  66. {
  67. memcpy( pBuf2, gUnalignedMem, offset );
  68. }
  69. memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, gMemCpyHeader.size );
  70. sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size + offset, 16 );
  71. gSpuMgr.DmaSync();
  72. gSpuMgr.DmaPut( dstAlignedDown, pBuf2, sizeAligned, 0 );
  73. gSpuMgr.DmaDone( 0x1 );
  74. if ( gMemCpyHeader.blocking )
  75. {
  76. gSpuMgr.WriteMailbox( 0 );
  77. }
  78. gSpuMgr.MemcpyUnlock();
  79. }
  80. //--------------------------------------------------------------------------------------------------
  81. // DmaCheckAlignment
  82. //
  83. // Checks restrictions specified in SpuMgr::DmaGet
  84. //--------------------------------------------------------------------------------------------------
  85. int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size)
  86. {
  87. #if !defined( _CERT )
  88. uint32_t align = size;
  89. bool error = false;
  90. if (size >= 16 && ((size & 0xf) == 0))
  91. {
  92. align = 16;
  93. }
  94. else if (size == 8 || size == 4 || size == 2 || size == 1)
  95. {
  96. error = ((src & 0xF) != (dest & 0xF));
  97. }
  98. else
  99. {
  100. error = true; // bad size
  101. }
  102. return (!error && src && dest &&
  103. SPUMGR_IS_ALIGNED(src, align) &&
  104. SPUMGR_IS_ALIGNED(dest, align));
  105. #else //!FINAL
  106. return 1;
  107. #endif //!FINAL
  108. }
  109. //--------------------------------------------------------------------------------------------------
  110. //
  111. //--------------------------------------------------------------------------------------------------
  112. int SpuMgr::Init()
  113. {
  114. // Start the decrementer since it is possible
  115. // that it has not been started by default
  116. const unsigned int kEventDec = 0x20;
  117. // Disable the decrementer event.
  118. unsigned int maskEvents = spu_readch(SPU_RdEventStatMask);
  119. spu_writech(SPU_WrEventMask, maskEvents & ~kEventDec);
  120. // Acknowledge any pending events and stop the decrementer.
  121. spu_writech(SPU_WrEventAck, kEventDec);
  122. // Write the decrementer value to start the decrementer.
  123. unsigned int decValue = spu_readch(SPU_RdDec);
  124. spu_writech(SPU_WrDec, decValue);
  125. // Enable events.
  126. spu_writech(SPU_WrEventMask, maskEvents | kEventDec);
  127. // Reset byte count
  128. ResetBytesTransferred();
  129. // reset malloc count
  130. m_mallocCount = 0;
  131. // Read the effective address of the SPU locks.
  132. ReadMailbox( &m_lockEA );
  133. ReadMailbox( &m_memcpyLockEA );
  134. return 0;
  135. }
  136. //--------------------------------------------------------------------------------------------------
  137. //
  138. //--------------------------------------------------------------------------------------------------
  139. void SpuMgr::Term()
  140. {
  141. }
  142. //--------------------------------------------------------------------------------------------------
  143. // SpuMgr::DmaGet
  144. //
  145. // DmaGet - alignment and size checking
  146. // DmaGetUNSAFE - no alignment or size checking (but will assert in debug)
  147. // _DmaGet - handles badly aligned dma's, should be a private member really (doesn't handle small dma's)
  148. //
  149. // DMA restrictions
  150. // An MFC supports naturally aligned DMA transfer sizes of 1, 2, 4,
  151. // 8, and 16 bytes and multiples of 16 bytes
  152. // Furthermore, if size is 1, 2, 4, or 8 bytes then lower 4 bits
  153. // of LS and EA must match
  154. //
  155. // Note:
  156. // Peak performance is achieved for transfers in which both the EA and
  157. // the LSA are 128-byte aligned and the size of the transfer is a multiple
  158. // of 128 bytes.
  159. //--------------------------------------------------------------------------------------------------
  160. void SpuMgr::DmaGetUNSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
  161. {
  162. DEBUG_ERROR( ea < 0xd0000000 );
  163. DEBUG_ERROR( ea );
  164. DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
  165. // do the dma
  166. while (size)
  167. {
  168. uint32_t dmaSize = 0x4000;
  169. dmaSize = (size < dmaSize)? size: dmaSize;
  170. size -= dmaSize;
  171. // kick off dma
  172. spu_mfcdma64( (void*)ls, 0, ea, dmaSize, tagId, MFC_GET_CMD);
  173. m_numDMATransfers++;
  174. ls = (void*)((uint32_t)ls + dmaSize);
  175. ea += dmaSize;
  176. }
  177. // add up bytes transferred
  178. m_bytesRequested += size;
  179. m_bytesTransferred += size;
  180. }
  181. //--------------------------------------------------------------------------------------------------
  182. // SpuMgr::_DmaGet
  183. //
  184. // Internal function - do not call this directly
  185. //--------------------------------------------------------------------------------------------------
  186. void SpuMgr::_DmaGet(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
  187. {
  188. uint32_t unaligned = false;
  189. uint32_t eaAligned = (uint32_t)ea;
  190. uint32_t sizeAligned = size;
  191. uint32_t lsAligned = (uint32_t)ls;
  192. uint32_t sizeOffset = 0;
  193. char *pTempBuff = NULL;
  194. // check if src is unaligned
  195. if (eaAligned & 0xF)
  196. {
  197. eaAligned = eaAligned & ~0xF; // round down
  198. sizeOffset = ea - eaAligned;
  199. sizeAligned += sizeOffset;
  200. unaligned = true;
  201. }
  202. // check if size is unaligned
  203. if (sizeAligned & 0xF)
  204. {
  205. sizeAligned = (sizeAligned + 0xF) & ~0xF; // round up
  206. unaligned = true;
  207. }
  208. // if we have adjusted the size, or if ls is unaligned,
  209. // we need to alloc temp buffer
  210. if (unaligned || (lsAligned & 0xF))
  211. {
  212. pTempBuff = (char*)MemAlign(0x10, sizeAligned);
  213. lsAligned = (uint32_t)pTempBuff;
  214. unaligned = true;
  215. }
  216. // add up bytes transferred, for informational purposes
  217. m_bytesRequested += size;
  218. m_bytesTransferred += sizeAligned;
  219. // do the dma
  220. while (sizeAligned)
  221. {
  222. uint32_t dmaSize = 0x4000;
  223. dmaSize = (sizeAligned < dmaSize)? sizeAligned: dmaSize;
  224. sizeAligned -= dmaSize;
  225. // kick off dma
  226. spu_mfcdma64( (void*)lsAligned, 0, eaAligned, dmaSize, tagId, MFC_GET_CMD);
  227. m_numDMATransfers++;
  228. lsAligned += dmaSize;
  229. eaAligned += dmaSize;
  230. }
  231. if (unaligned)
  232. {
  233. // block for now till dma done because we do the memcpy right here
  234. DmaDone(1 << tagId);
  235. // copy data over
  236. memcpy(ls, pTempBuff + sizeOffset, size);
  237. // free temp buff
  238. Free(pTempBuff);
  239. }
  240. }
  241. //--------------------------------------------------------------------------------------------------
  242. // SpuMgr::DmaGetSAFE
  243. //
  244. // DMA restrictions (look at SpuMgr::DmaGetUNSAFE in this file) are
  245. // handled transparently by this function
  246. //--------------------------------------------------------------------------------------------------
  247. void SpuMgr::DmaGetSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
  248. {
  249. DEBUG_ERROR( ea );
  250. if( size < 0x10 )
  251. {
  252. // lowest 4 bits of address have to match regardless, &
  253. // size can only be 1, 2, 4 or 8 B
  254. if( size==0x1 || size==0x2 || size==0x4 || size==0x8 )
  255. {
  256. if( ((uint32_t)ls&0xF == ea&0xF) )
  257. {
  258. DmaGetUNSAFE(ls,ea,size,tagId);
  259. }
  260. else
  261. {
  262. // small get not aligned within a 16B block
  263. _DmaGet(ls,ea,size,tagId);
  264. }
  265. }
  266. else
  267. {
  268. // if < 16B can only get 1,2,4 or 8B
  269. _DmaGet(ls,ea,size,tagId);
  270. }
  271. }
  272. else
  273. {
  274. if( (!(size & 0xF)) && // has to be multiple of 16B, &
  275. (((uint32_t)ls&0xF)==0) && // ea and ls have to be 16B aligned
  276. ((ea&0xF)==0) )
  277. {
  278. // alignment is okay just dma
  279. DmaGetUNSAFE(ls,ea,size,tagId);
  280. }
  281. else
  282. {
  283. _DmaGet(ls,ea,size,tagId);
  284. }
  285. }
  286. }
  287. //--------------------------------------------------------------------------------------------------
  288. // SpuMgr::DmaPut
  289. //--------------------------------------------------------------------------------------------------
  290. void SpuMgr::DmaPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId)
  291. {
  292. DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea
  293. DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls
  294. DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
  295. // do the dma
  296. while (size)
  297. {
  298. uint32_t dmaSize = 0x4000;
  299. dmaSize = (size < dmaSize)? size: dmaSize;
  300. size -= dmaSize;
  301. // initiate dma to ppu
  302. spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);
  303. ls = (void*)((uint32_t)ls + dmaSize);
  304. ea += dmaSize;
  305. }
  306. }
  307. //--------------------------------------------------------------------------------------------------
  308. // SpuMgr::DmaSmallPut
  309. //--------------------------------------------------------------------------------------------------
  310. void SpuMgr::DmaSmallPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId)
  311. {
  312. DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea
  313. DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls
  314. DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
  315. uint32_t dmaSize = 1;
  316. if ((size % 8) == 0)
  317. {
  318. dmaSize = 8;
  319. }
  320. else if ((size % 4) == 0)
  321. {
  322. dmaSize = 4;
  323. }
  324. else if ((size % 2) == 0)
  325. {
  326. dmaSize = 2;
  327. }
  328. while (size)
  329. {
  330. size -= dmaSize;
  331. // initiate dma to ppu
  332. spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);
  333. ls = (void*)((uint32_t)ls + dmaSize);
  334. ea += dmaSize;
  335. }
  336. }
  337. //--------------------------------------------------------------------------------------------------
  338. // SpuMgr::DmaGetlist
  339. //
  340. // Gather data scattered around main mem, MFC will run through the list, and place the elements (based on ea address and size)
  341. // contiguously in ls.
  342. //
  343. // NOTE: if an individual list element size is <16B, the data will still be dma'd but the proceeding element will be placed
  344. // on the next 16B boundary. So it is possible to get lots of small elements, but you will be left with gaps in ls.
  345. //
  346. // ls - ls address of where items will be placed (contiguously)
  347. // lsList - ls address of actual list
  348. // sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements // sizeof(DMAList))
  349. // tagId - works the same way as regular DMA's
  350. //
  351. // Alignment and Size Restrictions:
  352. // -ls and lsList must be 8B aligned
  353. // -size must be a multiple of 8B (sizeof(DMAList))
  354. // -no more than 2048 list elements
  355. //
  356. // light error checking right now
  357. //--------------------------------------------------------------------------------------------------
  358. void SpuMgr::DmaGetList(void *ls, DMAList *pLS_List, uint32_t sizeList, uint32_t tagId)
  359. {
  360. DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned
  361. DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions
  362. DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B
  363. DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements
  364. // initiate dma list
  365. spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_GETL_CMD );
  366. }
  367. //--------------------------------------------------------------------------------------------------
  368. // SpuMgr::DmaGPutlist
  369. //
  370. // Scatter data held contiguously in ls, to main mem
  371. //
  372. // ls - ls address of where items exist (contiguously) to be scattered back to main mem
  373. // lsList - ls address of actual list
  374. // sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements * sizeof(DMAList))
  375. // tagId - works the same way as regular DMA's
  376. //
  377. // Alignment and Size Restrictions:
  378. // ls and lsList must be 8B aligned, size must be a multiple of 8B (sizeof(DMAList))
  379. //
  380. // light error checking right now
  381. //--------------------------------------------------------------------------------------------------
  382. void SpuMgr::DmaPutList(void *ls, DMAList* pLS_List, uint32_t sizeList, uint32_t tagId)
  383. {
  384. DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned
  385. DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions
  386. DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B
  387. DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements
  388. // initiate dma list
  389. spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_PUTL_CMD );
  390. }