Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1010 lines
25 KiB

  1. //========= Copyright � 1996-2006, Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose: Proxy for D3DX routines
  4. //
  5. // $NoKeywords: $
  6. //
  7. //=============================================================================//
  8. //
  9. #define WIN32_LEAN_AND_MEAN
  10. #include <windows.h>
  11. #include <wincrypt.h>
  12. #include <vector>
  13. #include <string>
  14. #include <algorithm>
  15. #include "../../dx10sdk/include/d3dx10.h"
  16. typedef D3D10_SHADER_MACRO D3DXMACRO;
  17. typedef LPD3D10INCLUDE LPD3DXINCLUDE;
  18. typedef ID3D10Include ID3DXInclude;
  19. typedef D3D10_INCLUDE_TYPE D3DXINCLUDE_TYPE;
  20. typedef ID3D10Blob* LPD3DXBUFFER;
  21. typedef void* LPD3DXCONSTANTTABLE;
  22. #include "filememcache.h"
  23. #include "dxincludeimpl.h"
  24. #include "cgc.h"
  25. #include "SCEShaderPerf.h"
  26. typedef unsigned int uint;
  27. typedef unsigned __int64 uint64;
  28. #include "../../public/ps3shaderoptimizer/ps3optimalschedulesfmt.h"
  29. const int g_nRandSched[] =
  30. {
  31. // List of 17 good scheduler settings, found empirically by Sony.
  32. 8, 10, 15, 4,
  33. 32, 2, 1, 64,
  34. 13, 14, 16, 17, 18, 19,
  35. 128, 256, 512,
  36. // Extra 6 scheduler settings
  37. 6, 100, 192, 3, 384, 24
  38. };
  39. #define NUM_RANDOM_SCHEDULE_VALUES ARRAYSIZE( g_nRandSched )
  40. #define NUM_RANDOM_SCHEDULE_SEEDS 12
  41. // Faster settings, for testing purposes (currently takes around 11 minutes):
  42. //#define NUM_RANDOM_SCHEDULE_VALUES 8
  43. //#define NUM_RANDOM_SCHEDULE_SEEDS 1
  44. //#define NUM_RANDOM_SCHEDULE_VALUES 1
  45. //#define NUM_RANDOM_SCHEDULE_SEEDS 1
  46. #define CGC_COMPILER_OPTIMIZATION_LEVEL 1
  47. // Aux function prototype
  48. const char * WINAPI GetDllVersion( void );
  49. void* CgMalloc( void* arg, size_t size ); // Memory allocation callback
  50. void CgFree( void* arg, void* ptr ); // Memory freeing callback
  51. HANDLE g_mutexDebug = NULL;
  52. void DebugLog( const char * pMsg, ...)
  53. {
  54. (void)pMsg;
  55. #ifdef _DEBUG
  56. FILE * f = fopen( "c:\\dx_proxy_ps3.log", "at" );
  57. if( f )
  58. {
  59. if( g_mutexDebug )
  60. WaitForSingleObject( g_mutexDebug, INFINITE );
  61. va_list args;
  62. va_start(args,pMsg);
  63. SYSTEMTIME lt;
  64. GetLocalTime( &lt );
  65. fprintf( f, "%02d:%02d:%02d.%04d[%d.%d]", lt.wHour, lt.wMinute, lt.wSecond, lt.wMilliseconds,
  66. GetCurrentProcessId(), GetCurrentThreadId() );
  67. vfprintf(f, pMsg, args );
  68. fputs( "\n", f );
  69. va_end( args );
  70. fclose( f );
  71. if( g_mutexDebug )
  72. ReleaseMutex( g_mutexDebug );
  73. }
  74. #endif
  75. }
  76. //
  77. // ExtractDependencies
  78. //
  79. // Retrieves all the additional required binaries from the resources and
  80. // places them to a temporary location. Then the binaries are mapped into
  81. // the address space of the calling process.
  82. //
  83. static BOOL ExtractDependencies( void )
  84. {
  85. return TRUE;
  86. }
  87. class CgContextWrapper
  88. {
  89. public:
  90. CGCcontext *m_cgc;
  91. CgContextWrapper()
  92. {
  93. CGCmem mem;
  94. mem.malloc = CgMalloc;
  95. mem.free = CgFree;
  96. m_cgc = sceCgcNewContext( &mem );
  97. }
  98. ~CgContextWrapper()
  99. {
  100. sceCgcDeleteContext( m_cgc );
  101. }
  102. operator CGCcontext * () { return m_cgc ; }
  103. };
  104. // DLL entry point: DllMain
  105. BOOL WINAPI DllMain(
  106. HINSTANCE hinstDLL,
  107. DWORD fdwReason,
  108. LPVOID lpvReserved
  109. )
  110. {
  111. /*UNUSED_ALWAYS*/( hinstDLL );
  112. /*UNUSED_ALWAYS*/( lpvReserved );
  113. switch ( fdwReason )
  114. {
  115. case DLL_PROCESS_ATTACH:
  116. {
  117. g_mutexDebug = CreateMutex( NULL, FALSE, "DxProxyPs3DebugLog" );
  118. }
  119. // Process is attaching - make sure it can find the dependencies
  120. return ExtractDependencies();
  121. case DLL_PROCESS_DETACH:
  122. if( g_mutexDebug )
  123. CloseHandle( g_mutexDebug );
  124. break;
  125. }
  126. return TRUE;
  127. }
  128. // Obtain DLL version
  129. #pragma comment(linker, "/EXPORT:GetDllVersionLong=?GetDllVersionLong@@YGPBDXZ")
  130. const char * WINAPI GetDllVersionLong( void )
  131. {
  132. #if defined( _DEBUG )
  133. return "{DX_PROXY for PS3_V00_PC DEBUG}";
  134. #else
  135. return "{DX_PROXY for PS3_V00_PC RELEASE}";
  136. #endif
  137. }
  138. #pragma comment(linker, "/EXPORT:GetDllVersion=?GetDllVersion@@YGPBDXZ")
  139. const char * WINAPI GetDllVersion( void )
  140. {
  141. #ifdef _DEBUG
  142. return "DXPRX_PS3_V00_d";
  143. #else
  144. return "DXPRX_PS3_V00_r";
  145. #endif
  146. }
  147. LPD3DXINCLUDE g_pInclude = NULL;
  148. uint g_nCgAllocated = 0;
  149. int CgcIncludeOpen( SCECGC_INCLUDE_TYPE type,
  150. const char* filename,
  151. char** data, size_t* size )
  152. {
  153. D3DXINCLUDE_TYPE typeD3d = D3D10_INCLUDE_LOCAL;
  154. if( type == SCECGC_SYSTEM_INCLUDE )
  155. typeD3d = D3D10_INCLUDE_SYSTEM;
  156. HRESULT hr = g_pInclude->Open( typeD3d, filename, NULL, (LPCVOID*)data, size );
  157. return ( S_OK == hr );
  158. }
  159. void* CgMalloc( void* arg, size_t size ) // Memory allocation callback
  160. {
  161. g_nCgAllocated += size;
  162. uint * pData = (uint*)malloc( size + sizeof( uint ) );
  163. *pData = size;
  164. //DebugLog("alloc %d->%p", size, pData+1);
  165. return pData + 1;
  166. }
  167. void CgFree( void* arg, void* ptr ) // Memory freeing callback
  168. {
  169. uint * pData = ( ( uint* ) ptr ) - 1;
  170. //if( *pData > 0x1000000 && IsDebuggerPresent() )
  171. // _asm{int 3 ;};
  172. //DebugLog("free %p->%u", ptr, *pData);
  173. g_nCgAllocated -= *pData;
  174. free( pData );
  175. }
  176. //
  177. // return values:
  178. // 1 - Include file successfully closed.
  179. //
  180. // 0 - Failure closing an include file.
  181. //
  182. int CgcIncludeClose( const char* data )
  183. {
  184. HRESULT hr = g_pInclude->Close( data );
  185. return ( S_OK == hr );
  186. }
  187. class BlobAdaptor: public ID3D10Blob
  188. {
  189. public:
  190. uint m_nRefCount;
  191. CGCbin *m_bin;
  192. char * m_pMemory;
  193. uint m_nSize;
  194. BlobAdaptor( ID3D10Blob * pLeft, ID3D10Blob * pRight )
  195. {
  196. m_bin = NULL;
  197. m_nRefCount = 1;
  198. m_nSize = pLeft->GetBufferSize( ) + pRight->GetBufferSize() ;
  199. m_pMemory = new char [m_nSize + 1];
  200. memcpy(m_pMemory, pLeft->GetBufferPointer(), pLeft->GetBufferSize( ));
  201. memcpy(m_pMemory + pLeft->GetBufferSize(), pRight->GetBufferPointer(), pRight->GetBufferSize( ) );
  202. m_pMemory[m_nSize] = '\0';
  203. }
  204. BlobAdaptor()
  205. {
  206. m_pMemory = NULL;
  207. m_nSize = 0;
  208. CGCmem mem;
  209. mem.malloc = CgMalloc;
  210. mem.free = CgFree;
  211. m_bin = sceCgcNewBin( &mem );
  212. m_nRefCount = 1;
  213. }
  214. ~BlobAdaptor()
  215. {
  216. if( m_bin )
  217. sceCgcDeleteBin( m_bin );
  218. if( m_pMemory )
  219. delete[]m_pMemory;
  220. }
  221. void Bake()
  222. {
  223. if( m_bin )
  224. {
  225. m_nSize = sceCgcGetBinSize( m_bin );
  226. m_pMemory = new char [m_nSize + 1];
  227. memcpy( m_pMemory, sceCgcGetBinData( m_bin ), m_nSize );
  228. m_pMemory[m_nSize] = '\0';
  229. sceCgcDeleteBin( m_bin );
  230. m_bin = NULL;
  231. }
  232. }
  233. STDMETHOD(QueryInterface)(THIS_ REFIID iid, __deref_out LPVOID *ppv)
  234. {
  235. if( iid == IID_IUnknown || iid == IID_ID3D10Blob )
  236. {
  237. AddRef();
  238. *ppv = this;
  239. return S_OK;
  240. }
  241. *ppv = NULL;
  242. return E_NOINTERFACE;
  243. }
  244. STDMETHOD_( ULONG, AddRef )(THIS)
  245. {
  246. return ++m_nRefCount;
  247. }
  248. STDMETHOD_( ULONG, Release )(THIS)
  249. {
  250. if( --m_nRefCount )
  251. return m_nRefCount;
  252. delete this;
  253. return 0;
  254. }
  255. // ID3DXBuffer
  256. STDMETHOD_(__out LPVOID, GetBufferPointer)(THIS)
  257. {
  258. if( m_bin )
  259. return sceCgcGetBinData( m_bin );
  260. else
  261. return m_pMemory;
  262. }
  263. STDMETHOD_(DWORD, GetBufferSize)(THIS)
  264. {
  265. if( m_bin )
  266. return sceCgcGetBinSize( m_bin );
  267. else
  268. return m_nSize;
  269. }
  270. };
  271. static inline bool operator< ( const SceSpMeasurementResult& target, const SceSpMeasurementResult& reference )
  272. {
  273. if ( target.nResult != SCESP_OK )
  274. return false;
  275. if ( target.nCycles < reference.nCycles )
  276. return true;
  277. else if ( target.nCycles == reference.nCycles )
  278. {
  279. if ( target.nRRegisters < reference.nRRegisters )
  280. return true;
  281. else
  282. return false;
  283. }
  284. else
  285. return false;
  286. }
  287. // Use the Win32 crypto API to create a 64-bit GUID. (This sucks, but it avoids creating dependencies against tier0/tier1 into a DLL that is not expected to have such dependencies.)
  288. static uint64 CreateGUID64()
  289. {
  290. uint64 nResult = 0;
  291. HCRYPTPROV hCryptProv;
  292. if ( CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_MACHINE_KEYSET ) )
  293. {
  294. CryptGenRandom( hCryptProv, sizeof( nResult ), (BYTE*)&nResult );
  295. CryptReleaseContext( hCryptProv, 0 );
  296. }
  297. return nResult;
  298. }
  299. static bool HashBuffer( const void *pBuf, uint nLen, uint64 &nHashLow, uint64 &nHashHigh )
  300. {
  301. bool bResult = false;
  302. nHashLow = 0;
  303. nHashHigh = 0;
  304. HCRYPTPROV hCryptProv;
  305. if ( CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_MACHINE_KEYSET ) )
  306. {
  307. HCRYPTHASH hHash;
  308. if ( CryptCreateHash( hCryptProv, CALG_MD5, 0, 0, &hHash ) )
  309. {
  310. if ( CryptHashData( hHash, static_cast< const BYTE * >( pBuf ), nLen, 0 ) )
  311. {
  312. BYTE bHash[16];
  313. DWORD dwHashLen = 16;
  314. if ( CryptGetHashParam( hHash, HP_HASHVAL, bHash, &dwHashLen, 0 ) )
  315. {
  316. nHashLow = *reinterpret_cast< uint64 * >( &bHash[0] );
  317. nHashHigh = *reinterpret_cast< uint64 * >( &bHash[8] );
  318. bResult = true;
  319. }
  320. }
  321. CryptDestroyHash( hHash );
  322. }
  323. CryptReleaseContext( hCryptProv, 0 );
  324. }
  325. return bResult;
  326. }
  327. static uint64 ComputeComboHash( LPCSTR pSrcFile, CONST D3DXMACRO *pDefines, LPCSTR pFunctionName )
  328. {
  329. std::vector< std::string > defines;
  330. CONST D3DXMACRO *pCurDefine = pDefines;
  331. while ( ( pCurDefine->Name ) && ( pCurDefine->Definition ) )
  332. {
  333. char buf[1024];
  334. sprintf_s( buf, sizeof( buf ), "%s=%s", pCurDefine->Name, pCurDefine->Definition );
  335. defines.push_back( std::string( buf ) );
  336. pCurDefine++;
  337. }
  338. std::sort( defines.begin(), defines.end() );
  339. std::vector< uint8 > shaderSigBuf;
  340. shaderSigBuf.reserve( 1024 );
  341. shaderSigBuf.insert( shaderSigBuf.end(), (uint8 *)pSrcFile, (uint8 *)pSrcFile + strlen( pSrcFile ) );
  342. shaderSigBuf.insert( shaderSigBuf.end(), (uint8 *)pFunctionName, (uint8 *)pFunctionName + strlen( pFunctionName ) );
  343. for ( uint i = 0; i < defines.size(); ++i )
  344. {
  345. const char *pDefineStr = defines[i].c_str();
  346. shaderSigBuf.insert( shaderSigBuf.end(), (uint8 *)pDefineStr, (uint8 *)pDefineStr + strlen( pDefineStr ) );
  347. }
  348. uint64 nHashLow = 0, nHashHigh = 0;
  349. if ( shaderSigBuf.size() )
  350. {
  351. HashBuffer( &shaderSigBuf[0], shaderSigBuf.size(), nHashLow, nHashHigh );
  352. }
  353. return nHashLow ^ nHashHigh;
  354. }
  355. static void WriteToCompileLogFile( const char *pMsg )
  356. {
  357. char szLogFilename[MAX_PATH];
  358. if ( !GetEnvironmentVariableA( "PS3COMPILELOG", szLogFilename, sizeof( szLogFilename ) ) )
  359. return;
  360. HANDLE hMutex = CreateMutex( NULL, FALSE, "PS3COMPILELOGMUTEX" );
  361. if ( ( hMutex == NULL ) || ( WaitForSingleObject( hMutex, 10000 ) != WAIT_OBJECT_0 ) )
  362. return;
  363. FILE *pFile = fopen( szLogFilename, "a+" );
  364. if ( !pFile )
  365. {
  366. ReleaseMutex( hMutex );
  367. return;
  368. }
  369. fputs( pMsg, pFile );
  370. fclose( pFile );
  371. ReleaseMutex( hMutex );
  372. }
  373. static void UpdateCompileLogFile(
  374. LPCSTR pSrcFile,
  375. uint64 nComboHash,
  376. const SceSpMeasurementResult &origStatistics,
  377. const SceSpMeasurementResult &bestStatistics,
  378. int nBestSchedule, uint nBestSeed,
  379. int nShaderSchedulerSourceIndex,
  380. int nDbgStatusIndex )
  381. {
  382. char szComputerName[512];
  383. DWORD nSize = sizeof( szComputerName );
  384. GetComputerNameA( szComputerName, &nSize );
  385. uint64 nGUID = CreateGUID64();
  386. char msg[1024];
  387. sprintf_s( msg, sizeof( msg ), "%s,%016I64X,\"%s\",%016I64X,%u,%u,%u,%u,%i,%i,%i,%i\n",
  388. szComputerName,
  389. nGUID,
  390. pSrcFile,
  391. nComboHash,
  392. origStatistics.nCycles, origStatistics.nRRegisters,
  393. bestStatistics.nCycles, bestStatistics.nRRegisters,
  394. nBestSchedule, nBestSeed,
  395. nShaderSchedulerSourceIndex,
  396. nDbgStatusIndex );
  397. WriteToCompileLogFile( msg );
  398. }
  399. class COptimalComboFile
  400. {
  401. public:
  402. COptimalComboFile() :
  403. g_bTriedToLoadOptimalCombos( false )
  404. {
  405. InitializeCriticalSection( &m_CS );
  406. }
  407. ~COptimalComboFile()
  408. {
  409. DeleteCriticalSection( &m_CS );
  410. }
  411. bool Load( const char *pFilename )
  412. {
  413. Lock();
  414. if ( g_OptimalCombos.empty() )
  415. {
  416. if ( g_bTriedToLoadOptimalCombos )
  417. {
  418. Unlock();
  419. return false;
  420. }
  421. g_bTriedToLoadOptimalCombos = true;
  422. FILE *pFile = fopen( pFilename, "rb" );
  423. if ( !pFile )
  424. {
  425. Unlock();
  426. return false;
  427. }
  428. fseek( pFile, 0, SEEK_END );
  429. const uint nFilesize = ftell( pFile );
  430. fseek( pFile, 0, SEEK_SET );
  431. g_OptimalCombos.resize( nFilesize );
  432. if ( fread( &g_OptimalCombos[0], nFilesize, 1, pFile) != 1 )
  433. {
  434. fclose( pFile );
  435. g_OptimalCombos.clear();
  436. Unlock();
  437. return false;
  438. }
  439. fclose( pFile );
  440. const OptimalComboScheduleFileHeader_t *pHeader = reinterpret_cast< const OptimalComboScheduleFileHeader_t * >( &g_OptimalCombos[0] );
  441. if ( ( pHeader->m_nID != OPTIMAL_COMBO_SCHEDULE_FILE_HEADER_ID ) || ( !pHeader->m_nNumCombos ) )
  442. {
  443. g_OptimalCombos.clear();
  444. Unlock();
  445. return false;
  446. }
  447. }
  448. Unlock();
  449. return true;
  450. }
  451. bool GetOptimalScheduleForCombo( uint64 nComboHash, int &nBestSchedule, int &nBestSeed, SceSpMeasurementResult &bestStatistics )
  452. {
  453. if ( g_OptimalCombos.empty() )
  454. return false;
  455. const OptimalComboScheduleFileHeader_t *pHeader = reinterpret_cast< const OptimalComboScheduleFileHeader_t * >( &g_OptimalCombos[0] );
  456. const OptimalComboScheduleFileRecord_t *pCombos = reinterpret_cast< const OptimalComboScheduleFileRecord_t * >( &g_OptimalCombos[sizeof( OptimalComboScheduleFileHeader_t )] );
  457. int low = 0;
  458. int high = pHeader->m_nNumCombos - 1;
  459. while ( low <= high )
  460. {
  461. const int mid = ( low + high ) >> 1;
  462. const OptimalComboScheduleFileRecord_t &combo = pCombos[mid];
  463. if ( nComboHash == combo.m_nComboHash )
  464. {
  465. if ( combo.m_nOptSchedule == OptimalComboScheduleFileRecord_t::cDefaultScheduleIndex )
  466. {
  467. nBestSchedule = -1;
  468. nBestSeed = 0;
  469. }
  470. else
  471. {
  472. nBestSchedule = combo.m_nOptSchedule;
  473. nBestSeed = combo.m_nOptSeed;
  474. }
  475. bestStatistics.nResult = SCESP_OK;
  476. bestStatistics.nCycles = combo.m_nOptCycles;
  477. bestStatistics.nRRegisters = 100; // bogus value - shouldn't matter
  478. bestStatistics.nThroughput = 1; // bogus value - shouldn't matter
  479. return true;
  480. }
  481. else if ( nComboHash < combo.m_nComboHash )
  482. {
  483. high = mid - 1;
  484. }
  485. else
  486. {
  487. low = mid + 1;
  488. }
  489. }
  490. return false;
  491. }
  492. private:
  493. void Lock() { EnterCriticalSection( &m_CS ); }
  494. void Unlock() { LeaveCriticalSection( &m_CS ); }
  495. CRITICAL_SECTION m_CS;
  496. std::vector< uint8 > g_OptimalCombos;
  497. bool g_bTriedToLoadOptimalCombos;
  498. };
  499. class CCompiledShader
  500. {
  501. // Purposely undefined.
  502. CCompiledShader( const CCompiledShader & );
  503. CCompiledShader& operator= ( const CCompiledShader & );
  504. public:
  505. CCompiledShader() :
  506. m_pShader( NULL ),
  507. m_pErrorMsgs( NULL ),
  508. m_last_hres( E_FAIL ),
  509. m_nSchedule( -1 ),
  510. m_nSeed( 0 ),
  511. m_nOptLevel( 1 )
  512. {
  513. memset( &m_Statistics, 0, sizeof( m_Statistics ) );
  514. m_Statistics.nResult = SCESP_ERROR_UNKNOWN;
  515. }
  516. ~CCompiledShader()
  517. {
  518. Clear();
  519. }
  520. void Clear()
  521. {
  522. if ( m_pShader )
  523. {
  524. m_pShader->Release();
  525. m_pShader = NULL;
  526. }
  527. if ( m_pErrorMsgs )
  528. {
  529. m_pErrorMsgs->Release();
  530. m_pErrorMsgs = NULL;
  531. }
  532. memset( &m_Statistics, 0, sizeof( m_Statistics ) );
  533. m_Statistics.nResult = SCESP_ERROR_UNKNOWN;
  534. m_last_hres = E_FAIL;
  535. m_nSchedule = -1;
  536. m_nSeed = 0;
  537. m_nOptLevel = 1;
  538. }
  539. LPD3DXBUFFER GetShader() { return m_pShader; }
  540. LPD3DXBUFFER GetErrorMsgs() { return m_pErrorMsgs; }
  541. LPD3DXBUFFER GetShaderAndReleaseOwnership() { LPD3DXBUFFER pShader = m_pShader; m_pShader = NULL; return pShader; }
  542. LPD3DXBUFFER GetErrorMsgsAndReleaseOwnership() { LPD3DXBUFFER pErrorMsgs = m_pErrorMsgs; m_pErrorMsgs = NULL; return pErrorMsgs; }
  543. const SceSpMeasurementResult &GetStatistics() const { return m_Statistics; }
  544. HRESULT GetLastHRESULT() const { return m_last_hres; }
  545. int GetSchedule() const { return m_nSchedule; }
  546. int GetSeed() const { return m_nSeed; }
  547. int GetOptLevel() const { return m_nOptLevel; }
  548. // Proxied routines
  549. HRESULT Compile( LPCSTR pSrcFile,
  550. CONST D3DXMACRO* pDefines,
  551. LPD3DXINCLUDE pInclude,
  552. LPCSTR pFunctionName,
  553. LPCSTR pProfile,
  554. DWORD Flags,
  555. int nRandSched = -1,
  556. int nRandSeed = -1,
  557. int nOptLevel = 1,
  558. int *pDbgStatusIndex = NULL )
  559. {
  560. Clear();
  561. m_nSchedule = nRandSched;
  562. m_nSeed = nRandSeed;
  563. m_nOptLevel = nOptLevel;
  564. LPD3DXBUFFER *ppShader = &m_pShader;
  565. LPD3DXBUFFER *ppErrorMsgs = &m_pErrorMsgs;
  566. bool bFragmentShader = false;
  567. const char * pRsxProfile = pProfile;
  568. if ( *pProfile == 'v' ) // guessing it's a vertex shader profile
  569. {
  570. pRsxProfile = "sce_vp_rsx";
  571. }
  572. else if ( *pProfile == 'p' ) // guessing it's a pixel shader profile
  573. {
  574. pRsxProfile = "sce_fp_rsx";
  575. bFragmentShader = true;
  576. }
  577. if ( !pInclude )
  578. pInclude = &s_incDxImpl;
  579. // Open the top-level file via our include interface
  580. LPCVOID lpcvData;
  581. UINT numBytes;
  582. HRESULT hr = pInclude->Open( ( D3DXINCLUDE_TYPE ) 0, pSrcFile, NULL, &lpcvData, &numBytes );
  583. if ( FAILED( hr ) )
  584. {
  585. m_last_hres = hr;
  586. return hr;
  587. }
  588. LPCSTR pShaderData = ( LPCSTR ) lpcvData;
  589. g_pInclude = pInclude;
  590. CGCinclude incWrap;
  591. incWrap.close = CgcIncludeClose;
  592. incWrap.open = CgcIncludeOpen;
  593. std::vector<std::string> options;
  594. if ( pDefines )
  595. {
  596. for ( const D3DXMACRO * pMacro = pDefines; pMacro->Name; pMacro++ )
  597. {
  598. std::string strOpt = "-D";
  599. strOpt += pMacro->Name;
  600. if( pMacro->Definition && *pMacro->Definition )
  601. {
  602. if ( !strncmp( pMacro->Name, "PS3REGCOUNT", 11 ) )
  603. {
  604. options.push_back( "-regcount" );
  605. options.push_back( pMacro->Name + 11 );
  606. continue;
  607. }
  608. // Common case:
  609. strOpt += "=";
  610. strOpt += pMacro->Definition;
  611. }
  612. options.push_back( strOpt );
  613. }
  614. }
  615. char buf[512];
  616. if ( ( bFragmentShader ) && ( nRandSched >= 1 ) )
  617. {
  618. options.push_back( "-po" );
  619. sprintf( buf, "randomSched=%i", nRandSched );
  620. options.push_back( std::string( buf ) );
  621. options.push_back( "-po" );
  622. sprintf( buf, "randomSeed=%i", nRandSeed );
  623. options.push_back( std::string( buf ) );
  624. }
  625. options.push_back( "-inline" );
  626. options.push_back( "all" );
  627. options.push_back( "-fastmath" );
  628. sprintf( buf, "-O%i", nOptLevel );
  629. options.push_back( buf );
  630. const char ** ppOptions = (const char**)stackalloc( sizeof(char*) * ( options.size() + 1 ) );
  631. for( uint i = 0; i < options.size(); ++i )
  632. ppOptions[i] = options[i].c_str();
  633. ppOptions[options.size()] = NULL;
  634. DebugLog("%s:%s/%s", pSrcFile, pProfile, pRsxProfile );
  635. CgContextWrapper cgcc;
  636. BlobAdaptor *pCompiledShader = new BlobAdaptor(), *pMessages = new BlobAdaptor(), *asciiOutput = new BlobAdaptor();
  637. int status = sceCgcCompileString( cgcc, pShaderData, pRsxProfile, pFunctionName, ppOptions, pCompiledShader->m_bin, pMessages->m_bin, asciiOutput->m_bin, &incWrap );
  638. if ( ( !status ) && ( pCompiledShader ) && ( pCompiledShader->m_bin ) )
  639. {
  640. const char* optStr[] = { NULL };
  641. char *pBinData = static_cast< char * >( sceCgcGetBinData( pCompiledShader->m_bin ) );
  642. int nBinSize = sceCgcGetBinSize( pCompiledShader->m_bin );
  643. SceSpResult res = sceShaderPerfMeasure( pBinData, nBinSize, optStr, &m_Statistics );
  644. if ( res != SCESP_OK )
  645. {
  646. DebugLog( "sceShaderPerfMeasure failed with status %i", res );
  647. if ( pDbgStatusIndex )
  648. {
  649. *pDbgStatusIndex = -1;
  650. }
  651. }
  652. }
  653. pCompiledShader->Bake();
  654. *ppShader = pCompiledShader;
  655. *ppErrorMsgs = new BlobAdaptor( pMessages, asciiOutput );
  656. #ifdef _DEBUG
  657. if( status )
  658. DebugLog( "Error %d:\n%s\n%s", status, pMessages->GetBufferPointer(), asciiOutput->GetBufferPointer() );
  659. else
  660. DebugLog( "Success %d bytes", pCompiledShader->GetBufferSize() );
  661. #endif
  662. pMessages->Release();
  663. asciiOutput->Release();
  664. hr = ( status == SCECGC_OK ? S_OK : 0x80000005 );
  665. // Close the file
  666. pInclude->Close( lpcvData );
  667. m_last_hres = hr;
  668. return hr;
  669. }
  670. CCompiledShader &TakeOwnership( CCompiledShader &src )
  671. {
  672. if ( this == &src )
  673. return *this;
  674. Clear();
  675. m_last_hres = src.m_last_hres;
  676. m_pShader = src.m_pShader;
  677. src.m_pShader = NULL;
  678. m_pErrorMsgs = src.m_pErrorMsgs;
  679. src.m_pErrorMsgs = NULL;
  680. m_Statistics = src.m_Statistics;
  681. m_nSchedule = src.m_nSchedule;
  682. m_nSeed = src.m_nSeed;
  683. m_nOptLevel = src.m_nOptLevel;
  684. return *this;
  685. }
  686. private:
  687. HRESULT m_last_hres;
  688. LPD3DXBUFFER m_pShader;
  689. LPD3DXBUFFER m_pErrorMsgs;
  690. SceSpMeasurementResult m_Statistics;
  691. int m_nSchedule;
  692. int m_nSeed;
  693. int m_nOptLevel;
  694. };
  695. COptimalComboFile g_OptimalComboFile;
  696. // Proxied routines
  697. #pragma comment(linker, "/EXPORT:Proxy_D3DXCompileShaderFromFile=?Proxy_D3DXCompileShaderFromFile@@YGJPBDPBU_D3D_SHADER_MACRO@@PAUID3DInclude@@00KPAPAUID3D10Blob@@3PAPAX@Z")
  698. HRESULT WINAPI
  699. Proxy_D3DXCompileShaderFromFile(LPCSTR pSrcFile,
  700. CONST D3DXMACRO* pDefines,
  701. LPD3DXINCLUDE pInclude,
  702. LPCSTR pFunctionName,
  703. LPCSTR pProfile,
  704. DWORD Flags,
  705. LPD3DXBUFFER* ppShader,
  706. LPD3DXBUFFER* ppErrorMsgs,
  707. LPD3DXCONSTANTTABLE* ppConstantTable )
  708. {
  709. *ppShader = NULL;
  710. *ppErrorMsgs = NULL;
  711. if ( ppConstantTable ) *ppConstantTable = NULL;
  712. static bool bInitializedShaderPerfLib;
  713. if ( !bInitializedShaderPerfLib )
  714. {
  715. bInitializedShaderPerfLib = true;
  716. sceShaderPerfInit();
  717. }
  718. if ( *pProfile == 'v' )
  719. {
  720. CCompiledShader compiledShader;
  721. HRESULT hres = compiledShader.Compile( pSrcFile, pDefines, pInclude, pFunctionName, pProfile, Flags, -1, -1, 1 );
  722. if ( FAILED( hres ) )
  723. {
  724. *ppErrorMsgs = compiledShader.GetErrorMsgsAndReleaseOwnership();
  725. return hres;
  726. }
  727. *ppShader = compiledShader.GetShaderAndReleaseOwnership();
  728. return S_OK;
  729. }
  730. const uint nStartTime = GetTickCount();
  731. const uint64 nComboHash = ComputeComboHash( pSrcFile, pDefines, pFunctionName );
  732. char szOptimalScheduleFile[MAX_PATH];
  733. const bool bUseOptimalSchedulingFile = GetEnvironmentVariableA( "PS3OPTIMALSCHEDULESFILE", szOptimalScheduleFile, sizeof( szOptimalScheduleFile ) ) && szOptimalScheduleFile[0];
  734. char szFindOptimalSchedulesValue[MAX_PATH];
  735. const bool bFindOptimalScheduling = !bUseOptimalSchedulingFile && ( GetEnvironmentVariableA( "PS3FINDOPTIMALSCHEDULES", szFindOptimalSchedulesValue, sizeof( szFindOptimalSchedulesValue ) ) && ( szFindOptimalSchedulesValue[0] == '1' ) );
  736. ShaderSchedulerParamSource_t nShaderSchedulerSourceIndex = SHADER_SCHEDULER_PARAM_SOURCE_UNOPTIMIZED;
  737. SceSpMeasurementResult trainedScheduleResults;
  738. memset( &trainedScheduleResults, 0, sizeof( trainedScheduleResults ) );
  739. int nTrainedSchedule = -1;
  740. int nTrainedSeed = 0;
  741. int nDbgStatusIndex = 0;
  742. if ( ( bUseOptimalSchedulingFile ) && ( g_OptimalComboFile.Load( szOptimalScheduleFile ) ) )
  743. {
  744. if ( g_OptimalComboFile.GetOptimalScheduleForCombo( nComboHash, nTrainedSchedule, nTrainedSeed, trainedScheduleResults ) )
  745. {
  746. nShaderSchedulerSourceIndex = SHADER_SCHEDULER_PARAM_SOURCE_FROM_SCHEDULER_FILE;
  747. nDbgStatusIndex = 1;
  748. }
  749. }
  750. uint nTotalCompiles = 0;
  751. CCompiledShader defaultShader;
  752. nTotalCompiles++;
  753. HRESULT hres = defaultShader.Compile( pSrcFile, pDefines, pInclude, pFunctionName, pProfile, Flags, nTrainedSchedule, nTrainedSeed, CGC_COMPILER_OPTIMIZATION_LEVEL, &nDbgStatusIndex );
  754. if ( FAILED( hres ) )
  755. {
  756. *ppErrorMsgs = defaultShader.GetErrorMsgsAndReleaseOwnership();
  757. return hres;
  758. }
  759. CCompiledShader bestShader;
  760. bestShader.TakeOwnership( defaultShader );
  761. if ( ( nShaderSchedulerSourceIndex == SHADER_SCHEDULER_PARAM_SOURCE_FROM_SCHEDULER_FILE ) && ( defaultShader.GetStatistics().nCycles > trainedScheduleResults.nCycles ) )
  762. {
  763. // The optimal schedule params stored in the ps3optimalschedules.bin file didn't produce the expected results (the shader was modified since the
  764. // schedules where optimized), so try falling back to the compiler's default scheduling. (Which may not be any better, but at least we'll never get worse than the default schedule.)
  765. nTotalCompiles++;
  766. CCompiledShader alternateShader;
  767. HRESULT hres = alternateShader.Compile( pSrcFile, pDefines, pInclude, pFunctionName, pProfile, Flags, -1, 0, CGC_COMPILER_OPTIMIZATION_LEVEL, &nDbgStatusIndex );
  768. if ( FAILED( hres ) )
  769. {
  770. *ppErrorMsgs = alternateShader.GetErrorMsgsAndReleaseOwnership();
  771. return hres;
  772. }
  773. if ( alternateShader.GetStatistics() < bestShader.GetStatistics() )
  774. {
  775. nShaderSchedulerSourceIndex = SHADER_SCHEDULER_PARAM_SOURCE_UNOPTIMIZED_FALLBACK;
  776. bestShader.TakeOwnership( alternateShader );
  777. nDbgStatusIndex = 2;
  778. }
  779. }
  780. SceSpMeasurementResult origStatistics( bestShader.GetStatistics() );
  781. // Don't bother trying to optimize tiny shaders, the potential gain is not worth it (and they're probably fill bound anyway).
  782. if ( ( bFindOptimalScheduling ) && ( ( bestShader.GetStatistics().nCycles > 5 ) || ( bestShader.GetStatistics().nRRegisters > 2 ) ) )
  783. {
  784. // Important: Watch the ranges of rand_schedule and rand_seed. See COMBO_SEED_BITS and COMBO_SCHEDULE_BITS.
  785. for ( int nRandSchedIndex = 0; nRandSchedIndex < NUM_RANDOM_SCHEDULE_VALUES; ++nRandSchedIndex )
  786. {
  787. const int nRandSched = g_nRandSched[nRandSchedIndex];
  788. for ( int nTrial = 0; nTrial < NUM_RANDOM_SCHEDULE_SEEDS; ++nTrial )
  789. {
  790. const int nRandSeed = 10 + nTrial * 8;
  791. nTotalCompiles++;
  792. CCompiledShader trialShader;
  793. HRESULT hres = trialShader.Compile( pSrcFile, pDefines, pInclude, pFunctionName, pProfile, Flags, nRandSched, nRandSeed, CGC_COMPILER_OPTIMIZATION_LEVEL, &nDbgStatusIndex );
  794. if ( FAILED( hres ) )
  795. {
  796. *ppErrorMsgs = trialShader.GetErrorMsgsAndReleaseOwnership();
  797. return hres;
  798. }
  799. if ( trialShader.GetStatistics() < bestShader.GetStatistics() )
  800. {
  801. bestShader.TakeOwnership( trialShader );
  802. nShaderSchedulerSourceIndex = SHADER_SCHEDULER_PARAM_SOURCE_FOUND_OPTIMAL;
  803. nDbgStatusIndex = 3;
  804. }
  805. }
  806. }
  807. }
  808. *ppShader = bestShader.GetShaderAndReleaseOwnership();
  809. const uint nEndTime = GetTickCount();
  810. double flTotalTime = ( nEndTime - nStartTime ) * .001f;
  811. flTotalTime;
  812. UpdateCompileLogFile( pSrcFile, nComboHash, origStatistics, bestShader.GetStatistics(), bestShader.GetSchedule(), bestShader.GetSeed(), nShaderSchedulerSourceIndex, nDbgStatusIndex );
  813. #if 0
  814. printf( "Orig cycles/registers: %u (%u), Optimized cycles/registers: %u (%u), Total compiles: %u, ms per compile: %f\n",
  815. origStatistics.nCycles, origStatistics.nRRegisters,
  816. bestShader.GetStatistics().nCycles, bestShader.GetStatistics().nRRegisters,
  817. nTotalCompiles,
  818. 1000.0f * ( flTotalTime / nTotalCompiles ) );
  819. #endif
  820. return S_OK;
  821. }