Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3838 lines
114 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. // TOGL CODE LICENSE
  3. //
  4. // Copyright 2011-2014 Valve Corporation
  5. // All Rights Reserved.
  6. //
  7. // Permission is hereby granted, free of charge, to any person obtaining a copy
  8. // of this software and associated documentation files (the "Software"), to deal
  9. // in the Software without restriction, including without limitation the rights
  10. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. // copies of the Software, and to permit persons to whom the Software is
  12. // furnished to do so, subject to the following conditions:
  13. //
  14. // The above copyright notice and this permission notice shall be included in
  15. // all copies or substantial portions of the Software.
  16. //
  17. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. // THE SOFTWARE.
  24. //------------------------------------------------------------------------------
  25. // DX9AsmToGL2.cpp
  26. //------------------------------------------------------------------------------
  27. // Immediately include gl.h, etc. here to avoid compilation warnings.
  28. #include <GL/gl.h>
  29. #include <GL/glext.h>
  30. #include "togl/rendermechanism.h"
  31. #include "tier0/dbg.h"
  32. #include "tier1/strtools.h"
  33. #include "tier1/utlbuffer.h"
  34. #include "dx9asmtogl2.h"
  35. #include "materialsystem/IShader.h"
  36. // memdbgon must be the last include file in a .cpp file!!!
  37. #include "tier0/memdbgon.h"
  38. #ifdef POSIX
  39. #define strcat_s( a, b, c) V_strcat( a, c, b )
  40. #endif
  41. #define DST_REGISTER 0
  42. #define SRC_REGISTER 1
  43. // Flags to PrintUsageAndIndexToString.
  44. #define SEMANTIC_OUTPUT 0x01
  45. #define SEMANTIC_INPUT 0x02
  46. #define UNDECLARED_OUTPUT 0xFFFFFFFF
  47. #define UNDECLARED_INPUT 0xFFFFFFFF
  48. #ifndef POSIX
  49. #define Debugger() Assert(0)
  50. #endif
  51. //#define Assert(n) if( !(n) ){ TranslationError(); }
  52. static char *g_szVecZeros[] = { NULL, "0.0", "vec2( 0.0, 0.0 )", "vec3( 0.0, 0.0, 0.0 )", "vec4( 0.0, 0.0, 0.0, 0.0 )" };
  53. static char *g_szVecOnes[] = { NULL, "1.0", "vec2( 1.0, 1.0 )", "vec3( 1.0, 1.0, 1.0 )", "vec4( 1.0, 1.0, 1.0, 1.0 )" };
  54. static char *g_szDefaultSwizzle = "xyzw";
  55. static char *g_szDefaultSwizzleStrings[] = { "x", "y", "z", "w" };
  56. static char *g_szSamplerStrings[] = { "2D", "CUBE", "3D" };
  57. static const char *g_pAtomicTempVarName = "atomic_temp_var";
  58. static const char *g_pTangentAttributeName = "g_tangent";
  59. int __cdecl SortInts( const int *a, const int *b )
  60. {
  61. if ( *a < *b )
  62. return -1;
  63. else if ( *a > *b )
  64. return 1;
  65. else
  66. return 0;
  67. }
  68. void StripExtraTrailingZeros( char *pStr )
  69. {
  70. int len = (int)V_strlen( pStr );
  71. while ( len >= 2 && pStr[len-1] == '0' && pStr[len-2] != '.' )
  72. {
  73. pStr[len-1] = 0;
  74. --len;
  75. }
  76. }
  77. void D3DToGL::PrintToBufWithIndents( CUtlBuffer &buf, const char *pFormat, ... )
  78. {
  79. va_list marker;
  80. va_start( marker, pFormat );
  81. char szTemp[1024];
  82. V_vsnprintf( szTemp, sizeof( szTemp ), pFormat, marker );
  83. va_end( marker );
  84. PrintIndentation( (char*)buf.Base(), buf.Size() );
  85. strcat_s( (char*)buf.Base(), buf.Size(), szTemp );
  86. }
  87. void PrintToBuf( CUtlBuffer &buf, const char *pFormat, ... )
  88. {
  89. va_list marker;
  90. va_start( marker, pFormat );
  91. char szTemp[1024];
  92. V_vsnprintf( szTemp, sizeof( szTemp ), pFormat, marker );
  93. va_end( marker );
  94. strcat_s( (char*)buf.Base(), buf.Size(), szTemp );
  95. }
  96. void PrintToBuf( char *pOut, int nOutSize, const char *pFormat, ... )
  97. {
  98. int nStrlen = V_strlen( pOut );
  99. pOut += nStrlen;
  100. nOutSize -= nStrlen;
  101. va_list marker;
  102. va_start( marker, pFormat );
  103. V_vsnprintf( pOut, nOutSize, pFormat, marker );
  104. va_end( marker );
  105. }
  106. // Return the number of letters following the dot.
  107. // Returns 4 if there is no dot.
  108. // (So "r0.xy" returns 2 and "r0" returns 4).
  109. int GetNumWriteMaskEntries( const char *pParam )
  110. {
  111. const char *pDot = strchr( pParam, '.' );
  112. if ( pDot )
  113. return V_strlen( pDot + 1 );
  114. else
  115. return 4;
  116. }
  117. const char* GetSwizzleDot( const char *pParam )
  118. {
  119. const char *pDot = strrchr( pParam, '.' );
  120. const char *pSquareClose = strrchr( pParam, ']' );
  121. if ( pSquareClose )
  122. {
  123. // The test against ']' catches cases like, so we point to the last dot vc[int(va_r.x) + 29].x
  124. if ( pDot && ( pSquareClose < pDot ) )
  125. return pDot;
  126. else
  127. return NULL;
  128. }
  129. // Make sure the next character is a valid swizzle since we want to treat strings like vec4( gl_Normal, 0.0 ) as a whole param name.
  130. if ( pDot && ( ( *(pDot+1) == 'x' ) || ( *(pDot+1) == 'y' ) || ( *(pDot+1) == 'z' ) || ( *(pDot+1) == 'w' ) ||
  131. ( *(pDot+1) == 'r' ) || ( *(pDot+1) == 'g' ) || ( *(pDot+1) == 'b' ) || ( *(pDot+1) == 'z' ) ) )
  132. {
  133. return pDot;
  134. }
  135. return NULL;
  136. }
  137. int GetNumSwizzleComponents( const char *pParam )
  138. {
  139. // Special scalar output which won't accept a swizzle
  140. if ( !V_stricmp( pParam, "gl_FogFragCoord" ) )
  141. return 1;
  142. // Special scalar output which won't accept a swizzle
  143. if ( !V_stricmp( pParam, "gl_FragDepth" ) )
  144. return 1;
  145. // Special scalar output which won't accept a swizzle
  146. if ( !V_stricmp( pParam, "a0" ) )
  147. return 1;
  148. const char *pDot = GetSwizzleDot( pParam );
  149. if ( pDot )
  150. {
  151. pDot++; // Step over the dot
  152. int nNumSwizzleComponents = 0;
  153. while ( ( *pDot == 'x' ) || ( *pDot == 'y' ) || ( *pDot == 'z' ) || ( *pDot == 'w' ) ||
  154. ( *pDot == 'r' ) || ( *pDot == 'g' ) || ( *pDot == 'b' ) || ( *pDot == 'z' ) )
  155. {
  156. nNumSwizzleComponents++;
  157. pDot++;
  158. }
  159. return nNumSwizzleComponents;
  160. }
  161. return 0;
  162. }
  163. char GetSwizzleComponent( const char *pParam, int n )
  164. {
  165. Assert( n < 4 );
  166. const char *pDot = GetSwizzleDot( pParam );
  167. if ( pDot )
  168. {
  169. ++pDot;
  170. int nComponents = (int)V_strlen( pDot );
  171. Assert( nComponents > 0 );
  172. if ( n < nComponents )
  173. return pDot[n];
  174. else
  175. return pDot[nComponents-1];
  176. }
  177. return g_szDefaultSwizzle[n];
  178. }
  179. // Replace the parameter name and leave the swizzle intact.
  180. // So "somevar.xyz" becomes "othervar.xyz".
  181. void ReplaceParamName( const char *pSrc, const char *pNewParamName, char *pOut, int nOutLen )
  182. {
  183. // Start with the new parameter name.
  184. V_strncpy( pOut, pNewParamName, nOutLen );
  185. // Now add the swizzle if necessary.
  186. const char *pDot = GetSwizzleDot( pSrc );
  187. if ( pDot )
  188. {
  189. V_strncat( pOut, pDot, nOutLen );
  190. }
  191. }
  192. void GetParamNameWithoutSwizzle( const char *pParam, char *pOut, int nOutLen )
  193. {
  194. char *pParamStart = (char *) pParam;
  195. const char *pDot = GetSwizzleDot( pParam ); // dot followed by valid swizzle characters
  196. bool bAbsWrapper = false;
  197. // Check for abs() or -abs() wrapper and strip it off during the fixup
  198. if ( !V_strncmp( pParam, "abs(", 4 ) || !V_strncmp( pParam, "-abs(", 5 ) )
  199. {
  200. const char *pOpenParen = strchr( pParam, '(' ); // FIRST opening paren
  201. const char *pClosingParen = strrchr( pParam, ')' ); // LAST closing paren
  202. Assert ( pOpenParen && pClosingParen );
  203. pClosingParen; // hush compiler
  204. pParamStart = (char *) pOpenParen;
  205. pParamStart++;
  206. bAbsWrapper = true;
  207. }
  208. if ( pDot )
  209. {
  210. int nToCopy = MIN( nOutLen-1, pDot - pParamStart );
  211. memcpy( pOut, pParamStart, nToCopy );
  212. pOut[nToCopy] = 0;
  213. }
  214. else
  215. {
  216. V_strncpy( pOut, pParamStart, bAbsWrapper ? nOutLen - 1 : nOutLen );
  217. }
  218. }
  219. bool DoParamNamesMatch( const char *pParam1, const char *pParam2 )
  220. {
  221. char szTemp[2][256];
  222. GetParamNameWithoutSwizzle( pParam1, szTemp[0], sizeof( szTemp[0] ) );
  223. GetParamNameWithoutSwizzle( pParam2, szTemp[1], sizeof( szTemp[1] ) );
  224. return ( V_stricmp( szTemp[0], szTemp[1] ) == 0 );
  225. }
  226. // Extract the n'th component of the swizzle mask.
  227. // If n would exceed the length of the swizzle mask, then it looks up into "xyzw".
  228. void WriteParamWithSingleMaskEntry( const char *pParam, int n, char *pOut, int nOutLen )
  229. {
  230. bool bCloseParen = false;
  231. if ( !V_strncmp( pParam, "-abs(", 5 ) )
  232. {
  233. V_strcpy( pOut, "-abs(" );
  234. bCloseParen = true;
  235. pOut += 5; nOutLen -= 5;
  236. }
  237. else if ( !V_strncmp( pParam, "abs(", 4 ) )
  238. {
  239. V_strcpy( pOut, "abs(" );
  240. bCloseParen = true;
  241. pOut += 4; nOutLen -= 4;
  242. }
  243. GetParamNameWithoutSwizzle( pParam, pOut, nOutLen );
  244. PrintToBuf( pOut, nOutLen, "." );
  245. PrintToBuf( pOut, nOutLen, "%c", GetSwizzleComponent( pParam, n ) );
  246. if ( bCloseParen )
  247. {
  248. PrintToBuf( pOut, nOutLen, ")" );
  249. }
  250. }
  251. float uint32ToFloat( uint32 dw )
  252. {
  253. return *((float*)&dw);
  254. }
  255. CUtlString EnsureNumSwizzleComponents( const char *pSrcRegisterName, int nComponents )
  256. {
  257. int nExisting = GetNumSwizzleComponents( pSrcRegisterName );
  258. if ( nExisting == nComponents )
  259. return pSrcRegisterName;
  260. bool bAbsWrapper = false; // Parameter wrapped in an abs()
  261. bool bAbsNegative = false; // -abs()
  262. char szSrcRegister[128];
  263. V_strncpy( szSrcRegister, pSrcRegisterName, sizeof(szSrcRegister) );
  264. // Check for abs() or -abs() wrapper and strip it off during the fixup
  265. if ( !V_strncmp( pSrcRegisterName, "abs(", 4 ) || !V_strncmp( pSrcRegisterName, "-abs(", 5 ) )
  266. {
  267. bAbsWrapper = true;
  268. bAbsNegative = pSrcRegisterName[0] == '-';
  269. const char *pOpenParen = strchr( pSrcRegisterName, '(' ); // FIRST opening paren
  270. const char *pClosingParen = strrchr( pSrcRegisterName, ')' ); // LAST closing paren
  271. Assert ( pOpenParen && pClosingParen ); // If we start with abs( and don't get both parens, something is very wrong
  272. // Copy out just the register name with no abs()
  273. int nRegNameLength = pClosingParen - pOpenParen - 1;
  274. V_strncpy( szSrcRegister, pOpenParen+1, nRegNameLength + 1 ); // Kind of a weird function...copy more than you need and slam the last char to NULL-terminate
  275. }
  276. char szReg[256];
  277. GetParamNameWithoutSwizzle( szSrcRegister, szReg, sizeof( szReg ) );
  278. if ( nComponents == 0 )
  279. return szReg;
  280. PrintToBuf( szReg, sizeof( szReg ), "." );
  281. if ( nExisting > nComponents )
  282. {
  283. // DX ASM will sometimes have statements like "NRM r0.xyz, r1.yzww", where it just doesn't use the last part of r1. So we won't either.
  284. for ( int i=0; i < nComponents; i++ )
  285. {
  286. PrintToBuf( szReg, sizeof( szReg ), "%c", GetSwizzleComponent( szSrcRegister, i ) );
  287. }
  288. }
  289. else
  290. {
  291. if ( nExisting == 0 )
  292. {
  293. // We've got something like r0 and need N more components, so add as much of "xyzw" is needed.
  294. for ( int i=0; i < nComponents; i++ )
  295. PrintToBuf( szReg, sizeof( szReg ), "%c", g_szDefaultSwizzle[i] );
  296. }
  297. else
  298. {
  299. // We've got something like r0.x and need N more components, so replicate the X so it looks like r0.xxx
  300. V_strncpy( szReg, szSrcRegister, sizeof( szReg ) );
  301. char cLast = szSrcRegister[ V_strlen( szSrcRegister ) - 1 ];
  302. for ( int i=nExisting; i < nComponents; i++ )
  303. {
  304. PrintToBuf( szReg, sizeof( szReg ), "%c", cLast );
  305. }
  306. }
  307. }
  308. if ( bAbsWrapper )
  309. {
  310. char szTemp[128];
  311. V_strncpy( szTemp, szReg, sizeof(szTemp) );
  312. V_snprintf( szReg, sizeof( szReg ), "%sabs(%s)", bAbsNegative ? "-" : "", szTemp ) ;
  313. }
  314. return szReg;
  315. }
  316. static void TranslationError()
  317. {
  318. GLMDebugPrintf( "D3DToGL: GLSL translation error!\n" );
  319. DebuggerBreakIfDebugging();
  320. Error( "D3DToGL: GLSL translation error!\n" );
  321. }
  322. D3DToGL::D3DToGL()
  323. {
  324. }
  325. uint32 D3DToGL::GetNextToken( void )
  326. {
  327. uint32 dwToken = *m_pdwNextToken;
  328. m_pdwNextToken++;
  329. return dwToken;
  330. }
  331. void D3DToGL::SkipTokens( uint32 numToSkip )
  332. {
  333. m_pdwNextToken += numToSkip;
  334. }
  335. uint32 D3DToGL::Opcode( uint32 dwToken )
  336. {
  337. return ( dwToken & D3DSI_OPCODE_MASK );
  338. }
  339. uint32 D3DToGL::OpcodeSpecificData (uint32 dwToken)
  340. {
  341. return ( ( dwToken & D3DSP_OPCODESPECIFICCONTROL_MASK ) >> D3DSP_OPCODESPECIFICCONTROL_SHIFT );
  342. }
  343. uint32 D3DToGL::TextureType ( uint32 dwToken )
  344. {
  345. return ( dwToken & D3DSP_TEXTURETYPE_MASK ); // Note this one doesn't shift due to weird D3DSAMPLER_TEXTURE_TYPE enum
  346. }
  347. // Print GLSL intrinsic corresponding to particular instruction
  348. bool D3DToGL::OpenIntrinsic( uint32 inst, char* buff, int nBufLen, uint32 destDimension, uint32 nArgumentDimension )
  349. {
  350. // Some GLSL intrinsics need type conversion, which we do in this routine
  351. // As a result, the caller must sometimes close both parentheses, not just one
  352. bool bDoubleClose = false;
  353. if ( nArgumentDimension == 0 )
  354. {
  355. nArgumentDimension = 4;
  356. }
  357. switch ( inst )
  358. {
  359. case D3DSIO_RSQ:
  360. V_snprintf( buff, nBufLen, "inversesqrt( " );
  361. break;
  362. case D3DSIO_DP3:
  363. case D3DSIO_DP4:
  364. if ( destDimension == 1 )
  365. {
  366. V_snprintf( buff, nBufLen, "dot( " );
  367. }
  368. else
  369. {
  370. if ( !destDimension )
  371. destDimension = 4;
  372. V_snprintf( buff, nBufLen, "vec%d( dot( ", destDimension );
  373. bDoubleClose = true;
  374. }
  375. break;
  376. case D3DSIO_MIN:
  377. V_snprintf( buff, nBufLen, "min( " );
  378. break;
  379. case D3DSIO_MAX:
  380. V_snprintf( buff, nBufLen, "max( " );
  381. break;
  382. case D3DSIO_SLT:
  383. if ( nArgumentDimension == 1 )
  384. {
  385. V_snprintf( buff, nBufLen, "float( " ); // lessThan doesn't have a scalar version
  386. }
  387. else
  388. {
  389. Assert( nArgumentDimension > 1 );
  390. V_snprintf( buff, nBufLen, "vec%d( lessThan( ", nArgumentDimension );
  391. bDoubleClose = true;
  392. }
  393. break;
  394. case D3DSIO_SGE:
  395. if ( nArgumentDimension == 1 )
  396. {
  397. V_snprintf( buff, nBufLen, "float( " ); // greaterThanEqual doesn't have a scalar version
  398. }
  399. else
  400. {
  401. Assert( nArgumentDimension > 1 );
  402. V_snprintf( buff, nBufLen, "vec%d( greaterThanEqual( ", nArgumentDimension );
  403. bDoubleClose = true;
  404. }
  405. break;
  406. case D3DSIO_EXP:
  407. V_snprintf( buff, nBufLen, "exp( " ); // exp2 ?
  408. break;
  409. case D3DSIO_LOG:
  410. V_snprintf( buff, nBufLen, "log( " ); // log2 ?
  411. break;
  412. case D3DSIO_LIT:
  413. TranslationError();
  414. V_snprintf( buff, nBufLen, "lit( " ); // gonna have to write this one
  415. break;
  416. case D3DSIO_DST:
  417. V_snprintf( buff, nBufLen, "dst( " ); // gonna have to write this one
  418. break;
  419. case D3DSIO_LRP:
  420. Assert( !m_bVertexShader );
  421. V_snprintf( buff, nBufLen, "mix( " );
  422. break;
  423. case D3DSIO_FRC:
  424. V_snprintf( buff, nBufLen, "fract( " );
  425. break;
  426. case D3DSIO_M4x4:
  427. TranslationError();
  428. V_snprintf( buff, nBufLen, "m4x4" );
  429. break;
  430. case D3DSIO_M4x3:
  431. case D3DSIO_M3x4:
  432. case D3DSIO_M3x3:
  433. case D3DSIO_M3x2:
  434. case D3DSIO_CALL:
  435. case D3DSIO_CALLNZ:
  436. case D3DSIO_LOOP:
  437. case D3DSIO_RET:
  438. case D3DSIO_ENDLOOP:
  439. case D3DSIO_LABEL:
  440. case D3DSIO_DCL:
  441. TranslationError();
  442. break;
  443. case D3DSIO_POW:
  444. V_snprintf( buff, nBufLen, "pow( " );
  445. break;
  446. case D3DSIO_CRS:
  447. V_snprintf( buff, nBufLen, "cross( " );
  448. break;
  449. case D3DSIO_SGN:
  450. TranslationError();
  451. V_snprintf( buff, nBufLen, "sign( " );
  452. break;
  453. case D3DSIO_ABS:
  454. V_snprintf( buff, nBufLen, "abs( " );
  455. break;
  456. case D3DSIO_NRM:
  457. TranslationError();
  458. V_snprintf( buff, nBufLen, "normalize( " );
  459. break;
  460. case D3DSIO_SINCOS:
  461. TranslationError();
  462. V_snprintf( buff, nBufLen, "sincos( " ); // gonna have to write this one
  463. break;
  464. case D3DSIO_REP:
  465. case D3DSIO_ENDREP:
  466. case D3DSIO_IF:
  467. case D3DSIO_IFC:
  468. case D3DSIO_ELSE:
  469. case D3DSIO_ENDIF:
  470. case D3DSIO_BREAK:
  471. case D3DSIO_BREAKC: // TODO: these are the reason we even need GLSL...gotta make these work
  472. TranslationError();
  473. break;
  474. case D3DSIO_DEFB:
  475. case D3DSIO_DEFI:
  476. TranslationError();
  477. break;
  478. case D3DSIO_TEXCOORD:
  479. V_snprintf( buff, nBufLen, "texcoord" );
  480. break;
  481. case D3DSIO_TEXKILL:
  482. V_snprintf( buff, nBufLen, "kill( " ); // wrap the discard instruction?
  483. break;
  484. case D3DSIO_TEX:
  485. TranslationError();
  486. V_snprintf( buff, nBufLen, "TEX" ); // We shouldn't get here
  487. break;
  488. case D3DSIO_TEXBEM:
  489. case D3DSIO_TEXBEML:
  490. case D3DSIO_TEXREG2AR:
  491. case D3DSIO_TEXREG2GB:
  492. case D3DSIO_TEXM3x2PAD:
  493. case D3DSIO_TEXM3x2TEX:
  494. case D3DSIO_TEXM3x3PAD:
  495. case D3DSIO_TEXM3x3TEX:
  496. case D3DSIO_TEXM3x3SPEC:
  497. case D3DSIO_TEXM3x3VSPEC:
  498. TranslationError();
  499. break;
  500. case D3DSIO_EXPP:
  501. V_snprintf( buff, nBufLen, "exp( " );
  502. break;
  503. case D3DSIO_LOGP:
  504. V_snprintf( buff, nBufLen, "log( " );
  505. break;
  506. case D3DSIO_CND:
  507. TranslationError();
  508. break;
  509. case D3DSIO_DEF:
  510. TranslationError();
  511. V_snprintf( buff, nBufLen, "DEF" );
  512. break;
  513. case D3DSIO_TEXREG2RGB:
  514. case D3DSIO_TEXDP3TEX:
  515. case D3DSIO_TEXM3x2DEPTH:
  516. case D3DSIO_TEXDP3:
  517. case D3DSIO_TEXM3x3:
  518. TranslationError();
  519. break;
  520. case D3DSIO_TEXDEPTH:
  521. V_snprintf( buff, nBufLen, "texdepth" );
  522. break;
  523. case D3DSIO_CMP:
  524. TranslationError();
  525. Assert( !m_bVertexShader );
  526. V_snprintf( buff, nBufLen, "CMP" );
  527. break;
  528. case D3DSIO_BEM:
  529. TranslationError();
  530. break;
  531. case D3DSIO_DP2ADD:
  532. TranslationError();
  533. break;
  534. case D3DSIO_DSX:
  535. case D3DSIO_DSY:
  536. TranslationError();
  537. break;
  538. case D3DSIO_TEXLDD:
  539. V_snprintf( buff, nBufLen, "texldd" );
  540. break;
  541. case D3DSIO_SETP:
  542. TranslationError();
  543. break;
  544. case D3DSIO_TEXLDL:
  545. V_snprintf( buff, nBufLen, "texldl" );
  546. break;
  547. case D3DSIO_BREAKP:
  548. case D3DSIO_PHASE:
  549. TranslationError();
  550. break;
  551. }
  552. return bDoubleClose;
  553. }
  554. const char* D3DToGL::GetGLSLOperatorString( uint32 inst )
  555. {
  556. if ( inst == D3DSIO_ADD )
  557. return "+";
  558. else if ( inst == D3DSIO_SUB )
  559. return "-";
  560. else if ( inst == D3DSIO_MUL )
  561. return "*";
  562. Error( "GetGLSLOperatorString: unknown operator" );
  563. return "zzzz";
  564. }
  565. // Print ASM opcode
  566. void D3DToGL::PrintOpcode( uint32 inst, char* buff, int nBufLen )
  567. {
  568. switch ( inst )
  569. {
  570. case D3DSIO_NOP:
  571. V_snprintf( buff, nBufLen, "NOP" );
  572. TranslationError();
  573. break;
  574. case D3DSIO_MOV:
  575. V_snprintf( buff, nBufLen, "MOV" );
  576. break;
  577. case D3DSIO_ADD:
  578. V_snprintf( buff, nBufLen, "ADD" );
  579. break;
  580. case D3DSIO_SUB:
  581. V_snprintf( buff, nBufLen, "SUB" );
  582. break;
  583. case D3DSIO_MAD:
  584. V_snprintf( buff, nBufLen, "MAD" );
  585. break;
  586. case D3DSIO_MUL:
  587. V_snprintf( buff, nBufLen, "MUL" );
  588. break;
  589. case D3DSIO_RCP:
  590. V_snprintf( buff, nBufLen, "RCP" );
  591. break;
  592. case D3DSIO_RSQ:
  593. V_snprintf( buff, nBufLen, "RSQ" );
  594. break;
  595. case D3DSIO_DP3:
  596. V_snprintf( buff, nBufLen, "DP3" );
  597. break;
  598. case D3DSIO_DP4:
  599. V_snprintf( buff, nBufLen, "DP4" );
  600. break;
  601. case D3DSIO_MIN:
  602. V_snprintf( buff, nBufLen, "MIN" );
  603. break;
  604. case D3DSIO_MAX:
  605. V_snprintf( buff, nBufLen, "MAX" );
  606. break;
  607. case D3DSIO_SLT:
  608. V_snprintf( buff, nBufLen, "SLT" );
  609. break;
  610. case D3DSIO_SGE:
  611. V_snprintf( buff, nBufLen, "SGE" );
  612. break;
  613. case D3DSIO_EXP:
  614. V_snprintf( buff, nBufLen, "EX2" );
  615. break;
  616. case D3DSIO_LOG:
  617. V_snprintf( buff, nBufLen, "LG2" );
  618. break;
  619. case D3DSIO_LIT:
  620. V_snprintf( buff, nBufLen, "LIT" );
  621. break;
  622. case D3DSIO_DST:
  623. V_snprintf( buff, nBufLen, "DST" );
  624. break;
  625. case D3DSIO_LRP:
  626. Assert( !m_bVertexShader );
  627. V_snprintf( buff, nBufLen, "LRP" );
  628. break;
  629. case D3DSIO_FRC:
  630. V_snprintf( buff, nBufLen, "FRC" );
  631. break;
  632. case D3DSIO_M4x4:
  633. V_snprintf( buff, nBufLen, "m4x4" );
  634. break;
  635. case D3DSIO_M4x3:
  636. case D3DSIO_M3x4:
  637. case D3DSIO_M3x3:
  638. case D3DSIO_M3x2:
  639. case D3DSIO_CALL:
  640. case D3DSIO_CALLNZ:
  641. case D3DSIO_LOOP:
  642. case D3DSIO_RET:
  643. case D3DSIO_ENDLOOP:
  644. case D3DSIO_LABEL:
  645. TranslationError();
  646. break;
  647. case D3DSIO_DCL:
  648. V_snprintf( buff, nBufLen, "DCL" );
  649. break;
  650. case D3DSIO_POW:
  651. V_snprintf( buff, nBufLen, "POW" );
  652. break;
  653. case D3DSIO_CRS:
  654. V_snprintf( buff, nBufLen, "XPD" );
  655. break;
  656. case D3DSIO_SGN:
  657. TranslationError();
  658. V_snprintf( buff, nBufLen, "SGN" );
  659. break;
  660. case D3DSIO_ABS:
  661. V_snprintf( buff, nBufLen, "ABS" );
  662. break;
  663. case D3DSIO_NRM:
  664. TranslationError();
  665. V_snprintf( buff, nBufLen, "NRM" );
  666. break;
  667. case D3DSIO_SINCOS:
  668. Assert( !m_bVertexShader );
  669. V_snprintf( buff, nBufLen, "SCS" );
  670. break;
  671. case D3DSIO_REP:
  672. case D3DSIO_ENDREP:
  673. case D3DSIO_IF:
  674. case D3DSIO_IFC:
  675. case D3DSIO_ELSE:
  676. case D3DSIO_ENDIF:
  677. case D3DSIO_BREAK:
  678. case D3DSIO_BREAKC:
  679. TranslationError();
  680. break;
  681. case D3DSIO_MOVA:
  682. Assert( m_bVertexShader );
  683. V_snprintf( buff, nBufLen, "MOV" ); // We're always moving into a temp instead, so this is MOV instead of ARL
  684. break;
  685. case D3DSIO_DEFB:
  686. case D3DSIO_DEFI:
  687. TranslationError();
  688. break;
  689. case D3DSIO_TEXCOORD:
  690. V_snprintf( buff, nBufLen, "texcoord" );
  691. break;
  692. case D3DSIO_TEXKILL:
  693. V_snprintf( buff, nBufLen, "KIL" );
  694. break;
  695. case D3DSIO_TEX:
  696. V_snprintf( buff, nBufLen, "TEX" );
  697. break;
  698. case D3DSIO_TEXBEM:
  699. case D3DSIO_TEXBEML:
  700. case D3DSIO_TEXREG2AR:
  701. case D3DSIO_TEXREG2GB:
  702. case D3DSIO_TEXM3x2PAD:
  703. case D3DSIO_TEXM3x2TEX:
  704. case D3DSIO_TEXM3x3PAD:
  705. case D3DSIO_TEXM3x3TEX:
  706. case D3DSIO_TEXM3x3SPEC:
  707. case D3DSIO_TEXM3x3VSPEC:
  708. TranslationError();
  709. break;
  710. case D3DSIO_EXPP:
  711. V_snprintf( buff, nBufLen, "EXP" );
  712. break;
  713. case D3DSIO_LOGP:
  714. V_snprintf( buff, nBufLen, "LOG" );
  715. break;
  716. case D3DSIO_CND:
  717. TranslationError();
  718. break;
  719. case D3DSIO_DEF:
  720. V_snprintf( buff, nBufLen, "DEF" );
  721. break;
  722. case D3DSIO_TEXREG2RGB:
  723. case D3DSIO_TEXDP3TEX:
  724. case D3DSIO_TEXM3x2DEPTH:
  725. case D3DSIO_TEXDP3:
  726. case D3DSIO_TEXM3x3:
  727. TranslationError();
  728. break;
  729. case D3DSIO_TEXDEPTH:
  730. V_snprintf( buff, nBufLen, "texdepth" );
  731. break;
  732. case D3DSIO_CMP:
  733. Assert( !m_bVertexShader );
  734. V_snprintf( buff, nBufLen, "CMP" );
  735. break;
  736. case D3DSIO_BEM:
  737. TranslationError();
  738. break;
  739. case D3DSIO_DP2ADD:
  740. TranslationError();
  741. break;
  742. case D3DSIO_DSX:
  743. case D3DSIO_DSY:
  744. TranslationError();
  745. break;
  746. case D3DSIO_TEXLDD:
  747. V_snprintf( buff, nBufLen, "texldd" );
  748. break;
  749. case D3DSIO_SETP:
  750. TranslationError();
  751. break;
  752. case D3DSIO_TEXLDL:
  753. V_snprintf( buff, nBufLen, "texldl" );
  754. break;
  755. case D3DSIO_BREAKP:
  756. case D3DSIO_PHASE:
  757. TranslationError();
  758. break;
  759. }
  760. }
  761. CUtlString D3DToGL::GetUsageAndIndexString( uint32 dwToken, int fSemanticFlags )
  762. {
  763. char szTemp[1024];
  764. PrintUsageAndIndexToString( dwToken, szTemp, sizeof( szTemp ), fSemanticFlags );
  765. return szTemp;
  766. }
  767. //------------------------------------------------------------------------------
  768. // Helper function which prints ASCII representation of usage-usageindex pair to string
  769. //
  770. // Strictly used by vertex shaders
  771. // not used any more now that we have attribmap metadata
  772. //------------------------------------------------------------------------------
  773. void D3DToGL::PrintUsageAndIndexToString( uint32 dwToken, char* strUsageUsageIndexName, int nBufLen, int fSemanticFlags )
  774. {
  775. uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK );
  776. uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT;
  777. switch ( dwUsage )
  778. {
  779. case D3DDECLUSAGE_POSITION:
  780. if ( m_bVertexShader )
  781. {
  782. if ( fSemanticFlags & SEMANTIC_OUTPUT )
  783. V_snprintf( strUsageUsageIndexName, nBufLen, "vTempPos" ); // effectively gl_Position
  784. else
  785. V_snprintf( strUsageUsageIndexName, nBufLen, "gl_Vertex" );
  786. }
  787. else
  788. {
  789. // .xy = position in viewport coordinates
  790. // .z = depth
  791. V_snprintf( strUsageUsageIndexName, nBufLen, "gl_FragCoord" );
  792. }
  793. break;
  794. case D3DDECLUSAGE_BLENDWEIGHT:
  795. V_snprintf( strUsageUsageIndexName, nBufLen, "vertex.attrib[1]" ); // "vertex.attrib[12]" ); // or [1]
  796. break;
  797. case D3DDECLUSAGE_BLENDINDICES:
  798. V_snprintf( strUsageUsageIndexName, nBufLen, "vertex.attrib[13]" ); // "vertex.attrib[13]" ); // or [ 7 ]
  799. break;
  800. case D3DDECLUSAGE_NORMAL:
  801. V_snprintf( strUsageUsageIndexName, nBufLen, "vec4( gl_Normal, 0.0 )" );
  802. break;
  803. case D3DDECLUSAGE_PSIZE:
  804. TranslationError();
  805. V_snprintf( strUsageUsageIndexName, nBufLen, "_psize" ); // no analog
  806. break;
  807. case D3DDECLUSAGE_TEXCOORD:
  808. V_snprintf( strUsageUsageIndexName, nBufLen, "oT%d", dwUsageIndex );
  809. break;
  810. case D3DDECLUSAGE_TANGENT:
  811. NoteTangentInputUsed();
  812. V_strncpy( strUsageUsageIndexName, g_pTangentAttributeName, nBufLen );
  813. break;
  814. case D3DDECLUSAGE_BINORMAL:
  815. V_snprintf( strUsageUsageIndexName, nBufLen, "vertex.attrib[14]" ); // aka texc[6]
  816. break;
  817. // case D3DDECLUSAGE_TESSFACTOR:
  818. // TranslationError();
  819. // V_snprintf( strUsageUsageIndexName, nBufLen, "_position" ); // no analog
  820. // break;
  821. // case D3DDECLUSAGE_POSITIONT:
  822. // TranslationError();
  823. // V_snprintf( strUsageUsageIndexName, nBufLen, "_positiont" ); // no analog
  824. // break;
  825. case D3DDECLUSAGE_COLOR:
  826. Assert( dwUsageIndex <= 1 );
  827. // if ( fSemanticFlags & SEMANTIC_OUTPUT )
  828. // V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "gl_BackColor" : "gl_FrontColor" );
  829. // else
  830. V_snprintf( strUsageUsageIndexName, nBufLen, dwUsageIndex != 0 ? "gl_SecondaryColor" : "gl_Color" );
  831. break;
  832. case D3DDECLUSAGE_FOG:
  833. TranslationError();
  834. break;
  835. case D3DDECLUSAGE_DEPTH:
  836. TranslationError();
  837. V_snprintf( strUsageUsageIndexName, nBufLen, "_depth" ); // no analog
  838. break;
  839. case D3DDECLUSAGE_SAMPLE:
  840. TranslationError();
  841. V_snprintf( strUsageUsageIndexName, nBufLen, "_sample" ); // no analog
  842. break;
  843. default:
  844. Debugger();
  845. break;
  846. }
  847. }
  848. uint32 D3DToGL::GetRegType( uint32 dwRegToken )
  849. {
  850. return ( ( dwRegToken & D3DSP_REGTYPE_MASK2 ) >> D3DSP_REGTYPE_SHIFT2 ) | ( ( dwRegToken & D3DSP_REGTYPE_MASK ) >> D3DSP_REGTYPE_SHIFT );
  851. }
  852. void D3DToGL::PrintIndentation( char *pBuf, int nBufLen )
  853. {
  854. for( int i=0; i<m_NumIndentTabs; i++ )
  855. {
  856. strcat_s( pBuf, nBufLen, "\t" );
  857. }
  858. }
  859. CUtlString D3DToGL::GetParameterString( uint32 dwToken, uint32 dwSourceOrDest, bool bForceScalarSource, int *pARLDestReg )
  860. {
  861. char szTemp[1024];
  862. PrintParameterToString( dwToken, dwSourceOrDest, szTemp, sizeof( szTemp ), bForceScalarSource, pARLDestReg );
  863. return szTemp;
  864. }
  865. // If the register happens to end with ".xyzw", then this strips off the mask.
  866. void SimplifyFourParamRegister( char *pRegister )
  867. {
  868. int nLen = V_strlen( pRegister );
  869. if ( nLen > 5 && V_strcmp( &pRegister[nLen-5], ".xyzw" ) == 0 )
  870. pRegister[nLen-5] = 0;
  871. }
  872. // This returns 0 for x, 1 for y, 2 for z, and 3 for w.
  873. int GetSwizzleComponentVectorIndex( char chMask )
  874. {
  875. if ( chMask == 'x' )
  876. return 0;
  877. else if ( chMask == 'y' )
  878. return 1;
  879. else if ( chMask == 'z' )
  880. return 2;
  881. else if ( chMask == 'w' )
  882. return 3;
  883. Error( "GetSwizzleComponentVectorIndex( '%c' ) - invalid parameter.\n", chMask );
  884. return 0;
  885. }
  886. // GLSL needs the # of src masks to match the dest write mask.
  887. //
  888. // So this:
  889. // r0.xy = r1 + r2;
  890. // becomes:
  891. // r0.xy = r1.xy + r2.xy;
  892. //
  893. //
  894. // Also, and this is the trickier one: GLSL reads the source registers from their first component on
  895. // whereas D3D reads them as referenced in the dest register mask!
  896. //
  897. // So this code in D3D:
  898. // r0.yz = c0.x + c1.wxyz
  899. // Really means:
  900. // r0.y = c0.x + c1.x
  901. // r0.z = c0.x + c1.y
  902. // So we translate it to this in GLSL:
  903. // r0.yz = c0.xx + c1.wx
  904. // r0.yz = c0.xx + c1.xy
  905. //
  906. CUtlString D3DToGL::FixGLSLSwizzle( const char *pDestRegisterName, const char *pSrcRegisterName )
  907. {
  908. bool bAbsWrapper = false; // Parameter wrapped in an abs()
  909. bool bAbsNegative = false; // -abs()
  910. char szSrcRegister[128];
  911. V_strncpy( szSrcRegister, pSrcRegisterName, sizeof(szSrcRegister) );
  912. // Check for abs() or -abs() wrapper and strip it off during the fixup
  913. if ( !V_strncmp( pSrcRegisterName, "abs(", 4 ) || !V_strncmp( pSrcRegisterName, "-abs(", 5 ) )
  914. {
  915. bAbsWrapper = true;
  916. bAbsNegative = pSrcRegisterName[0] == '-';
  917. const char *pOpenParen = strchr( pSrcRegisterName, '(' ); // FIRST opening paren
  918. const char *pClosingParen = strrchr( pSrcRegisterName, ')' ); // LAST closing paren
  919. Assert ( pOpenParen && pClosingParen ); // If we start with abs( and don't get both parens, something is very wrong
  920. // Copy out just the register name with no abs()
  921. int nRegNameLength = pClosingParen - pOpenParen - 1;
  922. V_strncpy( szSrcRegister, pOpenParen+1, nRegNameLength + 1 ); // Kind of a weird function...copy more than you need and slam the last char to NULL-terminate
  923. }
  924. int nSwizzlesInDest = GetNumSwizzleComponents( pDestRegisterName );
  925. if ( nSwizzlesInDest == 0 )
  926. nSwizzlesInDest = 4;
  927. char szFixedSrcRegister[128];
  928. GetParamNameWithoutSwizzle( szSrcRegister, szFixedSrcRegister, sizeof( szFixedSrcRegister ) );
  929. V_strncat( szFixedSrcRegister, ".", sizeof( szFixedSrcRegister ) );
  930. for ( int i=0; i < nSwizzlesInDest; i++ )
  931. {
  932. char chDestWriteMask = GetSwizzleComponent( pDestRegisterName, i );
  933. int nVectorIndex = GetSwizzleComponentVectorIndex( chDestWriteMask );
  934. char ch[2];
  935. ch[0] = GetSwizzleComponent( szSrcRegister, nVectorIndex );
  936. ch[1] = 0;
  937. V_strncat( szFixedSrcRegister, ch, sizeof( szFixedSrcRegister ) );
  938. }
  939. SimplifyFourParamRegister( szFixedSrcRegister );
  940. if ( bAbsWrapper )
  941. {
  942. char szTempSrcRegister[128];
  943. V_strncpy( szTempSrcRegister, szFixedSrcRegister, sizeof(szTempSrcRegister) );
  944. V_snprintf( szFixedSrcRegister, sizeof( szFixedSrcRegister ), "%sabs(%s)", bAbsNegative ? "-" : "", szTempSrcRegister ) ;
  945. }
  946. return szFixedSrcRegister;
  947. }
  948. // Weird encoding...bits are split apart in the dwToken
  949. inline uint32 GetRegTypeFromToken( uint32 dwToken )
  950. {
  951. return ( ( dwToken & D3DSP_REGTYPE_MASK2 ) >> D3DSP_REGTYPE_SHIFT2 ) | ( ( dwToken & D3DSP_REGTYPE_MASK ) >> D3DSP_REGTYPE_SHIFT );
  952. }
  953. void D3DToGL::FlagIndirectRegister( uint32 dwToken, int *pARLDestReg )
  954. {
  955. if ( !pARLDestReg )
  956. return;
  957. switch ( dwToken & D3DVS_SWIZZLE_MASK & D3DVS_X_W )
  958. {
  959. case D3DVS_X_X:
  960. *pARLDestReg = ARL_DEST_X;
  961. break;
  962. case D3DVS_X_Y:
  963. *pARLDestReg = ARL_DEST_Y;
  964. break;
  965. case D3DVS_X_Z:
  966. *pARLDestReg = ARL_DEST_Z;
  967. break;
  968. case D3DVS_X_W:
  969. *pARLDestReg = ARL_DEST_W;
  970. break;
  971. }
  972. }
  973. //------------------------------------------------------------------------------
  974. // PrintParameterToString()
  975. //
  976. // Helper function which prints ASCII representation of passed Parameter dwToken
  977. // to string. Token defines parameter details. The dwSourceOrDest parameter says
  978. // whether or not this is a source or destination register
  979. //------------------------------------------------------------------------------
  980. void D3DToGL::PrintParameterToString ( uint32 dwToken, uint32 dwSourceOrDest, char *pRegisterName, int nBufLen, bool bForceScalarSource, int *pARLDestReg )
  981. {
  982. char buff[32];
  983. bool bAllowWriteMask = true;
  984. bool bAllowSwizzle = true;
  985. uint32 dwRegNum = dwToken & D3DSP_REGNUM_MASK;
  986. uint32 dwRegType, dwSwizzle;
  987. uint32 dwSrcModifier = D3DSPSM_NONE;
  988. // Clear string to zero length
  989. pRegisterName[ 0 ] = 0;
  990. dwRegType = GetRegTypeFromToken( dwToken );
  991. // If this is a dest register
  992. if ( dwSourceOrDest == DST_REGISTER )
  993. {
  994. // Instruction modifiers
  995. if ( dwToken & D3DSPDM_PARTIALPRECISION )
  996. {
  997. // strcat_s( pRegisterName, nBufLen, "_pp" );
  998. }
  999. if ( dwToken & D3DSPDM_MSAMPCENTROID)
  1000. {
  1001. // strcat_s( pRegisterName, nBufLen, "_centroid" );
  1002. }
  1003. }
  1004. // If this is a source register
  1005. if ( dwSourceOrDest == SRC_REGISTER )
  1006. {
  1007. dwSrcModifier = dwToken & D3DSP_SRCMOD_MASK;
  1008. // If there are any source modifiers, check to see if they're at
  1009. // least partially "prefix" and prepend appropriately
  1010. if ( dwSrcModifier != D3DSPSM_NONE )
  1011. {
  1012. switch ( dwSrcModifier )
  1013. {
  1014. // These four start with just minus... (some may result in "postfix" notation as well later on)
  1015. case D3DSPSM_NEG: // negate
  1016. strcat_s( pRegisterName, nBufLen, "-" );
  1017. break;
  1018. case D3DSPSM_BIASNEG: // bias and negate
  1019. case D3DSPSM_SIGNNEG: // sign and negate
  1020. case D3DSPSM_X2NEG: // *2 and negate
  1021. TranslationError();
  1022. strcat_s( pRegisterName, nBufLen, "-" );
  1023. break;
  1024. case D3DSPSM_COMP: // complement
  1025. TranslationError();
  1026. strcat_s( pRegisterName, nBufLen, "1-" );
  1027. break;
  1028. case D3DSPSM_ABS: // abs()
  1029. strcat_s( pRegisterName, nBufLen, "abs(" );
  1030. break;
  1031. case D3DSPSM_ABSNEG: // -abs()
  1032. strcat_s( pRegisterName, nBufLen, "-abs(" );
  1033. break;
  1034. case D3DSPSM_NOT: // for predicate register: "!p0"
  1035. TranslationError();
  1036. strcat_s( pRegisterName, nBufLen, "!" );
  1037. break;
  1038. }
  1039. }
  1040. }
  1041. // Register name (from type and number)
  1042. switch ( dwRegType )
  1043. {
  1044. case D3DSPR_TEMP:
  1045. V_snprintf( buff, sizeof( buff ), "r%d", dwRegNum );
  1046. strcat_s( pRegisterName, nBufLen, buff );
  1047. m_dwTempUsageMask |= 0x00000001 << dwRegNum; // Keep track of the use of this temp
  1048. break;
  1049. case D3DSPR_INPUT:
  1050. if ( !m_bVertexShader && ( dwSourceOrDest == SRC_REGISTER ) )
  1051. {
  1052. if ( m_dwMajorVersion == 3 )
  1053. {
  1054. V_snprintf( buff, sizeof( buff ), "oTempT%d", dwRegNum );
  1055. }
  1056. else
  1057. {
  1058. V_snprintf( buff, sizeof( buff ), dwRegNum == 0 ? "gl_Color" : "gl_SecondaryColor" );
  1059. }
  1060. strcat_s( pRegisterName, nBufLen, buff );
  1061. }
  1062. else
  1063. {
  1064. V_snprintf( buff, sizeof( buff ), "v%d", dwRegNum );
  1065. strcat_s( pRegisterName, nBufLen, buff );
  1066. }
  1067. break;
  1068. case D3DSPR_CONST:
  1069. if ( m_bConstantRegisterDefined[dwRegNum] )
  1070. {
  1071. char szConstantRegName[3];
  1072. if ( m_bVertexShader )
  1073. {
  1074. V_snprintf( szConstantRegName, 3, "vd" );
  1075. }
  1076. else
  1077. {
  1078. V_snprintf( szConstantRegName, 3, "pd" );
  1079. }
  1080. // Put defined constants into their own namespace "d"
  1081. V_snprintf( buff, sizeof( buff ), "%s%d", szConstantRegName, dwRegNum );
  1082. strcat_s( pRegisterName, nBufLen, buff );
  1083. }
  1084. else if ( dwToken & D3DSHADER_ADDRESSMODE_MASK ) // Indirect addressing (e.g. skinning in a vertex shader)
  1085. {
  1086. char szConstantRegName[16];
  1087. if ( m_bVertexShader )
  1088. {
  1089. V_snprintf( szConstantRegName, 3, "vc" );
  1090. }
  1091. else // No indirect addressing in PS, this shouldn't happen
  1092. {
  1093. TranslationError();
  1094. V_snprintf( szConstantRegName, 3, "pc" );
  1095. }
  1096. if ( ( m_bGenerateBoneUniformBuffer ) && ( dwRegNum >= DXABSTRACT_VS_FIRST_BONE_SLOT ) )
  1097. {
  1098. if( dwRegNum < DXABSTRACT_VS_LAST_BONE_SLOT )
  1099. {
  1100. dwRegNum -= DXABSTRACT_VS_FIRST_BONE_SLOT;
  1101. V_strcpy( szConstantRegName, "vcbones" );
  1102. m_nHighestBoneRegister = ( DXABSTRACT_VS_PARAM_SLOTS - 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT;
  1103. }
  1104. else
  1105. {
  1106. dwRegNum -= ( DXABSTRACT_VS_LAST_BONE_SLOT + 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT;
  1107. m_nHighestRegister = m_bGenerateBoneUniformBuffer ? ( ( DXABSTRACT_VS_PARAM_SLOTS - 1 ) - ( ( DXABSTRACT_VS_LAST_BONE_SLOT + 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT ) ): ( DXABSTRACT_VS_PARAM_SLOTS - 1 );
  1108. }
  1109. }
  1110. else
  1111. {
  1112. m_nHighestRegister = m_bGenerateBoneUniformBuffer ? ( ( DXABSTRACT_VS_PARAM_SLOTS - 1 ) - ( ( DXABSTRACT_VS_LAST_BONE_SLOT + 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT ) ): ( DXABSTRACT_VS_PARAM_SLOTS - 1 );
  1113. }
  1114. // Index into single pc/vc[] register array with relative addressing
  1115. int nDstReg = -1;
  1116. FlagIndirectRegister( GetNextToken(), &nDstReg );
  1117. if ( pARLDestReg )
  1118. *pARLDestReg = nDstReg;
  1119. Assert( nDstReg != ARL_DEST_NONE );
  1120. int nSrcSwizzle = 'x';
  1121. if ( nDstReg == ARL_DEST_Y )
  1122. nSrcSwizzle = 'y';
  1123. else if ( nDstReg == ARL_DEST_Z )
  1124. nSrcSwizzle = 'z';
  1125. else if ( nDstReg == ARL_DEST_W )
  1126. nSrcSwizzle = 'w';
  1127. V_snprintf( buff, sizeof( buff ), "%s[int(va_r.%c) + %d]", szConstantRegName, nSrcSwizzle, dwRegNum );
  1128. strcat_s( pRegisterName, nBufLen, buff );
  1129. // Must allow swizzling, otherwise this example doesn't compile right: mad r3.xyz, c27[a0.w].w, r3, r7
  1130. //bAllowSwizzle = false;
  1131. }
  1132. else // Direct addressing of constant array
  1133. {
  1134. char szConstantRegName[16];
  1135. V_snprintf( szConstantRegName, 3, m_bVertexShader ? "vc" : "pc" );
  1136. if ( ( m_bGenerateBoneUniformBuffer ) && ( dwRegNum >= DXABSTRACT_VS_FIRST_BONE_SLOT ) )
  1137. {
  1138. if( dwRegNum < DXABSTRACT_VS_LAST_BONE_SLOT )
  1139. {
  1140. dwRegNum -= DXABSTRACT_VS_FIRST_BONE_SLOT;
  1141. V_strcpy( szConstantRegName, "vcbones" );
  1142. m_nHighestBoneRegister = MAX( m_nHighestBoneRegister, (int)dwRegNum );
  1143. }
  1144. else
  1145. {
  1146. // handles case where constants after the bones are used (c217 onwards), these are to be concatenated with those before the bones (c0-c57)
  1147. // keep track of regnum for concatenated array
  1148. dwRegNum -= ( DXABSTRACT_VS_LAST_BONE_SLOT + 1 ) - DXABSTRACT_VS_FIRST_BONE_SLOT;
  1149. m_nHighestRegister = MAX( m_nHighestRegister, dwRegNum );
  1150. }
  1151. }
  1152. else
  1153. {
  1154. //// NOGO if (dwRegNum != 255) // have seen cases where dwRegNum is 0xFF... need to figure out where those opcodes are coming from
  1155. {
  1156. m_nHighestRegister = MAX( m_nHighestRegister, dwRegNum );
  1157. }
  1158. Assert( m_nHighestRegister < DXABSTRACT_VS_PARAM_SLOTS );
  1159. }
  1160. // Index into single pc/vc[] register array with absolute addressing, same for GLSL and ASM
  1161. V_snprintf( buff, sizeof( buff ), "%s[%d]", szConstantRegName, dwRegNum );
  1162. strcat_s( pRegisterName, nBufLen, buff );
  1163. }
  1164. break;
  1165. case D3DSPR_ADDR: // aliases to D3DSPR_TEXTURE
  1166. if ( m_bVertexShader )
  1167. {
  1168. Assert( dwRegNum == 0 );
  1169. V_snprintf( buff, sizeof( buff ), "va_r" );
  1170. }
  1171. else // D3DSPR_TEXTURE in the pixel shader
  1172. {
  1173. // If dest reg, this is an iterator/varying declaration
  1174. if ( dwSourceOrDest == DST_REGISTER )
  1175. {
  1176. // Is this iterator centroid?
  1177. if ( m_nCentroidMask & ( 0x00000001 << dwRegNum ) )
  1178. {
  1179. V_snprintf( buff, sizeof( buff ), "centroid varying vec4 oT%d", dwRegNum ); // centroid varying
  1180. }
  1181. else
  1182. {
  1183. V_snprintf( buff, sizeof( buff ), "varying vec4 oT%d", dwRegNum );
  1184. }
  1185. bAllowWriteMask = false;
  1186. }
  1187. else // source register
  1188. {
  1189. V_snprintf( buff, sizeof( buff ), "oT%d", dwRegNum );
  1190. }
  1191. }
  1192. strcat_s( pRegisterName, nBufLen, buff );
  1193. break;
  1194. case D3DSPR_RASTOUT: // vertex shader oPos
  1195. Assert( m_bVertexShader );
  1196. Assert( m_dwMajorVersion == 2 );
  1197. switch( dwRegNum )
  1198. {
  1199. case D3DSRO_POSITION:
  1200. strcat_s( pRegisterName, nBufLen, "vTempPos" ); // In GLSL, this ends up in gl_Position later on
  1201. m_bDeclareVSOPos = true;
  1202. break;
  1203. case D3DSRO_FOG:
  1204. strcat_s( pRegisterName, nBufLen, "gl_FogFragCoord" );
  1205. m_bDeclareVSOFog = true;
  1206. break;
  1207. default:
  1208. printf( "\nD3DSPR_RASTOUT: dwRegNum is %08x and token is %08x", dwRegNum, dwToken );
  1209. TranslationError();
  1210. break;
  1211. }
  1212. break;
  1213. case D3DSPR_ATTROUT:
  1214. Assert( m_bVertexShader );
  1215. Assert( m_dwMajorVersion == 2 );
  1216. if ( dwRegNum == 0 )
  1217. {
  1218. V_snprintf( buff, sizeof( buff ), "gl_FrontColor" );
  1219. }
  1220. else if ( dwRegNum == 1 )
  1221. {
  1222. V_snprintf( buff, sizeof( buff ), "gl_FrontSecondaryColor" );
  1223. }
  1224. else
  1225. {
  1226. Error( "Invalid D3DSPR_ATTROUT index" );
  1227. }
  1228. strcat_s( pRegisterName, nBufLen, buff );
  1229. break;
  1230. case D3DSPR_TEXCRDOUT: // aliases to D3DSPR_OUTPUT
  1231. if ( m_bVertexShader )
  1232. {
  1233. if ( m_nVSPositionOutput == (int32) dwRegNum )
  1234. {
  1235. V_snprintf( buff, sizeof( buff ), "vTempPos" ); // This output varying is the position
  1236. }
  1237. else if ( m_dwMajorVersion == 3 )
  1238. {
  1239. V_snprintf( buff, sizeof( buff ), "oTempT%d", dwRegNum );
  1240. }
  1241. else
  1242. {
  1243. V_snprintf( buff, sizeof( buff ), "oT%d", dwRegNum );
  1244. }
  1245. m_dwTexCoordOutMask |= ( 0x00000001 << dwRegNum );
  1246. }
  1247. else
  1248. {
  1249. V_snprintf( buff, sizeof( buff ), "oC%d", dwRegNum );
  1250. }
  1251. strcat_s( pRegisterName, nBufLen, buff );
  1252. break;
  1253. case D3DSPR_CONSTINT:
  1254. V_snprintf( buff, sizeof( buff ), "i%d", dwRegNum ); // Loops use these
  1255. strcat_s( pRegisterName, nBufLen, buff );
  1256. m_dwConstIntUsageMask |= 0x00000001 << dwRegNum; // Keep track of the use of this integer constant
  1257. break;
  1258. case D3DSPR_COLOROUT:
  1259. V_snprintf( buff, sizeof( buff ), "gl_FragData[%d]", dwRegNum );
  1260. strcat_s( pRegisterName, nBufLen, buff );
  1261. m_bOutputColorRegister[dwRegNum] = true;
  1262. break;
  1263. case D3DSPR_DEPTHOUT:
  1264. V_snprintf( buff, sizeof( buff ), "gl_FragDepth" );
  1265. strcat_s( pRegisterName, nBufLen, buff );
  1266. m_bOutputDepthRegister = true;
  1267. break;
  1268. case D3DSPR_SAMPLER:
  1269. V_snprintf( buff, sizeof( buff ), "sampler%d", dwRegNum );
  1270. strcat_s( pRegisterName, nBufLen, buff );
  1271. break;
  1272. case D3DSPR_CONST2:
  1273. TranslationError();
  1274. V_snprintf( buff, sizeof( buff ), "c%d", dwRegNum+2048);
  1275. strcat_s( pRegisterName, nBufLen, buff );
  1276. break;
  1277. case D3DSPR_CONST3:
  1278. TranslationError();
  1279. V_snprintf( buff, sizeof( buff ), "c%d", dwRegNum+4096);
  1280. strcat_s( pRegisterName, nBufLen, buff );
  1281. break;
  1282. case D3DSPR_CONST4:
  1283. TranslationError();
  1284. V_snprintf( buff, sizeof( buff ), "c%d", dwRegNum+6144);
  1285. strcat_s( pRegisterName, nBufLen, buff );
  1286. break;
  1287. case D3DSPR_CONSTBOOL:
  1288. V_snprintf( buff, sizeof( buff ), m_bVertexShader ? "b%d" : "fb%d", dwRegNum );
  1289. strcat_s( pRegisterName, nBufLen, buff );
  1290. m_dwConstBoolUsageMask |= 0x00000001 << dwRegNum; // Keep track of the use of this bool constant
  1291. break;
  1292. case D3DSPR_LOOP:
  1293. TranslationError();
  1294. V_snprintf( buff, sizeof( buff ), "aL%d", dwRegNum );
  1295. strcat_s( pRegisterName, nBufLen, buff );
  1296. break;
  1297. case D3DSPR_TEMPFLOAT16:
  1298. TranslationError();
  1299. V_snprintf( buff, sizeof( buff ), "temp_float16_xxx%d", dwRegNum );
  1300. strcat_s( pRegisterName, nBufLen, buff );
  1301. break;
  1302. case D3DSPR_MISCTYPE:
  1303. Assert( dwRegNum == 0 ); // So far, we know that MISC[0] is gl_FragCoord (aka vPos in DX ASM parlance), but we don't know about any other MISC registers
  1304. V_snprintf( buff, sizeof( buff ), "gl_FragCoord" );
  1305. strcat_s( pRegisterName, nBufLen, buff );
  1306. break;
  1307. case D3DSPR_LABEL:
  1308. TranslationError();
  1309. V_snprintf( buff, sizeof( buff ), "label%d", dwRegNum );
  1310. strcat_s( pRegisterName, nBufLen, buff );
  1311. break;
  1312. case D3DSPR_PREDICATE:
  1313. TranslationError();
  1314. V_snprintf( buff, sizeof( buff ), "p%d", dwRegNum );
  1315. strcat_s( pRegisterName, nBufLen, buff );
  1316. break;
  1317. }
  1318. // If this is a dest register
  1319. if ( dwSourceOrDest == DST_REGISTER )
  1320. {
  1321. //
  1322. // Write masks
  1323. //
  1324. // If some (not all, not none) of the write masks are set, we should include them
  1325. //
  1326. if ( bAllowWriteMask && ( !((dwToken & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) || ((dwToken & D3DSP_WRITEMASK_ALL) == 0x00000000) ) )
  1327. {
  1328. // Put the dot on there
  1329. strcat_s( pRegisterName, nBufLen, "." );
  1330. // Optionally put on the x, y, z or w
  1331. int nMasksWritten = 0;
  1332. if ( dwToken & D3DSP_WRITEMASK_0 )
  1333. {
  1334. strcat_s( pRegisterName, nBufLen, "x" );
  1335. ++nMasksWritten;
  1336. }
  1337. if ( dwToken & D3DSP_WRITEMASK_1 )
  1338. {
  1339. strcat_s( pRegisterName, nBufLen, "y" );
  1340. ++nMasksWritten;
  1341. }
  1342. if ( dwToken & D3DSP_WRITEMASK_2 )
  1343. {
  1344. strcat_s( pRegisterName, nBufLen, "z" );
  1345. ++nMasksWritten;
  1346. }
  1347. if ( dwToken & D3DSP_WRITEMASK_3 )
  1348. {
  1349. strcat_s( pRegisterName, nBufLen, "w" );
  1350. ++nMasksWritten;
  1351. }
  1352. }
  1353. }
  1354. else // must be a source register
  1355. {
  1356. if ( bAllowSwizzle ) // relative addressing hard-codes the swizzle on a0.x
  1357. {
  1358. uint32 dwXSwizzle, dwYSwizzle, dwZSwizzle, dwWSwizzle;
  1359. // Mask out the swizzle modifier
  1360. dwSwizzle = dwToken & D3DVS_SWIZZLE_MASK;
  1361. // If there are any swizzles at all, tack on the appropriate notation
  1362. if ( dwSwizzle != D3DVS_NOSWIZZLE )
  1363. {
  1364. // Separate out the two-bit codes for each component swizzle
  1365. dwXSwizzle = dwSwizzle & D3DVS_X_W;
  1366. dwYSwizzle = dwSwizzle & D3DVS_Y_W;
  1367. dwZSwizzle = dwSwizzle & D3DVS_Z_W;
  1368. dwWSwizzle = dwSwizzle & D3DVS_W_W;
  1369. // Put on the dot
  1370. strcat_s( pRegisterName, nBufLen, "." );
  1371. // See where X comes from
  1372. switch ( dwXSwizzle )
  1373. {
  1374. case D3DVS_X_X:
  1375. strcat_s( pRegisterName, nBufLen, "x" );
  1376. break;
  1377. case D3DVS_X_Y:
  1378. strcat_s( pRegisterName, nBufLen, "y" );
  1379. break;
  1380. case D3DVS_X_Z:
  1381. strcat_s( pRegisterName, nBufLen, "z" );
  1382. break;
  1383. case D3DVS_X_W:
  1384. strcat_s( pRegisterName, nBufLen, "w" );
  1385. break;
  1386. }
  1387. if ( !bForceScalarSource )
  1388. {
  1389. // If the source of the remaining components are aren't
  1390. // identical to the source of x, continue with swizzle
  1391. if ( ((dwXSwizzle >> D3DVS_SWIZZLE_SHIFT) != (dwYSwizzle >> (D3DVS_SWIZZLE_SHIFT + 2))) || // X and Y sources match?
  1392. ((dwXSwizzle >> D3DVS_SWIZZLE_SHIFT) != (dwZSwizzle >> (D3DVS_SWIZZLE_SHIFT + 4))) || // X and Z sources match?
  1393. ((dwXSwizzle >> D3DVS_SWIZZLE_SHIFT) != (dwWSwizzle >> (D3DVS_SWIZZLE_SHIFT + 6)))) // X and W sources match?
  1394. {
  1395. // OpenGL seems to want us to have either 1 or 4 components in a swizzle, so just plow on through the rest
  1396. switch ( dwYSwizzle )
  1397. {
  1398. case D3DVS_Y_X:
  1399. strcat_s( pRegisterName, nBufLen, "x" );
  1400. break;
  1401. case D3DVS_Y_Y:
  1402. strcat_s( pRegisterName, nBufLen, "y" );
  1403. break;
  1404. case D3DVS_Y_Z:
  1405. strcat_s( pRegisterName, nBufLen, "z" );
  1406. break;
  1407. case D3DVS_Y_W:
  1408. strcat_s( pRegisterName, nBufLen, "w" );
  1409. break;
  1410. }
  1411. switch ( dwZSwizzle )
  1412. {
  1413. case D3DVS_Z_X:
  1414. strcat_s( pRegisterName, nBufLen, "x" );
  1415. break;
  1416. case D3DVS_Z_Y:
  1417. strcat_s( pRegisterName, nBufLen, "y" );
  1418. break;
  1419. case D3DVS_Z_Z:
  1420. strcat_s( pRegisterName, nBufLen, "z" );
  1421. break;
  1422. case D3DVS_Z_W:
  1423. strcat_s( pRegisterName, nBufLen, "w" );
  1424. break;
  1425. }
  1426. switch ( dwWSwizzle )
  1427. {
  1428. case D3DVS_W_X:
  1429. strcat_s( pRegisterName, nBufLen, "x" );
  1430. break;
  1431. case D3DVS_W_Y:
  1432. strcat_s( pRegisterName, nBufLen, "y" );
  1433. break;
  1434. case D3DVS_W_Z:
  1435. strcat_s( pRegisterName, nBufLen, "z" );
  1436. break;
  1437. case D3DVS_W_W:
  1438. strcat_s( pRegisterName, nBufLen, "w" );
  1439. break;
  1440. }
  1441. }
  1442. } // end !bForceScalarSource
  1443. }
  1444. else // dwSwizzle == D3DVS_NOSWIZZLE
  1445. {
  1446. // If this is a MOVA / ARL, GL on the Mac requires us to tack the .x onto the source register
  1447. if ( bForceScalarSource )
  1448. {
  1449. strcat_s( pRegisterName, nBufLen, ".x" );
  1450. }
  1451. }
  1452. } // bAllowSwizzle
  1453. // If there are any source modifiers, check to see if they're at
  1454. // least partially "postfix" and tack them on as appropriate
  1455. if ( dwSrcModifier != D3DSPSM_NONE )
  1456. {
  1457. switch ( dwSrcModifier )
  1458. {
  1459. case D3DSPSM_BIAS: // bias
  1460. case D3DSPSM_BIASNEG: // bias and negate
  1461. TranslationError();
  1462. strcat_s( pRegisterName, nBufLen, "_bx2" );
  1463. break;
  1464. case D3DSPSM_SIGN: // sign
  1465. case D3DSPSM_SIGNNEG: // sign and negate
  1466. TranslationError();
  1467. strcat_s( pRegisterName, nBufLen, "_sgn" );
  1468. break;
  1469. case D3DSPSM_X2: // *2
  1470. case D3DSPSM_X2NEG: // *2 and negate
  1471. TranslationError();
  1472. strcat_s( pRegisterName, nBufLen, "_x2" );
  1473. break;
  1474. case D3DSPSM_ABS: // abs()
  1475. case D3DSPSM_ABSNEG: // -abs()
  1476. strcat_s( pRegisterName, nBufLen, ")" );
  1477. break;
  1478. case D3DSPSM_DZ: // divide through by z component
  1479. TranslationError();
  1480. strcat_s( pRegisterName, nBufLen, "_dz" );
  1481. break;
  1482. case D3DSPSM_DW: // divide through by w component
  1483. TranslationError();
  1484. strcat_s( pRegisterName, nBufLen, "_dw" );
  1485. break;
  1486. }
  1487. } // end postfix modifiers (really only ps.1.x)
  1488. }
  1489. }
  1490. void D3DToGL::RecordInputAndOutputPositions()
  1491. {
  1492. // Remember where we are in the token stream.
  1493. m_pRecordedInputTokenStart = m_pdwNextToken;
  1494. // Remember where our outputs are.
  1495. m_nRecordedParamCodeStrlen = V_strlen( (char*)m_pBufParamCode->Base() );
  1496. m_nRecordedALUCodeStrlen = V_strlen( (char*)m_pBufALUCode->Base() );
  1497. m_nRecordedAttribCodeStrlen = V_strlen( (char*)m_pBufAttribCode->Base() );
  1498. }
  1499. void D3DToGL::AddTokenHexCodeToBuffer( char *pBuffer, int nSize, int nLastStrlen )
  1500. {
  1501. int nCurStrlen = V_strlen( pBuffer );
  1502. if ( nCurStrlen == nLastStrlen )
  1503. return;
  1504. // Build a string with all the hex codes of the tokens since last time.
  1505. char szHex[512];
  1506. szHex[0] = '\n';
  1507. V_snprintf( &szHex[1], sizeof( szHex )-1, HEXCODE_HEADER );
  1508. int nTokens = MIN( 10, m_pdwNextToken - m_pRecordedInputTokenStart );
  1509. for ( int i=0; i < nTokens; i++ )
  1510. {
  1511. char szTemp[32];
  1512. V_snprintf( szTemp, sizeof( szTemp ), "0x%x ", m_pRecordedInputTokenStart[i] );
  1513. V_strncat( szHex, szTemp, sizeof( szHex ) );
  1514. }
  1515. V_strncat( szHex, "\n", sizeof( szHex ) );
  1516. // Insert the hex codes into the string.
  1517. int nBytesToInsert = V_strlen( szHex );
  1518. if ( nCurStrlen + nBytesToInsert + 1 >= nSize )
  1519. Error( "Buffer overflow writing token hex codes" );
  1520. if ( m_bPutHexCodesAfterLines )
  1521. {
  1522. // Put it at the end of the last line.
  1523. if ( pBuffer[nCurStrlen-1] == '\n' )
  1524. pBuffer[nCurStrlen-1] = 0;
  1525. V_strncat( pBuffer, &szHex[1], nSize );
  1526. }
  1527. else
  1528. {
  1529. memmove( pBuffer + nLastStrlen + nBytesToInsert, pBuffer + nLastStrlen, nCurStrlen - nLastStrlen + 1 );
  1530. memcpy( pBuffer + nLastStrlen, szHex, nBytesToInsert );
  1531. }
  1532. }
  1533. void D3DToGL::AddTokenHexCode()
  1534. {
  1535. if ( m_pdwNextToken > m_pRecordedInputTokenStart )
  1536. {
  1537. AddTokenHexCodeToBuffer( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size(), m_nRecordedParamCodeStrlen );
  1538. AddTokenHexCodeToBuffer( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), m_nRecordedALUCodeStrlen );
  1539. AddTokenHexCodeToBuffer( (char*)m_pBufAttribCode->Base(), m_pBufAttribCode->Size(), m_nRecordedAttribCodeStrlen );
  1540. }
  1541. }
  1542. uint32 D3DToGL::MaintainAttributeMap( uint32 dwToken, uint32 dwRegToken )
  1543. {
  1544. // Check that this reg index has not been used before - if it has, let Houston know
  1545. uint dwRegIndex = dwRegToken & D3DSP_REGNUM_MASK;
  1546. if ( m_dwAttribMap[ dwRegIndex ] == 0xFFFFFFFF )
  1547. {
  1548. // log it
  1549. // semantic/usage in the higher nibble
  1550. // usage index in the low nibble
  1551. uint usage = dwToken & D3DSP_DCL_USAGE_MASK;
  1552. uint usageindex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT;
  1553. m_dwAttribMap[ dwRegIndex ] = ( usage << 4 ) | usageindex;
  1554. // avoid writing 0xBB since runtime code uses that for an 'unused' marker
  1555. if ( m_dwAttribMap[ dwRegIndex ] == 0xBB )
  1556. {
  1557. Debugger();
  1558. }
  1559. }
  1560. else
  1561. {
  1562. //not OK
  1563. Debugger();
  1564. }
  1565. return dwRegIndex;
  1566. }
  1567. void D3DToGL::Handle_DCL()
  1568. {
  1569. uint32 dwToken = GetNextToken(); // What kind of dcl is this...
  1570. uint32 dwRegToken = GetNextToken(); // Look ahead to register token
  1571. uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK );
  1572. uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT;
  1573. uint32 dwRegNum = dwRegToken & D3DSP_REGNUM_MASK;
  1574. uint32 nRegType = GetRegTypeFromToken( dwRegToken );
  1575. if ( m_bVertexShader )
  1576. {
  1577. // If this is an output, remember the index (what the ASM code calls o0, o1, o2..) and the semantic.
  1578. // When GetParameterString( DST_REGISTER ) hits this one, we'll return "oN".
  1579. // At the end of the main() function, we'll insert a bunch of statements like "gl_Color = o2" based on what we remembered here.
  1580. if ( ( m_dwMajorVersion >= 3 ) && ( nRegType == D3DSPR_OUTPUT ) )
  1581. {
  1582. // uint32 dwRegComponents = ( dwRegToken & D3DSP_WRITEMASK_ALL ) >> 16; // Components used by the output register (1 means float, 3 means vec2, 7 means vec3, f means vec4)
  1583. if ( dwRegNum >= MAX_DECLARED_OUTPUTS )
  1584. Error( "Output register number (%d) too high (only %d supported).", dwRegNum, MAX_DECLARED_OUTPUTS );
  1585. if ( m_DeclaredOutputs[dwRegNum] != UNDECLARED_OUTPUT )
  1586. Error( "Output dcl_ hit for register #%d more than once!", dwRegNum );
  1587. Assert( dwToken != UNDECLARED_OUTPUT );
  1588. m_DeclaredOutputs[dwRegNum] = dwToken;
  1589. //uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK );
  1590. //uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT;
  1591. // Flag which o# output register maps to gl_Position
  1592. if ( dwUsage == D3DDECLUSAGE_POSITION )
  1593. {
  1594. m_nVSPositionOutput = dwUsageIndex;
  1595. m_bDeclareVSOPos = true;
  1596. }
  1597. if ( m_bAddHexCodeComments )
  1598. {
  1599. CUtlString sParam2 = GetUsageAndIndexString( dwToken, SEMANTIC_OUTPUT );
  1600. PrintToBuf( *m_pBufHeaderCode, "// [GL remembering that oT%d maps to %s]\n", dwRegNum, sParam2.String() );
  1601. }
  1602. }
  1603. else if ( GetRegType( dwRegToken ) == D3DSPR_SAMPLER )
  1604. {
  1605. // We can support vertex texturing if necessary, but I can't find a use case in any branch. (HW morphing in L4D2 isn't enabled, and the comments indicate that r_hwmorph isn't compatible with mat_queue_mode anyway, and CS:GO/DoTA don't use vertex shader texturing.)
  1606. TranslationError();
  1607. int nRegNum = dwRegToken & D3DSP_REGNUM_MASK;
  1608. switch ( TextureType( dwToken ) )
  1609. {
  1610. default:
  1611. case D3DSTT_UNKNOWN:
  1612. case D3DSTT_2D:
  1613. m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_2D;
  1614. break;
  1615. case D3DSTT_CUBE:
  1616. m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_CUBE;
  1617. break;
  1618. case D3DSTT_VOLUME:
  1619. m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_3D;
  1620. break;
  1621. }
  1622. // Track sampler declarations
  1623. m_dwSamplerUsageMask |= 1 << nRegNum;
  1624. }
  1625. else
  1626. {
  1627. Assert( GetRegType( dwRegToken ) == D3DSPR_INPUT);
  1628. CUtlString sParam1 = GetParameterString( dwRegToken, DST_REGISTER, false, NULL );
  1629. CUtlString sParam2 = GetUsageAndIndexString( dwToken, SEMANTIC_INPUT );
  1630. sParam2 = FixGLSLSwizzle( sParam1, sParam2 );
  1631. PrintToBuf( *m_pBufHeaderCode, "attribute vec4 %s; // ", sParam1.String() );
  1632. MaintainAttributeMap( dwToken, dwRegToken );
  1633. char temp[128];
  1634. // regnum goes straight into the vertex.attrib[n] index
  1635. sprintf( temp, "%08x %08x\n", dwToken, dwRegToken );
  1636. StrcatToHeaderCode( temp );
  1637. }
  1638. }
  1639. else // Pixel shader
  1640. {
  1641. // If the register is a sampler, the dcl has a dimension decorator that we have to save for subsequent TEX instructions
  1642. uint32 nRegType = GetRegType( dwRegToken );
  1643. if ( nRegType == D3DSPR_SAMPLER )
  1644. {
  1645. int nRegNum = dwRegToken & D3DSP_REGNUM_MASK;
  1646. switch ( TextureType( dwToken ) )
  1647. {
  1648. default:
  1649. case D3DSTT_UNKNOWN:
  1650. case D3DSTT_2D:
  1651. m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_2D;
  1652. break;
  1653. case D3DSTT_CUBE:
  1654. m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_CUBE;
  1655. break;
  1656. case D3DSTT_VOLUME:
  1657. m_dwSamplerTypes[nRegNum] = SAMPLER_TYPE_3D;
  1658. break;
  1659. }
  1660. // Track sampler declarations
  1661. m_dwSamplerUsageMask |= 1 << nRegNum;
  1662. }
  1663. else // Not a sampler, we're going to generate varying declaration code
  1664. {
  1665. // In pixel shaders we only declare texture coordinate varyings since they may be using centroid
  1666. if ( ( m_dwMajorVersion == 3 ) && ( nRegType == D3DSPR_INPUT ) )
  1667. {
  1668. Assert( m_DeclaredInputs[dwRegNum] == UNDECLARED_INPUT );
  1669. m_DeclaredInputs[dwRegNum] = dwToken;
  1670. if ( ( dwUsage != D3DDECLUSAGE_COLOR ) && ( dwUsage != D3DDECLUSAGE_TEXCOORD ) )
  1671. {
  1672. TranslationError(); // Not supported yet, but can be if we need it.
  1673. }
  1674. if ( dwUsage == D3DDECLUSAGE_TEXCOORD )
  1675. {
  1676. char buf[256];
  1677. if ( m_nCentroidMask & ( 0x00000001 << dwUsageIndex ) )
  1678. {
  1679. V_snprintf( buf, sizeof( buf ), "centroid varying vec4 oT%d;\n", dwUsageIndex ); // centroid varying
  1680. }
  1681. else
  1682. {
  1683. V_snprintf( buf, sizeof( buf ), "varying vec4 oT%d;\n", dwUsageIndex );
  1684. }
  1685. StrcatToHeaderCode( buf );
  1686. }
  1687. }
  1688. else if ( nRegType == D3DSPR_TEXTURE )
  1689. {
  1690. char buff[256];
  1691. PrintParameterToString( dwRegToken, DST_REGISTER, buff, sizeof( buff ), false, NULL );
  1692. PrintToBuf( *m_pBufHeaderCode, "%s;\n",buff );
  1693. }
  1694. else
  1695. {
  1696. // No need to declare anything (probably D3DSPR_MISCTYPE either VPOS or VFACE)
  1697. }
  1698. }
  1699. }
  1700. }
  1701. static bool IsFloatNaN( float f )
  1702. {
  1703. const uint nBits = *reinterpret_cast<uint*>(&f);
  1704. const uint nExponent = ( nBits >> 23 ) & 0xFF;
  1705. return ( nExponent == 255 );
  1706. }
  1707. static inline bool EqualTol( double a, double b, double t )
  1708. {
  1709. return fabs( a - b ) <= ( ( MAX( fabs( a ), fabs( b ) ) + 1.0 ) * t );
  1710. }
  1711. // Originally written by Bruce Dawson, see:
  1712. // See http://randomascii.wordpress.com/2012/03/08/float-precisionfrom-zero-to-100-digits-2/
  1713. // This class represents a very limited high-precision number with 'count' 32-bit
  1714. // unsigned elements.
  1715. template <int count>
  1716. struct HighPrec
  1717. {
  1718. typedef unsigned T;
  1719. typedef unsigned long long Product_t;
  1720. static const int kWordShift = 32;
  1721. HighPrec()
  1722. {
  1723. memset(m_data, 0, sizeof(m_data));
  1724. m_nLowestNonZeroIndex = ARRAYSIZE(m_data);
  1725. }
  1726. // Insert the bits from value into m_data, shifted in from the bottom (least
  1727. // significant end) by the specified number of bits. A shift of zero or less
  1728. // means that none of the bits will be shifted in. A shift of one means that
  1729. // the high bit of value will be in the bottom of the last element of m_data -
  1730. // the least significant bit. A shift of kWordShift means that value will be
  1731. // in the least significant element of m_data, and so on.
  1732. void InsertLowBits(T value, int shiftAmount)
  1733. {
  1734. if (shiftAmount <= 0)
  1735. return;
  1736. int subShift = shiftAmount & (kWordShift - 1);
  1737. int bigShift = shiftAmount / kWordShift;
  1738. Product_t result = (Product_t)value << subShift;
  1739. T resultLow = (T)result;
  1740. T resultHigh = result >> kWordShift;
  1741. // Use an unsigned type so that negative numbers will become large,
  1742. // which makes the range checking below simpler.
  1743. unsigned highIndex = ARRAYSIZE(m_data) - 1 - bigShift;
  1744. // Write the results to the data array. If the index is too large
  1745. // then that means that the data was shifted off the edge.
  1746. if ( (highIndex < ARRAYSIZE(m_data)) && ( resultHigh ) )
  1747. {
  1748. m_data[highIndex] |= resultHigh;
  1749. m_nLowestNonZeroIndex = MIN( m_nLowestNonZeroIndex, highIndex );
  1750. }
  1751. if ( ( highIndex + 1 < ARRAYSIZE(m_data)) && ( resultLow ) )
  1752. {
  1753. m_data[highIndex + 1] |= resultLow;
  1754. m_nLowestNonZeroIndex = MIN( m_nLowestNonZeroIndex, highIndex + 1 );
  1755. }
  1756. }
  1757. // Insert the bits from value into m_data, shifted in from the top (most
  1758. // significant end) by the specified number of bits. A shift of zero or less
  1759. // means that none of the bits will be shifted in. A shift of one means that
  1760. // the low bit of value will be in the top of the first element of m_data -
  1761. // the most significant bit. A shift of kWordShift means that value will be
  1762. // in the most significant element of m_data, and so on.
  1763. void InsertTopBits(T value, int shiftAmount)
  1764. {
  1765. InsertLowBits(value, (ARRAYSIZE(m_data) + 1) * kWordShift - shiftAmount);
  1766. }
  1767. // Return true if all elements of m_data are zero.
  1768. bool IsZero() const
  1769. {
  1770. bool bIsZero = ( m_nLowestNonZeroIndex == ARRAYSIZE(m_data) );
  1771. #ifdef DEBUG
  1772. for (int i = 0; i < ARRAYSIZE(m_data); ++i)
  1773. {
  1774. if (m_data[i])
  1775. {
  1776. Assert( !bIsZero );
  1777. return false;
  1778. }
  1779. }
  1780. Assert( bIsZero );
  1781. #endif
  1782. return bIsZero;
  1783. }
  1784. // Divide by div and return the remainder, from 0 to div-1.
  1785. // Standard long-division algorithm.
  1786. T DivReturnRemainder(T divisor)
  1787. {
  1788. T remainder = 0;
  1789. #ifdef DEBUG
  1790. for (uint j = 0; j < m_nLowestNonZeroIndex; ++j)
  1791. {
  1792. Assert( m_data[j] == 0 );
  1793. }
  1794. #endif
  1795. int nNewLowestNonZeroIndex = ARRAYSIZE(m_data);
  1796. for (int i = m_nLowestNonZeroIndex; i < ARRAYSIZE(m_data); ++i)
  1797. {
  1798. Product_t dividend = ((Product_t)remainder << kWordShift) + m_data[i];
  1799. Product_t result = dividend / divisor;
  1800. remainder = T(dividend % divisor);
  1801. m_data[i] = T(result);
  1802. if ( ( result ) && ( nNewLowestNonZeroIndex == ARRAYSIZE(m_data) ) )
  1803. nNewLowestNonZeroIndex = i;
  1804. }
  1805. m_nLowestNonZeroIndex = nNewLowestNonZeroIndex;
  1806. return remainder;
  1807. }
  1808. // The individual 'digits' (32-bit unsigned integers actually) that
  1809. // make up the number. The most-significant digit is in m_data[0].
  1810. T m_data[count];
  1811. uint m_nLowestNonZeroIndex;
  1812. };
  1813. union Double_t
  1814. {
  1815. Double_t(double num = 0.0f) : f(num) {}
  1816. // Portable extraction of components.
  1817. bool Negative() const { return (i >> 63) != 0; }
  1818. int64_t RawMantissa() const { return i & ((1LL << 52) - 1); }
  1819. int64_t RawExponent() const { return (i >> 52) & 0x7FF; }
  1820. int64_t i;
  1821. double f;
  1822. };
  1823. static uint PrintDoubleInt( char *pBuf, uint nBufSize, double f, uint nMinChars )
  1824. {
  1825. static const char *pDigits = "00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899";
  1826. Assert( !nMinChars || ( ( nMinChars % 6 ) == 0 ) );
  1827. char *pLastChar = pBuf + nBufSize - 1;
  1828. char *pDst = pLastChar;
  1829. *pDst-- = '\0';
  1830. // Put the double in our magic union so we can grab the components.
  1831. union Double_t num(f);
  1832. // Get the character that represents the sign.
  1833. // Check for NaNs or infinity.
  1834. if (num.RawExponent() == 2047)
  1835. {
  1836. TranslationError();
  1837. }
  1838. // Adjust for the exponent bias.
  1839. int exponentValue = int(num.RawExponent() - 1023);
  1840. // Add the implied one to the mantissa.
  1841. uint64_t mantissaValue = (1ll << 52) + num.RawMantissa();
  1842. // Special-case for denormals - no special exponent value and
  1843. // no implied one.
  1844. if (num.RawExponent() == 0)
  1845. {
  1846. exponentValue = -1022;
  1847. mantissaValue = num.RawMantissa();
  1848. }
  1849. uint32_t mantissaHigh = mantissaValue >> 32;
  1850. uint32_t mantissaLow = mantissaValue & 0xFFFFFFFF;
  1851. // The first bit of the mantissa has an implied value of one and this can
  1852. // be shifted 1023 positions to the left, so that's 1024 bits to the left
  1853. // of the binary point, or 32 32-bit words for the integer part.
  1854. HighPrec<32> intPart;
  1855. // When our exponentValue is zero (a number in the 1.0 to 2.0 range)
  1856. // we have a 53-bit mantissa and the implied value of the highest bit
  1857. // is 1. We need to shift 12 bits in from the bottom to get that 53rd bit
  1858. // into the ones spot in the integral portion.
  1859. // To complicate it a bit more we have to insert the mantissa as two parts.
  1860. intPart.InsertLowBits(mantissaHigh, 12 + exponentValue);
  1861. intPart.InsertLowBits(mantissaLow, 12 + exponentValue - 32);
  1862. bool bAnyDigitsLeft;
  1863. do
  1864. {
  1865. uint remainder = intPart.DivReturnRemainder( 1000000 ); // 10^6
  1866. uint origRemainer = remainder; (void)origRemainer;
  1867. bAnyDigitsLeft = !intPart.IsZero();
  1868. if ( bAnyDigitsLeft )
  1869. {
  1870. uint n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n];
  1871. n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1 - 2) = reinterpret_cast<const uint16*>(pDigits)[n];
  1872. Assert( remainder < 100U );
  1873. *reinterpret_cast<uint16*>(pDst - 1 - 4) = reinterpret_cast<const uint16*>(pDigits)[remainder];
  1874. pDst -= 6;
  1875. }
  1876. else
  1877. {
  1878. uint n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n]; --pDst; if ( ( n >= 10 ) || ( remainder ) ) --pDst;
  1879. if ( remainder )
  1880. {
  1881. n = remainder % 100U; remainder /= 100U; *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[n]; --pDst; if ( ( n >= 10 ) || ( remainder ) ) --pDst;
  1882. if ( remainder )
  1883. {
  1884. Assert( remainder < 100U );
  1885. *reinterpret_cast<uint16*>(pDst - 1) = reinterpret_cast<const uint16*>(pDigits)[remainder]; --pDst; if ( remainder >= 10 ) --pDst;
  1886. }
  1887. }
  1888. }
  1889. } while ( bAnyDigitsLeft );
  1890. uint l = pLastChar - pDst;
  1891. while ( ( l - 1 ) < nMinChars )
  1892. {
  1893. *pDst-- = '0';
  1894. l++;
  1895. }
  1896. Assert( (int)l == ( pLastChar - pDst ) );
  1897. Assert( l <= nBufSize );
  1898. memmove( pBuf, pDst + 1, l );
  1899. return l - 1;
  1900. }
  1901. // FloatToString is equivalent to sprintf( "%.12f" ), but doesn't have any dependencies on the current locale setting.
  1902. // Unfortunately, high accuracy radix conversion is actually pretty tricky to do right.
  1903. // Most importantly, this function has the same max roundtrip (IEEE->ASCII->IEEE) error as the MS CRT functions and can reliably handle extremely large inputs.
  1904. static void FloatToString( char *pBuf, uint nBufSize, double fConst )
  1905. {
  1906. char *pEnd = pBuf + nBufSize;
  1907. char *pDst = pBuf;
  1908. double flVal = fConst;
  1909. if ( IsFloatNaN( flVal ) )
  1910. {
  1911. flVal = 0;
  1912. }
  1913. if ( flVal < 0.0f )
  1914. {
  1915. *pDst++ = '-';
  1916. flVal = -flVal;
  1917. }
  1918. double flInt;
  1919. double flFract = modf( flVal, &flInt );
  1920. flFract = floor( flFract * 1000000000000.0 + .5 );
  1921. if ( !flInt )
  1922. {
  1923. *pDst++ = '0';
  1924. }
  1925. else
  1926. {
  1927. uint l = PrintDoubleInt( pDst, pEnd - pDst, flInt, 0 );
  1928. pDst += l;
  1929. }
  1930. *pDst++ = '.';
  1931. if ( !flFract )
  1932. {
  1933. *pDst++ = '0';
  1934. *pDst++ = '\0';
  1935. }
  1936. else
  1937. {
  1938. uint l = PrintDoubleInt( pDst, pEnd - pDst, flFract, 12 );
  1939. pDst += l;
  1940. StripExtraTrailingZeros( pBuf ); // Turn 1.00000 into 1.0
  1941. }
  1942. }
  1943. #if 0
  1944. #include "vstdlib/random.h"
  1945. static void TestFloatConversion()
  1946. {
  1947. for ( ; ; )
  1948. {
  1949. double fConst;
  1950. switch ( rand() % 4 )
  1951. {
  1952. case 0:
  1953. fConst = RandomFloat( -1e-30, 1e+30 ); break;
  1954. case 1:
  1955. fConst = RandomFloat( -1e-10, 1e+10 ); break;
  1956. case 2:
  1957. fConst = RandomFloat( -1e-5, 1e+5 ); break;
  1958. default:
  1959. fConst = RandomFloat( -1, 1 ); break;
  1960. }
  1961. char szTemp[1024];
  1962. // FloatToString does not rely on V_snprintf(), so it can't be affected by the current locale setting.
  1963. FloatToString( szTemp, sizeof( szTemp ), fConst );
  1964. static double flMaxErr1;
  1965. static double flMaxErr2;
  1966. // Compare FloatToString()'s results vs. V_snprintf()'s, also track maximum error of each.
  1967. double flCheck = atof( szTemp );
  1968. double flErr = fabs( flCheck - fConst );
  1969. flMaxErr1 = MAX( flMaxErr1, flErr );
  1970. Assert( EqualTol( flCheck, fConst, .000000125 ) );
  1971. char szTemp2[256];
  1972. V_snprintf( szTemp2, sizeof( szTemp2 ), "%.12f", fConst );
  1973. StripExtraTrailingZeros( szTemp2 );
  1974. if ( !strchr( szTemp2, '.' ) )
  1975. {
  1976. V_strncat( szTemp2, ".0", sizeof( szTemp2 ) );
  1977. }
  1978. double flCheck2 = atof( szTemp2 );
  1979. double flErr2 = fabs( flCheck2 - fConst );
  1980. flMaxErr2 = MAX( flMaxErr2, flErr2 );
  1981. Assert( EqualTol( flCheck2, fConst, .000000125 ) );
  1982. if ( flMaxErr1 > flMaxErr2 )
  1983. {
  1984. GLMDebugPrintf( "!\n" );
  1985. }
  1986. }
  1987. }
  1988. #endif
  1989. void D3DToGL::Handle_DEFIB( uint32 instruction )
  1990. {
  1991. Assert( ( instruction == D3DSIO_DEFI ) || ( instruction == D3DSIO_DEFB ) );
  1992. // which register is being defined
  1993. uint32 dwToken = GetNextToken();
  1994. uint32 nRegNum = dwToken & D3DSP_REGNUM_MASK;
  1995. uint32 regType = GetRegTypeFromToken( dwToken );
  1996. if ( regType == D3DSPR_CONSTINT )
  1997. {
  1998. m_dwDefConstIntUsageMask |= ( 1 << nRegNum );
  1999. uint x = GetNextToken();
  2000. uint y = GetNextToken();
  2001. uint z = GetNextToken();
  2002. uint w = GetNextToken();
  2003. NOTE_UNUSED(y); NOTE_UNUSED(z); NOTE_UNUSED(w);
  2004. Assert( nRegNum < 32 );
  2005. if ( nRegNum < 32 )
  2006. {
  2007. m_dwDefConstIntIterCount[nRegNum] = x;
  2008. }
  2009. }
  2010. else
  2011. {
  2012. TranslationError();
  2013. }
  2014. }
  2015. void D3DToGL::Handle_DEF()
  2016. {
  2017. //TestFloatConversion();
  2018. //
  2019. // JasonM TODO: catch D3D's sincos-specific D3DSINCOSCONST1 and D3DSINCOSCONST2 constants and filter them out here
  2020. //
  2021. // Which register is being defined
  2022. uint32 dwToken = GetNextToken();
  2023. // Note that this constant was explicitly defined
  2024. m_bConstantRegisterDefined[dwToken & D3DSP_REGNUM_MASK] = true;
  2025. CUtlString sParamName = GetParameterString( dwToken, DST_REGISTER, false, NULL );
  2026. PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() );
  2027. PrintToBuf( *m_pBufParamCode, "vec4 %s = vec4( ", sParamName.String() );
  2028. // Run through the 4 floats
  2029. for ( int i=0; i < 4; i++ )
  2030. {
  2031. float fConst = uint32ToFloat( GetNextToken() );
  2032. char szTemp[1024];
  2033. FloatToString( szTemp, sizeof( szTemp ), fConst );
  2034. #if 0
  2035. static double flMaxErr1;
  2036. static double flMaxErr2;
  2037. // Compare FloatToString()'s results vs. V_snprintf()'s, also track maximum error of each.
  2038. double flCheck = atof( szTemp );
  2039. double flErr = fabs( flCheck - fConst );
  2040. flMaxErr1 = MAX( flMaxErr1, flErr );
  2041. Assert( EqualTol( flCheck, fConst, .000000125 ) );
  2042. char szTemp2[256];
  2043. V_snprintf( szTemp2, sizeof( szTemp2 ), "%.12f", fConst );
  2044. StripExtraTrailingZeros( szTemp2 );
  2045. if ( !strchr( szTemp2, '.' ) )
  2046. {
  2047. V_strncat( szTemp2, ".0", sizeof( szTemp2 ) );
  2048. }
  2049. double flCheck2 = atof( szTemp2 );
  2050. double flErr2 = fabs( flCheck2 - fConst );
  2051. flMaxErr2 = MAX( flMaxErr2, flErr2 );
  2052. Assert( EqualTol( flCheck2, fConst, .000000125 ) );
  2053. if ( flMaxErr1 > flMaxErr2 )
  2054. {
  2055. GLMDebugPrintf( "!\n" );
  2056. }
  2057. #endif
  2058. PrintToBuf( *m_pBufParamCode, i != 3 ? "%s, " : "%s", szTemp ); // end with comma-space
  2059. }
  2060. PrintToBuf( *m_pBufParamCode, " );\n" );
  2061. }
  2062. void D3DToGL::Handle_MAD( uint32 nInstruction )
  2063. {
  2064. uint32 nDestToken = GetNextToken();
  2065. CUtlString sParam1 = GetParameterString( nDestToken, DST_REGISTER, false, NULL );
  2066. int nARLComp0 = ARL_DEST_NONE;
  2067. CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp0 );
  2068. int nARLComp1 = ARL_DEST_NONE;
  2069. CUtlString sParam3 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp1 );
  2070. int nARLComp2 = ARL_DEST_NONE;
  2071. CUtlString sParam4 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp2 );
  2072. // This optionally inserts a move from our dummy address register to the .x component of the real one
  2073. InsertMoveFromAddressRegister( m_pBufALUCode, nARLComp0, nARLComp1, nARLComp2 );
  2074. sParam2 = FixGLSLSwizzle( sParam1, sParam2 );
  2075. sParam3 = FixGLSLSwizzle( sParam1, sParam3 );
  2076. sParam4 = FixGLSLSwizzle( sParam1, sParam4 );
  2077. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s * %s + %s;\n", sParam1.String(), sParam2.String(), sParam3.String(), sParam4.String() );
  2078. // If the _SAT instruction modifier is used, then do a saturate here.
  2079. if ( nDestToken & D3DSPDM_SATURATE )
  2080. {
  2081. int nComponents = GetNumSwizzleComponents( sParam1.String() );
  2082. if ( nComponents == 0 )
  2083. nComponents = 4;
  2084. PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", sParam1.String(), sParam1.String(), g_szVecZeros[nComponents], g_szVecOnes[nComponents] );
  2085. }
  2086. }
  2087. void D3DToGL::Handle_DP2ADD()
  2088. {
  2089. char pDestReg[64], pSrc0Reg[64], pSrc1Reg[64], pSrc2Reg[64];
  2090. uint32 nDestToken = GetNextToken();
  2091. PrintParameterToString( nDestToken, DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL );
  2092. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), false, NULL );
  2093. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc1Reg, sizeof( pSrc1Reg ), false, NULL );
  2094. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc2Reg, sizeof( pSrc2Reg ), false, NULL );
  2095. // We should only be assigning to a single component of the dest.
  2096. Assert( GetNumSwizzleComponents( pDestReg ) == 1 );
  2097. Assert( GetNumSwizzleComponents( pSrc2Reg ) == 1 );
  2098. // This is a 2D dot product, so we only want two entries from the middle components.
  2099. CUtlString sArg0 = EnsureNumSwizzleComponents( pSrc0Reg, 2 );
  2100. CUtlString sArg1 = EnsureNumSwizzleComponents( pSrc1Reg, 2 );
  2101. PrintToBufWithIndents( *m_pBufALUCode, "%s = dot( %s, %s ) + %s;\n", pDestReg, sArg0.String(), sArg1.String(), pSrc2Reg );
  2102. // If the _SAT instruction modifier is used, then do a saturate here.
  2103. if ( nDestToken & D3DSPDM_SATURATE )
  2104. {
  2105. int nComponents = GetNumSwizzleComponents( pDestReg );
  2106. if ( nComponents == 0 )
  2107. nComponents = 4;
  2108. PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", pDestReg, pDestReg, g_szVecZeros[nComponents], g_szVecOnes[nComponents] );
  2109. }
  2110. }
  2111. void D3DToGL::Handle_SINCOS()
  2112. {
  2113. char pDestReg[64], pSrc0Reg[64];
  2114. PrintParameterToString( GetNextToken(), DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL );
  2115. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), true, NULL );
  2116. m_bNeedsSinCosDeclarations = true;
  2117. CUtlString sDest( pDestReg );
  2118. CUtlString sArg0 = EnsureNumSwizzleComponents( pSrc0Reg, 1 );// Ensure input is scalar
  2119. CUtlString sResult( "vSinCosTmp.xy" ); // Always going to populate this
  2120. sResult = FixGLSLSwizzle( sDest, sResult ); // Make sure we match the desired output reg
  2121. PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.z = %s * %s;\n", sArg0.String(), sArg0.String() );
  2122. PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.zz * scA.xy + scA.wz;\n" );
  2123. PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.xy * vSinCosTmp.zz + scB.xy;\n" );
  2124. PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.xy * vSinCosTmp.zz + scB.wz;\n" );
  2125. PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.x = vSinCosTmp.x * %s;\n", sArg0.String() );
  2126. PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.xy * vSinCosTmp.xx;\n" );
  2127. PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.xy = vSinCosTmp.xy + vSinCosTmp.xy;\n" );
  2128. PrintToBufWithIndents( *m_pBufALUCode, "vSinCosTmp.x = -vSinCosTmp.x + scB.z;\n" );
  2129. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", sDest.String(), sResult.String() );
  2130. if ( m_dwMajorVersion < 3 )
  2131. {
  2132. // Eat two more tokens since D3D defines Taylor series constants that we won't need
  2133. // Only valid for pixel and vertex shader version earlier than 3_0
  2134. // (http://msdn.microsoft.com/en-us/library/windows/hardware/ff569710(v=vs.85).aspx)
  2135. SkipTokens( 2 );
  2136. }
  2137. }
  2138. void D3DToGL::Handle_LRP( uint32 nInstruction )
  2139. {
  2140. uint32 nDestToken = GetNextToken();
  2141. CUtlString sDest = GetParameterString( nDestToken, DST_REGISTER, false, NULL );
  2142. int nARLComp0 = ARL_DEST_NONE;
  2143. CUtlString sParam0 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp0 );
  2144. int nARLComp1 = ARL_DEST_NONE;
  2145. CUtlString sParam1 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp1 );
  2146. int nARLComp2 = ARL_DEST_NONE;
  2147. CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp2 );
  2148. // This optionally inserts a move from our dummy address register to the .x component of the real one
  2149. InsertMoveFromAddressRegister( m_pBufALUCode, nARLComp0, nARLComp1, nARLComp2 );
  2150. sParam0 = FixGLSLSwizzle( sDest, sParam0 );
  2151. sParam1 = FixGLSLSwizzle( sDest, sParam1 );
  2152. sParam2 = FixGLSLSwizzle( sDest, sParam2 );
  2153. // dest = src0 * (src1 - src2) + src2;
  2154. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s * ( %s - %s ) + %s;\n", sDest.String(), sParam0.String(), sParam1.String(), sParam2.String(), sParam2.String() );
  2155. // If the _SAT instruction modifier is used, then do a saturate here.
  2156. if ( nDestToken & D3DSPDM_SATURATE )
  2157. {
  2158. int nComponents = GetNumSwizzleComponents( sDest.String() );
  2159. if ( nComponents == 0 )
  2160. nComponents = 4;
  2161. PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", sDest.String(), sDest.String(), g_szVecZeros[nComponents], g_szVecOnes[nComponents] );
  2162. }
  2163. }
  2164. void D3DToGL::Handle_TEX( uint32 dwToken, bool bIsTexLDL )
  2165. {
  2166. char pDestReg[64], pSrc0Reg[64], pSrc1Reg[64];
  2167. PrintParameterToString( GetNextToken(), DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL );
  2168. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), false, NULL );
  2169. DWORD dwSrc1Token = GetNextToken();
  2170. PrintParameterToString( dwSrc1Token, SRC_REGISTER, pSrc1Reg, sizeof( pSrc1Reg ), false, NULL );
  2171. Assert( (dwSrc1Token & D3DSP_REGNUM_MASK) < ARRAYSIZE( m_dwSamplerTypes ) );
  2172. uint32 nSamplerType = m_dwSamplerTypes[dwSrc1Token & D3DSP_REGNUM_MASK];
  2173. if ( nSamplerType == SAMPLER_TYPE_2D )
  2174. {
  2175. const bool bIsShadowSampler = ( ( 1 << ( (int) ( dwSrc1Token & D3DSP_REGNUM_MASK ) ) ) & m_nShadowDepthSamplerMask ) != 0;
  2176. if ( bIsTexLDL )
  2177. {
  2178. CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, bIsShadowSampler ? 3 : 2 );
  2179. // Strip out the W component of the pSrc0Reg and pass that as the LOD to texture2DLod.
  2180. char szLOD[128], szExtra[8];
  2181. GetParamNameWithoutSwizzle( pSrc0Reg, szLOD, sizeof( szLOD ) );
  2182. V_snprintf( szExtra, sizeof( szExtra ), ".%c", GetSwizzleComponent( pSrc0Reg, 3 ) );
  2183. V_strncat( szLOD, szExtra, sizeof( szLOD ) );
  2184. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s( %s, %s, %s );\n", pDestReg, bIsShadowSampler ? "shadow2DLod" : "texture2DLod", pSrc1Reg, sCoordVar.String(), szLOD );
  2185. }
  2186. else if ( bIsShadowSampler )
  2187. {
  2188. // .z is meant to contain the object depth, while .xy contains the 2D tex coords
  2189. CUtlString sCoordVar3D = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
  2190. PrintToBufWithIndents( *m_pBufALUCode, "%s = shadow2D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar3D.String() );
  2191. Assert( m_dwSamplerTypes[dwSrc1Token & D3DSP_REGNUM_MASK] == SAMPLER_TYPE_2D );
  2192. }
  2193. else if( ( OpcodeSpecificData( dwToken ) << D3DSP_OPCODESPECIFICCONTROL_SHIFT ) == D3DSI_TEXLD_PROJECT )
  2194. {
  2195. // This projective case is after the shadow case intentionally, due to the way that "projective"
  2196. // loads are overloaded in our D3D shaders for shadow lookups.
  2197. //
  2198. // We use the vec4 variant of texture2DProj() intentionally here, since it lines up well with Direct3D.
  2199. CUtlString s4DProjCoords = EnsureNumSwizzleComponents( pSrc0Reg, 4 ); // Ensure vec4 variant
  2200. PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2DProj( %s, %s );\n", pDestReg, pSrc1Reg, s4DProjCoords.String() );
  2201. }
  2202. else
  2203. {
  2204. CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, bIsShadowSampler ? 3 : 2 );
  2205. PrintToBufWithIndents( *m_pBufALUCode, "%s = texture2D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
  2206. }
  2207. }
  2208. else if ( nSamplerType == SAMPLER_TYPE_3D )
  2209. {
  2210. if ( bIsTexLDL )
  2211. {
  2212. TranslationError();
  2213. }
  2214. CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
  2215. PrintToBufWithIndents( *m_pBufALUCode, "%s = texture3D( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
  2216. }
  2217. else if ( nSamplerType == SAMPLER_TYPE_CUBE )
  2218. {
  2219. if ( bIsTexLDL )
  2220. {
  2221. TranslationError();
  2222. }
  2223. CUtlString sCoordVar = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
  2224. PrintToBufWithIndents( *m_pBufALUCode, "%s = textureCube( %s, %s );\n", pDestReg, pSrc1Reg, sCoordVar.String() );
  2225. }
  2226. else
  2227. {
  2228. Error( "TEX instruction: unsupported sampler type used" );
  2229. }
  2230. }
  2231. void D3DToGL::StrcatToHeaderCode( const char *pBuf )
  2232. {
  2233. strcat_s( (char*)m_pBufHeaderCode->Base(), m_pBufHeaderCode->Size(), pBuf );
  2234. }
  2235. void D3DToGL::StrcatToALUCode( const char *pBuf )
  2236. {
  2237. PrintIndentation( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size() );
  2238. strcat_s( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), pBuf );
  2239. }
  2240. void D3DToGL::StrcatToParamCode( const char *pBuf )
  2241. {
  2242. strcat_s( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size(), pBuf );
  2243. }
  2244. void D3DToGL::StrcatToAttribCode( const char *pBuf )
  2245. {
  2246. strcat_s( (char*)m_pBufAttribCode->Base(), m_pBufAttribCode->Size(), pBuf );
  2247. }
  2248. void D3DToGL::Handle_TexLDD( uint32 nInstruction )
  2249. {
  2250. TranslationError(); // Not supported yet, but can be if we need it.
  2251. }
  2252. void D3DToGL::Handle_TexCoord()
  2253. {
  2254. TranslationError();
  2255. // If ps_1_4, this is texcrd
  2256. if ( (m_dwMajorVersion == 1) && (m_dwMinorVersion == 4) && (!m_bVertexShader) )
  2257. {
  2258. StrcatToALUCode( "texcrd" );
  2259. }
  2260. else // else it's texcoord
  2261. {
  2262. TranslationError();
  2263. StrcatToALUCode( "texcoord" );
  2264. }
  2265. char buff[256];
  2266. PrintParameterToString( GetNextToken(), DST_REGISTER, buff, sizeof( buff ), false, NULL );
  2267. StrcatToALUCode( buff );
  2268. // If ps_1_4, texcrd also has a source parameter
  2269. if ((m_dwMajorVersion == 1) && (m_dwMinorVersion == 4) && (!m_bVertexShader))
  2270. {
  2271. StrcatToALUCode( ", " );
  2272. PrintParameterToString( GetNextToken(), SRC_REGISTER, buff, sizeof( buff ), false, NULL );
  2273. StrcatToALUCode( buff );
  2274. }
  2275. StrcatToALUCode( ";\n" );
  2276. }
  2277. void D3DToGL::Handle_BREAKC( uint32 dwToken )
  2278. {
  2279. uint nComparison = ( dwToken & D3DSHADER_COMPARISON_MASK ) >> D3DSHADER_COMPARISON_SHIFT;
  2280. const char *pComparison = "?";
  2281. switch ( nComparison )
  2282. {
  2283. case D3DSPC_GT: pComparison = ">"; break;
  2284. case D3DSPC_EQ: pComparison = "=="; break;
  2285. case D3DSPC_GE: pComparison = ">="; break;
  2286. case D3DSPC_LT: pComparison = "<"; break;
  2287. case D3DSPC_NE: pComparison = "!="; break;
  2288. case D3DSPC_LE: pComparison = "<="; break;
  2289. default:
  2290. TranslationError();
  2291. }
  2292. char src0[256];
  2293. uint32 src0Token = GetNextToken();
  2294. PrintParameterToString( src0Token, SRC_REGISTER, src0, sizeof( src0 ), false, NULL );
  2295. char src1[256];
  2296. uint32 src1Token = GetNextToken();
  2297. PrintParameterToString( src1Token, SRC_REGISTER, src1, sizeof( src1 ), false, NULL );
  2298. PrintToBufWithIndents( *m_pBufALUCode, "if (%s %s %s) break;\n", src0, pComparison, src1 );
  2299. }
  2300. void D3DToGL::HandleBinaryOp_GLSL( uint32 nInstruction )
  2301. {
  2302. uint32 nDestToken = GetNextToken();
  2303. CUtlString sParam1 = GetParameterString( nDestToken, DST_REGISTER, false, NULL );
  2304. int nARLComp0 = ARL_DEST_NONE;
  2305. CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp0 );
  2306. int nARLComp1 = ARL_DEST_NONE;
  2307. CUtlString sParam3 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp1 );
  2308. // This optionally inserts a move from our dummy address register to the .x component of the real one
  2309. InsertMoveFromAddressRegister( m_pBufALUCode, nARLComp0, nARLComp1 );
  2310. // Since DP3 and DP4 have a scalar as the dest and vectors as the src, don't screw with the swizzle specifications.
  2311. if ( nInstruction == D3DSIO_DP3 )
  2312. {
  2313. sParam2 = EnsureNumSwizzleComponents( sParam2, 3 );
  2314. sParam3 = EnsureNumSwizzleComponents( sParam3, 3 );
  2315. }
  2316. else if ( nInstruction == D3DSIO_DP4 )
  2317. {
  2318. sParam2 = EnsureNumSwizzleComponents( sParam2, 4 );
  2319. sParam3 = EnsureNumSwizzleComponents( sParam3, 4 );
  2320. }
  2321. else if ( nInstruction == D3DSIO_DST )
  2322. {
  2323. m_bUsesDSTInstruction = true;
  2324. sParam2 = EnsureNumSwizzleComponents( sParam2, 4 );
  2325. sParam3 = EnsureNumSwizzleComponents( sParam3, 4 );
  2326. }
  2327. else
  2328. {
  2329. sParam2 = FixGLSLSwizzle( sParam1, sParam2 );
  2330. sParam3 = FixGLSLSwizzle( sParam1, sParam3 );
  2331. }
  2332. char buff[256];
  2333. if ( nInstruction == D3DSIO_ADD || nInstruction == D3DSIO_SUB || nInstruction == D3DSIO_MUL )
  2334. {
  2335. // These all look like x = y op z
  2336. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s %s %s;\n", sParam1.String(), sParam2.String(), GetGLSLOperatorString( nInstruction ), sParam3.String() );
  2337. }
  2338. else
  2339. {
  2340. int nDestComponents = GetNumSwizzleComponents( sParam1.String() );
  2341. int nSrcComponents = GetNumSwizzleComponents( sParam2.String() );
  2342. // All remaining instructions can use GLSL intrinsics like dot() and cross().
  2343. bool bDoubleClose = OpenIntrinsic( nInstruction, buff, sizeof( buff ), nDestComponents, nSrcComponents );
  2344. if ( ( nSrcComponents == 1 ) && ( nInstruction == D3DSIO_SGE ) )
  2345. {
  2346. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s%s >= %s );\n", sParam1.String(), buff, sParam2.String(), sParam3.String() );
  2347. }
  2348. else if ( ( nSrcComponents == 1 ) && ( nInstruction == D3DSIO_SLT ) )
  2349. {
  2350. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s%s < %s );\n", sParam1.String(), buff, sParam2.String(), sParam3.String() );
  2351. }
  2352. else
  2353. {
  2354. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s%s, %s %s;\n", sParam1.String(), buff, sParam2.String(), sParam3.String(), bDoubleClose ? ") )" : ")" );
  2355. }
  2356. }
  2357. // If the _SAT instruction modifier is used, then do a saturate here.
  2358. if ( nDestToken & D3DSPDM_SATURATE )
  2359. {
  2360. int nComponents = GetNumSwizzleComponents( sParam1.String() );
  2361. if ( nComponents == 0 )
  2362. nComponents = 4;
  2363. PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", sParam1.String(), sParam1.String(), g_szVecZeros[nComponents], g_szVecOnes[nComponents] );
  2364. }
  2365. }
  2366. void D3DToGL::HandleBinaryOp_ASM( uint32 nInstruction )
  2367. {
  2368. CUtlString sParam1 = GetParameterString( GetNextToken(), DST_REGISTER, false, NULL );
  2369. int nARLComp0 = ARL_DEST_NONE;
  2370. CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp0 );
  2371. int nARLComp1 = ARL_DEST_NONE;
  2372. CUtlString sParam3 = GetParameterString( GetNextToken(), SRC_REGISTER, false, &nARLComp1 );
  2373. // This optionally inserts a move from our dummy address register to the .x component of the real one
  2374. InsertMoveFromAddressRegister( m_pBufALUCode, nARLComp0, nARLComp1 );
  2375. char buff[256];
  2376. PrintOpcode( nInstruction, buff, sizeof( buff ) );
  2377. PrintToBufWithIndents( *m_pBufALUCode, "%s%s, %s, %s;\n", buff, sParam1.String(), sParam2.String(), sParam3.String() );
  2378. }
  2379. void D3DToGL::WriteGLSLCmp( const char *pDestReg, const char *pSrc0Reg, const char *pSrc1Reg, const char *pSrc2Reg )
  2380. {
  2381. int nWriteMaskEntries = GetNumWriteMaskEntries( pDestReg );
  2382. for ( int i=0; i < nWriteMaskEntries; i++ )
  2383. {
  2384. char params[4][256];
  2385. WriteParamWithSingleMaskEntry( pDestReg, i, params[0], sizeof( params[0] ) );
  2386. WriteParamWithSingleMaskEntry( pSrc0Reg, i, params[1], sizeof( params[1] ) );
  2387. WriteParamWithSingleMaskEntry( pSrc1Reg, i, params[2], sizeof( params[2] ) );
  2388. WriteParamWithSingleMaskEntry( pSrc2Reg, i, params[3], sizeof( params[3] ) );
  2389. PrintToBufWithIndents( *m_pBufALUCode, "%s = ( %s >= 0.0 ) ? %s : %s;\n", params[0], params[1], params[2], params[3] );
  2390. }
  2391. }
  2392. void D3DToGL::Handle_CMP()
  2393. {
  2394. // In Direct3D, result = (src0 >= 0.0) ? src1 : src2
  2395. // In OpenGL, result = (src0 < 0.0) ? src1 : src2
  2396. //
  2397. // As a result, arguments are effectively in a different order than Direct3D! !#$&*!%#$&
  2398. char pDestReg[64], pSrc0Reg[64], pSrc1Reg[64], pSrc2Reg[64];
  2399. uint32 nDestToken = GetNextToken();
  2400. PrintParameterToString( nDestToken, DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL );
  2401. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), false, NULL );
  2402. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc1Reg, sizeof( pSrc1Reg ), false, NULL );
  2403. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc2Reg, sizeof( pSrc2Reg ), false, NULL );
  2404. // These are a tricky case.. we have to expand it out into multiple statements.
  2405. char szDestBase[256];
  2406. GetParamNameWithoutSwizzle( pDestReg, szDestBase, sizeof( szDestBase ) );
  2407. V_strncpy( pSrc0Reg, FixGLSLSwizzle( pDestReg, pSrc0Reg ), sizeof( pSrc0Reg ) );
  2408. V_strncpy( pSrc1Reg, FixGLSLSwizzle( pDestReg, pSrc1Reg ), sizeof( pSrc1Reg ) );
  2409. V_strncpy( pSrc2Reg, FixGLSLSwizzle( pDestReg, pSrc2Reg ), sizeof( pSrc2Reg ) );
  2410. // This isn't reliable!
  2411. //if ( DoParamNamesMatch( pDestReg, pSrc0Reg ) && GetNumSwizzleComponents( pDestReg ) > 1 )
  2412. if ( 1 )
  2413. {
  2414. // So the dest register is the same as the comparand. We're in danger of screwing up our results.
  2415. //
  2416. // For example, this code:
  2417. // CMP r0.xy, r0.xx, r1, r2
  2418. // would generate this:
  2419. // r0.x = (r0.x >= 0) ? r1.x : r2.x;
  2420. // r0.y = (r0.x >= 0) ? r1.x : r2.x;
  2421. //
  2422. // But the first lines changes r0.x and thus screws the atomicity of the CMP instruction for the second line.
  2423. // So we assign r0 to a temporary first and then write to the temporary.
  2424. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", g_pAtomicTempVarName, szDestBase );
  2425. char szTempVar[256];
  2426. ReplaceParamName( pDestReg, g_pAtomicTempVarName, szTempVar, sizeof( szTempVar ) );
  2427. WriteGLSLCmp( szTempVar, pSrc0Reg, pSrc1Reg, pSrc2Reg );
  2428. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", szDestBase, g_pAtomicTempVarName );
  2429. m_bUsedAtomicTempVar = true;
  2430. }
  2431. else
  2432. {
  2433. // Just write out the simple expanded version of the CMP. No need to use atomic_temp_var.
  2434. WriteGLSLCmp( pDestReg, pSrc0Reg, pSrc1Reg, pSrc2Reg );
  2435. }
  2436. // If the _SAT instruction modifier is used, then do a saturate here.
  2437. if ( nDestToken & D3DSPDM_SATURATE )
  2438. {
  2439. int nComponents = GetNumSwizzleComponents( pDestReg );
  2440. if ( nComponents == 0 )
  2441. nComponents = 4;
  2442. PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", pDestReg, pDestReg, g_szVecZeros[nComponents], g_szVecOnes[nComponents] );
  2443. }
  2444. }
  2445. void D3DToGL::Handle_NRM()
  2446. {
  2447. char pDestReg[64];
  2448. char pSrc0Reg[64];
  2449. PrintParameterToString( GetNextToken(), DST_REGISTER, pDestReg, sizeof( pDestReg ), false, NULL );
  2450. int nARLSrcComp = ARL_DEST_NONE;
  2451. PrintParameterToString( GetNextToken(), SRC_REGISTER, pSrc0Reg, sizeof( pSrc0Reg ), false, &nARLSrcComp );
  2452. if ( nARLSrcComp != -1 )
  2453. {
  2454. InsertMoveFromAddressRegister( m_pBufALUCode, nARLSrcComp, -1, -1 );
  2455. }
  2456. CUtlString sSrc = EnsureNumSwizzleComponents( pSrc0Reg, 3 );
  2457. PrintToBufWithIndents( *m_pBufALUCode, "%s = normalize( %s );\n", pDestReg, sSrc.String() );
  2458. }
  2459. void D3DToGL::Handle_UnaryOp( uint32 nInstruction )
  2460. {
  2461. uint32 nDestToken = GetNextToken();
  2462. CUtlString sParam1 = GetParameterString( nDestToken, DST_REGISTER, false, NULL );
  2463. CUtlString sParam2 = GetParameterString( GetNextToken(), SRC_REGISTER, false, NULL );
  2464. sParam2 = FixGLSLSwizzle( sParam1, sParam2 );
  2465. if ( nInstruction == D3DSIO_MOV )
  2466. {
  2467. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", sParam1.String(), sParam2.String() );
  2468. }
  2469. else if ( nInstruction == D3DSIO_RSQ )
  2470. {
  2471. PrintToBufWithIndents( *m_pBufALUCode, "%s = inversesqrt( %s );\n", sParam1.String(), sParam2.String() );
  2472. }
  2473. else if ( nInstruction == D3DSIO_RCP )
  2474. {
  2475. PrintToBufWithIndents( *m_pBufALUCode, "%s = 1.0 / %s;\n", sParam1.String(), sParam2.String() );
  2476. }
  2477. else if ( nInstruction == D3DSIO_EXP )
  2478. {
  2479. PrintToBufWithIndents( *m_pBufALUCode, "%s = exp2( %s );\n", sParam1.String(), sParam2.String() );
  2480. }
  2481. else if ( nInstruction == D3DSIO_FRC )
  2482. {
  2483. PrintToBufWithIndents( *m_pBufALUCode, "%s = fract( %s );\n", sParam1.String(), sParam2.String() );
  2484. }
  2485. else if ( nInstruction == D3DSIO_LOG ) // d3d 'log' is log base 2
  2486. {
  2487. PrintToBufWithIndents( *m_pBufALUCode, "%s = log2( %s );\n", sParam1.String(), sParam2.String() );
  2488. }
  2489. else if ( nInstruction == D3DSIO_ABS ) // rbarris did this one, Jason please check
  2490. {
  2491. PrintToBufWithIndents( *m_pBufALUCode, "%s = abs( %s );\n", sParam1.String(), sParam2.String() );
  2492. }
  2493. else if ( nInstruction == D3DSIO_MOVA )
  2494. {
  2495. m_bDeclareAddressReg = true;
  2496. PrintToBufWithIndents( *m_pBufALUCode, "%s = %s;\n", sParam1.String(), sParam2.String() );
  2497. if ( !m_bGenerateBoneUniformBuffer )
  2498. {
  2499. m_nHighestRegister = DXABSTRACT_VS_PARAM_SLOTS - 1;
  2500. }
  2501. }
  2502. else
  2503. {
  2504. Error( "Unsupported instruction" );
  2505. }
  2506. // If the _SAT instruction modifier is used, then do a saturate here.
  2507. if ( nDestToken & D3DSPDM_SATURATE )
  2508. {
  2509. int nComponents = GetNumSwizzleComponents( sParam1.String() );
  2510. if ( nComponents == 0 )
  2511. {
  2512. nComponents = 4;
  2513. }
  2514. PrintToBufWithIndents( *m_pBufALUCode, "%s = clamp( %s, %s, %s );\n", sParam1.String(), sParam1.String(), g_szVecZeros[nComponents], g_szVecOnes[nComponents] );
  2515. }
  2516. }
  2517. void D3DToGL::WriteGLSLSamplerDefinitions()
  2518. {
  2519. int nSamplersWritten = 0;
  2520. for ( int i=0; i < ARRAYSIZE( m_dwSamplerTypes ); i++ )
  2521. {
  2522. if ( m_dwSamplerTypes[i] == SAMPLER_TYPE_2D )
  2523. {
  2524. if ( ( ( 1 << i ) & m_nShadowDepthSamplerMask ) != 0 )
  2525. {
  2526. PrintToBuf( *m_pBufHeaderCode, "uniform sampler2DShadow sampler%d;\n", i );
  2527. }
  2528. else
  2529. {
  2530. PrintToBuf( *m_pBufHeaderCode, "uniform sampler2D sampler%d;\n", i );
  2531. }
  2532. ++nSamplersWritten;
  2533. }
  2534. else if ( m_dwSamplerTypes[i] == SAMPLER_TYPE_3D )
  2535. {
  2536. PrintToBuf( *m_pBufHeaderCode, "uniform sampler3D sampler%d;\n", i );
  2537. ++nSamplersWritten;
  2538. }
  2539. else if ( m_dwSamplerTypes[i] == SAMPLER_TYPE_CUBE )
  2540. {
  2541. PrintToBuf( *m_pBufHeaderCode, "uniform samplerCube sampler%d;\n", i );
  2542. ++nSamplersWritten;
  2543. }
  2544. else if ( m_dwSamplerTypes[i] != SAMPLER_TYPE_UNUSED )
  2545. {
  2546. Error( "Unknown sampler type." );
  2547. }
  2548. }
  2549. if ( nSamplersWritten > 0 )
  2550. PrintToBuf( *m_pBufHeaderCode, "\n\n" );
  2551. }
  2552. void D3DToGL::WriteGLSLOutputVariableAssignments()
  2553. {
  2554. if ( m_bVertexShader )
  2555. {
  2556. // Map output "oN" registers back to GLSL output variables.
  2557. if ( m_bAddHexCodeComments )
  2558. {
  2559. PrintToBuf( *m_pBufAttribCode, "\n// Now we're storing the oN variables from the output dcl_ statements back into their GLSL equivalents.\n" );
  2560. }
  2561. for ( int i=0; i < ARRAYSIZE( m_DeclaredOutputs ); i++ )
  2562. {
  2563. if ( m_DeclaredOutputs[i] == UNDECLARED_OUTPUT )
  2564. continue;
  2565. if ( ( m_dwTexCoordOutMask & ( 1 << i ) ) == 0 )
  2566. continue;
  2567. uint32 dwToken = m_DeclaredOutputs[i];
  2568. uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK );
  2569. uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT;
  2570. if ( ( dwUsage == D3DDECLUSAGE_FOG ) || ( dwUsage == D3DDECLUSAGE_PSIZE ) )
  2571. {
  2572. TranslationError(); // Not supported yet, but can be if we need it.
  2573. }
  2574. if ( dwUsage == D3DDECLUSAGE_COLOR )
  2575. {
  2576. PrintToBufWithIndents( *m_pBufALUCode, "%s = oTempT%d;\n", dwUsageIndex ? "gl_FrontSecondaryColor" : "gl_FrontColor", i );
  2577. }
  2578. else if ( dwUsage == D3DDECLUSAGE_TEXCOORD )
  2579. {
  2580. char buf[256];
  2581. if ( m_nCentroidMask & ( 0x00000001 << dwUsageIndex ) )
  2582. {
  2583. V_snprintf( buf, sizeof( buf ), "centroid varying vec4 oT%d;\n", dwUsageIndex ); // centroid varying
  2584. }
  2585. else
  2586. {
  2587. V_snprintf( buf, sizeof( buf ), "varying vec4 oT%d;\n", dwUsageIndex );
  2588. }
  2589. StrcatToHeaderCode( buf );
  2590. PrintToBufWithIndents( *m_pBufALUCode, "oT%d = oTempT%d;\n", dwUsageIndex, i );
  2591. }
  2592. }
  2593. }
  2594. }
  2595. void D3DToGL::WriteGLSLInputVariableAssignments()
  2596. {
  2597. if ( m_bVertexShader )
  2598. return;
  2599. for ( int i=0; i < ARRAYSIZE( m_DeclaredInputs ); i++ )
  2600. {
  2601. if ( m_DeclaredInputs[i] == UNDECLARED_INPUT )
  2602. continue;
  2603. uint32 dwToken = m_DeclaredInputs[i];
  2604. uint32 dwUsage = ( dwToken & D3DSP_DCL_USAGE_MASK );
  2605. uint32 dwUsageIndex = ( dwToken & D3DSP_DCL_USAGEINDEX_MASK ) >> D3DSP_DCL_USAGEINDEX_SHIFT;
  2606. if ( dwUsage == D3DDECLUSAGE_COLOR )
  2607. {
  2608. PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = %s;\n", i, dwUsageIndex ? "gl_SecondaryColor" : "gl_Color" );
  2609. }
  2610. else if ( dwUsage == D3DDECLUSAGE_TEXCOORD )
  2611. {
  2612. PrintToBufWithIndents( *m_pBufAttribCode, "vec4 oTempT%d = oT%d;\n", i, dwUsageIndex );
  2613. }
  2614. }
  2615. }
  2616. void D3DToGL::Handle_DeclarativeNonDclOp( uint32 nInstruction )
  2617. {
  2618. char buff[128];
  2619. uint32 dwToken = GetNextToken();
  2620. PrintParameterToString( dwToken, DST_REGISTER, buff, sizeof( buff ), false, NULL );
  2621. if ( nInstruction == D3DSIO_TEXKILL )
  2622. {
  2623. // TEXKILL is supposed to discard the pixel if any of the src register's X, Y, or Z components are less than zero.
  2624. // We have to translate it to something like:
  2625. // if ( r0.x < 0.0 || r0.y < 0.0 )
  2626. // discard;
  2627. char c[3];
  2628. c[0] = GetSwizzleComponent( buff, 0 );
  2629. c[1] = GetSwizzleComponent( buff, 1 );
  2630. c[2] = GetSwizzleComponent( buff, 2 );
  2631. // Get the unique components.
  2632. char cUnique[3];
  2633. cUnique[0] = c[0];
  2634. int nUnique = 1;
  2635. if ( c[1] != c[0] )
  2636. cUnique[nUnique++] = c[1];
  2637. if ( c[2] != c[1] && c[2] != c[0] )
  2638. cUnique[nUnique++] = c[2];
  2639. // Get the src register base name.
  2640. char szBase[256];
  2641. GetParamNameWithoutSwizzle( buff, szBase, sizeof( szBase ) );
  2642. PrintToBufWithIndents( *m_pBufALUCode, "if ( %s.%c < 0.0 ", szBase, cUnique[0] );
  2643. for ( int i=1; i < nUnique; i++ )
  2644. {
  2645. PrintToBuf( *m_pBufALUCode, "|| %s.%c < 0.0 ", szBase, cUnique[i] );
  2646. }
  2647. PrintToBuf( *m_pBufALUCode, ")\n{\n\tdiscard;\n}\n" );
  2648. }
  2649. else
  2650. {
  2651. char szOpcode[128];
  2652. PrintOpcode( nInstruction, szOpcode, sizeof( szOpcode ) );
  2653. StrcatToALUCode( szOpcode );
  2654. StrcatToALUCode( buff );
  2655. StrcatToALUCode( ";\n" );
  2656. }
  2657. }
  2658. void D3DToGL::NoteTangentInputUsed()
  2659. {
  2660. if ( !m_bTangentInputUsed )
  2661. {
  2662. m_bTangentInputUsed = true;
  2663. // PrintToBuf( *m_pBufParamCode, "attribute vec4 %s;\n", g_pTangentAttributeName );
  2664. }
  2665. }
  2666. // These are the only ARL instructions that should appear in the instruction stream
  2667. void D3DToGL::InsertMoveInstruction( CUtlBuffer *pCode, int nARLComponent )
  2668. {
  2669. PrintIndentation( ( char * )pCode->Base(), pCode->Size() );
  2670. switch ( nARLComponent )
  2671. {
  2672. case ARL_DEST_X:
  2673. strcat_s( ( char * )pCode->Base(), pCode->Size(), "a0 = int( va_r.x );\n" );
  2674. break;
  2675. case ARL_DEST_Y:
  2676. strcat_s( ( char * )pCode->Base(), pCode->Size(), "a0 = int( va_r.y );\n" );
  2677. break;
  2678. case ARL_DEST_Z:
  2679. strcat_s( ( char * )pCode->Base(), pCode->Size(), "a0 = int( va_r.z );\n" );
  2680. break;
  2681. case ARL_DEST_W:
  2682. strcat_s( ( char * )pCode->Base(), pCode->Size(), "a0 = int( va_r.w );\n" );
  2683. break;
  2684. }
  2685. }
  2686. // This optionally inserts a move from our dummy address register to the .x component of the real one
  2687. void D3DToGL::InsertMoveFromAddressRegister( CUtlBuffer *pCode, int nARLComp0, int nARLComp1, int nARLComp2 /* = ARL_DEST_NONE */ )
  2688. {
  2689. // We no longer need to do this in GLSL - we put the cast to int from the dummy address register va_r.x, va_r.y, etc. directly into the instruction
  2690. return;
  2691. }
  2692. //------------------------------------------------------------------------------
  2693. // TranslateShader()
  2694. //
  2695. // This is the main function that the outside world sees. A pointer to the
  2696. // uint32 stream returned from the D3DX compile routine is parsed and used
  2697. // to write human-readable asm code into the character array pointed to by
  2698. // pDisassembledCode. An error code is returned.
  2699. //------------------------------------------------------------------------------
  2700. int D3DToGL::TranslateShader( uint32* code, CUtlBuffer *pBufDisassembledCode, bool *bVertexShader, uint32 options, int32 nShadowDepthSamplerMask, uint32 nCentroidMask, char *debugLabel )
  2701. {
  2702. CUtlString sLine, sParamName;
  2703. uint32 i, dwToken, nInstruction, nNumTokensToSkip;
  2704. char buff[256];
  2705. // obey options
  2706. m_bUseEnvParams = (options & D3DToGL_OptionUseEnvParams) != 0;
  2707. m_bDoFixupZ = (options & D3DToGL_OptionDoFixupZ) != 0;
  2708. m_bDoFixupY = (options & D3DToGL_OptionDoFixupY) != 0;
  2709. m_bDoUserClipPlanes = (options & D3DToGL_OptionDoUserClipPlanes) != 0;
  2710. m_bAddHexCodeComments = (options & D3DToGL_AddHexComments) != 0;
  2711. m_bPutHexCodesAfterLines = (options & D3DToGL_PutHexCommentsAfterLines) != 0;
  2712. m_bGeneratingDebugText = (options & D3DToGL_GeneratingDebugText) != 0;
  2713. m_bGenerateSRGBWriteSuffix = (options & D3DToGL_OptionSRGBWriteSuffix) != 0;
  2714. m_NumIndentTabs = 1; // start code indented one tab
  2715. m_nLoopDepth = 0;
  2716. // debugging
  2717. m_bSpew = (options & D3DToGL_OptionSpew) != 0;
  2718. // These are not accessed below in a way that will cause them to glow, so
  2719. // we could overflow these and/or the buffer pointed to by pDisassembledCode
  2720. m_pBufAttribCode = new CUtlBuffer( 100, 10000, CUtlBuffer::TEXT_BUFFER );
  2721. m_pBufParamCode = new CUtlBuffer( 100, 10000, CUtlBuffer::TEXT_BUFFER );
  2722. m_pBufALUCode = new CUtlBuffer( 100, 60000, CUtlBuffer::TEXT_BUFFER );
  2723. // Pointers to text buffers for assembling sections of the program
  2724. m_pBufHeaderCode = pBufDisassembledCode;
  2725. char *pAttribMapStart = NULL;
  2726. ((char*)m_pBufHeaderCode->Base())[0] = 0;
  2727. ((char*)m_pBufAttribCode->Base())[0] = 0;
  2728. ((char*)m_pBufParamCode->Base())[0] = 0;
  2729. ((char*)m_pBufALUCode->Base())[0] = 0;
  2730. for ( i=0; i<MAX_SHADER_CONSTANTS; i++ )
  2731. {
  2732. m_bConstantRegisterDefined[i] = false;
  2733. }
  2734. // Track shadow sampler usage for proper declaration
  2735. m_nShadowDepthSamplerMask = nShadowDepthSamplerMask;
  2736. m_bDeclareShadowOption = false;
  2737. // Various flags set while parsing code to drive various declaration instructions
  2738. m_bNeedsD2AddTemp = false;
  2739. m_bNeedsLerpTemp = false;
  2740. m_bNeedsNRMTemp = false;
  2741. m_bNeedsSinCosDeclarations = false;
  2742. m_bDeclareAddressReg = false;
  2743. m_bDeclareVSOPos = false;
  2744. m_bDeclareVSOFog = false;
  2745. m_dwTexCoordOutMask = 0x00000000;
  2746. m_nVSPositionOutput = -1;
  2747. m_bOutputColorRegister[0] = false;
  2748. m_bOutputColorRegister[1] = false;
  2749. m_bOutputColorRegister[2] = false;
  2750. m_bOutputColorRegister[3] = false;
  2751. m_bOutputDepthRegister = false;
  2752. m_bTangentInputUsed = false;
  2753. m_bUsesDSTInstruction = false;
  2754. m_dwTempUsageMask = 0x00000000;
  2755. m_dwSamplerUsageMask = 0x00000000;
  2756. m_dwConstIntUsageMask = 0x00000000;
  2757. m_dwDefConstIntUsageMask = 0x00000000;
  2758. memset( m_dwDefConstIntIterCount, 0, sizeof( m_dwDefConstIntIterCount ) );
  2759. m_dwConstBoolUsageMask = 0x00000000;
  2760. m_nCentroidMask = nCentroidMask;
  2761. m_nHighestRegister = 0;
  2762. m_nHighestBoneRegister = -1;
  2763. m_bGenerateBoneUniformBuffer = false;
  2764. m_bUseBindlessTexturing = ((options & D3DToGL_OptionUseBindlessTexturing) != 0);
  2765. m_bUsedAtomicTempVar = false;
  2766. for ( int i=0; i < ARRAYSIZE( m_dwSamplerTypes ); i++ )
  2767. {
  2768. m_dwSamplerTypes[i] = SAMPLER_TYPE_UNUSED;
  2769. }
  2770. for ( int i=0; i < ARRAYSIZE( m_DeclaredOutputs ); i++ )
  2771. {
  2772. m_DeclaredOutputs[i] = UNDECLARED_OUTPUT;
  2773. }
  2774. for ( int i=0; i < ARRAYSIZE( m_DeclaredInputs ); i++ )
  2775. {
  2776. m_DeclaredInputs[i] = UNDECLARED_INPUT;
  2777. }
  2778. memset( m_dwAttribMap, 0xFF, sizeof(m_dwAttribMap) );
  2779. m_pdwBaseToken = m_pdwNextToken = code; // Initialize dwToken pointers
  2780. dwToken = GetNextToken();
  2781. m_dwMajorVersion = D3DSHADER_VERSION_MAJOR( dwToken );
  2782. m_dwMinorVersion = D3DSHADER_VERSION_MINOR( dwToken );
  2783. // If pixel shader
  2784. const char *glslExtText = "#extension GL_ARB_shader_texture_lod : require\n";//m_bUseBindlessTexturing ? "#extension GL_NV_bindless_texture : require\n" : "";
  2785. // 7ls
  2786. const char *glslVersionText = m_bUseBindlessTexturing ? "330 compatibility" : "120";
  2787. if ( ( dwToken & 0xFFFF0000 ) == 0xFFFF0000 )
  2788. {
  2789. // must explicitly enable extensions if emitting GLSL
  2790. V_snprintf( (char *)m_pBufHeaderCode->Base(), m_pBufHeaderCode->Size(), "#version %s\n%s", glslVersionText, glslExtText );
  2791. m_bVertexShader = false;
  2792. }
  2793. else // vertex shader
  2794. {
  2795. m_bGenerateSRGBWriteSuffix = false;
  2796. V_snprintf( (char *)m_pBufHeaderCode->Base(), m_pBufHeaderCode->Size(), "#version %s\n%s//ATTRIBMAP-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx-xx\n", glslVersionText, glslExtText );
  2797. // find that first '-xx' which is where the attrib map will be written later.
  2798. pAttribMapStart = strstr( (char *)m_pBufHeaderCode->Base(), "-xx" ) + 1;
  2799. m_bVertexShader = true;
  2800. }
  2801. *bVertexShader = m_bVertexShader;
  2802. m_bGenerateBoneUniformBuffer = m_bVertexShader && ((options & D3DToGL_OptionGenerateBoneUniformBuffer) != 0);
  2803. if ( m_bAddHexCodeComments )
  2804. {
  2805. RecordInputAndOutputPositions();
  2806. }
  2807. if ( m_bSpew )
  2808. {
  2809. printf("\n************* translating shader " );
  2810. }
  2811. int opcounter = 0;
  2812. // Loop until we hit the end dwToken...note that D3DPS_END() == D3DVS_END() so this works for either
  2813. while ( dwToken != D3DPS_END() )
  2814. {
  2815. if ( m_bAddHexCodeComments )
  2816. {
  2817. AddTokenHexCode();
  2818. RecordInputAndOutputPositions();
  2819. }
  2820. #ifdef POSIX
  2821. int tokenIndex = m_pdwNextToken - code;
  2822. #endif
  2823. int aluCodeLength0 = V_strlen( (char *) m_pBufALUCode->Base() );
  2824. dwToken = GetNextToken(); // Get next dwToken in the stream
  2825. nInstruction = Opcode( dwToken ); // Mask out the instruction opcode
  2826. if ( m_bSpew )
  2827. {
  2828. #ifdef POSIX
  2829. printf("\n** token# %04x inst# %04d opcode %s (%08x)", tokenIndex, opcounter, GLMDecode(eD3D_SIO, nInstruction), dwToken );
  2830. #endif
  2831. opcounter++;
  2832. }
  2833. switch ( nInstruction )
  2834. {
  2835. // -- No arguments at all -----------------------------------------------
  2836. case D3DSIO_NOP:
  2837. // D3D compiler outputs NOPs when shader debugging/optimizations are disabled.
  2838. break;
  2839. case D3DSIO_PHASE:
  2840. case D3DSIO_RET:
  2841. case D3DSIO_ENDLOOP:
  2842. case D3DSIO_BREAK:
  2843. TranslationError();
  2844. PrintOpcode( nInstruction, buff, sizeof( buff ) );
  2845. StrcatToALUCode( buff );
  2846. StrcatToALUCode( ";\n" );
  2847. break;
  2848. // -- "Declarative" non dcl ops ----------------------------------------
  2849. case D3DSIO_TEXDEPTH:
  2850. case D3DSIO_TEXKILL:
  2851. Handle_DeclarativeNonDclOp( nInstruction );
  2852. break;
  2853. // -- Unary ops -------------------------------------------------
  2854. case D3DSIO_BEM:
  2855. case D3DSIO_TEXBEM:
  2856. case D3DSIO_TEXBEML:
  2857. case D3DSIO_TEXDP3:
  2858. case D3DSIO_TEXDP3TEX:
  2859. case D3DSIO_TEXM3x2DEPTH:
  2860. case D3DSIO_TEXM3x2TEX:
  2861. case D3DSIO_TEXM3x3:
  2862. case D3DSIO_TEXM3x3PAD:
  2863. case D3DSIO_TEXM3x3TEX:
  2864. case D3DSIO_TEXM3x3VSPEC:
  2865. case D3DSIO_TEXREG2AR:
  2866. case D3DSIO_TEXREG2GB:
  2867. case D3DSIO_TEXREG2RGB:
  2868. case D3DSIO_LABEL:
  2869. case D3DSIO_CALL:
  2870. case D3DSIO_LOOP:
  2871. case D3DSIO_BREAKP:
  2872. case D3DSIO_DSX:
  2873. case D3DSIO_DSY:
  2874. TranslationError();
  2875. break;
  2876. case D3DSIO_IFC:
  2877. {
  2878. static const char *s_szCompareStrings[ 7 ] =
  2879. {
  2880. "__INVALID__",
  2881. ">",
  2882. "==",
  2883. ">=",
  2884. "<",
  2885. "!=",
  2886. "<="
  2887. };
  2888. // Compare mode is encoded in instruction token
  2889. uint32 dwCompareMode = OpcodeSpecificData( dwToken );
  2890. Assert( ( dwCompareMode >= 1 ) && ( dwCompareMode <= 6 ) );
  2891. // Get left side of compare
  2892. dwToken = GetNextToken();
  2893. char szLeftSide[32];
  2894. PrintParameterToString( dwToken, SRC_REGISTER, szLeftSide, sizeof( szLeftSide ), false, NULL );
  2895. // Get right side of compare
  2896. dwToken = GetNextToken();
  2897. char szRightSide[32];
  2898. PrintParameterToString( dwToken, SRC_REGISTER, szRightSide, sizeof( szRightSide ), false, NULL );
  2899. PrintToBufWithIndents( *m_pBufALUCode, "if ( %s %s %s )\n", szLeftSide, s_szCompareStrings[dwCompareMode], szRightSide );
  2900. StrcatToALUCode( "{\n" );
  2901. m_NumIndentTabs++;
  2902. break;
  2903. }
  2904. case D3DSIO_IF:
  2905. dwToken = GetNextToken();
  2906. PrintParameterToString( dwToken, SRC_REGISTER, buff, sizeof( buff ), false, NULL );
  2907. PrintToBufWithIndents( *m_pBufALUCode, "if ( %s )\n", buff );
  2908. StrcatToALUCode( "{\n" );
  2909. m_NumIndentTabs++;
  2910. break;
  2911. case D3DSIO_ELSE:
  2912. m_NumIndentTabs--;
  2913. StrcatToALUCode( "}\n" );
  2914. StrcatToALUCode( "else\n" );
  2915. StrcatToALUCode( "{\n" );
  2916. m_NumIndentTabs++;
  2917. break;
  2918. case D3DSIO_ENDIF:
  2919. m_NumIndentTabs--;
  2920. StrcatToALUCode( "}\n" );
  2921. break;
  2922. case D3DSIO_REP:
  2923. dwToken = GetNextToken();
  2924. PrintParameterToString( dwToken, SRC_REGISTER, buff, sizeof( buff ), false, NULL );
  2925. // In practice, this is the only form of for loop that will appear in DX asm
  2926. PrintToBufWithIndents( *m_pBufALUCode, "for( int i=0; i < %s; i++ )\n", buff );
  2927. StrcatToALUCode( "{\n" );
  2928. m_nLoopDepth++;
  2929. // For now, we don't deal with loop nesting
  2930. // Easy enough to fix later with an array of loop names i, j, k etc
  2931. Assert( m_nLoopDepth <= 1 );
  2932. m_NumIndentTabs++;
  2933. break;
  2934. case D3DSIO_ENDREP:
  2935. m_nLoopDepth--;
  2936. m_NumIndentTabs--;
  2937. StrcatToALUCode( "}\n" );
  2938. break;
  2939. case D3DSIO_NRM:
  2940. Handle_NRM();
  2941. break;
  2942. case D3DSIO_MOVA:
  2943. Handle_UnaryOp( nInstruction );
  2944. break;
  2945. // Unary operations
  2946. case D3DSIO_MOV:
  2947. case D3DSIO_RCP:
  2948. case D3DSIO_RSQ:
  2949. case D3DSIO_EXP:
  2950. case D3DSIO_EXPP:
  2951. case D3DSIO_LOG:
  2952. case D3DSIO_LOGP:
  2953. case D3DSIO_FRC:
  2954. case D3DSIO_LIT:
  2955. case D3DSIO_ABS:
  2956. Handle_UnaryOp( nInstruction );
  2957. break;
  2958. // -- Binary ops -------------------------------------------------
  2959. case D3DSIO_TEXM3x3SPEC:
  2960. case D3DSIO_M4x4:
  2961. case D3DSIO_M4x3:
  2962. case D3DSIO_M3x4:
  2963. case D3DSIO_M3x3:
  2964. case D3DSIO_M3x2:
  2965. case D3DSIO_CALLNZ:
  2966. case D3DSIO_SETP:
  2967. TranslationError();
  2968. break;
  2969. case D3DSIO_BREAKC:
  2970. Handle_BREAKC( dwToken );
  2971. break;
  2972. // Binary Operations
  2973. case D3DSIO_ADD:
  2974. case D3DSIO_SUB:
  2975. case D3DSIO_MUL:
  2976. case D3DSIO_DP3:
  2977. case D3DSIO_DP4:
  2978. case D3DSIO_MIN:
  2979. case D3DSIO_MAX:
  2980. case D3DSIO_DST:
  2981. case D3DSIO_SLT:
  2982. case D3DSIO_SGE:
  2983. case D3DSIO_CRS:
  2984. case D3DSIO_POW:
  2985. HandleBinaryOp_GLSL( nInstruction );
  2986. break;
  2987. // -- Ternary ops -------------------------------------------------
  2988. case D3DSIO_DP2ADD:
  2989. Handle_DP2ADD();
  2990. break;
  2991. case D3DSIO_LRP:
  2992. Handle_LRP( nInstruction );
  2993. break;
  2994. case D3DSIO_SGN:
  2995. Assert( m_bVertexShader );
  2996. TranslationError(); // TODO emulate with SLT etc
  2997. break;
  2998. case D3DSIO_CND:
  2999. TranslationError();
  3000. break;
  3001. case D3DSIO_CMP:
  3002. Handle_CMP();
  3003. break;
  3004. case D3DSIO_SINCOS:
  3005. Handle_SINCOS();
  3006. break;
  3007. case D3DSIO_MAD:
  3008. Handle_MAD( nInstruction );
  3009. break;
  3010. // -- Quaternary op ------------------------------------------------
  3011. case D3DSIO_TEXLDD:
  3012. Handle_TexLDD( nInstruction );
  3013. break;
  3014. // -- Special cases: texcoord vs texcrd and tex vs texld -----------
  3015. case D3DSIO_TEXCOORD:
  3016. Handle_TexCoord();
  3017. break;
  3018. case D3DSIO_TEX:
  3019. Handle_TEX( dwToken, false );
  3020. break;
  3021. case D3DSIO_TEXLDL:
  3022. Handle_TEX( nInstruction, true );
  3023. break;
  3024. case D3DSIO_DCL:
  3025. Handle_DCL();
  3026. break;
  3027. case D3DSIO_DEFB:
  3028. case D3DSIO_DEFI:
  3029. Handle_DEFIB( nInstruction );
  3030. break;
  3031. case D3DSIO_DEF:
  3032. Handle_DEF();
  3033. break;
  3034. case D3DSIO_COMMENT:
  3035. // Using OpcodeSpecificData() can fail here since the comments can be longer than 0xff dwords
  3036. nNumTokensToSkip = ( dwToken & 0x0fff0000 ) >> 16;
  3037. SkipTokens( nNumTokensToSkip );
  3038. break;
  3039. case D3DSIO_END:
  3040. break;
  3041. }
  3042. if ( m_bSpew )
  3043. {
  3044. int aluCodeLength1 = V_strlen( (char *) m_pBufALUCode->Base() );
  3045. if ( aluCodeLength1 != aluCodeLength0 )
  3046. {
  3047. // code was emitted
  3048. printf( "\n > %s", ((char *)m_pBufALUCode->Base()) + aluCodeLength0 );
  3049. aluCodeLength0 = aluCodeLength1;
  3050. }
  3051. }
  3052. }
  3053. // Note that this constant packing expects .wzyx swizzles in case we ever use the SINCOS code in a ps_2_x shader
  3054. //
  3055. // The Microsoft documentation on this is all kinds of broken and, strangely, these numbers don't even
  3056. // match the D3DSINCOSCONST1 and D3DSINCOSCONST2 constants used by the D3D assembly sincos instruction...
  3057. if ( m_bNeedsSinCosDeclarations )
  3058. {
  3059. PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() );
  3060. StrcatToParamCode( "vec4 scA = vec4( -1.55009923e-6, -2.17013894e-5, 0.00260416674, 0.00026041668 );\n" );
  3061. PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() );
  3062. StrcatToParamCode( "vec4 scB = vec4( -0.020833334, -0.125, 1.0, 0.5 );\n" );
  3063. }
  3064. // Stick in the sampler mask in hex
  3065. PrintToBuf( *m_pBufHeaderCode, "%sSAMPLERMASK-%x\n", "//", m_dwSamplerUsageMask );
  3066. uint nSamplerTypes = 0;
  3067. for ( int i = 0; i < 16; i++ )
  3068. {
  3069. Assert( m_dwSamplerTypes[i] < 4);
  3070. nSamplerTypes |= ( m_dwSamplerTypes[i] << ( i * 2 ) );
  3071. }
  3072. PrintToBuf( *m_pBufHeaderCode, "%sSAMPLERTYPES-%x\n", "//", nSamplerTypes );
  3073. // fragData outputs referenced
  3074. uint nFragDataMask = 0;
  3075. for ( int i = 0; i < 4; i++ )
  3076. {
  3077. nFragDataMask |= m_bOutputColorRegister[ i ] ? ( 1 << i ) : 0;
  3078. }
  3079. PrintToBuf( *m_pBufHeaderCode, "%sFRAGDATAMASK-%x\n", "//", nFragDataMask );
  3080. // Uniforms
  3081. PrintToBuf( *m_pBufHeaderCode, "//HIGHWATER-%d\n", m_nHighestRegister + 1 );
  3082. if ( ( m_bVertexShader ) && ( m_bGenerateBoneUniformBuffer ) )
  3083. {
  3084. PrintToBuf( *m_pBufHeaderCode, "//HIGHWATERBONE-%i\n", m_nHighestBoneRegister + 1 );
  3085. }
  3086. PrintToBuf( *m_pBufHeaderCode, "\nuniform vec4 %s[%d];\n", m_bVertexShader ? "vc" : "pc", m_nHighestRegister + 1 );
  3087. if ( ( m_nHighestBoneRegister >= 0 ) && ( m_bVertexShader ) && ( m_bGenerateBoneUniformBuffer ) )
  3088. {
  3089. PrintToBuf( *m_pBufHeaderCode, "\nuniform vec4 %s[%d];\n", "vcbones", m_nHighestBoneRegister + 1 );
  3090. }
  3091. if ( m_bVertexShader )
  3092. {
  3093. PrintToBuf( *m_pBufHeaderCode, "\nuniform vec4 vcscreen;\n" );
  3094. }
  3095. for( int i=0; i<32; i++ )
  3096. {
  3097. if ( ( m_dwConstIntUsageMask & ( 0x00000001 << i ) ) &&
  3098. ( !( m_dwDefConstIntUsageMask & ( 0x00000001 << i ) ) )
  3099. )
  3100. {
  3101. PrintToBuf( *m_pBufHeaderCode, "uniform int i%d ;\n", i );
  3102. }
  3103. }
  3104. for( int i=0; i<32; i++ )
  3105. {
  3106. if ( m_dwDefConstIntUsageMask & ( 0x00000001 << i ) )
  3107. {
  3108. PrintToBuf( *m_pBufHeaderCode, "const int i%d = %i;\n", i, m_dwDefConstIntIterCount[i] );
  3109. }
  3110. }
  3111. for( int i=0; i<32; i++ )
  3112. {
  3113. if ( m_dwConstBoolUsageMask & ( 0x00000001 << i ) )
  3114. {
  3115. PrintToBuf( *m_pBufHeaderCode, m_bVertexShader ? "uniform bool b%d;\n" : "uniform bool fb%d;\n", i );
  3116. }
  3117. }
  3118. // Control bit for sRGB Write suffix
  3119. if ( m_bGenerateSRGBWriteSuffix )
  3120. {
  3121. // R500 Hookup
  3122. // Set this guy to 1 when the sRGBWrite state is true, otherwise 0
  3123. StrcatToHeaderCode( "uniform float flSRGBWrite;\n" );
  3124. }
  3125. PrintToBuf( *m_pBufHeaderCode, "\n" );
  3126. // Write samplers
  3127. WriteGLSLSamplerDefinitions();
  3128. if ( m_bUsesDSTInstruction )
  3129. {
  3130. PrintToBuf( *m_pBufHeaderCode, "vec4 dst(vec4 src0,vec4 src1) { return vec4(1.0f,src0.y*src1.y,src0.z,src1.w); }\n" );
  3131. }
  3132. if ( m_bDeclareAddressReg )
  3133. {
  3134. if ( !m_bGenerateBoneUniformBuffer )
  3135. {
  3136. m_nHighestRegister = DXABSTRACT_VS_PARAM_SLOTS - 1;
  3137. }
  3138. PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() );
  3139. StrcatToParamCode( "vec4 va_r;\n" );
  3140. }
  3141. char *pTempVarStr = "TEMP";
  3142. pTempVarStr = "vec4";
  3143. // Declare temps in Param code buffer
  3144. for( int i=0; i<32; i++ )
  3145. {
  3146. if ( m_dwTempUsageMask & ( 0x00000001 << i ) )
  3147. {
  3148. PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() );
  3149. PrintToBuf( *m_pBufParamCode, "%s r%d;\n", pTempVarStr, i );
  3150. }
  3151. }
  3152. if ( m_bVertexShader && (m_bDoUserClipPlanes || m_bDoFixupZ || m_bDoFixupY ) )
  3153. {
  3154. PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() );
  3155. StrcatToParamCode( "vec4 vTempPos;\n" );
  3156. }
  3157. if ( ( m_bVertexShader ) && ( m_dwMajorVersion == 3 ) )
  3158. {
  3159. for ( int i = 0; i < 32; i++ )
  3160. {
  3161. if ( m_dwTexCoordOutMask & ( 1 << i ) )
  3162. {
  3163. PrintIndentation( (char*)m_pBufParamCode->Base(), m_pBufParamCode->Size() );
  3164. char buf[256];
  3165. V_snprintf( buf, sizeof( buf ), "vec4 oTempT%i = vec4( 0, 0, 0, 0 );\n", i );
  3166. StrcatToParamCode( buf );
  3167. }
  3168. }
  3169. }
  3170. if ( m_bNeedsSinCosDeclarations )
  3171. {
  3172. StrcatToParamCode( "vec3 vSinCosTmp;\n" ); // declare temp used by GLSL sin and cos intrinsics
  3173. }
  3174. // Optional temps needed to emulate d2add instruction in DX pixel shaders
  3175. if ( m_bNeedsD2AddTemp )
  3176. {
  3177. PrintToBuf( *m_pBufParamCode, "%s DP2A0;\n%s DP2A1;\n", pTempVarStr, pTempVarStr );
  3178. }
  3179. // Optional temp needed to emulate lerp instruction in DX vertex shaders
  3180. if ( m_bNeedsLerpTemp )
  3181. {
  3182. PrintToBuf( *m_pBufParamCode, "%s LRP_TEMP;\n", pTempVarStr );
  3183. }
  3184. // Optional temp needed to emulate NRM instruction in DX shaders
  3185. if ( m_bNeedsNRMTemp )
  3186. {
  3187. PrintToBuf( *m_pBufParamCode, "%s NRM_TEMP;\n", pTempVarStr );
  3188. }
  3189. if ( m_bDeclareVSOPos && m_bVertexShader )
  3190. {
  3191. if ( m_bDoUserClipPlanes )
  3192. {
  3193. StrcatToALUCode( "gl_ClipVertex = vTempPos;\n" ); // if user clip is enabled, jam clip space position into gl_ClipVertex
  3194. }
  3195. if ( m_bDoFixupZ || m_bDoFixupY )
  3196. {
  3197. // TODO: insert clip distance computation something like this:
  3198. //
  3199. // StrcatToALUCode( "DP4 oCLP[0].x, oPos, vc[215]; \n" );
  3200. //
  3201. if ( m_bDoFixupZ )
  3202. {
  3203. StrcatToALUCode( "vTempPos.z = vTempPos.z * vc[0].z - vTempPos.w; // z' = (2*z)-w\n" );
  3204. }
  3205. if ( m_bDoFixupY )
  3206. {
  3207. // append instructions to flip Y over
  3208. // new Y = -(old Y)
  3209. StrcatToALUCode( "vTempPos.y = -vTempPos.y; // y' = -y \n" );
  3210. }
  3211. // Apply half pixel offset (0.5f pixel offset D3D) to output vertices to account for the pixel center difference between D3D9 and OpenGL.
  3212. // This is the actual work in the shader. This works out to be 0.5 pixels wide because clip space is 2 units wide (-1, 1).
  3213. StrcatToALUCode( "vTempPos.xy += vcscreen.xy * vTempPos.w;\n" );
  3214. StrcatToALUCode( "gl_Position = vTempPos;\n" );
  3215. }
  3216. else
  3217. {
  3218. StrcatToParamCode( "OUTPUT oPos = result.position;\n" );
  3219. // TODO: insert clip distance computation something like this:
  3220. //
  3221. // StrcatToALUCode( "DP4 oCLP[0].x, oPos, c[215]; \n" );
  3222. //
  3223. }
  3224. }
  3225. if ( m_bVertexShader )
  3226. {
  3227. if ( m_dwMajorVersion == 3 )
  3228. {
  3229. WriteGLSLOutputVariableAssignments();
  3230. }
  3231. else
  3232. {
  3233. for ( int i=0; i<32; i++ )
  3234. {
  3235. char outTexCoordBuff[64];
  3236. // Don't declare a varying for the output that is mapped to the position output
  3237. if ( i != m_nVSPositionOutput )
  3238. {
  3239. if ( m_dwTexCoordOutMask & ( 0x00000001 << i ) )
  3240. {
  3241. if ( m_nCentroidMask & ( 0x00000001 << i ) )
  3242. {
  3243. V_snprintf( outTexCoordBuff, sizeof( outTexCoordBuff ), "centroid varying vec4 oT%d;\n", i ); // centroid varying
  3244. StrcatToHeaderCode( outTexCoordBuff );
  3245. }
  3246. else
  3247. {
  3248. V_snprintf( outTexCoordBuff, sizeof( outTexCoordBuff ), "varying vec4 oT%d;\n", i );
  3249. StrcatToHeaderCode( outTexCoordBuff );
  3250. }
  3251. }
  3252. }
  3253. }
  3254. }
  3255. }
  3256. else
  3257. {
  3258. if ( m_dwMajorVersion == 3 )
  3259. {
  3260. WriteGLSLInputVariableAssignments();
  3261. }
  3262. }
  3263. // do some annotation at the end of the attrib block
  3264. {
  3265. char temp[1000];
  3266. if ( m_bVertexShader )
  3267. {
  3268. // write attrib map into the text starting at pAttribMapStart - two hex digits per attrib
  3269. for( int i=0; i<16; i++ )
  3270. {
  3271. if ( m_dwAttribMap[i] != 0xFFFFFFFF )
  3272. {
  3273. V_snprintf( temp, sizeof(temp), "%02X", m_dwAttribMap[i] );
  3274. memcpy( pAttribMapStart + (i*3), temp, 2 );
  3275. }
  3276. }
  3277. }
  3278. PrintIndentation( (char*)m_pBufAttribCode->Base(), m_pBufAttribCode->Size() );
  3279. // This used to write out a translation counter into the shader as a comment. However, the order that shaders get in here
  3280. // is non-deterministic between runs, and the change in this comment would cause shaders to appear different to the GL disk cache,
  3281. // significantly increasing app load time.
  3282. // Other code looks for trans#%d, so we can't just remove it. Instead, output it as 0.
  3283. V_snprintf( temp, sizeof(temp), "%s trans#%d label:%s\n", "//", 0, debugLabel ? debugLabel : "none" );
  3284. StrcatToAttribCode( temp );
  3285. }
  3286. // If we actually sample from a shadow depth sampler, we need to declare the shadow option at the top
  3287. if ( m_bDeclareShadowOption )
  3288. {
  3289. StrcatToHeaderCode( "OPTION ARB_fragment_program_shadow;\n" );
  3290. }
  3291. StrcatToHeaderCode( "\nvoid main()\n{\n" );
  3292. if ( m_bUsedAtomicTempVar )
  3293. {
  3294. PrintToBufWithIndents( *m_pBufHeaderCode, "vec4 %s;\n\n", g_pAtomicTempVarName );
  3295. }
  3296. // sRGB Write suffix
  3297. if ( m_bGenerateSRGBWriteSuffix )
  3298. {
  3299. StrcatToALUCode( "vec3 sRGBFragData;\n" );
  3300. StrcatToALUCode( "sRGBFragData.xyz = log( gl_FragData[0].xyz );\n" );
  3301. StrcatToALUCode( "sRGBFragData.xyz = sRGBFragData.xyz * vec3( 0.454545f, 0.454545f, 0.454545f );\n" );
  3302. StrcatToALUCode( "sRGBFragData.xyz = exp( sRGBFragData.xyz );\n" );
  3303. StrcatToALUCode( "gl_FragData[0].xyz = mix( gl_FragData[0].xyz, sRGBFragData, flSRGBWrite );\n" );
  3304. }
  3305. strcat_s( (char*)m_pBufALUCode->Base(), m_pBufALUCode->Size(), "}\n" );
  3306. // Put all of the strings together for final program ( pHeaderCode + pAttribCode + pParamCode + pALUCode )
  3307. StrcatToHeaderCode( (char*)m_pBufAttribCode->Base() );
  3308. StrcatToHeaderCode( (char*)m_pBufParamCode->Base() );
  3309. StrcatToHeaderCode( (char*)m_pBufALUCode->Base() );
  3310. // Cleanup - don't touch m_pBufHeaderCode, as it is managed by the caller
  3311. delete m_pBufAttribCode;
  3312. delete m_pBufParamCode;
  3313. delete m_pBufALUCode;
  3314. m_pBufAttribCode = m_pBufParamCode = m_pBufALUCode = NULL;
  3315. if ( m_bSpew )
  3316. {
  3317. printf("\n************* translation complete\n\n " );
  3318. }
  3319. return DISASM_OK;
  3320. }