Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2161 lines
75 KiB

  1. /*==========================================================================
  2. *
  3. * Copyright (C) 1999 Microsoft Corporation. All Rights Reserved.
  4. *
  5. * File: vvm.cpp
  6. * Content: Virtual Vertex Machine implementation
  7. *
  8. * History:
  9. * 6/16/00
  10. * Added LOGP, EXPP, NM3
  11. * RCP, RSQ, LOG, LOGP, EXP, EXPP take input value from W instead of X
  12. * 7/11/00
  13. * Removed NM3 macro
  14. *
  15. *
  16. ***************************************************************************/
  17. #include "pch.cpp"
  18. #pragma hdrstop
  19. #include <stdio.h>
  20. #include "vvm.h"
  21. #include "d3dexcept.hpp"
  22. #include "float.h"
  23. const DWORD __MAX_CODE_SIZE = 4096;
  24. //-----------------------------------------------------------------------------
  25. HRESULT ComputeShaderCodeSize(
  26. CONST DWORD* pCode,
  27. DWORD* pdwCodeOnlySize,
  28. DWORD* pdwCodeAndCommentSize,
  29. DWORD* pdwNumConstDefs)
  30. {
  31. // set this now for error return
  32. *pdwCodeOnlySize = 0;
  33. *pdwCodeAndCommentSize = 0;
  34. DWORD dwNumConstDefs = 0;
  35. DWORD dwCodeOnlySize = 0;
  36. DWORD dwCodeAndCommentSize = 0;
  37. CONST DWORD* pToken = pCode;
  38. DWORD Version = *pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
  39. if ( (((Version >> 16) != 0xFFFF) && (Version >> 16) != 0xFFFE) ||
  40. ((Version & 0xFFFF) == 0x0))
  41. {
  42. D3D_ERR("invalid version token");
  43. return D3DERR_INVALIDCALL;
  44. }
  45. // very basic parse to find number of instructions
  46. while ( ((*pToken) != 0x0000FFFF) && (dwCodeOnlySize <= __MAX_CODE_SIZE) )
  47. {
  48. if (IsInstructionToken(*pToken))
  49. {
  50. DWORD opCode = (*pToken) & D3DSI_OPCODE_MASK;
  51. if ( opCode == D3DSIO_COMMENT )
  52. {
  53. UINT DWordSize = ((*pToken)&D3DSI_COMMENTSIZE_MASK)>>D3DSI_COMMENTSIZE_SHIFT;
  54. dwCodeAndCommentSize += (1+DWordSize); // instruction token + comment
  55. pToken += (1+DWordSize);
  56. }
  57. else if (opCode == D3DSIO_DEF )
  58. {
  59. pToken += 6;
  60. dwCodeOnlySize += 6;
  61. dwCodeAndCommentSize += 6;
  62. dwNumConstDefs++;
  63. }
  64. else
  65. {
  66. pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
  67. }
  68. }
  69. else
  70. {
  71. pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
  72. }
  73. }
  74. dwCodeOnlySize++; dwCodeAndCommentSize++; // for END token
  75. if (dwCodeOnlySize > __MAX_CODE_SIZE)
  76. {
  77. D3D_ERR("Shader code size is too big. Possibly, missing D3DVS_END()");
  78. return D3DERR_INVALIDCALL;
  79. }
  80. *pdwCodeOnlySize = 4*dwCodeOnlySize;
  81. *pdwCodeAndCommentSize = 4*dwCodeAndCommentSize;
  82. if( pdwNumConstDefs )
  83. *pdwNumConstDefs = dwNumConstDefs;
  84. return S_OK;
  85. }
  86. //-----------------------------------------------------------------------------
  87. float MINUS_INFINITY()
  88. {
  89. return -FLT_MAX;
  90. }
  91. float PLUS_INFINITY()
  92. {
  93. return FLT_MAX;
  94. }
  95. //-----------------------------------------------------------------------------
  96. // Returns instruction size, based on the op-code
  97. //
  98. UINT CVertexVM::GetNumSrcOperands(UINT opcode)
  99. {
  100. // returns number of source operands + opcode + destination
  101. switch (opcode)
  102. {
  103. case D3DSIO_MOV : return 1;
  104. case D3DSIO_ADD : return 2;
  105. case D3DSIO_MAD : return 3;
  106. case D3DSIO_MUL : return 2;
  107. case D3DSIO_RCP : return 1;
  108. case D3DSIO_RSQ : return 1;
  109. case D3DSIO_DP3 : return 2;
  110. case D3DSIO_DP4 : return 2;
  111. case D3DSIO_MIN : return 2;
  112. case D3DSIO_MAX : return 2;
  113. case D3DSIO_SLT : return 2;
  114. case D3DSIO_SGE : return 2;
  115. case D3DSIO_EXP : return 1;
  116. case D3DSIO_LOG : return 1;
  117. case D3DSIO_EXPP: return 1;
  118. case D3DSIO_LOGP: return 1;
  119. case D3DSIO_LIT : return 1;
  120. case D3DSIO_DST : return 2;
  121. case D3DSIO_FRC : return 1;
  122. case D3DSIO_M4x4: return 2;
  123. case D3DSIO_M4x3: return 2;
  124. case D3DSIO_M3x4: return 2;
  125. case D3DSIO_M3x3: return 2;
  126. case D3DSIO_M3x2: return 2;
  127. case D3DSIO_NOP: return 0;
  128. default:
  129. PrintInstCount();
  130. D3D_THROW_FAIL("Illegal instruction");
  131. }
  132. return 0;
  133. }
  134. //-----------------------------------------------------------------------------
  135. // Returns a bit field to say which source register components are used to
  136. // produce the output components.
  137. // 4 bits are used per each output component:
  138. // 0-3 output component X
  139. // 4-7 output component Y
  140. // 8-11 output component Z
  141. // 12-15 output component W
  142. // Each of the four bits is used to say if this source component is used to
  143. // produce the output component:
  144. // bit 0 - X, bit 1 - Y, bit 2 - Z, bit 3 - W.
  145. //
  146. // SourceIndex - sequential index of the source operand
  147. //
  148. UINT CVertexVM::GetRegisterUsage(UINT opcode, UINT SourceIndex)
  149. {
  150. switch (opcode)
  151. {
  152. case D3DSIO_MOV : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  153. case D3DSIO_ADD : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  154. case D3DSIO_MAD : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  155. case D3DSIO_MUL : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  156. case D3DSIO_RCP : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
  157. case D3DSIO_RSQ : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
  158. case D3DSIO_DP3 : return 7 | (7 << 4) | (7 << 8) | (7 << 12);
  159. case D3DSIO_DP4 : return 0xF | (0xF << 4) | (0xF << 8) | (0xF << 12);
  160. case D3DSIO_MIN : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  161. case D3DSIO_MAX : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  162. case D3DSIO_SLT : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  163. case D3DSIO_SGE : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  164. case D3DSIO_EXP : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
  165. case D3DSIO_LOG : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
  166. case D3DSIO_EXPP: return 8 | (8 << 4) | (8 << 8);
  167. case D3DSIO_LOGP: return 8 | (8 << 4) | (8 << 8);
  168. case D3DSIO_LIT : return (1 << 4) | ((1 | 2 | 8) << 8);
  169. case D3DSIO_DST :
  170. if (SourceIndex == 0)
  171. return (2 << 4) | (4 << 8);
  172. else
  173. return (2 << 4) | (8 << 12);
  174. case D3DSIO_FRC : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  175. case D3DSIO_M4x4: return 0xF | (0xF << 4) | (0xF << 8) | (0xF << 12);
  176. case D3DSIO_M4x3: return 0xF | (0xF << 4) | (0xF << 8);
  177. case D3DSIO_M3x4: return 7 | (7 << 4) | (7 << 8) | (7 << 12);
  178. case D3DSIO_M3x3: return 7 | (7 << 4) | (7 << 8);
  179. case D3DSIO_M3x2: return 7 | (7 << 4);
  180. case D3DSIO_NOP: return 0;
  181. default:
  182. PrintInstCount();
  183. D3D_THROW_FAIL("Illegal instruction");
  184. }
  185. return 0;
  186. }
  187. //-----------------------------------------------------------------------------
  188. // Returns instruction size in DWORDs, based on the op-code
  189. //
  190. UINT CVertexVM::GetInstructionLength(DWORD inst)
  191. {
  192. // returns number of source operands + opcode + destination
  193. DWORD opcode = D3DSI_GETOPCODE(inst);
  194. if (opcode == D3DSIO_NOP)
  195. return 1;
  196. else if (opcode == D3DSIO_COMMENT)
  197. return ((inst & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT) + 1;
  198. else
  199. return GetNumSrcOperands(opcode) + 2;
  200. }
  201. #if DBG
  202. //-----------------------------------------------------------------------------
  203. // VertexShaderInstDisAsm - Generates human-readable character string for a
  204. // single vertex shader instruction. String interface is similar to _snprintf.
  205. //-----------------------------------------------------------------------------
  206. static int VertexShaderInstDisAsm(
  207. char* pStrRet, int StrSizeRet, DWORD* pShader, DWORD Flags )
  208. {
  209. DWORD* pToken = pShader;
  210. // stage in local string, then copy
  211. char pStr[256] = "";
  212. #define _ADDSTR( _Str ) { _snprintf( pStr, 256, "%s" _Str , pStr ); }
  213. #define _ADDSTRP( _Str, _Param ) { _snprintf( pStr, 256, "%s" _Str , pStr, _Param ); }
  214. DWORD Inst = *pToken++;
  215. DWORD Opcode = (Inst & D3DSI_OPCODE_MASK);
  216. switch (Opcode)
  217. {
  218. case D3DSIO_NOP: _ADDSTR("NOP"); break;
  219. case D3DSIO_MOV: _ADDSTR("MOV"); break;
  220. case D3DSIO_ADD: _ADDSTR("ADD"); break;
  221. case D3DSIO_MAD: _ADDSTR("MAD"); break;
  222. case D3DSIO_MUL: _ADDSTR("MUL"); break;
  223. case D3DSIO_RCP: _ADDSTR("RCP"); break;
  224. case D3DSIO_RSQ: _ADDSTR("RSQ"); break;
  225. case D3DSIO_DP3: _ADDSTR("DP3"); break;
  226. case D3DSIO_DP4: _ADDSTR("DP4"); break;
  227. case D3DSIO_MIN: _ADDSTR("MIN"); break;
  228. case D3DSIO_MAX: _ADDSTR("MAX"); break;
  229. case D3DSIO_SLT: _ADDSTR("SLT"); break;
  230. case D3DSIO_SGE: _ADDSTR("SGE"); break;
  231. case D3DSIO_EXP: _ADDSTR("EXP"); break;
  232. case D3DSIO_LOG: _ADDSTR("LOG"); break;
  233. case D3DSIO_EXPP:_ADDSTR("EXPP"); break;
  234. case D3DSIO_LOGP:_ADDSTR("LOGP"); break;
  235. case D3DSIO_LIT: _ADDSTR("LIT"); break;
  236. case D3DSIO_DST: _ADDSTR("DST"); break;
  237. case D3DSIO_COMMENT: _ADDSTR("COMMENT"); break;
  238. default : _ADDSTR("???"); break;
  239. }
  240. if (*pToken & (1L<<31))
  241. {
  242. DWORD DstParam = *pToken++;
  243. switch (DstParam & D3DSP_REGTYPE_MASK)
  244. {
  245. case D3DSPR_TEMP : _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  246. case D3DSPR_ADDR : _ADDSTR(" Addr"); break;
  247. case D3DSPR_RASTOUT : _ADDSTRP(" R%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  248. case D3DSPR_ATTROUT : _ADDSTRP(" A%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  249. case D3DSPR_TEXCRDOUT: _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  250. }
  251. if (*pToken & (1L<<31)) _ADDSTR(" ");
  252. while (*pToken & (1L<<31))
  253. {
  254. DWORD SrcParam = *pToken++;
  255. switch (SrcParam & D3DSP_REGTYPE_MASK)
  256. {
  257. case D3DSPR_TEMP : _ADDSTRP(" T%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  258. case D3DSPR_INPUT : _ADDSTRP(" I%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  259. case D3DSPR_CONST : _ADDSTRP(" C%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  260. }
  261. if (*pToken & (1L<<31)) _ADDSTR(",");
  262. }
  263. }
  264. return _snprintf( pStrRet, StrSizeRet, "%s", pStr );
  265. }
  266. //-----------------------------------------------------------------------------
  267. typedef struct _VShaderInst
  268. {
  269. DWORD m_Tokens[32];
  270. char m_String[128];
  271. DWORD* m_pComment;
  272. DWORD m_cdwComment;
  273. } VShaderInst;
  274. #endif // DBG
  275. //-----------------------------------------------------------------------------
  276. class CVShaderCodeI: public CVShaderCode
  277. {
  278. public:
  279. CVShaderCodeI()
  280. {
  281. m_pdwCode = NULL;
  282. m_InstCount = 0;
  283. #if DBG
  284. m_pInst = NULL;
  285. #endif
  286. }
  287. ~CVShaderCodeI()
  288. {
  289. delete m_pdwCode;
  290. #if DBG
  291. if (m_pInst) delete m_pInst;
  292. #endif
  293. }
  294. DWORD* m_pdwCode; // Pointer to the original code
  295. DWORD m_dwSize; // Size of the code in DWORDs
  296. DWORD m_InstCount;
  297. #if DBG
  298. VShaderInst* m_pInst;
  299. #endif
  300. DWORD InstCount( void ) { return m_InstCount; }
  301. DWORD* InstTokens( DWORD Inst );
  302. char* InstDisasm( DWORD Inst );
  303. DWORD* InstComment( DWORD Inst );
  304. DWORD InstCommentSize( DWORD Inst );
  305. };
  306. //-----------------------------------------------------------------------------
  307. DWORD* CVShaderCodeI::InstTokens( DWORD Inst )
  308. {
  309. #if DBG
  310. if ( Inst >= m_InstCount ) return NULL;
  311. return m_pInst[Inst].m_Tokens;
  312. #else
  313. return NULL;
  314. #endif
  315. }
  316. //-----------------------------------------------------------------------------
  317. char* CVShaderCodeI::InstDisasm( DWORD Inst )
  318. {
  319. #if DBG
  320. if ( Inst >= m_InstCount ) return NULL;
  321. return m_pInst[Inst].m_String;
  322. #else
  323. return NULL;
  324. #endif
  325. }
  326. //-----------------------------------------------------------------------------
  327. DWORD* CVShaderCodeI::InstComment( DWORD Inst )
  328. {
  329. #if DBG
  330. if ( Inst >= m_InstCount ) return NULL;
  331. return m_pInst[Inst].m_pComment;
  332. #else
  333. return NULL;
  334. #endif
  335. }
  336. //-----------------------------------------------------------------------------
  337. DWORD CVShaderCodeI::InstCommentSize( DWORD Inst )
  338. {
  339. #if DBG
  340. if ( Inst >= m_InstCount ) return NULL;
  341. return m_pInst[Inst].m_cdwComment;
  342. #else
  343. return NULL;
  344. #endif
  345. }
  346. //-----------------------------------------------------------------------------
  347. // Vertex Virtual Machine object implementation
  348. //
  349. //-----------------------------------------------------------------------------
  350. CVertexVM::CVertexVM()
  351. {
  352. m_pCurrentShader = NULL;
  353. m_CurInstIndex = 0;
  354. #if DBG
  355. for (UINT i=0; i < D3DVS_CONSTREG_MAX_V1_1; i++)
  356. m_c_initialized[i] = FALSE;
  357. #endif
  358. }
  359. //-----------------------------------------------------------------------------
  360. CVertexVM::~CVertexVM()
  361. {
  362. }
  363. //-----------------------------------------------------------------------------
  364. void CVertexVM::Init(UINT MaxVertexShaderConst)
  365. {
  366. m_MaxVertexShaderConst = max(MaxVertexShaderConst, D3DVS_CONSTREG_MAX_V1_1);
  367. m_reg.m_c = new VVM_WORD[m_MaxVertexShaderConst];
  368. if (m_reg.m_c == NULL)
  369. D3D_THROW_FAIL("Not enough memory to allocate vertex shader constant array");
  370. }
  371. //-----------------------------------------------------------------------------
  372. // Returns addres of the first vertex of the element
  373. //
  374. VVM_WORD * CVertexVM::GetDataAddr(DWORD dwRegType, DWORD dwElementIndex)
  375. {
  376. switch (dwRegType)
  377. {
  378. case D3DSPR_TEMP : return &m_reg.m_r[dwElementIndex][0];
  379. case D3DSPR_INPUT : return &m_reg.m_v[dwElementIndex][0];
  380. case D3DSPR_CONST : return &m_reg.m_c[dwElementIndex];
  381. case D3DSPR_ADDR : return &m_reg.m_a[dwElementIndex][0];
  382. case D3DSPR_RASTOUT : return &m_reg.m_output[dwElementIndex][0];
  383. case D3DSPR_ATTROUT : return &m_reg.m_color[dwElementIndex][0];
  384. case D3DSPR_TEXCRDOUT : return &m_reg.m_texture[dwElementIndex][0];
  385. default:
  386. D3D_THROW(D3DERR_INVALIDCALL, "Invalid register type");
  387. }
  388. return NULL;
  389. }
  390. //-----------------------------------------------------------------------------
  391. // Sets data of the first vertex pf the register
  392. //
  393. HRESULT CVertexVM::SetData(DWORD dwMemType, DWORD dwStart, DWORD dwCount,
  394. LPVOID pBuffer)
  395. {
  396. try
  397. {
  398. VVM_WORD* p = this->GetDataAddr(dwMemType, dwStart);
  399. if (dwMemType == D3DSPR_CONST)
  400. {
  401. #if DBG
  402. if ((dwStart + dwCount) > m_MaxVertexShaderConst)
  403. {
  404. D3D_THROW_FAIL("Attemt to write outside constant register array");
  405. }
  406. // We only can set initialized flag for software constant registers
  407. if (dwStart < D3DVS_CONSTREG_MAX_V1_1)
  408. {
  409. BOOL* p = &m_c_initialized[dwStart];
  410. UINT count = dwCount;
  411. if ((dwStart + dwCount) >= D3DVS_CONSTREG_MAX_V1_1)
  412. {
  413. count = D3DVS_CONSTREG_MAX_V1_1 - dwStart;
  414. }
  415. for (UINT i = 0; i < count; i++)
  416. {
  417. p[i] = TRUE;
  418. }
  419. }
  420. #endif
  421. UINT size = dwCount * sizeof(VVM_WORD);
  422. memcpy(p, pBuffer, size);
  423. }
  424. else
  425. {
  426. // Set only the first element of the register batch
  427. for (UINT i=0; i < dwCount; i++)
  428. {
  429. p[i * VVMVERTEXBATCH] = ((VVM_WORD*)pBuffer)[i];
  430. }
  431. }
  432. }
  433. D3D_CATCH;
  434. return D3D_OK;
  435. }
  436. //-----------------------------------------------------------------------------
  437. HRESULT CVertexVM::GetData(DWORD dwMemType, DWORD dwStart, DWORD dwCount,
  438. LPVOID pBuffer)
  439. {
  440. try
  441. {
  442. VVM_WORD* p = this->GetDataAddr(dwMemType, dwStart);
  443. if (dwMemType == D3DSPR_CONST)
  444. {
  445. memcpy(pBuffer, p, dwCount * sizeof(VVM_WORD));
  446. }
  447. else
  448. {
  449. // Set only the first element of the register batch
  450. for (UINT i=0; i < dwCount; i++)
  451. {
  452. ((VVM_WORD*)pBuffer)[i] = p[i * VVMVERTEXBATCH];
  453. }
  454. }
  455. }
  456. D3D_CATCH;
  457. return D3D_OK;
  458. }
  459. //-----------------------------------------------------------------------------
  460. // - allocates memory for the shader
  461. // - validates shader code
  462. // - computes output FVF and vertex elements offsets
  463. //
  464. void CVertexVM::ValidateShader(CVShaderCodeI* shader, DWORD* orgShader)
  465. {
  466. // shader will be already stripped of comments upon reaching here if stripping
  467. // is necessary, so always use CodeAndComment size
  468. DWORD dwCodeOnlySize;
  469. DWORD dwCodeAndCommentSize;
  470. HRESULT hr = ComputeShaderCodeSize(orgShader, &dwCodeOnlySize,
  471. &dwCodeAndCommentSize, NULL);
  472. if (hr != S_OK)
  473. D3D_THROW(hr, "");
  474. // Initialize shader header and allocate memory for the shader code
  475. shader->m_dwSize = dwCodeAndCommentSize >> 2; // Size in DWORDs
  476. shader->m_pdwCode = new DWORD[shader->m_dwSize];
  477. if (shader->m_pdwCode == NULL)
  478. {
  479. D3D_THROW_FAIL("Cannot allocate memory for shader code");
  480. }
  481. memcpy(shader->m_pdwCode, orgShader, dwCodeAndCommentSize);
  482. // Based on the what output registers are modified, we compute the
  483. // corresponding FVF id. The id will be used for memory allocation
  484. // of the output buffer and will be passed to the rasterizer
  485. DWORD dwOutFVF = 0;
  486. DWORD nTexCoord = 0; // Number of output texture coordinates
  487. // For each texture register stores the combined write mask.
  488. // Used to find how many floats are written to each texture coordinates
  489. DWORD TextureWritten[8];
  490. memset(TextureWritten, 0, sizeof(TextureWritten));
  491. m_pdwCurToken = shader->m_pdwCode;
  492. DWORD* pEnd = shader->m_pdwCode + shader->m_dwSize;
  493. shader->m_dwOutRegs = 0;
  494. shader->m_InstCount = 0;
  495. m_CurInstIndex = 0;
  496. if ((*m_pdwCurToken != D3DVS_VERSION(1, 1)) &&
  497. (*m_pdwCurToken != D3DVS_VERSION(1, 0)) )
  498. {
  499. D3D_THROW_FAIL("Invalid vertex shader code version");
  500. }
  501. m_pdwCurToken++;
  502. while (m_pdwCurToken < pEnd && *m_pdwCurToken != D3DVS_END())
  503. {
  504. DWORD * pdwNextToken = m_pdwCurToken;
  505. DWORD dwInst = *m_pdwCurToken;
  506. if (!IsInstructionToken(dwInst))
  507. {
  508. PrintInstCount();
  509. D3D_THROW_FAIL("Intruction token has 31 bit set");
  510. }
  511. DWORD dwOpCode = D3DSI_GETOPCODE(dwInst);
  512. m_pdwCurToken++;
  513. switch (dwOpCode)
  514. {
  515. case D3DSIO_COMMENT:
  516. case D3DSIO_NOP : ; break;
  517. case D3DSIO_MOV :
  518. case D3DSIO_ADD :
  519. case D3DSIO_MAD :
  520. case D3DSIO_MUL :
  521. case D3DSIO_RCP :
  522. case D3DSIO_RSQ :
  523. case D3DSIO_DP3 :
  524. case D3DSIO_DP4 :
  525. case D3DSIO_MIN :
  526. case D3DSIO_MAX :
  527. case D3DSIO_SLT :
  528. case D3DSIO_SGE :
  529. case D3DSIO_EXP :
  530. case D3DSIO_LOG :
  531. case D3DSIO_EXPP :
  532. case D3DSIO_LOGP :
  533. case D3DSIO_LIT :
  534. case D3DSIO_DST :
  535. case D3DSIO_FRC :
  536. case D3DSIO_M4x4 :
  537. case D3DSIO_M4x3 :
  538. case D3DSIO_M3x4 :
  539. case D3DSIO_M3x3 :
  540. case D3DSIO_M3x2 :
  541. {
  542. // Find out if output register are modified by the command and
  543. // update the output FVF
  544. DWORD dwOffset;
  545. EvalDestination();
  546. VVM_WORD* m_pOutRegister = NULL;
  547. if ((m_pDest - m_dwOffset * VVMVERTEXBATCH) != m_reg.m_r[0])
  548. {
  549. dwOffset = m_dwOffset;
  550. m_pOutRegister = m_pDest - m_dwOffset * VVMVERTEXBATCH;
  551. if (m_pOutRegister == m_reg.m_output[0])
  552. {
  553. if (dwOffset == D3DSRO_POSITION)
  554. {
  555. dwOutFVF |= D3DFVF_XYZRHW;
  556. shader->m_dwOutRegs |= CPSGPShader_POSITION;
  557. }
  558. else
  559. if (dwOffset == D3DSRO_FOG)
  560. {
  561. dwOutFVF |= D3DFVF_FOG;
  562. shader->m_dwOutRegs |= CPSGPShader_FOG;
  563. }
  564. else
  565. if (dwOffset == D3DSRO_POINT_SIZE)
  566. {
  567. dwOutFVF |= D3DFVF_PSIZE;
  568. shader->m_dwOutRegs |= CPSGPShader_PSIZE;
  569. }
  570. }
  571. else
  572. if (m_pOutRegister == m_reg.m_color[0])
  573. if (dwOffset == 0)
  574. {
  575. dwOutFVF |= D3DFVF_DIFFUSE;
  576. shader->m_dwOutRegs |= CPSGPShader_DIFFUSE;
  577. }
  578. else
  579. {
  580. dwOutFVF |= D3DFVF_SPECULAR;
  581. shader->m_dwOutRegs |= CPSGPShader_SPECULAR;
  582. }
  583. else
  584. if (m_pOutRegister == m_reg.m_texture[0])
  585. {
  586. if (TextureWritten[dwOffset] == 0)
  587. {
  588. nTexCoord++;
  589. }
  590. TextureWritten[dwOffset] |= m_WriteMask;
  591. }
  592. else
  593. if (m_pOutRegister == m_reg.m_a[0])
  594. {
  595. }
  596. else
  597. {
  598. PrintInstCount();
  599. D3D_THROW_FAIL("Invalid output register offset");
  600. }
  601. }
  602. }
  603. break;
  604. default:
  605. {
  606. PrintInstCount();
  607. D3D_THROW_FAIL("Invalid shader opcode");
  608. }
  609. }
  610. m_pdwCurToken = pdwNextToken + GetInstructionLength(dwInst);
  611. shader->m_InstCount++;
  612. if (dwOpCode != D3DSIO_COMMENT)
  613. {
  614. m_CurInstIndex++;
  615. if (m_CurInstIndex > D3DVS_MAXINSTRUCTIONCOUNT_V1_1)
  616. {
  617. D3D_THROW_FAIL("Too many instructions in the shader");
  618. }
  619. }
  620. }
  621. #ifdef DBG
  622. // compute per-instruction stuff for shader
  623. if (shader->m_InstCount)
  624. {
  625. shader->m_pInst = new VShaderInst[shader->m_InstCount];
  626. if (shader->m_pInst == NULL)
  627. {
  628. D3D_THROW_FAIL("Cannot allocate memory for shader instructions");
  629. }
  630. memset( shader->m_pInst, 0, sizeof(VShaderInst)*shader->m_InstCount );
  631. DWORD dwCurInst = 0;
  632. // Remove version
  633. m_pdwCurToken = shader->m_pdwCode + 1;
  634. pEnd = shader->m_pdwCode + shader->m_dwSize;
  635. while( m_pdwCurToken < pEnd && *m_pdwCurToken != D3DVS_END())
  636. {
  637. UINT ilength = GetInstructionLength(*m_pdwCurToken);
  638. DWORD dwOpCode = D3DSI_GETOPCODE(*m_pdwCurToken);
  639. if (dwOpCode == D3DSIO_COMMENT)
  640. {
  641. shader->m_pInst[dwCurInst].m_Tokens[0] = *m_pdwCurToken;
  642. shader->m_pInst[dwCurInst].m_pComment = (m_pdwCurToken+1);
  643. shader->m_pInst[dwCurInst].m_cdwComment = ilength - 1;
  644. }
  645. else
  646. {
  647. memcpy( shader->m_pInst[dwCurInst].m_Tokens, m_pdwCurToken,
  648. 4*ilength );
  649. VertexShaderInstDisAsm( shader->m_pInst[dwCurInst].m_String,
  650. sizeof( shader->m_pInst[dwCurInst].m_String ) /
  651. sizeof( shader->m_pInst[dwCurInst].m_String[ 0 ] ),
  652. shader->m_pInst[dwCurInst].m_Tokens, 0x0 );
  653. }
  654. m_pdwCurToken += ilength;
  655. dwCurInst++;
  656. }
  657. }
  658. #endif
  659. dwOutFVF |= nTexCoord << D3DFVF_TEXCOUNT_SHIFT;
  660. // Compute output vertex offsets and size
  661. shader->m_dwOutVerSize = 4 * sizeof(float); // X, Y, Z, RHW
  662. shader->m_nOutTexCoord = nTexCoord;
  663. DWORD dwOffset = 4 * sizeof(float); // Current offset in the output vertex
  664. if ((dwOutFVF & D3DFVF_XYZRHW) == 0)
  665. {
  666. D3D_THROW_FAIL("Position is not written by shader");
  667. }
  668. shader->m_dwPointSizeOffset = dwOffset;
  669. if (dwOutFVF & D3DFVF_PSIZE)
  670. {
  671. dwOffset += 4;
  672. shader->m_dwOutVerSize += 4;
  673. }
  674. shader->m_dwDiffuseOffset = dwOffset;
  675. if (dwOutFVF & D3DFVF_DIFFUSE)
  676. {
  677. shader->m_dwOutVerSize += 4;
  678. dwOffset += 4;
  679. }
  680. shader->m_dwSpecularOffset = dwOffset;
  681. if (dwOutFVF & D3DFVF_SPECULAR)
  682. {
  683. dwOffset += 4;
  684. shader->m_dwOutVerSize += 4;
  685. }
  686. shader->m_dwFogOffset = dwOffset;
  687. if (dwOutFVF & D3DFVF_FOG)
  688. {
  689. dwOffset += 4;
  690. shader->m_dwOutVerSize += 4;
  691. }
  692. // Initialize texture coordinates
  693. shader->m_dwTextureOffset = dwOffset;
  694. if (nTexCoord)
  695. {
  696. for (DWORD i = 0; i < nTexCoord; i++)
  697. {
  698. DWORD n; // Size of texture coordinates
  699. if (TextureWritten[i] == 0)
  700. {
  701. D3D_THROW_FAIL("Texture coordinates are not continuous");
  702. }
  703. switch (TextureWritten[i])
  704. {
  705. case D3DSP_WRITEMASK_ALL:
  706. dwOutFVF |= D3DFVF_TEXCOORDSIZE4(i);
  707. n = 4 * sizeof(float);
  708. break;
  709. case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1 | D3DSP_WRITEMASK_2:
  710. dwOutFVF |= D3DFVF_TEXCOORDSIZE3(i);
  711. n = 3 * sizeof(float);
  712. break;
  713. case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1:
  714. dwOutFVF |= D3DFVF_TEXCOORDSIZE2(i);
  715. n = 2 * sizeof(float);
  716. break;
  717. case D3DSP_WRITEMASK_0:
  718. dwOutFVF |= D3DFVF_TEXCOORDSIZE1(i);
  719. n = 1 * sizeof(float);
  720. break;
  721. default:
  722. D3D_THROW_FAIL("Invalid write mask for texture register");
  723. }
  724. shader->m_dwOutVerSize += n;
  725. shader->m_dwOutTexCoordSize[i] = n;
  726. dwOffset += n;
  727. }
  728. }
  729. shader->m_dwOutFVF = dwOutFVF;
  730. }
  731. //-----------------------------------------------------------------------------
  732. CVShaderCode* CVertexVM::CreateShader(CVElement* pElements, DWORD dwNumElements,
  733. DWORD* pCode)
  734. {
  735. CVShaderCodeI* pShaderCode = NULL;
  736. try
  737. {
  738. pShaderCode = new CVShaderCodeI();
  739. if (pShaderCode == NULL)
  740. {
  741. D3D_THROW(E_OUTOFMEMORY, "Cannot allocate memory");
  742. }
  743. ValidateShader(pShaderCode, pCode);
  744. return pShaderCode;
  745. }
  746. catch (HRESULT e)
  747. {
  748. delete pShaderCode;
  749. D3D_ERR("Error in shader code creation");
  750. return NULL;
  751. }
  752. }
  753. //-----------------------------------------------------------------------------
  754. HRESULT CVertexVM::SetActiveShader(CVShaderCode* pCode)
  755. {
  756. m_pCurrentShader = (CVShaderCodeI*)pCode;
  757. return D3D_OK;
  758. }
  759. //-----------------------------------------------------------------------------
  760. // - parses destination token
  761. // - computes m_pDest, m_WrideMask, m_dwOffset for the destination
  762. // - current token pointer is andvanced to the next token
  763. //
  764. void CVertexVM::EvalDestination()
  765. {
  766. DWORD dwCurToken = *m_pdwCurToken;
  767. DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
  768. m_dwOffset = D3DSI_GETREGNUM(dwCurToken);
  769. m_WriteMask = D3DSI_GETWRITEMASK(dwCurToken);
  770. switch (dwRegType)
  771. {
  772. case D3DSPR_TEMP:
  773. m_pDest = m_reg.m_r[0];
  774. break;
  775. case D3DSPR_RASTOUT:
  776. m_pDest = m_reg.m_output[0];
  777. break;
  778. case D3DSPR_ATTROUT:
  779. m_pDest = m_reg.m_color[0];
  780. break;
  781. case D3DSPR_TEXCRDOUT:
  782. m_pDest = m_reg.m_texture[0];
  783. break;
  784. case D3DSPR_ADDR:
  785. m_pDest = m_reg.m_a[0];
  786. break;
  787. default:
  788. PrintInstCount();
  789. D3D_THROW_FAIL("Invalid register for destination");
  790. }
  791. m_pdwCurToken++;
  792. m_pDest += m_dwOffset * VVMVERTEXBATCH;
  793. }
  794. //---------------------------------------------------------------------
  795. void CVertexVM::PrintInstCount()
  796. {
  797. D3D_ERR("Error in instruction number: %d", m_CurInstIndex + 1);
  798. }
  799. //---------------------------------------------------------------------
  800. // Computes m_Source[index] and advances m_pdwCurToken
  801. //
  802. void CVertexVM::EvalSource(DWORD index)
  803. {
  804. const DWORD dwCurToken = *m_pdwCurToken;
  805. const DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
  806. const DWORD dwOffset = D3DSI_GETREGNUM(dwCurToken);
  807. DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
  808. VVM_WORD *src;
  809. VVM_WORD *outsrc = m_Source[index];
  810. if (dwRegType == D3DSPR_CONST)
  811. {
  812. D3DVS_ADDRESSMODE_TYPE am;
  813. am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE(dwCurToken);
  814. int offset = (int)dwOffset;
  815. if (am == D3DVS_ADDRMODE_RELATIVE)
  816. {
  817. for (UINT i=0; i < m_count; i++)
  818. {
  819. int relOffset = *(int*)&m_reg.m_a[0][i].x;
  820. offset = (int)dwOffset + relOffset;
  821. #if DBG
  822. if (offset < 0 || offset >= D3DVS_CONSTREG_MAX_V1_1)
  823. {
  824. PrintInstCount();
  825. D3D_THROW_FAIL("Constant register index is out of bounds");
  826. }
  827. if (!m_c_initialized[offset])
  828. {
  829. PrintInstCount();
  830. D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
  831. D3D_THROW_FAIL("");
  832. }
  833. #endif
  834. src = &m_reg.m_c[offset];
  835. if (swizzle == D3DVS_NOSWIZZLE)
  836. *outsrc = *src;
  837. else
  838. {
  839. // Where to take X
  840. const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
  841. // Where to take Y
  842. const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
  843. // Where to take Z
  844. const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
  845. // Where to take W
  846. const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
  847. outsrc->x = ((float*)src)[dwSrcX];
  848. outsrc->y = ((float*)src)[dwSrcY];
  849. outsrc->z = ((float*)src)[dwSrcZ];
  850. outsrc->w = ((float*)src)[dwSrcW];
  851. }
  852. outsrc++;
  853. }
  854. }
  855. else
  856. {
  857. #if DBG
  858. if (!m_c_initialized[offset])
  859. {
  860. PrintInstCount();
  861. D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
  862. D3D_THROW_FAIL("");
  863. }
  864. #endif
  865. src = &m_reg.m_c[offset];
  866. if (swizzle == D3DVS_NOSWIZZLE)
  867. {
  868. for (UINT i=0; i < m_count; i++)
  869. {
  870. outsrc[i] = *src;
  871. }
  872. }
  873. else
  874. {
  875. // Where to take X
  876. const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
  877. // Where to take Y
  878. const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
  879. // Where to take Z
  880. const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
  881. // Where to take W
  882. const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
  883. VVM_WORD v;
  884. v.x = ((float*)src)[dwSrcX];
  885. v.y = ((float*)src)[dwSrcY];
  886. v.z = ((float*)src)[dwSrcZ];
  887. v.w = ((float*)src)[dwSrcW];
  888. for (UINT i=0; i < m_count; i++)
  889. {
  890. outsrc[i] = v;
  891. }
  892. }
  893. }
  894. }
  895. else
  896. {
  897. src = this->GetDataAddr(dwRegType, dwOffset);
  898. if (swizzle == D3DVS_NOSWIZZLE)
  899. memcpy(outsrc, src, m_count * sizeof(VVM_WORD));
  900. else
  901. {
  902. // Where to take X
  903. const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
  904. // Where to take Y
  905. const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
  906. // Where to take Z
  907. const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
  908. // Where to take W
  909. const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
  910. for (UINT i=0; i < m_count; i++)
  911. {
  912. outsrc->x = ((float*)src)[dwSrcX];
  913. outsrc->y = ((float*)src)[dwSrcY];
  914. outsrc->z = ((float*)src)[dwSrcZ];
  915. outsrc->w = ((float*)src)[dwSrcW];
  916. outsrc++;
  917. src++;
  918. }
  919. }
  920. }
  921. if (D3DVS_GETSRCMODIFIER(dwCurToken) == D3DSPSM_NEG)
  922. {
  923. VVM_WORD *outsrc = m_Source[index];
  924. for (UINT i=0; i < m_count; i++)
  925. {
  926. outsrc->x = -outsrc->x;
  927. outsrc->y = -outsrc->y;
  928. outsrc->z = -outsrc->z;
  929. outsrc->w = -outsrc->w;
  930. outsrc++;
  931. }
  932. }
  933. m_pdwCurToken++;
  934. }
  935. //---------------------------------------------------------------------
  936. // Computes source operands and advances m_pdwCurToken
  937. //
  938. // Parameters:
  939. // index - index of the first source operand
  940. // count - number of source operands
  941. //
  942. void CVertexVM::EvalSource(DWORD index, DWORD count)
  943. {
  944. const DWORD dwCurToken = *m_pdwCurToken;
  945. const DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
  946. const DWORD dwOffset = D3DSI_GETREGNUM(dwCurToken);
  947. DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
  948. VVM_WORD *src;
  949. VVM_WORD *outsrc = m_Source[index];
  950. if (dwRegType == D3DSPR_CONST)
  951. {
  952. D3DVS_ADDRESSMODE_TYPE am;
  953. am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE(dwCurToken);
  954. int offset = (int)dwOffset;
  955. if (am == D3DVS_ADDRMODE_RELATIVE)
  956. {
  957. for (UINT j=0; j < count; j++)
  958. {
  959. VVM_WORD *outsrc = m_Source[index + j];
  960. for (UINT i=0; i < m_count; i++)
  961. {
  962. int relOffset = *(int*)&m_reg.m_a[0][i].x;
  963. offset = (int)dwOffset + relOffset;
  964. #if DBG
  965. if (offset < 0 || offset >= D3DVS_CONSTREG_MAX_V1_1)
  966. {
  967. PrintInstCount();
  968. D3D_THROW_FAIL("Constant register index is out of bounds");
  969. }
  970. if (!m_c_initialized[offset])
  971. {
  972. PrintInstCount();
  973. D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
  974. D3D_THROW_FAIL("");
  975. }
  976. #endif // DBG
  977. src = &m_reg.m_c[offset] + j;
  978. *outsrc = *src;
  979. outsrc++;
  980. }
  981. }
  982. }
  983. else
  984. {
  985. #if DBG
  986. for (UINT i = 0; i < count; i++)
  987. {
  988. if (!m_c_initialized[offset + i])
  989. {
  990. PrintInstCount();
  991. D3D_ERR("Attempt to read from uninitialized constant register %d", i);
  992. D3D_THROW_FAIL("");
  993. }
  994. }
  995. #endif
  996. src = &m_reg.m_c[offset];
  997. for (UINT j=0; j < count; j++)
  998. {
  999. for (UINT i=0; i < m_count; i++)
  1000. {
  1001. outsrc[i] = *src;
  1002. }
  1003. src++;
  1004. outsrc += VVMVERTEXBATCH;
  1005. }
  1006. }
  1007. }
  1008. else
  1009. {
  1010. src = this->GetDataAddr(dwRegType, dwOffset);
  1011. UINT size = m_count * sizeof(VVM_WORD);
  1012. for (UINT i=0; i < count; i++)
  1013. {
  1014. memcpy(outsrc, src, size);
  1015. outsrc += VVMVERTEXBATCH;
  1016. src += VVMVERTEXBATCH;
  1017. }
  1018. }
  1019. m_pdwCurToken++;
  1020. }
  1021. //-----------------------------------------------------------------------------
  1022. void CVertexVM::InstMov()
  1023. {
  1024. EvalDestination();
  1025. EvalSource(0);
  1026. if (m_pDest == m_reg.m_a[0])
  1027. {
  1028. for (UINT i=0; i < m_count; i++)
  1029. {
  1030. float p = (float)floor(m_Source[0][i].x);
  1031. *(int*)&m_pDest[i].x = FTOI(p);
  1032. }
  1033. }
  1034. else
  1035. {
  1036. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1037. {
  1038. memcpy(m_pDest, m_Source[0], m_BatchSize);
  1039. }
  1040. else
  1041. {
  1042. for (UINT i=0; i < m_count; i++)
  1043. {
  1044. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1045. m_pDest[i].x = m_Source[0][i].x;
  1046. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1047. m_pDest[i].y = m_Source[0][i].y;
  1048. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1049. m_pDest[i].z = m_Source[0][i].z;
  1050. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1051. m_pDest[i].w = m_Source[0][i].w;
  1052. }
  1053. }
  1054. }
  1055. }
  1056. //-----------------------------------------------------------------------------
  1057. void CVertexVM::InstAdd()
  1058. {
  1059. EvalDestination();
  1060. EvalSource(0);
  1061. EvalSource(1);
  1062. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1063. {
  1064. for (UINT i=0; i < m_count; i++)
  1065. {
  1066. m_pDest[i].x = m_Source[0][i].x + m_Source[1][i].x;
  1067. m_pDest[i].y = m_Source[0][i].y + m_Source[1][i].y;
  1068. m_pDest[i].z = m_Source[0][i].z + m_Source[1][i].z;
  1069. m_pDest[i].w = m_Source[0][i].w + m_Source[1][i].w;
  1070. }
  1071. }
  1072. else
  1073. {
  1074. for (UINT i=0; i < m_count; i++)
  1075. {
  1076. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1077. m_pDest[i].x = m_Source[0][i].x + m_Source[1][i].x;
  1078. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1079. m_pDest[i].y = m_Source[0][i].y + m_Source[1][i].y;
  1080. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1081. m_pDest[i].z = m_Source[0][i].z + m_Source[1][i].z;
  1082. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1083. m_pDest[i].w = m_Source[0][i].w + m_Source[1][i].w;
  1084. }
  1085. }
  1086. }
  1087. //-----------------------------------------------------------------------------
  1088. void CVertexVM::InstMad()
  1089. {
  1090. EvalDestination();
  1091. EvalSource(0);
  1092. EvalSource(1);
  1093. EvalSource(2);
  1094. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1095. {
  1096. for (UINT i=0; i < m_count; i++)
  1097. {
  1098. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x + m_Source[2][i].x;
  1099. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y + m_Source[2][i].y;
  1100. m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z + m_Source[2][i].z;
  1101. m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w + m_Source[2][i].w;
  1102. }
  1103. }
  1104. else
  1105. {
  1106. for (UINT i=0; i < m_count; i++)
  1107. {
  1108. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1109. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x + m_Source[2][i].x;
  1110. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1111. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y + m_Source[2][i].y;
  1112. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1113. m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z + m_Source[2][i].z;
  1114. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1115. m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w + m_Source[2][i].w;
  1116. }
  1117. }
  1118. }
  1119. //-----------------------------------------------------------------------------
  1120. void CVertexVM::InstMul()
  1121. {
  1122. EvalDestination();
  1123. EvalSource(0);
  1124. EvalSource(1);
  1125. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1126. {
  1127. for (UINT i=0; i < m_count; i++)
  1128. {
  1129. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x;
  1130. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
  1131. m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z;
  1132. m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w;
  1133. }
  1134. }
  1135. else
  1136. {
  1137. for (UINT i=0; i < m_count; i++)
  1138. {
  1139. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1140. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x;
  1141. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1142. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
  1143. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1144. m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z;
  1145. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1146. m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w;
  1147. }
  1148. }
  1149. }
  1150. //-----------------------------------------------------------------------------
  1151. void CVertexVM::InstDP3()
  1152. {
  1153. EvalDestination();
  1154. EvalSource(0);
  1155. EvalSource(1);
  1156. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1157. {
  1158. for (UINT i=0; i < m_count; i++)
  1159. {
  1160. m_pDest[i].x =
  1161. m_pDest[i].y =
  1162. m_pDest[i].z =
  1163. m_pDest[i].w = m_Source[0][i].x * m_Source[1][i].x +
  1164. m_Source[0][i].y * m_Source[1][i].y +
  1165. m_Source[0][i].z * m_Source[1][i].z;
  1166. }
  1167. }
  1168. else
  1169. {
  1170. for (UINT i=0; i < m_count; i++)
  1171. {
  1172. float v = m_Source[0][i].x * m_Source[1][i].x +
  1173. m_Source[0][i].y * m_Source[1][i].y +
  1174. m_Source[0][i].z * m_Source[1][i].z;
  1175. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1176. m_pDest[i].x = v;
  1177. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1178. m_pDest[i].y = v;
  1179. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1180. m_pDest[i].z = v;
  1181. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1182. m_pDest[i].w = v;
  1183. }
  1184. }
  1185. }
  1186. //-----------------------------------------------------------------------------
  1187. void CVertexVM::InstDP4()
  1188. {
  1189. EvalDestination();
  1190. EvalSource(0);
  1191. EvalSource(1);
  1192. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1193. {
  1194. for (UINT i=0; i < m_count; i++)
  1195. {
  1196. m_pDest[i].x =
  1197. m_pDest[i].y =
  1198. m_pDest[i].z =
  1199. m_pDest[i].w = m_Source[0][i].x * m_Source[1][i].x +
  1200. m_Source[0][i].y * m_Source[1][i].y +
  1201. m_Source[0][i].z * m_Source[1][i].z +
  1202. m_Source[0][i].w * m_Source[1][i].w;
  1203. }
  1204. }
  1205. else
  1206. {
  1207. for (UINT i=0; i < m_count; i++)
  1208. {
  1209. float v = m_Source[0][i].x * m_Source[1][i].x +
  1210. m_Source[0][i].y * m_Source[1][i].y +
  1211. m_Source[0][i].z * m_Source[1][i].z +
  1212. m_Source[0][i].w * m_Source[1][i].w;
  1213. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1214. m_pDest[i].x = v;
  1215. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1216. m_pDest[i].y = v;
  1217. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1218. m_pDest[i].z = v;
  1219. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1220. m_pDest[i].w = v;
  1221. }
  1222. }
  1223. }
  1224. //-----------------------------------------------------------------------------
  1225. void CVertexVM::InstRcp()
  1226. {
  1227. EvalDestination();
  1228. EvalSource(0);
  1229. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1230. {
  1231. for (UINT i=0; i < m_count; i++)
  1232. {
  1233. float v = m_Source[0][i].w;
  1234. if (v == 1.0f)
  1235. {
  1236. // Must be exactly 1.0
  1237. m_pDest[i].x =
  1238. m_pDest[i].y =
  1239. m_pDest[i].z =
  1240. m_pDest[i].w = 1.0f;
  1241. }
  1242. else
  1243. if (v == 0)
  1244. {
  1245. m_pDest[i].x =
  1246. m_pDest[i].y =
  1247. m_pDest[i].z =
  1248. m_pDest[i].w = PLUS_INFINITY();
  1249. }
  1250. else
  1251. {
  1252. m_pDest[i].x =
  1253. m_pDest[i].y =
  1254. m_pDest[i].z =
  1255. m_pDest[i].w = 1.0f/v;
  1256. }
  1257. }
  1258. }
  1259. else
  1260. {
  1261. for (UINT i=0; i < m_count; i++)
  1262. {
  1263. float v = m_Source[0][i].w;
  1264. if (FLOAT_EQZ(v))
  1265. v = PLUS_INFINITY();
  1266. else
  1267. if (v != 1.0f)
  1268. v = 1.0f/v;
  1269. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1270. m_pDest[i].x = v;
  1271. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1272. m_pDest[i].y = v;
  1273. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1274. m_pDest[i].z = v;
  1275. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1276. m_pDest[i].w = v;
  1277. }
  1278. }
  1279. }
  1280. //-----------------------------------------------------------------------------
  1281. void CVertexVM::InstRsq()
  1282. {
  1283. EvalDestination();
  1284. EvalSource(0);
  1285. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1286. {
  1287. for (UINT i=0; i < m_count; i++)
  1288. {
  1289. float v = ABSF(m_Source[0][i].w);
  1290. if (v == 1.0f)
  1291. {
  1292. m_pDest[i].x =
  1293. m_pDest[i].y =
  1294. m_pDest[i].z =
  1295. m_pDest[i].w = 1.0f;
  1296. }
  1297. else
  1298. if (v == 0)
  1299. {
  1300. m_pDest[i].x =
  1301. m_pDest[i].y =
  1302. m_pDest[i].z =
  1303. m_pDest[i].w = PLUS_INFINITY();
  1304. }
  1305. else
  1306. {
  1307. v = (float)(1.0f / sqrt(v));
  1308. m_pDest[i].x =
  1309. m_pDest[i].y =
  1310. m_pDest[i].z =
  1311. m_pDest[i].w = v;
  1312. }
  1313. }
  1314. }
  1315. else
  1316. {
  1317. for (UINT i=0; i < m_count; i++)
  1318. {
  1319. float v = ABSF(m_Source[0][i].w);
  1320. if (FLOAT_EQZ(v))
  1321. v = PLUS_INFINITY();
  1322. else
  1323. if (FLOAT_CMP_PONE(v, !=))
  1324. v = (float)(1.0f / sqrt(v));
  1325. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1326. m_pDest[i].x = v;
  1327. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1328. m_pDest[i].y = v;
  1329. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1330. m_pDest[i].z = v;
  1331. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1332. m_pDest[i].w = v;
  1333. }
  1334. }
  1335. }
  1336. //-----------------------------------------------------------------------------
  1337. void CVertexVM::InstSlt()
  1338. {
  1339. EvalDestination();
  1340. EvalSource(0);
  1341. EvalSource(1);
  1342. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1343. {
  1344. for (UINT i=0; i < m_count; i++)
  1345. {
  1346. m_pDest[i].x = (m_Source[0][i].x < m_Source[1][i].x) ? 1.0f : 0.0f;
  1347. m_pDest[i].y = (m_Source[0][i].y < m_Source[1][i].y) ? 1.0f : 0.0f;
  1348. m_pDest[i].z = (m_Source[0][i].z < m_Source[1][i].z) ? 1.0f : 0.0f;
  1349. m_pDest[i].w = (m_Source[0][i].w < m_Source[1][i].w) ? 1.0f : 0.0f;
  1350. }
  1351. }
  1352. else
  1353. {
  1354. for (UINT i=0; i < m_count; i++)
  1355. {
  1356. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1357. m_pDest[i].x = (m_Source[0][i].x < m_Source[1][i].x) ? 1.0f : 0.0f;
  1358. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1359. m_pDest[i].y = (m_Source[0][i].y < m_Source[1][i].y) ? 1.0f : 0.0f;
  1360. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1361. m_pDest[i].z = (m_Source[0][i].z < m_Source[1][i].z) ? 1.0f : 0.0f;
  1362. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1363. m_pDest[i].w = (m_Source[0][i].w < m_Source[1][i].w) ? 1.0f : 0.0f;
  1364. }
  1365. }
  1366. }
  1367. //-----------------------------------------------------------------------------
  1368. void CVertexVM::InstSge()
  1369. {
  1370. EvalDestination();
  1371. EvalSource(0);
  1372. EvalSource(1);
  1373. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1374. {
  1375. for (UINT i=0; i < m_count; i++)
  1376. {
  1377. m_pDest[i].x = (m_Source[0][i].x >= m_Source[1][i].x) ? 1.0f : 0.0f;
  1378. m_pDest[i].y = (m_Source[0][i].y >= m_Source[1][i].y) ? 1.0f : 0.0f;
  1379. m_pDest[i].z = (m_Source[0][i].z >= m_Source[1][i].z) ? 1.0f : 0.0f;
  1380. m_pDest[i].w = (m_Source[0][i].w >= m_Source[1][i].w) ? 1.0f : 0.0f;
  1381. }
  1382. }
  1383. else
  1384. {
  1385. for (UINT i=0; i < m_count; i++)
  1386. {
  1387. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1388. m_pDest[i].x = (m_Source[0][i].x >= m_Source[1][i].x) ? 1.0f : 0.0f;
  1389. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1390. m_pDest[i].y = (m_Source[0][i].y >= m_Source[1][i].y) ? 1.0f : 0.0f;
  1391. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1392. m_pDest[i].z = (m_Source[0][i].z >= m_Source[1][i].z) ? 1.0f : 0.0f;
  1393. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1394. m_pDest[i].w = (m_Source[0][i].w >= m_Source[1][i].w) ? 1.0f : 0.0f;
  1395. }
  1396. }
  1397. }
  1398. //-----------------------------------------------------------------------------
  1399. void CVertexVM::InstMin()
  1400. {
  1401. EvalDestination();
  1402. EvalSource(0);
  1403. EvalSource(1);
  1404. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1405. {
  1406. for (UINT i=0; i < m_count; i++)
  1407. {
  1408. m_pDest[i].x=(m_Source[0][i].x < m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
  1409. m_pDest[i].y=(m_Source[0][i].y < m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
  1410. m_pDest[i].z=(m_Source[0][i].z < m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
  1411. m_pDest[i].w=(m_Source[0][i].w < m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
  1412. }
  1413. }
  1414. else
  1415. {
  1416. for (UINT i=0; i < m_count; i++)
  1417. {
  1418. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1419. m_pDest[i].x=(m_Source[0][i].x < m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
  1420. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1421. m_pDest[i].y=(m_Source[0][i].y < m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
  1422. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1423. m_pDest[i].z=(m_Source[0][i].z < m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
  1424. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1425. m_pDest[i].w=(m_Source[0][i].w < m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
  1426. }
  1427. }
  1428. }
  1429. //-----------------------------------------------------------------------------
  1430. void CVertexVM::InstMax()
  1431. {
  1432. EvalDestination();
  1433. EvalSource(0);
  1434. EvalSource(1);
  1435. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1436. {
  1437. for (UINT i=0; i < m_count; i++)
  1438. {
  1439. m_pDest[i].x=(m_Source[0][i].x >= m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
  1440. m_pDest[i].y=(m_Source[0][i].y >= m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
  1441. m_pDest[i].z=(m_Source[0][i].z >= m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
  1442. m_pDest[i].w=(m_Source[0][i].w >= m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
  1443. }
  1444. }
  1445. else
  1446. {
  1447. for (UINT i=0; i < m_count; i++)
  1448. {
  1449. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1450. m_pDest[i].x=(m_Source[0][i].x >= m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
  1451. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1452. m_pDest[i].y=(m_Source[0][i].y >= m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
  1453. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1454. m_pDest[i].z=(m_Source[0][i].z >= m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
  1455. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1456. m_pDest[i].w=(m_Source[0][i].w >= m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
  1457. }
  1458. }
  1459. }
  1460. //-----------------------------------------------------------------------------
  1461. // Approximation 2**x
  1462. //
  1463. float ExpApprox(float x)
  1464. {
  1465. float tmp = (float)pow(2, x);
  1466. // Artificially reduce precision
  1467. DWORD tmpd = *(DWORD*)&tmp & 0xFFFFFF00;
  1468. return *(float*)&tmpd;
  1469. }
  1470. //-----------------------------------------------------------------------------
  1471. // Approximation Log2(x)
  1472. //
  1473. const float LOG2 = (float)(1.0f/log(2));
  1474. float LogApprox(float x)
  1475. {
  1476. float tmp = (float)(log(x) * LOG2);
  1477. // Artificially reduce precision
  1478. DWORD tmpd = *(DWORD*)&tmp & 0xFFFFFF00;
  1479. return *(float*)&tmpd;
  1480. }
  1481. //-----------------------------------------------------------------------------
  1482. // Full precision EXP
  1483. //
  1484. void CVertexVM::InstExp()
  1485. {
  1486. EvalDestination();
  1487. EvalSource(0);
  1488. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1489. {
  1490. for (UINT i=0; i < m_count; i++)
  1491. {
  1492. float v = (float)pow(2, m_Source[0][i].w);
  1493. m_pDest[i].x = v;
  1494. m_pDest[i].y = v;
  1495. m_pDest[i].z = v;
  1496. m_pDest[i].w = v;
  1497. }
  1498. }
  1499. else
  1500. {
  1501. for (UINT i=0; i < m_count; i++)
  1502. {
  1503. float v = (float)pow(2, m_Source[0][i].w);
  1504. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1505. m_pDest[i].x = v;
  1506. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1507. m_pDest[i].y = v;
  1508. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1509. m_pDest[i].z = v;
  1510. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1511. m_pDest[i].w = v;
  1512. }
  1513. }
  1514. }
  1515. //-----------------------------------------------------------------------------
  1516. // Low precision EXP
  1517. //
  1518. void CVertexVM::InstExpP()
  1519. {
  1520. EvalDestination();
  1521. EvalSource(0);
  1522. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1523. {
  1524. for (UINT i=0; i < m_count; i++)
  1525. {
  1526. float w = m_Source[0][i].w; // Input value
  1527. float v = (float)floor(w);
  1528. m_pDest[i].x = (float)pow(2, v);
  1529. m_pDest[i].y = w - v;
  1530. m_pDest[i].z = ExpApprox(w);
  1531. m_pDest[i].w = 1;
  1532. }
  1533. }
  1534. else
  1535. {
  1536. for (UINT i=0; i < m_count; i++)
  1537. {
  1538. float w = m_Source[0][i].w; // Input value
  1539. float v = (float)floor(w);
  1540. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1541. m_pDest[i].x = (float)pow(2, v);
  1542. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1543. m_pDest[i].y = w - v;
  1544. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1545. m_pDest[i].z = ExpApprox(w);
  1546. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1547. m_pDest[i].w = 1;
  1548. }
  1549. }
  1550. }
  1551. //-----------------------------------------------------------------------------
  1552. // Full precision LOG
  1553. //
  1554. void CVertexVM::InstLog()
  1555. {
  1556. EvalDestination();
  1557. EvalSource(0);
  1558. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1559. {
  1560. for (UINT i=0; i < m_count; i++)
  1561. {
  1562. float v = ABSF(m_Source[0][i].w);
  1563. if (v != 0)
  1564. {
  1565. m_pDest[i].x =
  1566. m_pDest[i].y =
  1567. m_pDest[i].z =
  1568. m_pDest[i].w = (float)(log(v) * LOG2);
  1569. }
  1570. else
  1571. {
  1572. m_pDest[i].x =
  1573. m_pDest[i].y =
  1574. m_pDest[i].z =
  1575. m_pDest[i].w = MINUS_INFINITY();
  1576. }
  1577. }
  1578. }
  1579. else
  1580. {
  1581. for (UINT i=0; i < m_count; i++)
  1582. {
  1583. float v = ABSF(m_Source[0][i].w);
  1584. if (v != 0)
  1585. {
  1586. v = (float)(log(v) * LOG2);
  1587. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1588. m_pDest[i].x = v;
  1589. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1590. m_pDest[i].y = v;
  1591. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1592. m_pDest[i].z = v;
  1593. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1594. m_pDest[i].w = v;
  1595. }
  1596. else
  1597. {
  1598. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1599. m_pDest[i].x = MINUS_INFINITY();
  1600. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1601. m_pDest[i].y = MINUS_INFINITY();
  1602. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1603. m_pDest[i].z = MINUS_INFINITY();
  1604. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1605. m_pDest[i].w = MINUS_INFINITY();
  1606. }
  1607. }
  1608. }
  1609. }
  1610. //-----------------------------------------------------------------------------
  1611. // Low precision LOG
  1612. //
  1613. void CVertexVM::InstLogP()
  1614. {
  1615. EvalDestination();
  1616. EvalSource(0);
  1617. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1618. {
  1619. for (UINT i=0; i < m_count; i++)
  1620. {
  1621. float v = ABSF(m_Source[0][i].w);
  1622. if (v != 0)
  1623. {
  1624. // -128.0 <= exponent < 127.0
  1625. int p = (int)(*(DWORD*)&v >> 23) - 127;
  1626. m_pDest[i].x = (float)p;
  1627. // 1.0 <= mantissa < 2.0
  1628. p = (*(DWORD*)&v & 0x7FFFFF) | 0x3F800000;
  1629. m_pDest[i].y = *(float*)&p;
  1630. m_pDest[i].z = LogApprox(v);
  1631. m_pDest[i].w = 1.0f;
  1632. }
  1633. else
  1634. {
  1635. m_pDest[i].x = MINUS_INFINITY();
  1636. m_pDest[i].y = 1.0f;
  1637. m_pDest[i].z = MINUS_INFINITY();
  1638. m_pDest[i].w = 1.0f;
  1639. }
  1640. }
  1641. }
  1642. else
  1643. {
  1644. for (UINT i=0; i < m_count; i++)
  1645. {
  1646. float v = ABSF(m_Source[0][i].w);
  1647. if (v != 0)
  1648. {
  1649. // -128.0 <= exponent < 127.0
  1650. int p = (int)(*(DWORD*)&v >> 23) - 127;
  1651. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1652. m_pDest[i].x = (float)p;
  1653. // 1.0 <= mantissa < 2.0
  1654. p = (*(DWORD*)&v & 0x7FFFFF) | 0x3F800000;
  1655. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1656. m_pDest[i].y = *(float*)&p;
  1657. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1658. m_pDest[i].z = LogApprox(v);
  1659. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1660. m_pDest[i].w = 1.0f;
  1661. }
  1662. else
  1663. {
  1664. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1665. m_pDest[i].x = MINUS_INFINITY();
  1666. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1667. m_pDest[i].y = 1.0f;
  1668. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1669. m_pDest[i].z = MINUS_INFINITY();
  1670. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1671. m_pDest[i].w = 1.0f;
  1672. }
  1673. }
  1674. }
  1675. }
  1676. //-----------------------------------------------------------------------------
  1677. void CVertexVM::InstFrc()
  1678. {
  1679. EvalDestination();
  1680. EvalSource(0);
  1681. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1682. {
  1683. for (UINT i=0; i < m_count; i++)
  1684. {
  1685. m_pDest[i].x = m_Source[0][i].x - (float)floor(m_Source[0][i].x);
  1686. m_pDest[i].y = m_Source[0][i].y - (float)floor(m_Source[0][i].y);
  1687. m_pDest[i].z = m_Source[0][i].z - (float)floor(m_Source[0][i].z);
  1688. m_pDest[i].w = m_Source[0][i].w - (float)floor(m_Source[0][i].w);
  1689. }
  1690. }
  1691. else
  1692. {
  1693. for (UINT i=0; i < m_count; i++)
  1694. {
  1695. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1696. m_pDest[i].x = m_Source[0][i].x - (float)floor(m_Source[0][i].x);
  1697. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1698. m_pDest[i].y = m_Source[0][i].y - (float)floor(m_Source[0][i].y);
  1699. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1700. m_pDest[i].z = m_Source[0][i].z - (float)floor(m_Source[0][i].z);
  1701. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1702. m_pDest[i].w = m_Source[0][i].w - (float)floor(m_Source[0][i].w);
  1703. }
  1704. }
  1705. }
  1706. //-----------------------------------------------------------------------------
  1707. void CVertexVM::InstLit()
  1708. {
  1709. EvalDestination();
  1710. EvalSource(0);
  1711. for (UINT i=0; i < m_count; i++)
  1712. {
  1713. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1714. m_pDest[i].x = 1;
  1715. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1716. m_pDest[i].y = 0;
  1717. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1718. m_pDest[i].z = 0;
  1719. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1720. m_pDest[i].w = 1;
  1721. float power = m_Source[0][i].w;
  1722. const float MAXPOWER = 127.9961f;
  1723. if (power < -MAXPOWER)
  1724. power = -MAXPOWER; // Fits into 8.8 fixed point format
  1725. else
  1726. if (power > MAXPOWER)
  1727. power = MAXPOWER; // Fits into 8.8 fixed point format
  1728. if (m_Source[0][i].x > 0)
  1729. {
  1730. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1731. m_pDest[i].y = m_Source[0][i].x;
  1732. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1733. if (m_Source[0][i].y > 0)
  1734. {
  1735. // Allowed approximation is EXP(power * LOG(m_Source[0].y))
  1736. m_pDest[i].z = (float)(pow(m_Source[0][i].y, power));
  1737. }
  1738. }
  1739. }
  1740. }
  1741. //-----------------------------------------------------------------------------
  1742. void CVertexVM::InstDst()
  1743. {
  1744. EvalDestination();
  1745. EvalSource(0);
  1746. EvalSource(1);
  1747. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1748. {
  1749. for (UINT i=0; i < m_count; i++)
  1750. {
  1751. m_pDest[i].x = 1;
  1752. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
  1753. m_pDest[i].z = m_Source[0][i].z;
  1754. m_pDest[i].w = m_Source[1][i].w;
  1755. }
  1756. }
  1757. else
  1758. {
  1759. for (UINT i=0; i < m_count; i++)
  1760. {
  1761. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1762. m_pDest[i].x = 1;
  1763. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1764. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
  1765. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1766. m_pDest[i].z = m_Source[0][i].z;
  1767. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1768. m_pDest[i].w = m_Source[1][i].w;
  1769. }
  1770. }
  1771. }
  1772. //-----------------------------------------------------------------------------
  1773. void CVertexVM::InstM4x4()
  1774. {
  1775. EvalDestination();
  1776. EvalSource(0);
  1777. EvalSource(1, 4);
  1778. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1779. {
  1780. for (UINT i=0; i < m_count; i++)
  1781. {
  1782. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1783. m_Source[0][i].y * m_Source[1][i].y +
  1784. m_Source[0][i].z * m_Source[1][i].z +
  1785. m_Source[0][i].w * m_Source[1][i].w;
  1786. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1787. m_Source[0][i].y * m_Source[2][i].y +
  1788. m_Source[0][i].z * m_Source[2][i].z +
  1789. m_Source[0][i].w * m_Source[2][i].w;
  1790. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1791. m_Source[0][i].y * m_Source[3][i].y +
  1792. m_Source[0][i].z * m_Source[3][i].z +
  1793. m_Source[0][i].w * m_Source[3][i].w;
  1794. m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
  1795. m_Source[0][i].y * m_Source[4][i].y +
  1796. m_Source[0][i].z * m_Source[4][i].z +
  1797. m_Source[0][i].w * m_Source[4][i].w;
  1798. }
  1799. }
  1800. else
  1801. {
  1802. for (UINT i=0; i < m_count; i++)
  1803. {
  1804. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1805. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1806. m_Source[0][i].y * m_Source[1][i].y +
  1807. m_Source[0][i].z * m_Source[1][i].z +
  1808. m_Source[0][i].w * m_Source[1][i].w;
  1809. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1810. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1811. m_Source[0][i].y * m_Source[2][i].y +
  1812. m_Source[0][i].z * m_Source[2][i].z +
  1813. m_Source[0][i].w * m_Source[2][i].w;
  1814. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1815. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1816. m_Source[0][i].y * m_Source[3][i].y +
  1817. m_Source[0][i].z * m_Source[3][i].z +
  1818. m_Source[0][i].w * m_Source[3][i].w;
  1819. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1820. m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
  1821. m_Source[0][i].y * m_Source[4][i].y +
  1822. m_Source[0][i].z * m_Source[4][i].z +
  1823. m_Source[0][i].w * m_Source[4][i].w;
  1824. }
  1825. }
  1826. }
  1827. //-----------------------------------------------------------------------------
  1828. void CVertexVM::InstM4x3()
  1829. {
  1830. EvalDestination();
  1831. EvalSource(0);
  1832. EvalSource(1, 3);
  1833. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1834. {
  1835. for (UINT i=0; i < m_count; i++)
  1836. {
  1837. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1838. m_Source[0][i].y * m_Source[1][i].y +
  1839. m_Source[0][i].z * m_Source[1][i].z +
  1840. m_Source[0][i].w * m_Source[1][i].w;
  1841. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1842. m_Source[0][i].y * m_Source[2][i].y +
  1843. m_Source[0][i].z * m_Source[2][i].z +
  1844. m_Source[0][i].w * m_Source[2][i].w;
  1845. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1846. m_Source[0][i].y * m_Source[3][i].y +
  1847. m_Source[0][i].z * m_Source[3][i].z +
  1848. m_Source[0][i].w * m_Source[3][i].w;
  1849. }
  1850. }
  1851. else
  1852. {
  1853. for (UINT i=0; i < m_count; i++)
  1854. {
  1855. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1856. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1857. m_Source[0][i].y * m_Source[1][i].y +
  1858. m_Source[0][i].z * m_Source[1][i].z +
  1859. m_Source[0][i].w * m_Source[1][i].w;
  1860. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1861. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1862. m_Source[0][i].y * m_Source[2][i].y +
  1863. m_Source[0][i].z * m_Source[2][i].z +
  1864. m_Source[0][i].w * m_Source[2][i].w;
  1865. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1866. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1867. m_Source[0][i].y * m_Source[3][i].y +
  1868. m_Source[0][i].z * m_Source[3][i].z +
  1869. m_Source[0][i].w * m_Source[3][i].w;
  1870. }
  1871. }
  1872. }
  1873. //-----------------------------------------------------------------------------
  1874. void CVertexVM::InstM3x4()
  1875. {
  1876. EvalDestination();
  1877. EvalSource(0);
  1878. EvalSource(1, 4);
  1879. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1880. {
  1881. for (UINT i=0; i < m_count; i++)
  1882. {
  1883. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1884. m_Source[0][i].y * m_Source[1][i].y +
  1885. m_Source[0][i].z * m_Source[1][i].z;
  1886. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1887. m_Source[0][i].y * m_Source[2][i].y +
  1888. m_Source[0][i].z * m_Source[2][i].z;
  1889. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1890. m_Source[0][i].y * m_Source[3][i].y +
  1891. m_Source[0][i].z * m_Source[3][i].z;
  1892. m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
  1893. m_Source[0][i].y * m_Source[4][i].y +
  1894. m_Source[0][i].z * m_Source[4][i].z;
  1895. }
  1896. }
  1897. else
  1898. {
  1899. for (UINT i=0; i < m_count; i++)
  1900. {
  1901. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1902. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1903. m_Source[0][i].y * m_Source[1][i].y +
  1904. m_Source[0][i].z * m_Source[1][i].z;
  1905. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1906. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1907. m_Source[0][i].y * m_Source[2][i].y +
  1908. m_Source[0][i].z * m_Source[2][i].z;
  1909. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1910. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1911. m_Source[0][i].y * m_Source[3][i].y +
  1912. m_Source[0][i].z * m_Source[3][i].z;
  1913. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1914. m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
  1915. m_Source[0][i].y * m_Source[4][i].y +
  1916. m_Source[0][i].z * m_Source[4][i].z;
  1917. }
  1918. }
  1919. }
  1920. //-----------------------------------------------------------------------------
  1921. void CVertexVM::InstM3x3()
  1922. {
  1923. EvalDestination();
  1924. EvalSource(0);
  1925. EvalSource(1, 3);
  1926. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1927. {
  1928. for (UINT i=0; i < m_count; i++)
  1929. {
  1930. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1931. m_Source[0][i].y * m_Source[1][i].y +
  1932. m_Source[0][i].z * m_Source[1][i].z;
  1933. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1934. m_Source[0][i].y * m_Source[2][i].y +
  1935. m_Source[0][i].z * m_Source[2][i].z;
  1936. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1937. m_Source[0][i].y * m_Source[3][i].y +
  1938. m_Source[0][i].z * m_Source[3][i].z;
  1939. }
  1940. }
  1941. else
  1942. {
  1943. for (UINT i=0; i < m_count; i++)
  1944. {
  1945. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1946. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1947. m_Source[0][i].y * m_Source[1][i].y +
  1948. m_Source[0][i].z * m_Source[1][i].z;
  1949. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1950. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1951. m_Source[0][i].y * m_Source[2][i].y +
  1952. m_Source[0][i].z * m_Source[2][i].z;
  1953. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1954. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1955. m_Source[0][i].y * m_Source[3][i].y +
  1956. m_Source[0][i].z * m_Source[3][i].z;
  1957. }
  1958. }
  1959. }
  1960. //-----------------------------------------------------------------------------
  1961. void CVertexVM::InstM3x2()
  1962. {
  1963. EvalDestination();
  1964. EvalSource(0);
  1965. EvalSource(1, 2);
  1966. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1967. {
  1968. for (UINT i=0; i < m_count; i++)
  1969. {
  1970. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1971. m_Source[0][i].y * m_Source[1][i].y +
  1972. m_Source[0][i].z * m_Source[1][i].z;
  1973. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1974. m_Source[0][i].y * m_Source[2][i].y +
  1975. m_Source[0][i].z * m_Source[2][i].z;
  1976. }
  1977. }
  1978. else
  1979. {
  1980. for (UINT i=0; i < m_count; i++)
  1981. {
  1982. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1983. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1984. m_Source[0][i].y * m_Source[1][i].y +
  1985. m_Source[0][i].z * m_Source[1][i].z;
  1986. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1987. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1988. m_Source[0][i].y * m_Source[2][i].y +
  1989. m_Source[0][i].z * m_Source[2][i].z;
  1990. }
  1991. }
  1992. }
  1993. //-----------------------------------------------------------------------------
  1994. HRESULT CVertexVM::ExecuteShader(LPD3DFE_PROCESSVERTICES pv, UINT vertexCount)
  1995. {
  1996. if (m_pCurrentShader == NULL)
  1997. {
  1998. D3D_ERR("No current shader set in the Virtual Shader Machine");
  1999. return D3DERR_INVALIDCALL;
  2000. }
  2001. try
  2002. {
  2003. m_count = vertexCount;
  2004. m_BatchSize = vertexCount * sizeof(VVM_WORD);
  2005. // Skip version
  2006. m_pdwCurToken = m_pCurrentShader->m_pdwCode + 1;
  2007. DWORD* pEnd = m_pCurrentShader->m_pdwCode + m_pCurrentShader->m_dwSize;
  2008. pEnd -= 1;
  2009. m_CurInstIndex = 0;
  2010. // Initialize position register
  2011. for (UINT i=0; i < m_count; i++)
  2012. {
  2013. m_reg.m_output[0][i].x = 0;
  2014. m_reg.m_output[0][i].y = 0;
  2015. m_reg.m_output[0][i].z = 0;
  2016. m_reg.m_output[0][i].w = 1;
  2017. }
  2018. while (m_pdwCurToken < pEnd)
  2019. {
  2020. DWORD dwInst = *m_pdwCurToken;
  2021. DWORD dwOpCode = D3DSI_GETOPCODE(dwInst);
  2022. m_pdwCurToken++;
  2023. switch (dwOpCode)
  2024. {
  2025. case D3DSIO_COMMENT: m_pdwCurToken += ((GetInstructionLength(dwInst))-1); break;
  2026. case D3DSIO_NOP : ; break;
  2027. case D3DSIO_MOV : InstMov(); break;
  2028. case D3DSIO_ADD : InstAdd(); break;
  2029. case D3DSIO_MAD : InstMad(); break;
  2030. case D3DSIO_MUL : InstMul(); break;
  2031. case D3DSIO_RCP : InstRcp(); break;
  2032. case D3DSIO_RSQ : InstRsq(); break;
  2033. case D3DSIO_DP3 : InstDP3(); break;
  2034. case D3DSIO_DP4 : InstDP4(); break;
  2035. case D3DSIO_MIN : InstMin(); break;
  2036. case D3DSIO_MAX : InstMax(); break;
  2037. case D3DSIO_SLT : InstSlt(); break;
  2038. case D3DSIO_SGE : InstSge(); break;
  2039. case D3DSIO_EXP : InstExp(); break;
  2040. case D3DSIO_LOG : InstLog(); break;
  2041. case D3DSIO_EXPP : InstExpP(); break;
  2042. case D3DSIO_LOGP : InstLogP(); break;
  2043. case D3DSIO_LIT : InstLit(); break;
  2044. case D3DSIO_DST : InstDst(); break;
  2045. case D3DSIO_FRC : InstFrc(); break;
  2046. case D3DSIO_M4x4 : InstM4x4(); break;
  2047. case D3DSIO_M4x3 : InstM4x3(); break;
  2048. case D3DSIO_M3x4 : InstM3x4(); break;
  2049. case D3DSIO_M3x3 : InstM3x3(); break;
  2050. case D3DSIO_M3x2 : InstM3x2(); break;
  2051. default:
  2052. {
  2053. PrintInstCount();
  2054. D3D_THROW_FAIL("Invalid shader opcode");
  2055. }
  2056. }
  2057. if (dwOpCode != D3DSIO_COMMENT)
  2058. m_CurInstIndex++;
  2059. }
  2060. m_CurInstIndex = 0;
  2061. }
  2062. D3D_CATCH;
  2063. return D3D_OK;
  2064. }
  2065. //-----------------------------------------------------------------------------
  2066. HRESULT CVertexVM::GetDataPointer(DWORD dwMemType, VVM_WORD ** pData)
  2067. {
  2068. try
  2069. {
  2070. *pData = this->GetDataAddr(dwMemType, 0);
  2071. }
  2072. catch (HRESULT e)
  2073. {
  2074. *pData = NULL;
  2075. return D3DERR_INVALIDCALL;
  2076. }
  2077. return D3D_OK;
  2078. }
  2079. //---------------------------------------------------------------------
  2080. VVM_REGISTERS* CVertexVM::GetRegisters()
  2081. {
  2082. return &m_reg;
  2083. }