Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2168 lines
73 KiB

  1. /*==========================================================================
  2. *
  3. * Copyright (C) 1999 Microsoft Corporation. All Rights Reserved.
  4. *
  5. * File: vvm.cpp
  6. * Content: Virtual Vertex Machine implementation
  7. *
  8. * History:
  9. * 6/16/00
  10. * Added LOGP, EXPP, NM3
  11. * RCP, RSQ, LOG, LOGP, EXP, EXPP take input value from W instead of X
  12. * 7/11/00
  13. * Removed NM3 macro
  14. *
  15. *
  16. ***************************************************************************/
  17. #include "pch.cpp"
  18. #pragma hdrstop
  19. #include <stdio.h>
  20. #include "vvm.h"
  21. #include "d3dexcept.hpp"
  22. #include "float.h"
  23. #if DBG
  24. #include "rtdmon.hpp"
  25. #endif
  26. const DWORD __MAX_CODE_SIZE = 4096;
  27. //-----------------------------------------------------------------------------
  28. HRESULT ComputeShaderCodeSize(
  29. CONST DWORD* pCode,
  30. DWORD* pdwCodeOnlySize,
  31. DWORD* pdwCodeAndCommentSize,
  32. DWORD* pdwNumConstDefs)
  33. {
  34. // set this now for error return
  35. *pdwCodeOnlySize = 0;
  36. *pdwCodeAndCommentSize = 0;
  37. DWORD dwNumConstDefs = 0;
  38. DWORD dwCodeOnlySize = 0;
  39. DWORD dwCodeAndCommentSize = 0;
  40. CONST DWORD* pToken = pCode;
  41. DWORD Version = *pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
  42. if ( (((Version >> 16) != 0xFFFF) && (Version >> 16) != 0xFFFE) ||
  43. ((Version & 0xFFFF) == 0x0))
  44. {
  45. D3D_ERR("invalid version token");
  46. return D3DERR_INVALIDCALL;
  47. }
  48. // very basic parse to find number of instructions
  49. while ( ((*pToken) != 0x0000FFFF) && (dwCodeOnlySize <= __MAX_CODE_SIZE) )
  50. {
  51. if (IsInstructionToken(*pToken))
  52. {
  53. DWORD opCode = (*pToken) & D3DSI_OPCODE_MASK;
  54. if ( opCode == D3DSIO_COMMENT )
  55. {
  56. UINT DWordSize = ((*pToken)&D3DSI_COMMENTSIZE_MASK)>>D3DSI_COMMENTSIZE_SHIFT;
  57. dwCodeAndCommentSize += (1+DWordSize); // instruction token + comment
  58. pToken += (1+DWordSize);
  59. }
  60. else if (opCode == D3DSIO_DEF )
  61. {
  62. pToken += 6;
  63. dwCodeOnlySize += 6;
  64. dwCodeAndCommentSize += 6;
  65. dwNumConstDefs++;
  66. }
  67. else
  68. {
  69. pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
  70. }
  71. }
  72. else
  73. {
  74. pToken++; dwCodeOnlySize++; dwCodeAndCommentSize++;
  75. }
  76. }
  77. dwCodeOnlySize++; dwCodeAndCommentSize++; // for END token
  78. if (dwCodeOnlySize > __MAX_CODE_SIZE)
  79. {
  80. D3D_ERR("Shader code size is too big. Possibly, missing D3DVS_END()");
  81. return D3DERR_INVALIDCALL;
  82. }
  83. *pdwCodeOnlySize = 4*dwCodeOnlySize;
  84. *pdwCodeAndCommentSize = 4*dwCodeAndCommentSize;
  85. if( pdwNumConstDefs )
  86. *pdwNumConstDefs = dwNumConstDefs;
  87. return S_OK;
  88. }
  89. //-----------------------------------------------------------------------------
  90. float MINUS_INFINITY()
  91. {
  92. return -FLT_MAX;
  93. }
  94. float PLUS_INFINITY()
  95. {
  96. return FLT_MAX;
  97. }
  98. //-----------------------------------------------------------------------------
  99. // Returns instruction size, based on the op-code
  100. //
  101. UINT CVertexVM::GetNumSrcOperands(UINT opcode)
  102. {
  103. // returns number of source operands + opcode + destination
  104. switch (opcode)
  105. {
  106. case D3DSIO_MOV : return 1;
  107. case D3DSIO_ADD : return 2;
  108. case D3DSIO_MAD : return 3;
  109. case D3DSIO_MUL : return 2;
  110. case D3DSIO_RCP : return 1;
  111. case D3DSIO_RSQ : return 1;
  112. case D3DSIO_DP3 : return 2;
  113. case D3DSIO_DP4 : return 2;
  114. case D3DSIO_MIN : return 2;
  115. case D3DSIO_MAX : return 2;
  116. case D3DSIO_SLT : return 2;
  117. case D3DSIO_SGE : return 2;
  118. case D3DSIO_EXP : return 1;
  119. case D3DSIO_LOG : return 1;
  120. case D3DSIO_EXPP: return 1;
  121. case D3DSIO_LOGP: return 1;
  122. case D3DSIO_LIT : return 1;
  123. case D3DSIO_DST : return 2;
  124. case D3DSIO_FRC : return 1;
  125. case D3DSIO_M4x4: return 2;
  126. case D3DSIO_M4x3: return 2;
  127. case D3DSIO_M3x4: return 2;
  128. case D3DSIO_M3x3: return 2;
  129. case D3DSIO_M3x2: return 2;
  130. case D3DSIO_NOP: return 0;
  131. default:
  132. PrintInstCount();
  133. D3D_THROW_FAIL("Illegal instruction");
  134. }
  135. return 0;
  136. }
  137. //-----------------------------------------------------------------------------
  138. // Returns a bit field to say which source register components are used to
  139. // produce the output components.
  140. // 4 bits are used per each output component:
  141. // 0-3 output component X
  142. // 4-7 output component Y
  143. // 8-11 output component Z
  144. // 12-15 output component W
  145. // Each of the four bits is used to say if this source component is used to
  146. // produce the output component:
  147. // bit 0 - X, bit 1 - Y, bit 2 - Z, bit 3 - W.
  148. //
  149. // SourceIndex - sequential index of the source operand
  150. //
  151. UINT CVertexVM::GetRegisterUsage(UINT opcode, UINT SourceIndex)
  152. {
  153. switch (opcode)
  154. {
  155. case D3DSIO_MOV : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  156. case D3DSIO_ADD : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  157. case D3DSIO_MAD : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  158. case D3DSIO_MUL : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  159. case D3DSIO_RCP : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
  160. case D3DSIO_RSQ : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
  161. case D3DSIO_DP3 : return 7 | (7 << 4) | (7 << 8) | (7 << 12);
  162. case D3DSIO_DP4 : return 0xF | (0xF << 4) | (0xF << 8) | (0xF << 12);
  163. case D3DSIO_MIN : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  164. case D3DSIO_MAX : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  165. case D3DSIO_SLT : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  166. case D3DSIO_SGE : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  167. case D3DSIO_EXP : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
  168. case D3DSIO_LOG : return 8 | (8 << 4) | (8 << 8) | (8 << 12);
  169. case D3DSIO_EXPP: return 8 | (8 << 4) | (8 << 8);
  170. case D3DSIO_LOGP: return 8 | (8 << 4) | (8 << 8);
  171. case D3DSIO_LIT : return (1 << 4) | ((1 | 2 | 8) << 8);
  172. case D3DSIO_DST :
  173. if (SourceIndex == 0)
  174. return (2 << 4) | (4 << 8);
  175. else
  176. return (2 << 4) | (8 << 12);
  177. case D3DSIO_FRC : return 1 | (2 << 4) | (4 << 8) | (8 << 12);
  178. case D3DSIO_M4x4: return 0xF | (0xF << 4) | (0xF << 8) | (0xF << 12);
  179. case D3DSIO_M4x3: return 0xF | (0xF << 4) | (0xF << 8);
  180. case D3DSIO_M3x4: return 7 | (7 << 4) | (7 << 8) | (7 << 12);
  181. case D3DSIO_M3x3: return 7 | (7 << 4) | (7 << 8);
  182. case D3DSIO_M3x2: return 7 | (7 << 4);
  183. case D3DSIO_NOP: return 0;
  184. default:
  185. PrintInstCount();
  186. D3D_THROW_FAIL("Illegal instruction");
  187. }
  188. return 0;
  189. }
  190. //-----------------------------------------------------------------------------
  191. // Returns instruction size in DWORDs, based on the op-code
  192. //
  193. UINT CVertexVM::GetInstructionLength(DWORD inst)
  194. {
  195. // returns number of source operands + opcode + destination
  196. DWORD opcode = D3DSI_GETOPCODE(inst);
  197. if (opcode == D3DSIO_NOP)
  198. return 1;
  199. else if (opcode == D3DSIO_COMMENT)
  200. return ((inst & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT) + 1;
  201. else
  202. return GetNumSrcOperands(opcode) + 2;
  203. }
  204. //-----------------------------------------------------------------------------
  205. // VertexShaderInstDisAsm - Generates human-readable character string for a
  206. // single vertex shader instruction. String interface is similar to _snprintf.
  207. //-----------------------------------------------------------------------------
  208. static int VertexShaderInstDisAsm(
  209. char* pStrRet, int StrSizeRet, DWORD* pShader, DWORD Flags )
  210. {
  211. DWORD* pToken = pShader;
  212. // stage in local string, then copy
  213. char pStr[256] = "";
  214. #define _ADDSTR( _Str ) { _snprintf( pStr, 256, "%s" _Str , pStr ); }
  215. #define _ADDSTRP( _Str, _Param ) { _snprintf( pStr, 256, "%s" _Str , pStr, _Param ); }
  216. DWORD Inst = *pToken++;
  217. DWORD Opcode = (Inst & D3DSI_OPCODE_MASK);
  218. switch (Opcode)
  219. {
  220. case D3DSIO_NOP: _ADDSTR("NOP"); break;
  221. case D3DSIO_MOV: _ADDSTR("MOV"); break;
  222. case D3DSIO_ADD: _ADDSTR("ADD"); break;
  223. case D3DSIO_MAD: _ADDSTR("MAD"); break;
  224. case D3DSIO_MUL: _ADDSTR("MUL"); break;
  225. case D3DSIO_RCP: _ADDSTR("RCP"); break;
  226. case D3DSIO_RSQ: _ADDSTR("RSQ"); break;
  227. case D3DSIO_DP3: _ADDSTR("DP3"); break;
  228. case D3DSIO_DP4: _ADDSTR("DP4"); break;
  229. case D3DSIO_MIN: _ADDSTR("MIN"); break;
  230. case D3DSIO_MAX: _ADDSTR("MAX"); break;
  231. case D3DSIO_SLT: _ADDSTR("SLT"); break;
  232. case D3DSIO_SGE: _ADDSTR("SGE"); break;
  233. case D3DSIO_EXP: _ADDSTR("EXP"); break;
  234. case D3DSIO_LOG: _ADDSTR("LOG"); break;
  235. case D3DSIO_EXPP:_ADDSTR("EXPP"); break;
  236. case D3DSIO_LOGP:_ADDSTR("LOGP"); break;
  237. case D3DSIO_LIT: _ADDSTR("LIT"); break;
  238. case D3DSIO_DST: _ADDSTR("DST"); break;
  239. case D3DSIO_COMMENT: _ADDSTR("COMMENT"); break;
  240. default : _ADDSTR("???"); break;
  241. }
  242. if (*pToken & (1L<<31))
  243. {
  244. DWORD DstParam = *pToken++;
  245. switch (DstParam & D3DSP_REGTYPE_MASK)
  246. {
  247. case D3DSPR_TEMP : _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  248. case D3DSPR_ADDR : _ADDSTR(" Addr"); break;
  249. case D3DSPR_RASTOUT : _ADDSTRP(" R%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  250. case D3DSPR_ATTROUT : _ADDSTRP(" A%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  251. case D3DSPR_TEXCRDOUT: _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  252. }
  253. if (*pToken & (1L<<31)) _ADDSTR(" ");
  254. while (*pToken & (1L<<31))
  255. {
  256. DWORD SrcParam = *pToken++;
  257. switch (SrcParam & D3DSP_REGTYPE_MASK)
  258. {
  259. case D3DSPR_TEMP : _ADDSTRP(" T%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  260. case D3DSPR_INPUT : _ADDSTRP(" I%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  261. case D3DSPR_CONST : _ADDSTRP(" C%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  262. }
  263. if (*pToken & (1L<<31)) _ADDSTR(",");
  264. }
  265. }
  266. return _snprintf( pStrRet, StrSizeRet, "%s", pStr );
  267. }
  268. //-----------------------------------------------------------------------------
  269. #if DBG
  270. typedef struct _VShaderInst
  271. {
  272. DWORD m_Tokens[D3DDM_MAX_VSINSTDWORD];
  273. char m_String[D3DDM_MAX_VSINSTSTRING];
  274. DWORD* m_pComment;
  275. DWORD m_cdwComment;
  276. } VShaderInst;
  277. #endif
  278. //-----------------------------------------------------------------------------
  279. class CVShaderCodeI: public CVShaderCode
  280. {
  281. public:
  282. CVShaderCodeI()
  283. {
  284. m_pdwCode = NULL;
  285. m_InstCount = 0;
  286. #if DBG
  287. m_pInst = NULL;
  288. #endif
  289. }
  290. ~CVShaderCodeI()
  291. {
  292. delete m_pdwCode;
  293. #if DBG
  294. if (m_pInst) delete m_pInst;
  295. #endif
  296. }
  297. DWORD* m_pdwCode; // Pointer to the original code
  298. DWORD m_dwSize; // Size of the code in DWORDs
  299. DWORD m_InstCount;
  300. #if DBG
  301. VShaderInst* m_pInst;
  302. #endif
  303. DWORD InstCount( void ) { return m_InstCount; }
  304. DWORD* InstTokens( DWORD Inst );
  305. char* InstDisasm( DWORD Inst );
  306. DWORD* InstComment( DWORD Inst );
  307. DWORD InstCommentSize( DWORD Inst );
  308. };
  309. //-----------------------------------------------------------------------------
  310. DWORD* CVShaderCodeI::InstTokens( DWORD Inst )
  311. {
  312. #if DBG
  313. if ( Inst >= m_InstCount ) return NULL;
  314. return m_pInst[Inst].m_Tokens;
  315. #else
  316. return NULL;
  317. #endif
  318. }
  319. //-----------------------------------------------------------------------------
  320. char* CVShaderCodeI::InstDisasm( DWORD Inst )
  321. {
  322. #if DBG
  323. if ( Inst >= m_InstCount ) return NULL;
  324. return m_pInst[Inst].m_String;
  325. #else
  326. return NULL;
  327. #endif
  328. }
  329. //-----------------------------------------------------------------------------
  330. DWORD* CVShaderCodeI::InstComment( DWORD Inst )
  331. {
  332. #if DBG
  333. if ( Inst >= m_InstCount ) return NULL;
  334. return m_pInst[Inst].m_pComment;
  335. #else
  336. return NULL;
  337. #endif
  338. }
  339. //-----------------------------------------------------------------------------
  340. DWORD CVShaderCodeI::InstCommentSize( DWORD Inst )
  341. {
  342. #if DBG
  343. if ( Inst >= m_InstCount ) return NULL;
  344. return m_pInst[Inst].m_cdwComment;
  345. #else
  346. return NULL;
  347. #endif
  348. }
  349. //-----------------------------------------------------------------------------
  350. // Vertex Virtual Machine object implementation
  351. //
  352. //-----------------------------------------------------------------------------
  353. CVertexVM::CVertexVM()
  354. {
  355. m_pCurrentShader = NULL;
  356. m_CurInstIndex = 0;
  357. #if DBG
  358. for (UINT i=0; i < D3DVS_CONSTREG_MAX_V1_1; i++)
  359. m_c_initialized[i] = FALSE;
  360. #endif
  361. }
  362. //-----------------------------------------------------------------------------
  363. CVertexVM::~CVertexVM()
  364. {
  365. }
  366. //-----------------------------------------------------------------------------
  367. void CVertexVM::Init(UINT MaxVertexShaderConst)
  368. {
  369. m_MaxVertexShaderConst = max(MaxVertexShaderConst, D3DVS_CONSTREG_MAX_V1_1);
  370. m_reg.m_c = new VVM_WORD[m_MaxVertexShaderConst];
  371. if (m_reg.m_c == NULL)
  372. D3D_THROW_FAIL("Not enough memory to allocate vertex shader constant array");
  373. }
  374. //-----------------------------------------------------------------------------
  375. // Returns addres of the first vertex of the element
  376. //
  377. VVM_WORD * CVertexVM::GetDataAddr(DWORD dwRegType, DWORD dwElementIndex)
  378. {
  379. switch (dwRegType)
  380. {
  381. case D3DSPR_TEMP : return &m_reg.m_r[dwElementIndex][0];
  382. case D3DSPR_INPUT : return &m_reg.m_v[dwElementIndex][0];
  383. case D3DSPR_CONST : return &m_reg.m_c[dwElementIndex];
  384. case D3DSPR_ADDR : return &m_reg.m_a[dwElementIndex][0];
  385. case D3DSPR_RASTOUT : return &m_reg.m_output[dwElementIndex][0];
  386. case D3DSPR_ATTROUT : return &m_reg.m_color[dwElementIndex][0];
  387. case D3DSPR_TEXCRDOUT : return &m_reg.m_texture[dwElementIndex][0];
  388. default:
  389. D3D_THROW(D3DERR_INVALIDCALL, "Invalid register type");
  390. }
  391. return NULL;
  392. }
  393. //-----------------------------------------------------------------------------
  394. // Sets data of the first vertex pf the register
  395. //
  396. HRESULT CVertexVM::SetData(DWORD dwMemType, DWORD dwStart, DWORD dwCount,
  397. LPVOID pBuffer)
  398. {
  399. try
  400. {
  401. VVM_WORD* p = this->GetDataAddr(dwMemType, dwStart);
  402. if (dwMemType == D3DSPR_CONST)
  403. {
  404. #if DBG
  405. if ((dwStart + dwCount) > m_MaxVertexShaderConst)
  406. {
  407. D3D_THROW_FAIL("Attemt to write outside constant register array");
  408. }
  409. // We only can set initialized flag for software constant registers
  410. if (dwStart < D3DVS_CONSTREG_MAX_V1_1)
  411. {
  412. BOOL* p = &m_c_initialized[dwStart];
  413. UINT count = dwCount;
  414. if ((dwStart + dwCount) >= D3DVS_CONSTREG_MAX_V1_1)
  415. {
  416. count = D3DVS_CONSTREG_MAX_V1_1 - dwStart;
  417. }
  418. for (UINT i = 0; i < count; i++)
  419. {
  420. p[i] = TRUE;
  421. }
  422. }
  423. #endif
  424. UINT size = dwCount * sizeof(VVM_WORD);
  425. memcpy(p, pBuffer, size);
  426. }
  427. else
  428. {
  429. // Set only the first element of the register batch
  430. for (UINT i=0; i < dwCount; i++)
  431. {
  432. p[i * VVMVERTEXBATCH] = ((VVM_WORD*)pBuffer)[i];
  433. }
  434. }
  435. }
  436. D3D_CATCH;
  437. return D3D_OK;
  438. }
  439. //-----------------------------------------------------------------------------
  440. HRESULT CVertexVM::GetData(DWORD dwMemType, DWORD dwStart, DWORD dwCount,
  441. LPVOID pBuffer)
  442. {
  443. try
  444. {
  445. VVM_WORD* p = this->GetDataAddr(dwMemType, dwStart);
  446. if (dwMemType == D3DSPR_CONST)
  447. {
  448. memcpy(pBuffer, p, dwCount * sizeof(VVM_WORD));
  449. }
  450. else
  451. {
  452. // Set only the first element of the register batch
  453. for (UINT i=0; i < dwCount; i++)
  454. {
  455. ((VVM_WORD*)pBuffer)[i] = p[i * VVMVERTEXBATCH];
  456. }
  457. }
  458. }
  459. D3D_CATCH;
  460. return D3D_OK;
  461. }
  462. //-----------------------------------------------------------------------------
  463. // - allocates memory for the shader
  464. // - validates shader code
  465. // - computes output FVF and vertex elements offsets
  466. //
  467. void CVertexVM::ValidateShader(CVShaderCodeI* shader, DWORD* orgShader)
  468. {
  469. // shader will be already stripped of comments upon reaching here if stripping
  470. // is necessary, so always use CodeAndComment size
  471. DWORD dwCodeOnlySize;
  472. DWORD dwCodeAndCommentSize;
  473. HRESULT hr = ComputeShaderCodeSize(orgShader, &dwCodeOnlySize,
  474. &dwCodeAndCommentSize, NULL);
  475. if (hr != S_OK)
  476. D3D_THROW(hr, "");
  477. // Initialize shader header and allocate memory for the shader code
  478. shader->m_dwSize = dwCodeAndCommentSize >> 2; // Size in DWORDs
  479. shader->m_pdwCode = new DWORD[shader->m_dwSize];
  480. if (shader->m_pdwCode == NULL)
  481. {
  482. D3D_THROW_FAIL("Cannot allocate memory for shader code");
  483. }
  484. memcpy(shader->m_pdwCode, orgShader, dwCodeAndCommentSize);
  485. // Based on the what output registers are modified, we compute the
  486. // corresponding FVF id. The id will be used for memory allocation
  487. // of the output buffer and will be passed to the rasterizer
  488. DWORD dwOutFVF = 0;
  489. DWORD nTexCoord = 0; // Number of output texture coordinates
  490. // For each texture register stores the combined write mask.
  491. // Used to find how many floats are written to each texture coordinates
  492. DWORD TextureWritten[8];
  493. memset(TextureWritten, 0, sizeof(TextureWritten));
  494. m_pdwCurToken = shader->m_pdwCode;
  495. DWORD* pEnd = shader->m_pdwCode + shader->m_dwSize;
  496. shader->m_dwOutRegs = 0;
  497. shader->m_InstCount = 0;
  498. m_CurInstIndex = 0;
  499. if ((*m_pdwCurToken != D3DVS_VERSION(1, 1)) &&
  500. (*m_pdwCurToken != D3DVS_VERSION(1, 0)) )
  501. {
  502. D3D_THROW_FAIL("Invalid vertex shader code version");
  503. }
  504. m_pdwCurToken++;
  505. while (m_pdwCurToken < pEnd && *m_pdwCurToken != D3DVS_END())
  506. {
  507. DWORD * pdwNextToken = m_pdwCurToken;
  508. DWORD dwInst = *m_pdwCurToken;
  509. if (!IsInstructionToken(dwInst))
  510. {
  511. PrintInstCount();
  512. D3D_THROW_FAIL("Intruction token has 31 bit set");
  513. }
  514. DWORD dwOpCode = D3DSI_GETOPCODE(dwInst);
  515. m_pdwCurToken++;
  516. switch (dwOpCode)
  517. {
  518. case D3DSIO_COMMENT:
  519. case D3DSIO_NOP : ; break;
  520. case D3DSIO_MOV :
  521. case D3DSIO_ADD :
  522. case D3DSIO_MAD :
  523. case D3DSIO_MUL :
  524. case D3DSIO_RCP :
  525. case D3DSIO_RSQ :
  526. case D3DSIO_DP3 :
  527. case D3DSIO_DP4 :
  528. case D3DSIO_MIN :
  529. case D3DSIO_MAX :
  530. case D3DSIO_SLT :
  531. case D3DSIO_SGE :
  532. case D3DSIO_EXP :
  533. case D3DSIO_LOG :
  534. case D3DSIO_EXPP :
  535. case D3DSIO_LOGP :
  536. case D3DSIO_LIT :
  537. case D3DSIO_DST :
  538. case D3DSIO_FRC :
  539. case D3DSIO_M4x4 :
  540. case D3DSIO_M4x3 :
  541. case D3DSIO_M3x4 :
  542. case D3DSIO_M3x3 :
  543. case D3DSIO_M3x2 :
  544. {
  545. // Find out if output register are modified by the command and
  546. // update the output FVF
  547. DWORD dwOffset;
  548. EvalDestination();
  549. VVM_WORD* m_pOutRegister = NULL;
  550. if ((m_pDest - m_dwOffset * VVMVERTEXBATCH) != m_reg.m_r[0])
  551. {
  552. dwOffset = m_dwOffset;
  553. m_pOutRegister = m_pDest - m_dwOffset * VVMVERTEXBATCH;
  554. if (m_pOutRegister == m_reg.m_output[0])
  555. {
  556. if (dwOffset == D3DSRO_POSITION)
  557. {
  558. dwOutFVF |= D3DFVF_XYZRHW;
  559. shader->m_dwOutRegs |= CPSGPShader_POSITION;
  560. }
  561. else
  562. if (dwOffset == D3DSRO_FOG)
  563. {
  564. dwOutFVF |= D3DFVF_FOG;
  565. shader->m_dwOutRegs |= CPSGPShader_FOG;
  566. }
  567. else
  568. if (dwOffset == D3DSRO_POINT_SIZE)
  569. {
  570. dwOutFVF |= D3DFVF_PSIZE;
  571. shader->m_dwOutRegs |= CPSGPShader_PSIZE;
  572. }
  573. }
  574. else
  575. if (m_pOutRegister == m_reg.m_color[0])
  576. if (dwOffset == 0)
  577. {
  578. dwOutFVF |= D3DFVF_DIFFUSE;
  579. shader->m_dwOutRegs |= CPSGPShader_DIFFUSE;
  580. }
  581. else
  582. {
  583. dwOutFVF |= D3DFVF_SPECULAR;
  584. shader->m_dwOutRegs |= CPSGPShader_SPECULAR;
  585. }
  586. else
  587. if (m_pOutRegister == m_reg.m_texture[0])
  588. {
  589. if (TextureWritten[dwOffset] == 0)
  590. {
  591. nTexCoord++;
  592. }
  593. TextureWritten[dwOffset] |= m_WriteMask;
  594. }
  595. else
  596. if (m_pOutRegister == m_reg.m_a[0])
  597. {
  598. }
  599. else
  600. {
  601. PrintInstCount();
  602. D3D_THROW_FAIL("Invalid output register offset");
  603. }
  604. }
  605. }
  606. break;
  607. default:
  608. {
  609. PrintInstCount();
  610. D3D_THROW_FAIL("Invalid shader opcode");
  611. }
  612. }
  613. m_pdwCurToken = pdwNextToken + GetInstructionLength(dwInst);
  614. shader->m_InstCount++;
  615. if (dwOpCode != D3DSIO_COMMENT)
  616. {
  617. m_CurInstIndex++;
  618. if (m_CurInstIndex > D3DVS_MAXINSTRUCTIONCOUNT_V1_1)
  619. {
  620. D3D_THROW_FAIL("Too many instructions in the shader");
  621. }
  622. }
  623. }
  624. #ifdef DBG
  625. // compute per-instruction stuff for shader
  626. if (shader->m_InstCount)
  627. {
  628. shader->m_pInst = new VShaderInst[shader->m_InstCount];
  629. if (shader->m_pInst == NULL)
  630. {
  631. D3D_THROW_FAIL("Cannot allocate memory for shader instructions");
  632. }
  633. memset( shader->m_pInst, 0, sizeof(VShaderInst)*shader->m_InstCount );
  634. DWORD dwCurInst = 0;
  635. // Remove version
  636. m_pdwCurToken = shader->m_pdwCode + 1;
  637. pEnd = shader->m_pdwCode + shader->m_dwSize;
  638. while( m_pdwCurToken < pEnd && *m_pdwCurToken != D3DVS_END())
  639. {
  640. UINT ilength = GetInstructionLength(*m_pdwCurToken);
  641. DWORD dwOpCode = D3DSI_GETOPCODE(*m_pdwCurToken);
  642. if (dwOpCode == D3DSIO_COMMENT)
  643. {
  644. shader->m_pInst[dwCurInst].m_Tokens[0] = *m_pdwCurToken;
  645. shader->m_pInst[dwCurInst].m_pComment = (m_pdwCurToken+1);
  646. shader->m_pInst[dwCurInst].m_cdwComment = ilength - 1;
  647. }
  648. else
  649. {
  650. memcpy( shader->m_pInst[dwCurInst].m_Tokens, m_pdwCurToken,
  651. 4*ilength );
  652. VertexShaderInstDisAsm( shader->m_pInst[dwCurInst].m_String,
  653. D3DDM_MAX_VSINSTSTRING, shader->m_pInst[dwCurInst].m_Tokens, 0x0 );
  654. }
  655. m_pdwCurToken += ilength;
  656. dwCurInst++;
  657. }
  658. }
  659. #endif
  660. dwOutFVF |= nTexCoord << D3DFVF_TEXCOUNT_SHIFT;
  661. // Compute output vertex offsets and size
  662. shader->m_dwOutVerSize = 4 * sizeof(float); // X, Y, Z, RHW
  663. shader->m_nOutTexCoord = nTexCoord;
  664. DWORD dwOffset = 4 * sizeof(float); // Current offset in the output vertex
  665. if ((dwOutFVF & D3DFVF_XYZRHW) == 0)
  666. {
  667. D3D_THROW_FAIL("Position is not written by shader");
  668. }
  669. shader->m_dwPointSizeOffset = dwOffset;
  670. if (dwOutFVF & D3DFVF_PSIZE)
  671. {
  672. dwOffset += 4;
  673. shader->m_dwOutVerSize += 4;
  674. }
  675. shader->m_dwDiffuseOffset = dwOffset;
  676. if (dwOutFVF & D3DFVF_DIFFUSE)
  677. {
  678. shader->m_dwOutVerSize += 4;
  679. dwOffset += 4;
  680. }
  681. shader->m_dwSpecularOffset = dwOffset;
  682. if (dwOutFVF & D3DFVF_SPECULAR)
  683. {
  684. dwOffset += 4;
  685. shader->m_dwOutVerSize += 4;
  686. }
  687. shader->m_dwFogOffset = dwOffset;
  688. if (dwOutFVF & D3DFVF_FOG)
  689. {
  690. dwOffset += 4;
  691. shader->m_dwOutVerSize += 4;
  692. }
  693. // Initialize texture coordinates
  694. shader->m_dwTextureOffset = dwOffset;
  695. if (nTexCoord)
  696. {
  697. for (DWORD i = 0; i < nTexCoord; i++)
  698. {
  699. DWORD n; // Size of texture coordinates
  700. if (TextureWritten[i] == 0)
  701. {
  702. D3D_THROW_FAIL("Texture coordinates are not continuous");
  703. }
  704. switch (TextureWritten[i])
  705. {
  706. case D3DSP_WRITEMASK_ALL:
  707. dwOutFVF |= D3DFVF_TEXCOORDSIZE4(i);
  708. n = 4 * sizeof(float);
  709. break;
  710. case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1 | D3DSP_WRITEMASK_2:
  711. dwOutFVF |= D3DFVF_TEXCOORDSIZE3(i);
  712. n = 3 * sizeof(float);
  713. break;
  714. case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1:
  715. dwOutFVF |= D3DFVF_TEXCOORDSIZE2(i);
  716. n = 2 * sizeof(float);
  717. break;
  718. case D3DSP_WRITEMASK_0:
  719. dwOutFVF |= D3DFVF_TEXCOORDSIZE1(i);
  720. n = 1 * sizeof(float);
  721. break;
  722. default:
  723. D3D_THROW_FAIL("Invalid write mask for texture register");
  724. }
  725. shader->m_dwOutVerSize += n;
  726. shader->m_dwOutTexCoordSize[i] = n;
  727. dwOffset += n;
  728. }
  729. }
  730. shader->m_dwOutFVF = dwOutFVF;
  731. }
  732. //-----------------------------------------------------------------------------
  733. CVShaderCode* CVertexVM::CreateShader(CVElement* pElements, DWORD dwNumElements,
  734. DWORD* pCode)
  735. {
  736. CVShaderCodeI* pShaderCode = NULL;
  737. try
  738. {
  739. pShaderCode = new CVShaderCodeI();
  740. if (pShaderCode == NULL)
  741. {
  742. D3D_THROW(E_OUTOFMEMORY, "Cannot allocate memory");
  743. }
  744. ValidateShader(pShaderCode, pCode);
  745. return pShaderCode;
  746. }
  747. catch (HRESULT e)
  748. {
  749. delete pShaderCode;
  750. D3D_ERR("Error in shader code creation");
  751. return NULL;
  752. }
  753. }
  754. //-----------------------------------------------------------------------------
  755. HRESULT CVertexVM::SetActiveShader(CVShaderCode* pCode)
  756. {
  757. m_pCurrentShader = (CVShaderCodeI*)pCode;
  758. return D3D_OK;
  759. }
  760. //-----------------------------------------------------------------------------
  761. // - parses destination token
  762. // - computes m_pDest, m_WrideMask, m_dwOffset for the destination
  763. // - current token pointer is andvanced to the next token
  764. //
  765. void CVertexVM::EvalDestination()
  766. {
  767. DWORD dwCurToken = *m_pdwCurToken;
  768. DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
  769. m_dwOffset = D3DSI_GETREGNUM(dwCurToken);
  770. m_WriteMask = D3DSI_GETWRITEMASK(dwCurToken);
  771. switch (dwRegType)
  772. {
  773. case D3DSPR_TEMP:
  774. m_pDest = m_reg.m_r[0];
  775. break;
  776. case D3DSPR_RASTOUT:
  777. m_pDest = m_reg.m_output[0];
  778. break;
  779. case D3DSPR_ATTROUT:
  780. m_pDest = m_reg.m_color[0];
  781. break;
  782. case D3DSPR_TEXCRDOUT:
  783. m_pDest = m_reg.m_texture[0];
  784. break;
  785. case D3DSPR_ADDR:
  786. m_pDest = m_reg.m_a[0];
  787. break;
  788. default:
  789. PrintInstCount();
  790. D3D_THROW_FAIL("Invalid register for destination");
  791. }
  792. m_pdwCurToken++;
  793. m_pDest += m_dwOffset * VVMVERTEXBATCH;
  794. }
  795. //---------------------------------------------------------------------
  796. void CVertexVM::PrintInstCount()
  797. {
  798. D3D_ERR("Error in instruction number: %d", m_CurInstIndex + 1);
  799. }
  800. //---------------------------------------------------------------------
  801. // Computes m_Source[index] and advances m_pdwCurToken
  802. //
  803. void CVertexVM::EvalSource(DWORD index)
  804. {
  805. const DWORD dwCurToken = *m_pdwCurToken;
  806. const DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
  807. const DWORD dwOffset = D3DSI_GETREGNUM(dwCurToken);
  808. DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
  809. VVM_WORD *src;
  810. VVM_WORD *outsrc = m_Source[index];
  811. if (dwRegType == D3DSPR_CONST)
  812. {
  813. D3DVS_ADDRESSMODE_TYPE am;
  814. am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE(dwCurToken);
  815. int offset = (int)dwOffset;
  816. if (am == D3DVS_ADDRMODE_RELATIVE)
  817. {
  818. for (UINT i=0; i < m_count; i++)
  819. {
  820. int relOffset = *(int*)&m_reg.m_a[0][i].x;
  821. offset = (int)dwOffset + relOffset;
  822. #if DBG
  823. if (offset < 0 || offset >= D3DVS_CONSTREG_MAX_V1_1)
  824. {
  825. PrintInstCount();
  826. D3D_THROW_FAIL("Constant register index is out of bounds");
  827. }
  828. if (!m_c_initialized[offset])
  829. {
  830. PrintInstCount();
  831. D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
  832. D3D_THROW_FAIL("");
  833. }
  834. #endif
  835. src = &m_reg.m_c[offset];
  836. if (swizzle == D3DVS_NOSWIZZLE)
  837. *outsrc = *src;
  838. else
  839. {
  840. // Where to take X
  841. const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
  842. // Where to take Y
  843. const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
  844. // Where to take Z
  845. const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
  846. // Where to take W
  847. const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
  848. outsrc->x = ((float*)src)[dwSrcX];
  849. outsrc->y = ((float*)src)[dwSrcY];
  850. outsrc->z = ((float*)src)[dwSrcZ];
  851. outsrc->w = ((float*)src)[dwSrcW];
  852. }
  853. outsrc++;
  854. }
  855. }
  856. else
  857. {
  858. #if DBG
  859. if (!m_c_initialized[offset])
  860. {
  861. PrintInstCount();
  862. D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
  863. D3D_THROW_FAIL("");
  864. }
  865. #endif
  866. src = &m_reg.m_c[offset];
  867. if (swizzle == D3DVS_NOSWIZZLE)
  868. {
  869. for (UINT i=0; i < m_count; i++)
  870. {
  871. outsrc[i] = *src;
  872. }
  873. }
  874. else
  875. {
  876. // Where to take X
  877. const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
  878. // Where to take Y
  879. const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
  880. // Where to take Z
  881. const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
  882. // Where to take W
  883. const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
  884. VVM_WORD v;
  885. v.x = ((float*)src)[dwSrcX];
  886. v.y = ((float*)src)[dwSrcY];
  887. v.z = ((float*)src)[dwSrcZ];
  888. v.w = ((float*)src)[dwSrcW];
  889. for (UINT i=0; i < m_count; i++)
  890. {
  891. outsrc[i] = v;
  892. }
  893. }
  894. }
  895. }
  896. else
  897. {
  898. src = this->GetDataAddr(dwRegType, dwOffset);
  899. if (swizzle == D3DVS_NOSWIZZLE)
  900. memcpy(outsrc, src, m_count * sizeof(VVM_WORD));
  901. else
  902. {
  903. // Where to take X
  904. const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
  905. // Where to take Y
  906. const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
  907. // Where to take Z
  908. const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
  909. // Where to take W
  910. const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
  911. for (UINT i=0; i < m_count; i++)
  912. {
  913. outsrc->x = ((float*)src)[dwSrcX];
  914. outsrc->y = ((float*)src)[dwSrcY];
  915. outsrc->z = ((float*)src)[dwSrcZ];
  916. outsrc->w = ((float*)src)[dwSrcW];
  917. outsrc++;
  918. src++;
  919. }
  920. }
  921. }
  922. if (D3DVS_GETSRCMODIFIER(dwCurToken) == D3DSPSM_NEG)
  923. {
  924. VVM_WORD *outsrc = m_Source[index];
  925. for (UINT i=0; i < m_count; i++)
  926. {
  927. outsrc->x = -outsrc->x;
  928. outsrc->y = -outsrc->y;
  929. outsrc->z = -outsrc->z;
  930. outsrc->w = -outsrc->w;
  931. outsrc++;
  932. }
  933. }
  934. m_pdwCurToken++;
  935. }
  936. //---------------------------------------------------------------------
  937. // Computes source operands and advances m_pdwCurToken
  938. //
  939. // Parameters:
  940. // index - index of the first source operand
  941. // count - number of source operands
  942. //
  943. void CVertexVM::EvalSource(DWORD index, DWORD count)
  944. {
  945. const DWORD dwCurToken = *m_pdwCurToken;
  946. const DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
  947. const DWORD dwOffset = D3DSI_GETREGNUM(dwCurToken);
  948. DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
  949. VVM_WORD *src;
  950. VVM_WORD *outsrc = m_Source[index];
  951. if (dwRegType == D3DSPR_CONST)
  952. {
  953. D3DVS_ADDRESSMODE_TYPE am;
  954. am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE(dwCurToken);
  955. int offset = (int)dwOffset;
  956. if (am == D3DVS_ADDRMODE_RELATIVE)
  957. {
  958. for (UINT j=0; j < count; j++)
  959. {
  960. VVM_WORD *outsrc = m_Source[index + j];
  961. for (UINT i=0; i < m_count; i++)
  962. {
  963. int relOffset = *(int*)&m_reg.m_a[0][i].x;
  964. offset = (int)dwOffset + relOffset;
  965. #if DBG
  966. if (offset < 0 || offset >= D3DVS_CONSTREG_MAX_V1_1)
  967. {
  968. PrintInstCount();
  969. D3D_THROW_FAIL("Constant register index is out of bounds");
  970. }
  971. if (!m_c_initialized[offset])
  972. {
  973. PrintInstCount();
  974. D3D_ERR("Attempt to read from uninitialized constant register %d", offset);
  975. D3D_THROW_FAIL("");
  976. }
  977. #endif // DBG
  978. src = &m_reg.m_c[offset] + j;
  979. *outsrc = *src;
  980. outsrc++;
  981. }
  982. }
  983. }
  984. else
  985. {
  986. #if DBG
  987. for (UINT i = 0; i < count; i++)
  988. {
  989. if (!m_c_initialized[offset + i])
  990. {
  991. PrintInstCount();
  992. D3D_ERR("Attempt to read from uninitialized constant register %d", i);
  993. D3D_THROW_FAIL("");
  994. }
  995. }
  996. #endif
  997. src = &m_reg.m_c[offset];
  998. for (UINT j=0; j < count; j++)
  999. {
  1000. for (UINT i=0; i < m_count; i++)
  1001. {
  1002. outsrc[i] = *src;
  1003. }
  1004. src++;
  1005. outsrc += VVMVERTEXBATCH;
  1006. }
  1007. }
  1008. }
  1009. else
  1010. {
  1011. src = this->GetDataAddr(dwRegType, dwOffset);
  1012. UINT size = m_count * sizeof(VVM_WORD);
  1013. for (UINT i=0; i < count; i++)
  1014. {
  1015. memcpy(outsrc, src, size);
  1016. outsrc += VVMVERTEXBATCH;
  1017. src += VVMVERTEXBATCH;
  1018. }
  1019. }
  1020. m_pdwCurToken++;
  1021. }
  1022. //-----------------------------------------------------------------------------
  1023. void CVertexVM::InstMov()
  1024. {
  1025. EvalDestination();
  1026. EvalSource(0);
  1027. if (m_pDest == m_reg.m_a[0])
  1028. {
  1029. for (UINT i=0; i < m_count; i++)
  1030. {
  1031. float p = (float)floor(m_Source[0][i].x);
  1032. *(int*)&m_pDest[i].x = FTOI(p);
  1033. }
  1034. }
  1035. else
  1036. {
  1037. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1038. {
  1039. memcpy(m_pDest, m_Source[0], m_BatchSize);
  1040. }
  1041. else
  1042. {
  1043. for (UINT i=0; i < m_count; i++)
  1044. {
  1045. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1046. m_pDest[i].x = m_Source[0][i].x;
  1047. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1048. m_pDest[i].y = m_Source[0][i].y;
  1049. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1050. m_pDest[i].z = m_Source[0][i].z;
  1051. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1052. m_pDest[i].w = m_Source[0][i].w;
  1053. }
  1054. }
  1055. }
  1056. }
  1057. //-----------------------------------------------------------------------------
  1058. void CVertexVM::InstAdd()
  1059. {
  1060. EvalDestination();
  1061. EvalSource(0);
  1062. EvalSource(1);
  1063. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1064. {
  1065. for (UINT i=0; i < m_count; i++)
  1066. {
  1067. m_pDest[i].x = m_Source[0][i].x + m_Source[1][i].x;
  1068. m_pDest[i].y = m_Source[0][i].y + m_Source[1][i].y;
  1069. m_pDest[i].z = m_Source[0][i].z + m_Source[1][i].z;
  1070. m_pDest[i].w = m_Source[0][i].w + m_Source[1][i].w;
  1071. }
  1072. }
  1073. else
  1074. {
  1075. for (UINT i=0; i < m_count; i++)
  1076. {
  1077. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1078. m_pDest[i].x = m_Source[0][i].x + m_Source[1][i].x;
  1079. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1080. m_pDest[i].y = m_Source[0][i].y + m_Source[1][i].y;
  1081. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1082. m_pDest[i].z = m_Source[0][i].z + m_Source[1][i].z;
  1083. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1084. m_pDest[i].w = m_Source[0][i].w + m_Source[1][i].w;
  1085. }
  1086. }
  1087. }
  1088. //-----------------------------------------------------------------------------
  1089. void CVertexVM::InstMad()
  1090. {
  1091. EvalDestination();
  1092. EvalSource(0);
  1093. EvalSource(1);
  1094. EvalSource(2);
  1095. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1096. {
  1097. for (UINT i=0; i < m_count; i++)
  1098. {
  1099. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x + m_Source[2][i].x;
  1100. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y + m_Source[2][i].y;
  1101. m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z + m_Source[2][i].z;
  1102. m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w + m_Source[2][i].w;
  1103. }
  1104. }
  1105. else
  1106. {
  1107. for (UINT i=0; i < m_count; i++)
  1108. {
  1109. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1110. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x + m_Source[2][i].x;
  1111. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1112. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y + m_Source[2][i].y;
  1113. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1114. m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z + m_Source[2][i].z;
  1115. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1116. m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w + m_Source[2][i].w;
  1117. }
  1118. }
  1119. }
  1120. //-----------------------------------------------------------------------------
  1121. void CVertexVM::InstMul()
  1122. {
  1123. EvalDestination();
  1124. EvalSource(0);
  1125. EvalSource(1);
  1126. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1127. {
  1128. for (UINT i=0; i < m_count; i++)
  1129. {
  1130. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x;
  1131. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
  1132. m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z;
  1133. m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w;
  1134. }
  1135. }
  1136. else
  1137. {
  1138. for (UINT i=0; i < m_count; i++)
  1139. {
  1140. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1141. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x;
  1142. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1143. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
  1144. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1145. m_pDest[i].z = m_Source[0][i].z * m_Source[1][i].z;
  1146. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1147. m_pDest[i].w = m_Source[0][i].w * m_Source[1][i].w;
  1148. }
  1149. }
  1150. }
  1151. //-----------------------------------------------------------------------------
  1152. void CVertexVM::InstDP3()
  1153. {
  1154. EvalDestination();
  1155. EvalSource(0);
  1156. EvalSource(1);
  1157. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1158. {
  1159. for (UINT i=0; i < m_count; i++)
  1160. {
  1161. m_pDest[i].x =
  1162. m_pDest[i].y =
  1163. m_pDest[i].z =
  1164. m_pDest[i].w = m_Source[0][i].x * m_Source[1][i].x +
  1165. m_Source[0][i].y * m_Source[1][i].y +
  1166. m_Source[0][i].z * m_Source[1][i].z;
  1167. }
  1168. }
  1169. else
  1170. {
  1171. for (UINT i=0; i < m_count; i++)
  1172. {
  1173. float v = m_Source[0][i].x * m_Source[1][i].x +
  1174. m_Source[0][i].y * m_Source[1][i].y +
  1175. m_Source[0][i].z * m_Source[1][i].z;
  1176. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1177. m_pDest[i].x = v;
  1178. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1179. m_pDest[i].y = v;
  1180. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1181. m_pDest[i].z = v;
  1182. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1183. m_pDest[i].w = v;
  1184. }
  1185. }
  1186. }
  1187. //-----------------------------------------------------------------------------
  1188. void CVertexVM::InstDP4()
  1189. {
  1190. EvalDestination();
  1191. EvalSource(0);
  1192. EvalSource(1);
  1193. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1194. {
  1195. for (UINT i=0; i < m_count; i++)
  1196. {
  1197. m_pDest[i].x =
  1198. m_pDest[i].y =
  1199. m_pDest[i].z =
  1200. m_pDest[i].w = m_Source[0][i].x * m_Source[1][i].x +
  1201. m_Source[0][i].y * m_Source[1][i].y +
  1202. m_Source[0][i].z * m_Source[1][i].z +
  1203. m_Source[0][i].w * m_Source[1][i].w;
  1204. }
  1205. }
  1206. else
  1207. {
  1208. for (UINT i=0; i < m_count; i++)
  1209. {
  1210. float v = m_Source[0][i].x * m_Source[1][i].x +
  1211. m_Source[0][i].y * m_Source[1][i].y +
  1212. m_Source[0][i].z * m_Source[1][i].z +
  1213. m_Source[0][i].w * m_Source[1][i].w;
  1214. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1215. m_pDest[i].x = v;
  1216. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1217. m_pDest[i].y = v;
  1218. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1219. m_pDest[i].z = v;
  1220. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1221. m_pDest[i].w = v;
  1222. }
  1223. }
  1224. }
  1225. //-----------------------------------------------------------------------------
  1226. void CVertexVM::InstRcp()
  1227. {
  1228. EvalDestination();
  1229. EvalSource(0);
  1230. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1231. {
  1232. for (UINT i=0; i < m_count; i++)
  1233. {
  1234. float v = m_Source[0][i].w;
  1235. if (v == 1.0f)
  1236. {
  1237. // Must be exactly 1.0
  1238. m_pDest[i].x =
  1239. m_pDest[i].y =
  1240. m_pDest[i].z =
  1241. m_pDest[i].w = 1.0f;
  1242. }
  1243. else
  1244. if (v == 0)
  1245. {
  1246. m_pDest[i].x =
  1247. m_pDest[i].y =
  1248. m_pDest[i].z =
  1249. m_pDest[i].w = PLUS_INFINITY();
  1250. }
  1251. else
  1252. {
  1253. m_pDest[i].x =
  1254. m_pDest[i].y =
  1255. m_pDest[i].z =
  1256. m_pDest[i].w = 1.0f/v;
  1257. }
  1258. }
  1259. }
  1260. else
  1261. {
  1262. for (UINT i=0; i < m_count; i++)
  1263. {
  1264. float v = m_Source[0][i].w;
  1265. if (FLOAT_EQZ(v))
  1266. v = PLUS_INFINITY();
  1267. else
  1268. if (v != 1.0f)
  1269. v = 1.0f/v;
  1270. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1271. m_pDest[i].x = v;
  1272. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1273. m_pDest[i].y = v;
  1274. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1275. m_pDest[i].z = v;
  1276. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1277. m_pDest[i].w = v;
  1278. }
  1279. }
  1280. }
  1281. //-----------------------------------------------------------------------------
  1282. void CVertexVM::InstRsq()
  1283. {
  1284. EvalDestination();
  1285. EvalSource(0);
  1286. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1287. {
  1288. for (UINT i=0; i < m_count; i++)
  1289. {
  1290. float v = ABSF(m_Source[0][i].w);
  1291. if (v == 1.0f)
  1292. {
  1293. m_pDest[i].x =
  1294. m_pDest[i].y =
  1295. m_pDest[i].z =
  1296. m_pDest[i].w = 1.0f;
  1297. }
  1298. else
  1299. if (v == 0)
  1300. {
  1301. m_pDest[i].x =
  1302. m_pDest[i].y =
  1303. m_pDest[i].z =
  1304. m_pDest[i].w = PLUS_INFINITY();
  1305. }
  1306. else
  1307. {
  1308. v = (float)(1.0f / sqrt(v));
  1309. m_pDest[i].x =
  1310. m_pDest[i].y =
  1311. m_pDest[i].z =
  1312. m_pDest[i].w = v;
  1313. }
  1314. }
  1315. }
  1316. else
  1317. {
  1318. for (UINT i=0; i < m_count; i++)
  1319. {
  1320. float v = ABSF(m_Source[0][i].w);
  1321. if (FLOAT_EQZ(v))
  1322. v = PLUS_INFINITY();
  1323. else
  1324. if (FLOAT_CMP_PONE(v, !=))
  1325. v = (float)(1.0f / sqrt(v));
  1326. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1327. m_pDest[i].x = v;
  1328. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1329. m_pDest[i].y = v;
  1330. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1331. m_pDest[i].z = v;
  1332. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1333. m_pDest[i].w = v;
  1334. }
  1335. }
  1336. }
  1337. //-----------------------------------------------------------------------------
  1338. void CVertexVM::InstSlt()
  1339. {
  1340. EvalDestination();
  1341. EvalSource(0);
  1342. EvalSource(1);
  1343. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1344. {
  1345. for (UINT i=0; i < m_count; i++)
  1346. {
  1347. m_pDest[i].x = (m_Source[0][i].x < m_Source[1][i].x) ? 1.0f : 0.0f;
  1348. m_pDest[i].y = (m_Source[0][i].y < m_Source[1][i].y) ? 1.0f : 0.0f;
  1349. m_pDest[i].z = (m_Source[0][i].z < m_Source[1][i].z) ? 1.0f : 0.0f;
  1350. m_pDest[i].w = (m_Source[0][i].w < m_Source[1][i].w) ? 1.0f : 0.0f;
  1351. }
  1352. }
  1353. else
  1354. {
  1355. for (UINT i=0; i < m_count; i++)
  1356. {
  1357. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1358. m_pDest[i].x = (m_Source[0][i].x < m_Source[1][i].x) ? 1.0f : 0.0f;
  1359. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1360. m_pDest[i].y = (m_Source[0][i].y < m_Source[1][i].y) ? 1.0f : 0.0f;
  1361. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1362. m_pDest[i].z = (m_Source[0][i].z < m_Source[1][i].z) ? 1.0f : 0.0f;
  1363. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1364. m_pDest[i].w = (m_Source[0][i].w < m_Source[1][i].w) ? 1.0f : 0.0f;
  1365. }
  1366. }
  1367. }
  1368. //-----------------------------------------------------------------------------
  1369. void CVertexVM::InstSge()
  1370. {
  1371. EvalDestination();
  1372. EvalSource(0);
  1373. EvalSource(1);
  1374. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1375. {
  1376. for (UINT i=0; i < m_count; i++)
  1377. {
  1378. m_pDest[i].x = (m_Source[0][i].x >= m_Source[1][i].x) ? 1.0f : 0.0f;
  1379. m_pDest[i].y = (m_Source[0][i].y >= m_Source[1][i].y) ? 1.0f : 0.0f;
  1380. m_pDest[i].z = (m_Source[0][i].z >= m_Source[1][i].z) ? 1.0f : 0.0f;
  1381. m_pDest[i].w = (m_Source[0][i].w >= m_Source[1][i].w) ? 1.0f : 0.0f;
  1382. }
  1383. }
  1384. else
  1385. {
  1386. for (UINT i=0; i < m_count; i++)
  1387. {
  1388. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1389. m_pDest[i].x = (m_Source[0][i].x >= m_Source[1][i].x) ? 1.0f : 0.0f;
  1390. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1391. m_pDest[i].y = (m_Source[0][i].y >= m_Source[1][i].y) ? 1.0f : 0.0f;
  1392. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1393. m_pDest[i].z = (m_Source[0][i].z >= m_Source[1][i].z) ? 1.0f : 0.0f;
  1394. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1395. m_pDest[i].w = (m_Source[0][i].w >= m_Source[1][i].w) ? 1.0f : 0.0f;
  1396. }
  1397. }
  1398. }
  1399. //-----------------------------------------------------------------------------
  1400. void CVertexVM::InstMin()
  1401. {
  1402. EvalDestination();
  1403. EvalSource(0);
  1404. EvalSource(1);
  1405. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1406. {
  1407. for (UINT i=0; i < m_count; i++)
  1408. {
  1409. m_pDest[i].x=(m_Source[0][i].x < m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
  1410. m_pDest[i].y=(m_Source[0][i].y < m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
  1411. m_pDest[i].z=(m_Source[0][i].z < m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
  1412. m_pDest[i].w=(m_Source[0][i].w < m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
  1413. }
  1414. }
  1415. else
  1416. {
  1417. for (UINT i=0; i < m_count; i++)
  1418. {
  1419. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1420. m_pDest[i].x=(m_Source[0][i].x < m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
  1421. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1422. m_pDest[i].y=(m_Source[0][i].y < m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
  1423. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1424. m_pDest[i].z=(m_Source[0][i].z < m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
  1425. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1426. m_pDest[i].w=(m_Source[0][i].w < m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
  1427. }
  1428. }
  1429. }
  1430. //-----------------------------------------------------------------------------
  1431. void CVertexVM::InstMax()
  1432. {
  1433. EvalDestination();
  1434. EvalSource(0);
  1435. EvalSource(1);
  1436. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1437. {
  1438. for (UINT i=0; i < m_count; i++)
  1439. {
  1440. m_pDest[i].x=(m_Source[0][i].x >= m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
  1441. m_pDest[i].y=(m_Source[0][i].y >= m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
  1442. m_pDest[i].z=(m_Source[0][i].z >= m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
  1443. m_pDest[i].w=(m_Source[0][i].w >= m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
  1444. }
  1445. }
  1446. else
  1447. {
  1448. for (UINT i=0; i < m_count; i++)
  1449. {
  1450. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1451. m_pDest[i].x=(m_Source[0][i].x >= m_Source[1][i].x) ? m_Source[0][i].x : m_Source[1][i].x;
  1452. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1453. m_pDest[i].y=(m_Source[0][i].y >= m_Source[1][i].y) ? m_Source[0][i].y : m_Source[1][i].y;
  1454. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1455. m_pDest[i].z=(m_Source[0][i].z >= m_Source[1][i].z) ? m_Source[0][i].z : m_Source[1][i].z;
  1456. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1457. m_pDest[i].w=(m_Source[0][i].w >= m_Source[1][i].w) ? m_Source[0][i].w : m_Source[1][i].w;
  1458. }
  1459. }
  1460. }
  1461. //-----------------------------------------------------------------------------
  1462. // Approximation 2**x
  1463. //
  1464. float ExpApprox(float x)
  1465. {
  1466. float tmp = (float)pow(2, x);
  1467. // Artificially reduce precision
  1468. DWORD tmpd = *(DWORD*)&tmp & 0xFFFFFF00;
  1469. return *(float*)&tmpd;
  1470. }
  1471. //-----------------------------------------------------------------------------
  1472. // Approximation Log2(x)
  1473. //
  1474. const float LOG2 = (float)(1.0f/log(2));
  1475. float LogApprox(float x)
  1476. {
  1477. float tmp = (float)(log(x) * LOG2);
  1478. // Artificially reduce precision
  1479. DWORD tmpd = *(DWORD*)&tmp & 0xFFFFFF00;
  1480. return *(float*)&tmpd;
  1481. }
  1482. //-----------------------------------------------------------------------------
  1483. // Full precision EXP
  1484. //
  1485. void CVertexVM::InstExp()
  1486. {
  1487. EvalDestination();
  1488. EvalSource(0);
  1489. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1490. {
  1491. for (UINT i=0; i < m_count; i++)
  1492. {
  1493. float v = (float)pow(2, m_Source[0][i].w);
  1494. m_pDest[i].x = v;
  1495. m_pDest[i].y = v;
  1496. m_pDest[i].z = v;
  1497. m_pDest[i].w = v;
  1498. }
  1499. }
  1500. else
  1501. {
  1502. for (UINT i=0; i < m_count; i++)
  1503. {
  1504. float v = (float)pow(2, m_Source[0][i].w);
  1505. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1506. m_pDest[i].x = v;
  1507. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1508. m_pDest[i].y = v;
  1509. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1510. m_pDest[i].z = v;
  1511. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1512. m_pDest[i].w = v;
  1513. }
  1514. }
  1515. }
  1516. //-----------------------------------------------------------------------------
  1517. // Low precision EXP
  1518. //
  1519. void CVertexVM::InstExpP()
  1520. {
  1521. EvalDestination();
  1522. EvalSource(0);
  1523. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1524. {
  1525. for (UINT i=0; i < m_count; i++)
  1526. {
  1527. float w = m_Source[0][i].w; // Input value
  1528. float v = (float)floor(w);
  1529. m_pDest[i].x = (float)pow(2, v);
  1530. m_pDest[i].y = w - v;
  1531. m_pDest[i].z = ExpApprox(w);
  1532. m_pDest[i].w = 1;
  1533. }
  1534. }
  1535. else
  1536. {
  1537. for (UINT i=0; i < m_count; i++)
  1538. {
  1539. float w = m_Source[0][i].w; // Input value
  1540. float v = (float)floor(w);
  1541. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1542. m_pDest[i].x = (float)pow(2, v);
  1543. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1544. m_pDest[i].y = w - v;
  1545. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1546. m_pDest[i].z = ExpApprox(w);
  1547. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1548. m_pDest[i].w = 1;
  1549. }
  1550. }
  1551. }
  1552. //-----------------------------------------------------------------------------
  1553. // Full precision LOG
  1554. //
  1555. void CVertexVM::InstLog()
  1556. {
  1557. EvalDestination();
  1558. EvalSource(0);
  1559. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1560. {
  1561. for (UINT i=0; i < m_count; i++)
  1562. {
  1563. float v = ABSF(m_Source[0][i].w);
  1564. if (v != 0)
  1565. {
  1566. m_pDest[i].x =
  1567. m_pDest[i].y =
  1568. m_pDest[i].z =
  1569. m_pDest[i].w = (float)(log(v) * LOG2);
  1570. }
  1571. else
  1572. {
  1573. m_pDest[i].x =
  1574. m_pDest[i].y =
  1575. m_pDest[i].z =
  1576. m_pDest[i].w = MINUS_INFINITY();
  1577. }
  1578. }
  1579. }
  1580. else
  1581. {
  1582. for (UINT i=0; i < m_count; i++)
  1583. {
  1584. float v = ABSF(m_Source[0][i].w);
  1585. if (v != 0)
  1586. {
  1587. v = (float)(log(v) * LOG2);
  1588. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1589. m_pDest[i].x = v;
  1590. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1591. m_pDest[i].y = v;
  1592. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1593. m_pDest[i].z = v;
  1594. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1595. m_pDest[i].w = v;
  1596. }
  1597. else
  1598. {
  1599. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1600. m_pDest[i].x = MINUS_INFINITY();
  1601. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1602. m_pDest[i].y = MINUS_INFINITY();
  1603. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1604. m_pDest[i].z = MINUS_INFINITY();
  1605. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1606. m_pDest[i].w = MINUS_INFINITY();
  1607. }
  1608. }
  1609. }
  1610. }
  1611. //-----------------------------------------------------------------------------
  1612. // Low precision LOG
  1613. //
  1614. void CVertexVM::InstLogP()
  1615. {
  1616. EvalDestination();
  1617. EvalSource(0);
  1618. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1619. {
  1620. for (UINT i=0; i < m_count; i++)
  1621. {
  1622. float v = ABSF(m_Source[0][i].w);
  1623. if (v != 0)
  1624. {
  1625. // -128.0 <= exponent < 127.0
  1626. int p = (int)(*(DWORD*)&v >> 23) - 127;
  1627. m_pDest[i].x = (float)p;
  1628. // 1.0 <= mantissa < 2.0
  1629. p = (*(DWORD*)&v & 0x7FFFFF) | 0x3F800000;
  1630. m_pDest[i].y = *(float*)&p;
  1631. m_pDest[i].z = LogApprox(v);
  1632. m_pDest[i].w = 1.0f;
  1633. }
  1634. else
  1635. {
  1636. m_pDest[i].x = MINUS_INFINITY();
  1637. m_pDest[i].y = 1.0f;
  1638. m_pDest[i].z = MINUS_INFINITY();
  1639. m_pDest[i].w = 1.0f;
  1640. }
  1641. }
  1642. }
  1643. else
  1644. {
  1645. for (UINT i=0; i < m_count; i++)
  1646. {
  1647. float v = ABSF(m_Source[0][i].w);
  1648. if (v != 0)
  1649. {
  1650. // -128.0 <= exponent < 127.0
  1651. int p = (int)(*(DWORD*)&v >> 23) - 127;
  1652. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1653. m_pDest[i].x = (float)p;
  1654. // 1.0 <= mantissa < 2.0
  1655. p = (*(DWORD*)&v & 0x7FFFFF) | 0x3F800000;
  1656. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1657. m_pDest[i].y = *(float*)&p;
  1658. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1659. m_pDest[i].z = LogApprox(v);
  1660. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1661. m_pDest[i].w = 1.0f;
  1662. }
  1663. else
  1664. {
  1665. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1666. m_pDest[i].x = MINUS_INFINITY();
  1667. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1668. m_pDest[i].y = 1.0f;
  1669. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1670. m_pDest[i].z = MINUS_INFINITY();
  1671. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1672. m_pDest[i].w = 1.0f;
  1673. }
  1674. }
  1675. }
  1676. }
  1677. //-----------------------------------------------------------------------------
  1678. void CVertexVM::InstFrc()
  1679. {
  1680. EvalDestination();
  1681. EvalSource(0);
  1682. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1683. {
  1684. for (UINT i=0; i < m_count; i++)
  1685. {
  1686. m_pDest[i].x = m_Source[0][i].x - (float)floor(m_Source[0][i].x);
  1687. m_pDest[i].y = m_Source[0][i].y - (float)floor(m_Source[0][i].y);
  1688. m_pDest[i].z = m_Source[0][i].z - (float)floor(m_Source[0][i].z);
  1689. m_pDest[i].w = m_Source[0][i].w - (float)floor(m_Source[0][i].w);
  1690. }
  1691. }
  1692. else
  1693. {
  1694. for (UINT i=0; i < m_count; i++)
  1695. {
  1696. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1697. m_pDest[i].x = m_Source[0][i].x - (float)floor(m_Source[0][i].x);
  1698. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1699. m_pDest[i].y = m_Source[0][i].y - (float)floor(m_Source[0][i].y);
  1700. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1701. m_pDest[i].z = m_Source[0][i].z - (float)floor(m_Source[0][i].z);
  1702. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1703. m_pDest[i].w = m_Source[0][i].w - (float)floor(m_Source[0][i].w);
  1704. }
  1705. }
  1706. }
  1707. //-----------------------------------------------------------------------------
  1708. void CVertexVM::InstLit()
  1709. {
  1710. EvalDestination();
  1711. EvalSource(0);
  1712. for (UINT i=0; i < m_count; i++)
  1713. {
  1714. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1715. m_pDest[i].x = 1;
  1716. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1717. m_pDest[i].y = 0;
  1718. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1719. m_pDest[i].z = 0;
  1720. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1721. m_pDest[i].w = 1;
  1722. float power = m_Source[0][i].w;
  1723. const float MAXPOWER = 127.9961f;
  1724. if (power < -MAXPOWER)
  1725. power = -MAXPOWER; // Fits into 8.8 fixed point format
  1726. else
  1727. if (power > MAXPOWER)
  1728. power = MAXPOWER; // Fits into 8.8 fixed point format
  1729. if (m_Source[0][i].x > 0)
  1730. {
  1731. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1732. m_pDest[i].y = m_Source[0][i].x;
  1733. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1734. if (m_Source[0][i].y > 0)
  1735. {
  1736. // Allowed approximation is EXP(power * LOG(m_Source[0].y))
  1737. m_pDest[i].z = (float)(pow(m_Source[0][i].y, power));
  1738. }
  1739. }
  1740. }
  1741. }
  1742. //-----------------------------------------------------------------------------
  1743. void CVertexVM::InstDst()
  1744. {
  1745. EvalDestination();
  1746. EvalSource(0);
  1747. EvalSource(1);
  1748. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1749. {
  1750. for (UINT i=0; i < m_count; i++)
  1751. {
  1752. m_pDest[i].x = 1;
  1753. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
  1754. m_pDest[i].z = m_Source[0][i].z;
  1755. m_pDest[i].w = m_Source[1][i].w;
  1756. }
  1757. }
  1758. else
  1759. {
  1760. for (UINT i=0; i < m_count; i++)
  1761. {
  1762. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1763. m_pDest[i].x = 1;
  1764. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1765. m_pDest[i].y = m_Source[0][i].y * m_Source[1][i].y;
  1766. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1767. m_pDest[i].z = m_Source[0][i].z;
  1768. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1769. m_pDest[i].w = m_Source[1][i].w;
  1770. }
  1771. }
  1772. }
  1773. //-----------------------------------------------------------------------------
  1774. void CVertexVM::InstM4x4()
  1775. {
  1776. EvalDestination();
  1777. EvalSource(0);
  1778. EvalSource(1, 4);
  1779. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1780. {
  1781. for (UINT i=0; i < m_count; i++)
  1782. {
  1783. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1784. m_Source[0][i].y * m_Source[1][i].y +
  1785. m_Source[0][i].z * m_Source[1][i].z +
  1786. m_Source[0][i].w * m_Source[1][i].w;
  1787. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1788. m_Source[0][i].y * m_Source[2][i].y +
  1789. m_Source[0][i].z * m_Source[2][i].z +
  1790. m_Source[0][i].w * m_Source[2][i].w;
  1791. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1792. m_Source[0][i].y * m_Source[3][i].y +
  1793. m_Source[0][i].z * m_Source[3][i].z +
  1794. m_Source[0][i].w * m_Source[3][i].w;
  1795. m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
  1796. m_Source[0][i].y * m_Source[4][i].y +
  1797. m_Source[0][i].z * m_Source[4][i].z +
  1798. m_Source[0][i].w * m_Source[4][i].w;
  1799. }
  1800. }
  1801. else
  1802. {
  1803. for (UINT i=0; i < m_count; i++)
  1804. {
  1805. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1806. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1807. m_Source[0][i].y * m_Source[1][i].y +
  1808. m_Source[0][i].z * m_Source[1][i].z +
  1809. m_Source[0][i].w * m_Source[1][i].w;
  1810. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1811. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1812. m_Source[0][i].y * m_Source[2][i].y +
  1813. m_Source[0][i].z * m_Source[2][i].z +
  1814. m_Source[0][i].w * m_Source[2][i].w;
  1815. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1816. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1817. m_Source[0][i].y * m_Source[3][i].y +
  1818. m_Source[0][i].z * m_Source[3][i].z +
  1819. m_Source[0][i].w * m_Source[3][i].w;
  1820. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1821. m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
  1822. m_Source[0][i].y * m_Source[4][i].y +
  1823. m_Source[0][i].z * m_Source[4][i].z +
  1824. m_Source[0][i].w * m_Source[4][i].w;
  1825. }
  1826. }
  1827. }
  1828. //-----------------------------------------------------------------------------
  1829. void CVertexVM::InstM4x3()
  1830. {
  1831. EvalDestination();
  1832. EvalSource(0);
  1833. EvalSource(1, 3);
  1834. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1835. {
  1836. for (UINT i=0; i < m_count; i++)
  1837. {
  1838. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1839. m_Source[0][i].y * m_Source[1][i].y +
  1840. m_Source[0][i].z * m_Source[1][i].z +
  1841. m_Source[0][i].w * m_Source[1][i].w;
  1842. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1843. m_Source[0][i].y * m_Source[2][i].y +
  1844. m_Source[0][i].z * m_Source[2][i].z +
  1845. m_Source[0][i].w * m_Source[2][i].w;
  1846. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1847. m_Source[0][i].y * m_Source[3][i].y +
  1848. m_Source[0][i].z * m_Source[3][i].z +
  1849. m_Source[0][i].w * m_Source[3][i].w;
  1850. }
  1851. }
  1852. else
  1853. {
  1854. for (UINT i=0; i < m_count; i++)
  1855. {
  1856. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1857. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1858. m_Source[0][i].y * m_Source[1][i].y +
  1859. m_Source[0][i].z * m_Source[1][i].z +
  1860. m_Source[0][i].w * m_Source[1][i].w;
  1861. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1862. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1863. m_Source[0][i].y * m_Source[2][i].y +
  1864. m_Source[0][i].z * m_Source[2][i].z +
  1865. m_Source[0][i].w * m_Source[2][i].w;
  1866. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1867. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1868. m_Source[0][i].y * m_Source[3][i].y +
  1869. m_Source[0][i].z * m_Source[3][i].z +
  1870. m_Source[0][i].w * m_Source[3][i].w;
  1871. }
  1872. }
  1873. }
  1874. //-----------------------------------------------------------------------------
  1875. void CVertexVM::InstM3x4()
  1876. {
  1877. EvalDestination();
  1878. EvalSource(0);
  1879. EvalSource(1, 4);
  1880. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1881. {
  1882. for (UINT i=0; i < m_count; i++)
  1883. {
  1884. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1885. m_Source[0][i].y * m_Source[1][i].y +
  1886. m_Source[0][i].z * m_Source[1][i].z;
  1887. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1888. m_Source[0][i].y * m_Source[2][i].y +
  1889. m_Source[0][i].z * m_Source[2][i].z;
  1890. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1891. m_Source[0][i].y * m_Source[3][i].y +
  1892. m_Source[0][i].z * m_Source[3][i].z;
  1893. m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
  1894. m_Source[0][i].y * m_Source[4][i].y +
  1895. m_Source[0][i].z * m_Source[4][i].z;
  1896. }
  1897. }
  1898. else
  1899. {
  1900. for (UINT i=0; i < m_count; i++)
  1901. {
  1902. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1903. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1904. m_Source[0][i].y * m_Source[1][i].y +
  1905. m_Source[0][i].z * m_Source[1][i].z;
  1906. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1907. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1908. m_Source[0][i].y * m_Source[2][i].y +
  1909. m_Source[0][i].z * m_Source[2][i].z;
  1910. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1911. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1912. m_Source[0][i].y * m_Source[3][i].y +
  1913. m_Source[0][i].z * m_Source[3][i].z;
  1914. if (m_WriteMask & D3DSP_WRITEMASK_3)
  1915. m_pDest[i].w = m_Source[0][i].x * m_Source[4][i].x +
  1916. m_Source[0][i].y * m_Source[4][i].y +
  1917. m_Source[0][i].z * m_Source[4][i].z;
  1918. }
  1919. }
  1920. }
  1921. //-----------------------------------------------------------------------------
  1922. void CVertexVM::InstM3x3()
  1923. {
  1924. EvalDestination();
  1925. EvalSource(0);
  1926. EvalSource(1, 3);
  1927. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1928. {
  1929. for (UINT i=0; i < m_count; i++)
  1930. {
  1931. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1932. m_Source[0][i].y * m_Source[1][i].y +
  1933. m_Source[0][i].z * m_Source[1][i].z;
  1934. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1935. m_Source[0][i].y * m_Source[2][i].y +
  1936. m_Source[0][i].z * m_Source[2][i].z;
  1937. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1938. m_Source[0][i].y * m_Source[3][i].y +
  1939. m_Source[0][i].z * m_Source[3][i].z;
  1940. }
  1941. }
  1942. else
  1943. {
  1944. for (UINT i=0; i < m_count; i++)
  1945. {
  1946. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1947. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1948. m_Source[0][i].y * m_Source[1][i].y +
  1949. m_Source[0][i].z * m_Source[1][i].z;
  1950. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1951. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1952. m_Source[0][i].y * m_Source[2][i].y +
  1953. m_Source[0][i].z * m_Source[2][i].z;
  1954. if (m_WriteMask & D3DSP_WRITEMASK_2)
  1955. m_pDest[i].z = m_Source[0][i].x * m_Source[3][i].x +
  1956. m_Source[0][i].y * m_Source[3][i].y +
  1957. m_Source[0][i].z * m_Source[3][i].z;
  1958. }
  1959. }
  1960. }
  1961. //-----------------------------------------------------------------------------
  1962. void CVertexVM::InstM3x2()
  1963. {
  1964. EvalDestination();
  1965. EvalSource(0);
  1966. EvalSource(1, 2);
  1967. if (m_WriteMask == D3DSP_WRITEMASK_ALL)
  1968. {
  1969. for (UINT i=0; i < m_count; i++)
  1970. {
  1971. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1972. m_Source[0][i].y * m_Source[1][i].y +
  1973. m_Source[0][i].z * m_Source[1][i].z;
  1974. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1975. m_Source[0][i].y * m_Source[2][i].y +
  1976. m_Source[0][i].z * m_Source[2][i].z;
  1977. }
  1978. }
  1979. else
  1980. {
  1981. for (UINT i=0; i < m_count; i++)
  1982. {
  1983. if (m_WriteMask & D3DSP_WRITEMASK_0)
  1984. m_pDest[i].x = m_Source[0][i].x * m_Source[1][i].x +
  1985. m_Source[0][i].y * m_Source[1][i].y +
  1986. m_Source[0][i].z * m_Source[1][i].z;
  1987. if (m_WriteMask & D3DSP_WRITEMASK_1)
  1988. m_pDest[i].y = m_Source[0][i].x * m_Source[2][i].x +
  1989. m_Source[0][i].y * m_Source[2][i].y +
  1990. m_Source[0][i].z * m_Source[2][i].z;
  1991. }
  1992. }
  1993. }
  1994. //-----------------------------------------------------------------------------
  1995. HRESULT CVertexVM::ExecuteShader(LPD3DFE_PROCESSVERTICES pv, UINT vertexCount)
  1996. {
  1997. if (m_pCurrentShader == NULL)
  1998. {
  1999. D3D_ERR("No current shader set in the Virtual Shader Machine");
  2000. return D3DERR_INVALIDCALL;
  2001. }
  2002. try
  2003. {
  2004. m_count = vertexCount;
  2005. m_BatchSize = vertexCount * sizeof(VVM_WORD);
  2006. // Skip version
  2007. m_pdwCurToken = m_pCurrentShader->m_pdwCode + 1;
  2008. DWORD* pEnd = m_pCurrentShader->m_pdwCode + m_pCurrentShader->m_dwSize;
  2009. pEnd -= 1;
  2010. m_CurInstIndex = 0;
  2011. // Initialize position register
  2012. for (UINT i=0; i < m_count; i++)
  2013. {
  2014. m_reg.m_output[0][i].x = 0;
  2015. m_reg.m_output[0][i].y = 0;
  2016. m_reg.m_output[0][i].z = 0;
  2017. m_reg.m_output[0][i].w = 1;
  2018. }
  2019. while (m_pdwCurToken < pEnd)
  2020. {
  2021. DWORD dwInst = *m_pdwCurToken;
  2022. DWORD dwOpCode = D3DSI_GETOPCODE(dwInst);
  2023. m_pdwCurToken++;
  2024. switch (dwOpCode)
  2025. {
  2026. case D3DSIO_COMMENT: m_pdwCurToken += ((GetInstructionLength(dwInst))-1); break;
  2027. case D3DSIO_NOP : ; break;
  2028. case D3DSIO_MOV : InstMov(); break;
  2029. case D3DSIO_ADD : InstAdd(); break;
  2030. case D3DSIO_MAD : InstMad(); break;
  2031. case D3DSIO_MUL : InstMul(); break;
  2032. case D3DSIO_RCP : InstRcp(); break;
  2033. case D3DSIO_RSQ : InstRsq(); break;
  2034. case D3DSIO_DP3 : InstDP3(); break;
  2035. case D3DSIO_DP4 : InstDP4(); break;
  2036. case D3DSIO_MIN : InstMin(); break;
  2037. case D3DSIO_MAX : InstMax(); break;
  2038. case D3DSIO_SLT : InstSlt(); break;
  2039. case D3DSIO_SGE : InstSge(); break;
  2040. case D3DSIO_EXP : InstExp(); break;
  2041. case D3DSIO_LOG : InstLog(); break;
  2042. case D3DSIO_EXPP : InstExpP(); break;
  2043. case D3DSIO_LOGP : InstLogP(); break;
  2044. case D3DSIO_LIT : InstLit(); break;
  2045. case D3DSIO_DST : InstDst(); break;
  2046. case D3DSIO_FRC : InstFrc(); break;
  2047. case D3DSIO_M4x4 : InstM4x4(); break;
  2048. case D3DSIO_M4x3 : InstM4x3(); break;
  2049. case D3DSIO_M3x4 : InstM3x4(); break;
  2050. case D3DSIO_M3x3 : InstM3x3(); break;
  2051. case D3DSIO_M3x2 : InstM3x2(); break;
  2052. default:
  2053. {
  2054. PrintInstCount();
  2055. D3D_THROW_FAIL("Invalid shader opcode");
  2056. }
  2057. }
  2058. #ifndef PSGPDLL
  2059. #if DBG
  2060. if (pv->pDbgMon) pv->pDbgMon->NextEvent(D3DDM_EVENT_VERTEXSHADERINST);
  2061. #endif
  2062. #endif // PSGPDLL
  2063. if (dwOpCode != D3DSIO_COMMENT)
  2064. m_CurInstIndex++;
  2065. }
  2066. m_CurInstIndex = 0;
  2067. }
  2068. D3D_CATCH;
  2069. return D3D_OK;
  2070. }
  2071. //-----------------------------------------------------------------------------
  2072. HRESULT CVertexVM::GetDataPointer(DWORD dwMemType, VVM_WORD ** pData)
  2073. {
  2074. try
  2075. {
  2076. *pData = this->GetDataAddr(dwMemType, 0);
  2077. }
  2078. catch (HRESULT e)
  2079. {
  2080. *pData = NULL;
  2081. return D3DERR_INVALIDCALL;
  2082. }
  2083. return D3D_OK;
  2084. }
  2085. //---------------------------------------------------------------------
  2086. VVM_REGISTERS* CVertexVM::GetRegisters()
  2087. {
  2088. return &m_reg;
  2089. }