Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1439 lines
44 KiB

  1. /*==========================================================================
  2. *
  3. * Copyright (C) 1999 Microsoft Corporation. All Rights Reserved.
  4. *
  5. * File: vvm.cpp
  6. * Content: Virtual Vertex Machine implementation
  7. *
  8. *
  9. ***************************************************************************/
  10. #include "pch.cpp"
  11. #pragma hdrstop
  12. float MINUS_MAX()
  13. {
  14. DWORD v = 0xFF7FFFFF;
  15. return *(float*)&v;
  16. }
  17. float PLUS_MAX()
  18. {
  19. DWORD v = 0x7F7FFFFF;
  20. return *(float*)&v;
  21. }
  22. //-----------------------------------------------------------------------------
  23. // Returns instruction size, based on the op-code
  24. //
  25. UINT GetInstructionLength(DWORD inst)
  26. {
  27. DWORD opcode = D3DSI_GETOPCODE( inst );
  28. // returns number of source operands + length of opcode and the destination
  29. switch (opcode)
  30. {
  31. case D3DSIO_MOV : return 1 + 2;
  32. case D3DSIO_ADD : return 2 + 2;
  33. case D3DSIO_MAD : return 3 + 2;
  34. case D3DSIO_MUL : return 2 + 2;
  35. case D3DSIO_RCP : return 1 + 2;
  36. case D3DSIO_RSQ : return 1 + 2;
  37. case D3DSIO_DP3 : return 2 + 2;
  38. case D3DSIO_DP4 : return 2 + 2;
  39. case D3DSIO_MIN : return 2 + 2;
  40. case D3DSIO_MAX : return 2 + 2;
  41. case D3DSIO_SLT : return 2 + 2;
  42. case D3DSIO_SGE : return 2 + 2;
  43. case D3DSIO_EXP : return 1 + 2;
  44. case D3DSIO_LOG : return 1 + 2;
  45. case D3DSIO_EXPP: return 1 + 2;
  46. case D3DSIO_LOGP: return 1 + 2;
  47. case D3DSIO_LIT : return 1 + 2;
  48. case D3DSIO_DST : return 2 + 2;
  49. case D3DSIO_FRC : return 1 + 2;
  50. case D3DSIO_M4x4: return 2 + 2;
  51. case D3DSIO_M4x3: return 2 + 2;
  52. case D3DSIO_M3x4: return 2 + 2;
  53. case D3DSIO_M3x3: return 2 + 2;
  54. case D3DSIO_M3x2: return 2 + 2;
  55. case D3DSIO_NOP : return 1;
  56. default: return 1;
  57. case D3DSIO_COMMENT: return 1 + ((inst & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT);
  58. }
  59. }
  60. #if 0
  61. //-----------------------------------------------------------------------------
  62. HRESULT CVertexVM::GetDataPointer(DWORD dwMemType, RDVECTOR4 ** pData)
  63. {
  64. try
  65. {
  66. *pData = this->GetDataAddr(dwMemType, 0);
  67. }
  68. catch (CD3DException e)
  69. {
  70. *pData = NULL;
  71. return DDERR_INVALIDPARAMS;
  72. }
  73. return D3D_OK;
  74. }
  75. #endif
  76. ///////////////////////////////////////////////////////////////////////////////
  77. //
  78. // RefVM implementation.
  79. //
  80. ///////////////////////////////////////////////////////////////////////////////
  81. //-----------------------------------------------------------------------------
  82. // Vertex Virtual Machine Opcode implementations
  83. //-----------------------------------------------------------------------------
  84. void
  85. RefVM::WriteResult()
  86. {
  87. if( m_WriteMask == D3DSP_WRITEMASK_ALL)
  88. {
  89. *m_pDest = m_TmpReg;
  90. }
  91. else
  92. {
  93. if( m_WriteMask & D3DSP_WRITEMASK_0)
  94. m_pDest->x = m_TmpReg.x;
  95. if( m_WriteMask & D3DSP_WRITEMASK_1)
  96. m_pDest->y = m_TmpReg.y;
  97. if( m_WriteMask & D3DSP_WRITEMASK_2)
  98. m_pDest->z = m_TmpReg.z;
  99. if( m_WriteMask & D3DSP_WRITEMASK_3)
  100. m_pDest->w = m_TmpReg.w;
  101. }
  102. }
  103. //-----------------------------------------------------------------------------
  104. void
  105. RefVM::InstMov()
  106. {
  107. SetDestReg();
  108. SetSrcReg(0);
  109. if( m_pDest == m_reg.m_a )
  110. {
  111. float p = (float)floor(m_Source[0].x);
  112. *(int*)&m_pDest->x = FTOI(p);
  113. }
  114. else
  115. {
  116. m_TmpReg = m_Source[0];
  117. WriteResult();
  118. }
  119. }
  120. //-----------------------------------------------------------------------------
  121. void
  122. RefVM::InstAdd()
  123. {
  124. SetDestReg();
  125. SetSrcReg(0);
  126. SetSrcReg(1);
  127. m_TmpReg.x = m_Source[0].x + m_Source[1].x;
  128. m_TmpReg.y = m_Source[0].y + m_Source[1].y;
  129. m_TmpReg.z = m_Source[0].z + m_Source[1].z;
  130. m_TmpReg.w = m_Source[0].w + m_Source[1].w;
  131. WriteResult();
  132. }
  133. //-----------------------------------------------------------------------------
  134. void
  135. RefVM::InstMad()
  136. {
  137. SetDestReg();
  138. SetSrcReg(0);
  139. SetSrcReg(1);
  140. SetSrcReg(2);
  141. m_TmpReg.x = m_Source[0].x * m_Source[1].x + m_Source[2].x;
  142. m_TmpReg.y = m_Source[0].y * m_Source[1].y + m_Source[2].y;
  143. m_TmpReg.z = m_Source[0].z * m_Source[1].z + m_Source[2].z;
  144. m_TmpReg.w = m_Source[0].w * m_Source[1].w + m_Source[2].w;
  145. WriteResult();
  146. }
  147. //-----------------------------------------------------------------------------
  148. void
  149. RefVM::InstMul()
  150. {
  151. SetDestReg();
  152. SetSrcReg(0);
  153. SetSrcReg(1);
  154. m_TmpReg.x = m_Source[0].x * m_Source[1].x;
  155. m_TmpReg.y = m_Source[0].y * m_Source[1].y;
  156. m_TmpReg.z = m_Source[0].z * m_Source[1].z;
  157. m_TmpReg.w = m_Source[0].w * m_Source[1].w;
  158. WriteResult();
  159. }
  160. //-----------------------------------------------------------------------------
  161. void
  162. RefVM::InstRcp()
  163. {
  164. SetDestReg();
  165. SetSrcReg(0);
  166. if( m_Source[0].w == 1.0f )
  167. {
  168. // Must be exactly 1.0
  169. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = 1.0f;
  170. }
  171. else if( m_Source[0].w == 0 )
  172. {
  173. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = PLUS_MAX();
  174. }
  175. else
  176. {
  177. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = 1.0f/m_Source[0].w;
  178. }
  179. WriteResult();
  180. }
  181. //-----------------------------------------------------------------------------
  182. void
  183. RefVM::InstRsq()
  184. {
  185. SetDestReg();
  186. SetSrcReg(0);
  187. float v = ABSF(m_Source[0].w);
  188. if( v == 1.0f )
  189. {
  190. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = 1.0f;
  191. }
  192. else if( v == 0 )
  193. {
  194. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = PLUS_MAX();
  195. }
  196. else
  197. {
  198. v = (float)(1.0f / sqrt(v));
  199. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = v;
  200. }
  201. WriteResult();
  202. }
  203. //-----------------------------------------------------------------------------
  204. void
  205. RefVM::InstDP3()
  206. {
  207. SetDestReg();
  208. SetSrcReg(0);
  209. SetSrcReg(1);
  210. m_TmpReg.x =
  211. m_TmpReg.y =
  212. m_TmpReg.z =
  213. m_TmpReg.w = m_Source[0].x * m_Source[1].x +
  214. m_Source[0].y * m_Source[1].y +
  215. m_Source[0].z * m_Source[1].z;
  216. WriteResult();
  217. }
  218. //-----------------------------------------------------------------------------
  219. void
  220. RefVM::InstDP4()
  221. {
  222. SetDestReg();
  223. SetSrcReg(0);
  224. SetSrcReg(1);
  225. m_TmpReg.x =
  226. m_TmpReg.y =
  227. m_TmpReg.z =
  228. m_TmpReg.w = m_Source[0].x * m_Source[1].x +
  229. m_Source[0].y * m_Source[1].y +
  230. m_Source[0].z * m_Source[1].z +
  231. m_Source[0].w * m_Source[1].w;
  232. WriteResult();
  233. }
  234. //-----------------------------------------------------------------------------
  235. void
  236. RefVM::InstSlt()
  237. {
  238. SetDestReg();
  239. SetSrcReg(0);
  240. SetSrcReg(1);
  241. m_TmpReg.x = (m_Source[0].x < m_Source[1].x) ? 1.0f : 0.0f;
  242. m_TmpReg.y = (m_Source[0].y < m_Source[1].y) ? 1.0f : 0.0f;
  243. m_TmpReg.z = (m_Source[0].z < m_Source[1].z) ? 1.0f : 0.0f;
  244. m_TmpReg.w = (m_Source[0].w < m_Source[1].w) ? 1.0f : 0.0f;
  245. WriteResult();
  246. }
  247. //-----------------------------------------------------------------------------
  248. void
  249. RefVM::InstSge()
  250. {
  251. SetDestReg();
  252. SetSrcReg(0);
  253. SetSrcReg(1);
  254. m_TmpReg.x = (m_Source[0].x >= m_Source[1].x) ? 1.0f : 0.0f;
  255. m_TmpReg.y = (m_Source[0].y >= m_Source[1].y) ? 1.0f : 0.0f;
  256. m_TmpReg.z = (m_Source[0].z >= m_Source[1].z) ? 1.0f : 0.0f;
  257. m_TmpReg.w = (m_Source[0].w >= m_Source[1].w) ? 1.0f : 0.0f;
  258. WriteResult();
  259. }
  260. //-----------------------------------------------------------------------------
  261. void
  262. RefVM::InstMin()
  263. {
  264. SetDestReg();
  265. SetSrcReg(0);
  266. SetSrcReg(1);
  267. m_TmpReg.x=(m_Source[0].x < m_Source[1].x) ? m_Source[0].x : m_Source[1].x;
  268. m_TmpReg.y=(m_Source[0].y < m_Source[1].y) ? m_Source[0].y : m_Source[1].y;
  269. m_TmpReg.z=(m_Source[0].z < m_Source[1].z) ? m_Source[0].z : m_Source[1].z;
  270. m_TmpReg.w=(m_Source[0].w < m_Source[1].w) ? m_Source[0].w : m_Source[1].w;
  271. WriteResult();
  272. }
  273. //-----------------------------------------------------------------------------
  274. void
  275. RefVM::InstMax()
  276. {
  277. SetDestReg();
  278. SetSrcReg(0);
  279. SetSrcReg(1);
  280. m_TmpReg.x=(m_Source[0].x >= m_Source[1].x) ? m_Source[0].x : m_Source[1].x;
  281. m_TmpReg.y=(m_Source[0].y >= m_Source[1].y) ? m_Source[0].y : m_Source[1].y;
  282. m_TmpReg.z=(m_Source[0].z >= m_Source[1].z) ? m_Source[0].z : m_Source[1].z;
  283. m_TmpReg.w=(m_Source[0].w >= m_Source[1].w) ? m_Source[0].w : m_Source[1].w;
  284. WriteResult();
  285. }
  286. //-----------------------------------------------------------------------------
  287. void
  288. RefVM::InstExp()
  289. {
  290. SetDestReg();
  291. SetSrcReg(0);
  292. float v = m_Source[0].w;
  293. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = (float)pow(2, v);
  294. WriteResult();
  295. }
  296. //-----------------------------------------------------------------------------
  297. void
  298. RefVM::InstExpP()
  299. {
  300. SetDestReg();
  301. SetSrcReg(0);
  302. float w = m_Source[0].w;
  303. float v = (float)floor(m_Source[0].w);
  304. m_TmpReg.x = (float)pow(2, v);
  305. m_TmpReg.y = w - v;
  306. // Reduced precision exponent
  307. float tmp = (float)pow(2, w);
  308. DWORD tmpd = *(DWORD*)&tmp & 0xffffff00;
  309. m_TmpReg.z = *(float*)&tmpd;
  310. m_TmpReg.w = 1;
  311. WriteResult();
  312. }
  313. //-----------------------------------------------------------------------------
  314. void
  315. RefVM::InstLog()
  316. {
  317. SetDestReg();
  318. SetSrcReg(0);
  319. float v = ABSF(m_Source[0].w);
  320. if (v != 0)
  321. {
  322. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w =
  323. (float)(log(v)/log(2));
  324. }
  325. else
  326. {
  327. m_TmpReg.x = m_TmpReg.y = m_TmpReg.z = m_TmpReg.w = MINUS_MAX();
  328. }
  329. WriteResult();
  330. }
  331. //-----------------------------------------------------------------------------
  332. void
  333. RefVM::InstLogP()
  334. {
  335. SetDestReg();
  336. SetSrcReg(0);
  337. float v = ABSF(m_Source[0].w);
  338. if (v != 0)
  339. {
  340. int p = (int)(*(DWORD*)&v >> 23) - 127;
  341. m_TmpReg.x = (float)p; // exponent
  342. p = (*(DWORD*)&v & 0x7FFFFF) | 0x3f800000;
  343. m_TmpReg.y = *(float*)&p;// mantissa;
  344. float tmp = (float)(log(v)/log(2));
  345. DWORD tmpd = *(DWORD*)&tmp & 0xffffff00;
  346. m_TmpReg.z = *(float*)&tmpd;
  347. m_TmpReg.w = 1;
  348. }
  349. else
  350. {
  351. m_TmpReg.x = MINUS_MAX();
  352. m_TmpReg.y = 1.0f;
  353. m_TmpReg.z = MINUS_MAX();
  354. m_TmpReg.w = 1.0f;
  355. }
  356. WriteResult();
  357. }
  358. //-----------------------------------------------------------------------------
  359. void
  360. RefVM::InstLit()
  361. {
  362. SetDestReg();
  363. SetSrcReg(0);
  364. m_TmpReg.x = 1;
  365. m_TmpReg.y = 0;
  366. m_TmpReg.z = 0;
  367. m_TmpReg.w = 1;
  368. float power = m_Source[0].w;
  369. const float MAXPOWER = 127.9961f;
  370. if (power < -MAXPOWER)
  371. power = -MAXPOWER; // Fits into 8.8 fixed point format
  372. else
  373. if (power > MAXPOWER)
  374. power = MAXPOWER; // Fits into 8.8 fixed point format
  375. if (m_Source[0].x > 0)
  376. {
  377. m_TmpReg.y = m_Source[0].x;
  378. if (m_Source[0].y > 0)
  379. {
  380. // Allowed approximation is EXP(power * LOG(m_Source[0].y))
  381. m_TmpReg.z = (float)(pow(m_Source[0].y, power));
  382. }
  383. }
  384. WriteResult();
  385. }
  386. //-----------------------------------------------------------------------------
  387. void
  388. RefVM::InstFrc()
  389. {
  390. SetDestReg();
  391. SetSrcReg(0);
  392. m_TmpReg.x = m_Source[0].x - (float)floor(m_Source[0].x);
  393. m_TmpReg.y = m_Source[0].y - (float)floor(m_Source[0].y);
  394. m_TmpReg.z = m_Source[0].z - (float)floor(m_Source[0].z);
  395. m_TmpReg.w = m_Source[0].w - (float)floor(m_Source[0].w);
  396. WriteResult();
  397. }
  398. //-----------------------------------------------------------------------------
  399. void
  400. RefVM::InstDst()
  401. {
  402. SetDestReg();
  403. SetSrcReg(0);
  404. SetSrcReg(1);
  405. m_TmpReg.x = 1;
  406. m_TmpReg.y = m_Source[0].y * m_Source[1].y;
  407. m_TmpReg.z = m_Source[0].z;
  408. m_TmpReg.w = m_Source[1].w;
  409. WriteResult();
  410. }
  411. //-----------------------------------------------------------------------------
  412. void
  413. RefVM::InstM4x4()
  414. {
  415. SetDestReg();
  416. SetSrcReg(0);
  417. SetSrcReg(1, 4);
  418. m_TmpReg.x = m_Source[0].x * m_Source[1].x +
  419. m_Source[0].y * m_Source[1].y +
  420. m_Source[0].z * m_Source[1].z +
  421. m_Source[0].w * m_Source[1].w;
  422. m_TmpReg.y = m_Source[0].x * m_Source[2].x +
  423. m_Source[0].y * m_Source[2].y +
  424. m_Source[0].z * m_Source[2].z +
  425. m_Source[0].w * m_Source[2].w;
  426. m_TmpReg.z = m_Source[0].x * m_Source[3].x +
  427. m_Source[0].y * m_Source[3].y +
  428. m_Source[0].z * m_Source[3].z +
  429. m_Source[0].w * m_Source[3].w;
  430. m_TmpReg.w = m_Source[0].x * m_Source[4].x +
  431. m_Source[0].y * m_Source[4].y +
  432. m_Source[0].z * m_Source[4].z +
  433. m_Source[0].w * m_Source[4].w;
  434. WriteResult();
  435. }
  436. //-----------------------------------------------------------------------------
  437. void
  438. RefVM::InstM4x3()
  439. {
  440. SetDestReg();
  441. SetSrcReg(0);
  442. SetSrcReg(1, 3);
  443. m_TmpReg.x = m_Source[0].x * m_Source[1].x +
  444. m_Source[0].y * m_Source[1].y +
  445. m_Source[0].z * m_Source[1].z +
  446. m_Source[0].w * m_Source[1].w;
  447. m_TmpReg.y = m_Source[0].x * m_Source[2].x +
  448. m_Source[0].y * m_Source[2].y +
  449. m_Source[0].z * m_Source[2].z +
  450. m_Source[0].w * m_Source[2].w;
  451. m_TmpReg.z = m_Source[0].x * m_Source[3].x +
  452. m_Source[0].y * m_Source[3].y +
  453. m_Source[0].z * m_Source[3].z +
  454. m_Source[0].w * m_Source[3].w;
  455. WriteResult();
  456. }
  457. //-----------------------------------------------------------------------------
  458. void
  459. RefVM::InstM3x4()
  460. {
  461. SetDestReg();
  462. SetSrcReg(0);
  463. SetSrcReg(1, 4);
  464. m_TmpReg.x = m_Source[0].x * m_Source[1].x +
  465. m_Source[0].y * m_Source[1].y +
  466. m_Source[0].z * m_Source[1].z;
  467. m_TmpReg.y = m_Source[0].x * m_Source[2].x +
  468. m_Source[0].y * m_Source[2].y +
  469. m_Source[0].z * m_Source[2].z;
  470. m_TmpReg.z = m_Source[0].x * m_Source[3].x +
  471. m_Source[0].y * m_Source[3].y +
  472. m_Source[0].z * m_Source[3].z;
  473. m_TmpReg.w = m_Source[0].x * m_Source[4].x +
  474. m_Source[0].y * m_Source[4].y +
  475. m_Source[0].z * m_Source[4].z;
  476. WriteResult();
  477. }
  478. //-----------------------------------------------------------------------------
  479. void RefVM::InstM3x3()
  480. {
  481. SetDestReg();
  482. SetSrcReg(0);
  483. SetSrcReg(1, 3);
  484. m_TmpReg.x = m_Source[0].x * m_Source[1].x +
  485. m_Source[0].y * m_Source[1].y +
  486. m_Source[0].z * m_Source[1].z;
  487. m_TmpReg.y = m_Source[0].x * m_Source[2].x +
  488. m_Source[0].y * m_Source[2].y +
  489. m_Source[0].z * m_Source[2].z;
  490. m_TmpReg.z = m_Source[0].x * m_Source[3].x +
  491. m_Source[0].y * m_Source[3].y +
  492. m_Source[0].z * m_Source[3].z;
  493. WriteResult();
  494. }
  495. //-----------------------------------------------------------------------------
  496. void RefVM::InstM3x2()
  497. {
  498. SetDestReg();
  499. SetSrcReg(0);
  500. SetSrcReg(1, 2);
  501. m_TmpReg.x = m_Source[0].x * m_Source[1].x +
  502. m_Source[0].y * m_Source[1].y +
  503. m_Source[0].z * m_Source[1].z;
  504. m_TmpReg.y = m_Source[0].x * m_Source[2].x +
  505. m_Source[0].y * m_Source[2].y +
  506. m_Source[0].z * m_Source[2].z;
  507. WriteResult();
  508. }
  509. //-----------------------------------------------------------------------------
  510. // RefVM::SetData
  511. // Save data into the specified registers.
  512. //-----------------------------------------------------------------------------
  513. HRESULT
  514. RefVM::SetData( DWORD dwMemType, DWORD dwStart, DWORD dwCount,
  515. LPVOID pBuffer )
  516. {
  517. memcpy( GetDataAddr( dwMemType, dwStart ), pBuffer,
  518. dwCount * sizeof(RDVECTOR4) );
  519. return D3D_OK;
  520. }
  521. //-----------------------------------------------------------------------------
  522. // RefVM::GetData
  523. // Fetch data from the specified registers.
  524. //-----------------------------------------------------------------------------
  525. HRESULT
  526. RefVM::GetData( DWORD dwMemType, DWORD dwStart, DWORD dwCount,
  527. LPVOID pBuffer )
  528. {
  529. memcpy( pBuffer, GetDataAddr( dwMemType, dwStart ),
  530. dwCount * sizeof(RDVECTOR4) );
  531. return D3D_OK;
  532. }
  533. //-----------------------------------------------------------------------------
  534. // RefVM::SetDestReg
  535. // - parses destination token
  536. // - computes m_pDest, m_WrideMask, m_dwOffset for the destination
  537. // - current token pointer is andvanced to the next token
  538. //-----------------------------------------------------------------------------
  539. #undef RET_ERR
  540. #define RET_ERR( a ) \
  541. { \
  542. DPFERR( a ); \
  543. return E_FAIL; \
  544. }
  545. HRESULT
  546. RefVM::SetDestReg()
  547. {
  548. DWORD dwCurToken = *m_pCurToken;
  549. DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
  550. m_dwRegOffset = D3DSI_GETREGNUM(dwCurToken);
  551. m_WriteMask = D3DSI_GETWRITEMASK(dwCurToken);
  552. switch( dwRegType )
  553. {
  554. case D3DSPR_TEMP:
  555. m_pDest = m_reg.m_t;
  556. break;
  557. case D3DSPR_RASTOUT:
  558. m_pDest = m_reg.m_out;
  559. break;
  560. case D3DSPR_ATTROUT:
  561. m_pDest = m_reg.m_col;
  562. break;
  563. case D3DSPR_TEXCRDOUT:
  564. m_pDest = m_reg.m_tex;
  565. break;
  566. case D3DSPR_ADDR:
  567. m_pDest = m_reg.m_a;
  568. break;
  569. default:
  570. RET_ERR( "Invalid register for destination" );
  571. }
  572. m_pCurToken++;
  573. m_pDest += m_dwRegOffset;
  574. return S_OK;
  575. }
  576. //-----------------------------------------------------------------------------
  577. // RefVM::SetSrcReg
  578. // Computes m_Source[index] and advances m_pCurToken
  579. //-----------------------------------------------------------------------------
  580. HRESULT
  581. RefVM::SetSrcReg( DWORD index )
  582. {
  583. const DWORD dwCurToken = *m_pCurToken;
  584. const DWORD dwRegType = D3DSI_GETREGTYPE( dwCurToken );
  585. const DWORD dwOffset = D3DSI_GETREGNUM( dwCurToken );
  586. RDVECTOR4 *src = NULL;
  587. if( dwRegType == D3DSPR_CONST )
  588. {
  589. D3DVS_ADDRESSMODE_TYPE am;
  590. am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE( dwCurToken );
  591. int offset = (int)dwOffset;
  592. if( am == D3DVS_ADDRMODE_RELATIVE )
  593. {
  594. int relOffset = *(int*)&m_reg.m_a[0].x;
  595. offset += relOffset;
  596. if( offset < 0 || offset >= RD_MAX_NUMCONSTREG )
  597. RET_ERR( "Constant register index is out of bounds" );
  598. }
  599. src = &m_reg.m_c[offset];
  600. }
  601. else
  602. src = this->GetDataAddr(dwRegType, dwOffset);
  603. _ASSERT( src != NULL, "src is NULL" );
  604. RDVECTOR4 *outsrc = &m_Source[index];
  605. DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
  606. if( swizzle == D3DVS_NOSWIZZLE )
  607. *outsrc = *src;
  608. else
  609. {
  610. // Where to take X
  611. const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
  612. // Where to take Y
  613. const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
  614. // Where to take Z
  615. const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
  616. // Where to take W
  617. const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
  618. outsrc->x = ((float*)src)[dwSrcX];
  619. outsrc->y = ((float*)src)[dwSrcY];
  620. outsrc->z = ((float*)src)[dwSrcZ];
  621. outsrc->w = ((float*)src)[dwSrcW];
  622. }
  623. if( D3DVS_GETSRCMODIFIER( dwCurToken ) == D3DSPSM_NEG)
  624. {
  625. outsrc->x = -outsrc->x;
  626. outsrc->y = -outsrc->y;
  627. outsrc->z = -outsrc->z;
  628. outsrc->w = -outsrc->w;
  629. }
  630. m_pCurToken++;
  631. return S_OK;
  632. }
  633. //-----------------------------------------------------------------------------
  634. // RefVM::SetSrcReg
  635. // Computes m_Source[index] and advances m_pCurToken
  636. //-----------------------------------------------------------------------------
  637. HRESULT
  638. RefVM::SetSrcReg( DWORD index, DWORD count )
  639. {
  640. const DWORD dwCurToken = *m_pCurToken;
  641. const DWORD dwRegType = D3DSI_GETREGTYPE(dwCurToken);
  642. const DWORD dwOffset = D3DSI_GETREGNUM(dwCurToken);
  643. RDVECTOR4 *src;
  644. if (dwRegType == D3DSPR_CONST)
  645. {
  646. D3DVS_ADDRESSMODE_TYPE am;
  647. am = (D3DVS_ADDRESSMODE_TYPE)D3DVS_GETADDRESSMODE(dwCurToken);
  648. int offset = (int)dwOffset;
  649. if (am == D3DVS_ADDRMODE_RELATIVE)
  650. {
  651. int relOffset = *(int*)&m_reg.m_a[0].x;
  652. offset += relOffset;
  653. if (offset < 0 || offset >= RD_MAX_NUMCONSTREG)
  654. RET_ERR( "Constant register index is out of bounds" );
  655. }
  656. src = &m_reg.m_c[offset];
  657. }
  658. else
  659. {
  660. if (dwOffset >= RD_MAX_NUMCONSTREG)
  661. RET_ERR( "Constant register index is out of bounds" );
  662. src = this->GetDataAddr(dwRegType, dwOffset);
  663. }
  664. RDVECTOR4 *outsrc = &m_Source[index];
  665. DWORD swizzle = D3DVS_GETSWIZZLE(dwCurToken);
  666. // Where to take X
  667. const DWORD dwSrcX = D3DVS_GETSWIZZLECOMP(dwCurToken, 0);
  668. // Where to take Y
  669. const DWORD dwSrcY = D3DVS_GETSWIZZLECOMP(dwCurToken, 1);
  670. // Where to take Z
  671. const DWORD dwSrcZ = D3DVS_GETSWIZZLECOMP(dwCurToken, 2);
  672. // Where to take W
  673. const DWORD dwSrcW = D3DVS_GETSWIZZLECOMP(dwCurToken, 3);
  674. for (UINT i=0; i < count; i++)
  675. {
  676. if (swizzle == D3DVS_NOSWIZZLE)
  677. *outsrc = *src;
  678. else
  679. {
  680. outsrc->x = ((float*)src)[dwSrcX];
  681. outsrc->y = ((float*)src)[dwSrcY];
  682. outsrc->z = ((float*)src)[dwSrcZ];
  683. outsrc->w = ((float*)src)[dwSrcW];
  684. }
  685. if (D3DVS_GETSRCMODIFIER(dwCurToken) == D3DSPSM_NEG)
  686. {
  687. outsrc->x = -outsrc->x;
  688. outsrc->y = -outsrc->y;
  689. outsrc->z = -outsrc->z;
  690. outsrc->w = -outsrc->w;
  691. }
  692. outsrc++;
  693. src++;
  694. }
  695. m_pCurToken++;
  696. return S_OK;
  697. }
  698. //---------------------------------------------------------------------
  699. // RefVM::GetDataAddr
  700. // Parses binary shader representation, compiles is and returns
  701. // compiled object
  702. //---------------------------------------------------------------------
  703. RDVECTOR4*
  704. RefVM::GetDataAddr(DWORD dwRegType, DWORD dwElementIndex)
  705. {
  706. RDVECTOR4* src;
  707. switch( dwRegType )
  708. {
  709. case D3DSPR_TEMP : src = m_reg.m_t; break;
  710. case D3DSPR_INPUT : src = m_reg.m_i; break;
  711. case D3DSPR_CONST : src = m_reg.m_c; break;
  712. case D3DSPR_ADDR : src = m_reg.m_a; break;
  713. case D3DSPR_RASTOUT : src = m_reg.m_out; break;
  714. case D3DSPR_ATTROUT : src = m_reg.m_col; break;
  715. case D3DSPR_TEXCRDOUT : src = m_reg.m_tex; break;
  716. default:
  717. return NULL;
  718. }
  719. return &src[dwElementIndex];
  720. }
  721. //---------------------------------------------------------------------
  722. // RefVM::ExecuteShader()
  723. // Executes the shader once per vertex.
  724. //---------------------------------------------------------------------
  725. HRESULT
  726. RefVM::ExecuteShader(RefDev *pRD)
  727. {
  728. if( m_pCurrentShaderCode == NULL )
  729. {
  730. RET_ERR( "No current shader set in the Virtual Shader Machine" );
  731. }
  732. m_pCurToken = m_pCurrentShaderCode->m_pRawBits;
  733. DWORD* pEnd = m_pCurToken + m_pCurrentShaderCode->m_dwSize;
  734. m_pCurToken++;
  735. m_CurInstIndex = 0;
  736. while( m_pCurToken < pEnd )
  737. {
  738. if( *m_pCurToken == D3DVS_END() ) break;
  739. DWORD dwInst = *m_pCurToken;
  740. DWORD dwOpCode = D3DSI_GETOPCODE( dwInst );
  741. m_pCurToken++;
  742. switch( dwOpCode )
  743. {
  744. case D3DSIO_COMMENT: m_pCurToken += (GetInstructionLength( dwInst ) - 1);
  745. case D3DSIO_NOP : ; break;
  746. case D3DSIO_MOV : InstMov(); break;
  747. case D3DSIO_ADD : InstAdd(); break;
  748. case D3DSIO_MAD : InstMad(); break;
  749. case D3DSIO_MUL : InstMul(); break;
  750. case D3DSIO_RCP : InstRcp(); break;
  751. case D3DSIO_RSQ : InstRsq(); break;
  752. case D3DSIO_DP3 : InstDP3(); break;
  753. case D3DSIO_DP4 : InstDP4(); break;
  754. case D3DSIO_MIN : InstMin(); break;
  755. case D3DSIO_MAX : InstMax(); break;
  756. case D3DSIO_SLT : InstSlt(); break;
  757. case D3DSIO_SGE : InstSge(); break;
  758. case D3DSIO_EXPP : InstExpP(); break;
  759. case D3DSIO_LOGP : InstLogP(); break;
  760. case D3DSIO_EXP : InstExp(); break;
  761. case D3DSIO_LOG : InstLog(); break;
  762. case D3DSIO_LIT : InstLit(); break;
  763. case D3DSIO_DST : InstDst(); break;
  764. case D3DSIO_FRC : InstFrc(); break;
  765. case D3DSIO_M4x4 : InstM4x4(); break;
  766. case D3DSIO_M4x3 : InstM4x3(); break;
  767. case D3DSIO_M3x4 : InstM3x4(); break;
  768. case D3DSIO_M3x3 : InstM3x3(); break;
  769. case D3DSIO_M3x2 : InstM3x2(); break;
  770. default:
  771. RET_ERR( "Invalid shader opcode" );
  772. }
  773. if (pRD->m_pDbgMon) pRD->m_pDbgMon->NextEvent( D3DDM_EVENT_VERTEXSHADERINST );
  774. if( dwOpCode != D3DSIO_COMMENT ) m_CurInstIndex++;
  775. }
  776. m_CurInstIndex = 0;
  777. return D3D_OK;
  778. }
  779. //-----------------------------------------------------------------------------
  780. // VertexShaderInstDisAsm - Generates human-readable character string for a
  781. // single vertex shader instruction. String interface is similar to _snprintf.
  782. //-----------------------------------------------------------------------------
  783. static int VertexShaderInstDisAsm(
  784. char* pStrRet, int StrSizeRet, DWORD* pShader, DWORD Flags )
  785. {
  786. DWORD* pToken = pShader;
  787. // stage in local string, then copy
  788. char pStr[256] = "";
  789. #define _ADDSTR( _Str ) { _snprintf( pStr, 256, "%s" _Str , pStr ); }
  790. #define _ADDSTRP( _Str, _Param ) { _snprintf( pStr, 256, "%s" _Str , pStr, _Param ); }
  791. DWORD Inst = *pToken++;
  792. DWORD Opcode = (Inst & D3DSI_OPCODE_MASK);
  793. switch (Opcode)
  794. {
  795. case D3DSIO_NOP: _ADDSTR("NOP"); break;
  796. case D3DSIO_MOV: _ADDSTR("MOV"); break;
  797. case D3DSIO_ADD: _ADDSTR("ADD"); break;
  798. case D3DSIO_MAD: _ADDSTR("MAD"); break;
  799. case D3DSIO_MUL: _ADDSTR("MUL"); break;
  800. case D3DSIO_RCP: _ADDSTR("RCP"); break;
  801. case D3DSIO_RSQ: _ADDSTR("RSQ"); break;
  802. case D3DSIO_DP3: _ADDSTR("DP3"); break;
  803. case D3DSIO_DP4: _ADDSTR("DP4"); break;
  804. case D3DSIO_MIN: _ADDSTR("MIN"); break;
  805. case D3DSIO_MAX: _ADDSTR("MAX"); break;
  806. case D3DSIO_SLT: _ADDSTR("SLT"); break;
  807. case D3DSIO_SGE: _ADDSTR("SGE"); break;
  808. case D3DSIO_EXP: _ADDSTR("EXP"); break;
  809. case D3DSIO_LOG: _ADDSTR("LOG"); break;
  810. case D3DSIO_EXPP: _ADDSTR("EXPP"); break;
  811. case D3DSIO_LOGP: _ADDSTR("LOGP"); break;
  812. case D3DSIO_LIT: _ADDSTR("LIT"); break;
  813. case D3DSIO_DST: _ADDSTR("DST"); break;
  814. default : _ADDSTR("???"); break;
  815. }
  816. if (*pToken & (1L<<31))
  817. {
  818. DWORD DstParam = *pToken++;
  819. switch (DstParam & D3DSP_REGTYPE_MASK)
  820. {
  821. case D3DSPR_TEMP : _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  822. case D3DSPR_ADDR : _ADDSTR(" Addr"); break;
  823. case D3DSPR_RASTOUT : _ADDSTRP(" R%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  824. case D3DSPR_ATTROUT : _ADDSTRP(" A%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  825. case D3DSPR_TEXCRDOUT: _ADDSTRP(" T%d", (DstParam & D3DSP_REGNUM_MASK) ); break;
  826. }
  827. if (*pToken & (1L<<31)) _ADDSTR(" ");
  828. while (*pToken & (1L<<31))
  829. {
  830. DWORD SrcParam = *pToken++;
  831. switch (SrcParam & D3DSP_REGTYPE_MASK)
  832. {
  833. case D3DSPR_TEMP : _ADDSTRP(" T%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  834. case D3DSPR_INPUT : _ADDSTRP(" I%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  835. case D3DSPR_CONST : _ADDSTRP(" C%d", (SrcParam & D3DSP_REGNUM_MASK) ); break;
  836. }
  837. if (*pToken & (1L<<31)) _ADDSTR(",");
  838. }
  839. }
  840. return _snprintf( pStrRet, StrSizeRet, "%s", pStr );
  841. }
  842. //---------------------------------------------------------------------
  843. // RefVM::CompileCode
  844. // Parses binary shader representation, compiles is and returns
  845. // compiled object
  846. //---------------------------------------------------------------------
  847. #undef RET_ERR
  848. #define RET_ERR( a ) \
  849. { \
  850. DPFERR( a ); \
  851. delete pShaderCode; \
  852. return NULL; \
  853. }
  854. RDVShaderCode*
  855. RefVM::CompileCode( DWORD dwSize, LPDWORD pBits )
  856. {
  857. RDVShaderCode* pShaderCode = new RDVShaderCode();
  858. if( pShaderCode == NULL )
  859. RET_ERR( "Out of memory allocating ShaderCode" );
  860. pShaderCode->m_dwSize = dwSize >> 2; // #DWORDs
  861. pShaderCode->m_pRawBits = new DWORD[pShaderCode->m_dwSize];
  862. if( pShaderCode->m_pRawBits == NULL )
  863. RET_ERR( "Out of memory allocating RawBits" );
  864. memcpy( pShaderCode->m_pRawBits, (LPBYTE)pBits, dwSize );
  865. // Based on the what output registers are modified, we compute the
  866. // corresponding FVF id. The id will be used for memory allocation
  867. // of the output buffer and will be passed to the rasterizer
  868. UINT64 qwOutFVF = 0;
  869. DWORD nTexCoord = 0; // Number of output texture coordinates
  870. LPDWORD pEnd = NULL;
  871. // For each texture register stores the combined write mask.
  872. // Used to find how many floats are written to each texture coordinates
  873. DWORD TextureWritten[8];
  874. memset( TextureWritten, 0, sizeof(TextureWritten) );
  875. m_pCurToken = pShaderCode->m_pRawBits;
  876. pEnd = m_pCurToken + pShaderCode->m_dwSize;
  877. m_pCurToken++; // Skip the version number
  878. pShaderCode->m_InstCount = 0;
  879. while( m_pCurToken < pEnd )
  880. {
  881. if( *m_pCurToken == D3DVS_END() ) break;
  882. DWORD* pNextToken = m_pCurToken;
  883. DWORD dwInst = *m_pCurToken;
  884. DWORD dwOpCode = D3DSI_GETOPCODE(dwInst);
  885. if( *m_pCurToken == D3DVS_END() ) break;
  886. m_pCurToken++;
  887. switch( dwOpCode )
  888. {
  889. case D3DSIO_COMMENT:
  890. case D3DSIO_NOP : ; break;
  891. case D3DSIO_MOV :
  892. case D3DSIO_ADD :
  893. case D3DSIO_MAD :
  894. case D3DSIO_MUL :
  895. case D3DSIO_RCP :
  896. case D3DSIO_RSQ :
  897. case D3DSIO_DP3 :
  898. case D3DSIO_DP4 :
  899. case D3DSIO_MIN :
  900. case D3DSIO_MAX :
  901. case D3DSIO_SLT :
  902. case D3DSIO_SGE :
  903. case D3DSIO_EXP :
  904. case D3DSIO_LOG :
  905. case D3DSIO_EXPP :
  906. case D3DSIO_LOGP :
  907. case D3DSIO_LIT :
  908. case D3DSIO_DST :
  909. case D3DSIO_FRC :
  910. case D3DSIO_M4x4 :
  911. case D3DSIO_M4x3 :
  912. case D3DSIO_M3x4 :
  913. case D3DSIO_M3x3 :
  914. case D3DSIO_M3x2 :
  915. {
  916. // Find out if output register are modified by the command and
  917. // update the output FVF
  918. DWORD dwOffset;
  919. if( FAILED( SetDestReg() ) )
  920. RET_ERR( "Invalid shader opcode" );
  921. RDVECTOR4* m_pOutRegister = NULL;
  922. if( m_pDest - m_dwRegOffset != m_reg.m_t )
  923. {
  924. dwOffset = m_dwRegOffset;
  925. m_pOutRegister = m_pDest - m_dwRegOffset;
  926. }
  927. else
  928. break; // Output register is not modified
  929. if( m_pOutRegister == m_reg.m_out )
  930. {
  931. if (dwOffset == D3DSRO_POSITION)
  932. {
  933. qwOutFVF |= D3DFVF_XYZRHW;
  934. }
  935. else if (dwOffset == D3DSRO_FOG)
  936. {
  937. qwOutFVF |= D3DFVFP_FOG;
  938. }
  939. else if (dwOffset == D3DSRO_POINT_SIZE)
  940. {
  941. qwOutFVF |= D3DFVF_PSIZE;
  942. }
  943. }
  944. else if( m_pOutRegister == m_reg.m_col )
  945. {
  946. if( dwOffset == 0 )
  947. {
  948. qwOutFVF |= D3DFVF_DIFFUSE;
  949. }
  950. else
  951. {
  952. qwOutFVF |= D3DFVF_SPECULAR;
  953. }
  954. }
  955. else if( m_pOutRegister == m_reg.m_tex )
  956. {
  957. if( TextureWritten[dwOffset] == 0 )
  958. {
  959. nTexCoord++;
  960. }
  961. TextureWritten[dwOffset] |= m_WriteMask;
  962. }
  963. else if( m_pOutRegister != m_reg.m_a )
  964. RET_ERR( "Invalid output register offset" );
  965. }
  966. break;
  967. default:
  968. RET_ERR( "Invalid shader opcode" );
  969. }
  970. pShaderCode->m_InstCount++;
  971. m_pCurToken = pNextToken + GetInstructionLength(dwInst);
  972. }
  973. // allocate and set instruction array
  974. if (pShaderCode->m_InstCount)
  975. {
  976. pShaderCode->m_pInst = new RDVShaderInst[pShaderCode->m_InstCount];
  977. if( pShaderCode->m_pInst == NULL )
  978. RET_ERR( "Out of memory allocating Instructions" );
  979. memset( pShaderCode->m_pInst, 0,
  980. sizeof(RDVShaderInst)*pShaderCode->m_InstCount );
  981. DWORD dwCurInst = 0;
  982. m_pCurToken = pShaderCode->m_pRawBits;
  983. pEnd = m_pCurToken + pShaderCode->m_dwSize;
  984. m_pCurToken++;
  985. while( m_pCurToken < pEnd )
  986. {
  987. DWORD dwInst = *m_pCurToken;
  988. DWORD dwOpCode = D3DSI_GETOPCODE( dwInst );
  989. if( *m_pCurToken == D3DVS_END() ) break;
  990. UINT ilength = GetInstructionLength( dwInst );
  991. if (dwOpCode == D3DSIO_COMMENT)
  992. {
  993. pShaderCode->m_pInst[dwCurInst].m_Tokens[0] = dwInst;
  994. pShaderCode->m_pInst[dwCurInst].m_pComment = (m_pCurToken+1);
  995. pShaderCode->m_pInst[dwCurInst].m_CommentSize = ilength - 1;
  996. }
  997. else
  998. {
  999. memcpy( pShaderCode->m_pInst[dwCurInst].m_Tokens, m_pCurToken,
  1000. 4*ilength );
  1001. VertexShaderInstDisAsm( pShaderCode->m_pInst[dwCurInst].m_String,
  1002. RD_MAX_SHADERINSTSTRING, pShaderCode->m_pInst[dwCurInst].m_Tokens, 0x0 );
  1003. }
  1004. m_pCurToken += ilength;
  1005. dwCurInst++;
  1006. }
  1007. }
  1008. qwOutFVF |= nTexCoord << D3DFVF_TEXCOUNT_SHIFT;
  1009. if( nTexCoord )
  1010. {
  1011. for( DWORD i = 0; i < nTexCoord; i++ )
  1012. {
  1013. if( TextureWritten[i] == 0 )
  1014. RET_ERR( "Texture coordinates are not continuous" );
  1015. switch( TextureWritten[i] )
  1016. {
  1017. case D3DSP_WRITEMASK_ALL:
  1018. qwOutFVF |= D3DFVF_TEXCOORDSIZE4(i);
  1019. break;
  1020. case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1 | D3DSP_WRITEMASK_2:
  1021. qwOutFVF |= D3DFVF_TEXCOORDSIZE3(i);
  1022. break;
  1023. case D3DSP_WRITEMASK_0 | D3DSP_WRITEMASK_1:
  1024. qwOutFVF |= D3DFVF_TEXCOORDSIZE2(i);
  1025. break;
  1026. case D3DSP_WRITEMASK_0:
  1027. qwOutFVF |= D3DFVF_TEXCOORDSIZE1(i);
  1028. break;
  1029. default:
  1030. RET_ERR( "Invalid write mask for texture register" );
  1031. }
  1032. }
  1033. }
  1034. pShaderCode->m_qwFVFOut = qwOutFVF;
  1035. return pShaderCode;
  1036. }
  1037. ///////////////////////////////////////////////////////////////////////////////
  1038. //
  1039. // RefDev implementation.
  1040. //
  1041. ///////////////////////////////////////////////////////////////////////////////
  1042. //---------------------------------------------------------------------
  1043. // RefDev::ProcessPrimitiveVVM()
  1044. // Processess and draw the current primitive using the VVM
  1045. //---------------------------------------------------------------------
  1046. HRESULT
  1047. RefDev::ProcessPrimitiveVVM( D3DPRIMITIVETYPE primType,
  1048. DWORD dwStartVertex,
  1049. DWORD cVertices,
  1050. DWORD dwStartIndex,
  1051. DWORD cIndices )
  1052. {
  1053. HRESULT hr = S_OK;
  1054. RDCLIPCODE clipIntersection = ~0;
  1055. RDCLIPCODE clipUnion = 0;
  1056. // Save Prim Type for later use
  1057. m_primType = primType;
  1058. m_dwNumVertices = cVertices;
  1059. m_dwStartVertex = dwStartVertex;
  1060. m_dwNumIndices = cIndices;
  1061. m_dwStartIndex = dwStartIndex;
  1062. RDVDeclaration* pDecl = &(m_pCurrentVShader->m_Declaration);
  1063. RDVShaderCode* pCode = m_pCurrentVShader->m_pCode;
  1064. RDVVMREG* pRegisters = m_RefVM.GetRegisters();
  1065. // Output FVF that was computed at the compile time
  1066. m_qwFVFOut = pCode->m_qwFVFOut;
  1067. //
  1068. // Clipping information depends both on the output FVF computation
  1069. // and the other State, so do it here after both have been computed
  1070. //
  1071. HR_RET( UpdateClipper());
  1072. D3DVALUE scaleX = m_Clipper.scaleX;
  1073. D3DVALUE scaleY = m_Clipper.scaleY;
  1074. D3DVALUE scaleZ = m_Clipper.scaleZ;
  1075. D3DVALUE offsetX = m_Clipper.offsetX;
  1076. D3DVALUE offsetY = m_Clipper.offsetY;
  1077. D3DVALUE offsetZ = m_Clipper.offsetZ;
  1078. //
  1079. // Grow buffers to the requisite size
  1080. //
  1081. // Grow TLVArray if required
  1082. if( FAILED( this->m_TLVArray.Grow( m_dwNumVertices ) ) )
  1083. {
  1084. DPFERR( "Could not grow TL vertex buffer" );
  1085. hr = DDERR_OUTOFMEMORY;
  1086. return hr;
  1087. }
  1088. //
  1089. // Process Vertices
  1090. //
  1091. for( DWORD i = 0; i < m_dwNumVertices; i++ )
  1092. {
  1093. RDVertex& Vout = m_TLVArray[i];
  1094. Vout.SetFVF( pCode->m_qwFVFOut | D3DFVFP_CLIP );
  1095. // Copy vertex elements to the input vertex registers
  1096. for( DWORD j = 0; j < pDecl->m_dwNumElements; j++ )
  1097. {
  1098. RDVElement& ve = pDecl->m_VertexElements[j];
  1099. RDVStream* pStream = &m_VStream[ve.m_dwStreamIndex];
  1100. LPBYTE pData = (LPBYTE)pStream->m_pData + ve.m_dwOffset +
  1101. pStream->m_dwStride * (m_dwStartVertex + i);
  1102. RDVECTOR4* pReg = m_RefVM.GetDataAddr( D3DSPR_INPUT,
  1103. ve.m_dwRegister );
  1104. ve.m_pfnCopy( pData, pReg );
  1105. }
  1106. if (m_pDbgMon) m_pDbgMon->NextEvent( D3DDM_EVENT_VERTEX );
  1107. // Execute the shader
  1108. m_RefVM.ExecuteShader(this);
  1109. // Get the result from the output VVM registers
  1110. float x, y, z, w, inv_w_clip = 0.0f;
  1111. w = pRegisters->m_out[D3DSRO_POSITION].w;
  1112. z = pRegisters->m_out[D3DSRO_POSITION].z;
  1113. // Make clipping rules 0 < x < w; 0 < y < w
  1114. x = (pRegisters->m_out[D3DSRO_POSITION].x + w) * 0.5f;
  1115. y = (pRegisters->m_out[D3DSRO_POSITION].y + w) * 0.5f;
  1116. // Save the clip coordinates
  1117. Vout.m_clip_x = x;
  1118. Vout.m_clip_y = y;
  1119. Vout.m_clip_z = z;
  1120. Vout.m_clip_w = w;
  1121. //
  1122. // Compute clip codes if needed
  1123. //
  1124. if( GetRS()[D3DRENDERSTATE_CLIPPING] )
  1125. {
  1126. RDCLIPCODE clip = m_Clipper.ComputeClipCodes(
  1127. &clipIntersection, &clipUnion, x, y, z, w);
  1128. if( clip == 0 )
  1129. {
  1130. Vout.m_clip = 0;
  1131. inv_w_clip = 1.0f/w;
  1132. }
  1133. else
  1134. {
  1135. if( m_Clipper.UseGuardBand() )
  1136. {
  1137. if( (clip & ~RDCLIP_INGUARDBAND) == 0 )
  1138. {
  1139. // If vertex is inside the guardband we have to compute
  1140. // screen coordinates
  1141. inv_w_clip = 1.0f/w;
  1142. Vout.m_clip = (RDCLIPCODE)clip;
  1143. goto l_DoScreenCoord;
  1144. }
  1145. }
  1146. Vout.m_clip = (RDCLIPCODE)clip;
  1147. // If vertex is outside the frustum we can not compute screen
  1148. // coordinates, hence store the clip coordinates
  1149. #if 0
  1150. Vout.m_pos.x = x;
  1151. Vout.m_pos.y = y;
  1152. Vout.m_pos.z = z;
  1153. Vout.m_rhw = w;
  1154. #endif
  1155. goto l_DoLighting;
  1156. }
  1157. }
  1158. else
  1159. {
  1160. // We have to check this only for DONOTCLIP case, because otherwise
  1161. // the vertex with "we = 0" will be clipped and screen coordinates
  1162. // will not be computed
  1163. // "clip" is not zero, if "we" is zero.
  1164. if( !FLOAT_EQZ(w) )
  1165. inv_w_clip = D3DVAL(1)/w;
  1166. else
  1167. inv_w_clip = __HUGE_PWR2;
  1168. }
  1169. l_DoScreenCoord:
  1170. Vout.m_pos.x = x * inv_w_clip * scaleX + offsetX;
  1171. Vout.m_pos.y = y * inv_w_clip * scaleY + offsetY;
  1172. Vout.m_pos.z = z * inv_w_clip * scaleZ + offsetZ;
  1173. Vout.m_rhw = inv_w_clip;
  1174. l_DoLighting:
  1175. if( m_qwFVFOut & D3DFVF_DIFFUSE )
  1176. {
  1177. // Clamp the colors before copying.
  1178. if( FLOAT_LTZ(pRegisters->m_col[0].a) )
  1179. pRegisters->m_col[0].a = 0.0f;
  1180. else if( FLOAT_CMP_PONE(pRegisters->m_col[0].a, >) )
  1181. pRegisters->m_col[0].a = 1.0f;
  1182. if( FLOAT_LTZ(pRegisters->m_col[0].r) )
  1183. pRegisters->m_col[0].r = 0.0f;
  1184. else if( FLOAT_CMP_PONE(pRegisters->m_col[0].r, >) )
  1185. pRegisters->m_col[0].r = 1.0f;
  1186. if( FLOAT_LTZ(pRegisters->m_col[0].g) )
  1187. pRegisters->m_col[0].g = 0.0f;
  1188. else if( FLOAT_CMP_PONE(pRegisters->m_col[0].g, >) )
  1189. pRegisters->m_col[0].g = 1.0f;
  1190. if( FLOAT_LTZ(pRegisters->m_col[0].b) )
  1191. pRegisters->m_col[0].b = 0.0f;
  1192. else if( FLOAT_CMP_PONE(pRegisters->m_col[0].b, >) )
  1193. pRegisters->m_col[0].b = 1.0f;
  1194. memcpy( &Vout.m_diffuse,&(pRegisters->m_col[0]),
  1195. sizeof(RDVECTOR4) );
  1196. }
  1197. if( m_qwFVFOut & D3DFVF_SPECULAR )
  1198. {
  1199. if( FLOAT_LTZ(pRegisters->m_col[1].a) )
  1200. pRegisters->m_col[1].a = 0.0f;
  1201. else if( FLOAT_CMP_PONE(pRegisters->m_col[1].a, >) )
  1202. pRegisters->m_col[1].a = 1.0f;
  1203. if( FLOAT_LTZ(pRegisters->m_col[1].r) )
  1204. pRegisters->m_col[1].r = 0.0f;
  1205. else if( FLOAT_CMP_PONE(pRegisters->m_col[1].r, >) )
  1206. pRegisters->m_col[1].r = 1.0f;
  1207. if( FLOAT_LTZ(pRegisters->m_col[1].g) )
  1208. pRegisters->m_col[1].g = 0.0f;
  1209. else if( FLOAT_CMP_PONE(pRegisters->m_col[1].g, >) )
  1210. pRegisters->m_col[1].g = 1.0f;
  1211. if( FLOAT_LTZ(pRegisters->m_col[1].b) )
  1212. pRegisters->m_col[1].b = 0.0f;
  1213. else if( FLOAT_CMP_PONE(pRegisters->m_col[1].b, >) )
  1214. pRegisters->m_col[1].b = 1.0f;
  1215. memcpy( &Vout.m_specular,&(pRegisters->m_col[1]),
  1216. sizeof(RDVECTOR4) );
  1217. }
  1218. if( m_qwFVFOut & D3DFVFP_FOG )
  1219. {
  1220. if( FLOAT_LTZ(pRegisters->m_out[D3DSRO_FOG].x) )
  1221. pRegisters->m_out[D3DSRO_FOG].x = 0.0f;
  1222. if( FLOAT_CMP_PONE(pRegisters->m_out[D3DSRO_FOG].x, >) )
  1223. pRegisters->m_out[D3DSRO_FOG].x = 1.0f;
  1224. Vout.m_fog = pRegisters->m_out[D3DSRO_FOG].x;
  1225. }
  1226. // Copy the textures over
  1227. if( m_qwFVFOut & D3DFVF_PSIZE )
  1228. {
  1229. Vout.m_pointsize = pRegisters->m_out[D3DSRO_POINT_SIZE].x;
  1230. }
  1231. // Copy the textures over
  1232. {
  1233. DWORD i, j;
  1234. DWORD numTex = FVF_TEXCOORD_NUMBER(m_qwFVFOut);
  1235. for( i = 0; i < numTex; i++ )
  1236. {
  1237. DWORD n = GetTexCoordDim( m_qwFVFOut, i );
  1238. // DWORD n = (DWORD)(m_dwTexCoordSizeArray[i] >> 2);
  1239. float *pCoordDest = (float *)&Vout.m_tex[i];
  1240. float *pCoordSrc = (float *)&pRegisters->m_tex[i];
  1241. for( j = 0; j < n; j++ )
  1242. {
  1243. pCoordDest[j] = pCoordSrc[j];
  1244. }
  1245. }
  1246. }
  1247. }
  1248. if( GetRS()[D3DRENDERSTATE_CLIPPING] )
  1249. {
  1250. m_Clipper.m_clipIntersection = clipIntersection;
  1251. m_Clipper.m_clipUnion = clipUnion;
  1252. }
  1253. else
  1254. {
  1255. m_Clipper.m_clipIntersection = 0;
  1256. m_Clipper.m_clipUnion = 0;
  1257. }
  1258. //
  1259. // Clip and Draw the primitives
  1260. //
  1261. if( m_dwNumIndices )
  1262. {
  1263. if( !NeedClipping((m_Clipper.UseGuardBand()), m_Clipper.m_clipUnion) )
  1264. {
  1265. if( m_IndexStream.m_dwStride == 4 )
  1266. hr = DrawOneIndexedPrimitive(
  1267. m_TLVArray,
  1268. 0,
  1269. (LPDWORD)m_IndexStream.m_pData,
  1270. m_dwStartIndex,
  1271. m_dwNumIndices,
  1272. m_primType );
  1273. else
  1274. hr = DrawOneIndexedPrimitive(
  1275. m_TLVArray,
  1276. 0,
  1277. (LPWORD)m_IndexStream.m_pData,
  1278. m_dwStartIndex,
  1279. m_dwNumIndices,
  1280. m_primType );
  1281. }
  1282. else
  1283. {
  1284. if( m_IndexStream.m_dwStride == 4 )
  1285. hr = m_Clipper.DrawOneIndexedPrimitive(
  1286. m_TLVArray,
  1287. 0,
  1288. (LPDWORD)m_IndexStream.m_pData,
  1289. m_dwStartIndex,
  1290. m_dwNumIndices,
  1291. m_primType );
  1292. else
  1293. hr = m_Clipper.DrawOneIndexedPrimitive(
  1294. m_TLVArray,
  1295. 0,
  1296. (LPWORD)m_IndexStream.m_pData,
  1297. m_dwStartIndex,
  1298. m_dwNumIndices,
  1299. m_primType );
  1300. }
  1301. }
  1302. else
  1303. {
  1304. if( !NeedClipping((m_Clipper.UseGuardBand()), m_Clipper.m_clipUnion) )
  1305. {
  1306. hr = DrawOnePrimitive(
  1307. m_TLVArray,
  1308. 0,
  1309. m_primType,
  1310. m_dwNumVertices );
  1311. }
  1312. else
  1313. {
  1314. hr = m_Clipper.DrawOnePrimitive(
  1315. m_TLVArray,
  1316. 0,
  1317. m_primType,
  1318. m_dwNumVertices );
  1319. }
  1320. }
  1321. return hr;
  1322. }