Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1681 lines
84 KiB

  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Copyright (C) Microsoft Corporation, 2000.
  3. //
  4. // pixshade.cpp
  5. //
  6. // Direct3D Reference Device - Pixel Shader
  7. //
  8. ///////////////////////////////////////////////////////////////////////////////
  9. #include "pch.cpp"
  10. #pragma hdrstop
  11. //-----------------------------------------------------------------------------
  12. RDPShader::RDPShader(void)
  13. {
  14. m_pRD = NULL;
  15. m_pCode = NULL;
  16. m_CodeSize = 0;
  17. m_cActiveTextureStages = 0;
  18. m_ReferencedTexCoordMask = 0;
  19. m_cInst = 0;
  20. m_pInst = NULL;
  21. m_cConstDefs = 0;
  22. m_pConstDefs = NULL;
  23. }
  24. //-----------------------------------------------------------------------------
  25. RDPShader::~RDPShader()
  26. {
  27. if (NULL != m_pCode) delete[] m_pCode;
  28. if (NULL != m_pInst) delete[] m_pInst;
  29. if (NULL != m_pConstDefs) delete[] m_pConstDefs;
  30. }
  31. #define _DWordCount() (pToken - pCode)
  32. #define _RegisterNeedsToBeInitializedWithTexcoords(Reg) (*pReferencedTexCoordMask)|=(1<<Reg);
  33. //-----------------------------------------------------------------------------
  34. //
  35. // UpdateReferencedTexCoords
  36. //
  37. // Called for each instruction while parsing a 1.3 pixelshader.
  38. // Updates pReferencedTexCoordMask (bitfield) to represent
  39. // which texture coordinate sets are actually used by the shader.
  40. // This is used to eliminate unnecessary attribute setup/sampling during
  41. // primitive rasterization.
  42. //
  43. //-----------------------------------------------------------------------------
  44. void UpdateReferencedTexCoords(PixelShaderInstruction* pInst,
  45. DWORD* pReferencedTexCoordMask )
  46. {
  47. switch( pInst->Opcode & D3DSI_OPCODE_MASK )
  48. {
  49. case D3DSIO_TEX:
  50. case D3DSIO_TEXCOORD:
  51. case D3DSIO_TEXDEPTH:
  52. {
  53. for( UINT i = 0; i < 3; i++ )
  54. {
  55. UINT RegNum = pInst->SrcParam[i] & 0xFF;
  56. if( D3DSPR_TEXTURE == (pInst->SrcParam[i] & D3DSP_REGTYPE_MASK) )
  57. _RegisterNeedsToBeInitializedWithTexcoords(RegNum);
  58. }
  59. }
  60. break;
  61. case D3DSIO_TEXKILL: // treat dest param as source
  62. {
  63. UINT RegNum = pInst->DstParam & 0xFF;
  64. if( D3DSPR_TEXTURE == (pInst->DstParam & D3DSP_REGTYPE_MASK) )
  65. _RegisterNeedsToBeInitializedWithTexcoords(RegNum);
  66. }
  67. break;
  68. }
  69. }
  70. void CalculateSourceReadMasks(PixelShaderInstruction* pInst, BYTE* pSourceReadMasks, BOOL bAfterSwizzle, DWORD dwVersion)
  71. {
  72. UINT i, j;
  73. DWORD Opcode = pInst->Opcode & D3DSI_OPCODE_MASK;
  74. BYTE ComponentMask[4]= {RDPS_COMPONENTMASK_0, RDPS_COMPONENTMASK_1, RDPS_COMPONENTMASK_2, RDPS_COMPONENTMASK_3};
  75. for( i = 0; i < pInst->SrcParamCount; i++ )
  76. {
  77. BYTE NeededComponents;
  78. BYTE ReadComponents = 0;
  79. switch( Opcode )
  80. {
  81. case D3DSIO_TEX: // only in ps.1.4 does texld have source parameter
  82. if( D3DPS_VERSION(1,4) == dwVersion )
  83. {
  84. // for ps.1.4, texld has a source parameter
  85. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  86. }
  87. else // versions < ps.1.4 don't have a src param on tex, so we shouldn't get here. But maybe in ps.2.0...
  88. {
  89. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2 | RDPS_COMPONENTMASK_3;
  90. }
  91. break;
  92. case D3DSIO_TEXCOORD:
  93. if( D3DPS_VERSION(1,4) == dwVersion )
  94. {
  95. // for ps.1.4, texcrd has a source parameter
  96. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  97. }
  98. else // versions < ps.1.4 don't have a src param on texcoord, so we shouldn't get here. But maybe in ps.2.0...
  99. {
  100. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2 | RDPS_COMPONENTMASK_3;
  101. }
  102. break;
  103. case D3DSIO_TEXBEM:
  104. case D3DSIO_TEXBEML:
  105. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1;
  106. break;
  107. case D3DSIO_DP3:
  108. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  109. break;
  110. case D3DSIO_DP4:
  111. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2 | RDPS_COMPONENTMASK_3;
  112. break;
  113. case D3DSIO_BEM: // ps.1.4
  114. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1;
  115. break;
  116. default:
  117. // standard component-wise instruction,
  118. // OR an op we know reads .rgba and we also know it will be validated to .rgba writemask
  119. NeededComponents = (pInst->DstParam & D3DSP_WRITEMASK_ALL) >> RDPS_COMPONENTMASK_SHIFT;
  120. break;
  121. }
  122. if( bAfterSwizzle )
  123. {
  124. pSourceReadMasks[i] = NeededComponents;
  125. }
  126. else
  127. {
  128. // Figure out which components of this source parameter are read (taking into account swizzle)
  129. for(j = 0; j < 4; j++)
  130. {
  131. if( NeededComponents & ComponentMask[j] )
  132. ReadComponents |= ComponentMask[((pInst->SrcParam[i] & D3DSP_SWIZZLE_MASK) >> (D3DVS_SWIZZLE_SHIFT + 2*j)) & 0x3];
  133. }
  134. pSourceReadMasks[i] = ReadComponents;
  135. }
  136. }
  137. }
  138. void RDPSRegister::Set(RDPS_REGISTER_TYPE RegType, UINT RegNum, RefRast* pRast)
  139. {
  140. m_RegType = RegType;
  141. m_RegNum = RegNum;
  142. UINT MaxRegNum = 0;
  143. switch( RegType )
  144. {
  145. case RDPSREG_INPUT:
  146. MaxRegNum = RDPS_MAX_NUMINPUTREG - 1;
  147. m_pReg = pRast->m_InputReg[RegNum];
  148. break;
  149. case RDPSREG_TEMP:
  150. MaxRegNum = RDPS_MAX_NUMTEMPREG - 1;
  151. m_pReg = pRast->m_TempReg[RegNum];
  152. break;
  153. case RDPSREG_CONST:
  154. MaxRegNum = RDPS_MAX_NUMCONSTREG - 1;
  155. m_pReg = pRast->m_ConstReg[RegNum];
  156. break;
  157. case RDPSREG_TEXTURE:
  158. MaxRegNum = RDPS_MAX_NUMTEXTUREREG - 1;
  159. m_pReg = pRast->m_TextReg[RegNum];
  160. break;
  161. case RDPSREG_POSTMODSRC:
  162. MaxRegNum = RDPS_MAX_NUMPOSTMODSRCREG - 1;
  163. m_pReg = pRast->m_PostModSrcReg[RegNum];
  164. break;
  165. case RDPSREG_SCRATCH:
  166. MaxRegNum = RDPS_MAX_NUMSCRATCHREG - 1;
  167. m_pReg = pRast->m_ScratchReg[RegNum];
  168. break;
  169. case RDPSREG_QUEUEDWRITE:
  170. MaxRegNum = RDPS_MAX_NUMQUEUEDWRITEREG - 1;
  171. m_pReg = pRast->m_QueuedWriteReg[RegNum];
  172. break;
  173. case RDPSREG_ZERO:
  174. MaxRegNum = 0;
  175. m_pReg = pRast->m_ZeroReg;
  176. break;
  177. case RDPSREG_ONE:
  178. MaxRegNum = 0;
  179. m_pReg = pRast->m_OneReg;
  180. break;
  181. case RDPSREG_TWO:
  182. MaxRegNum = 0;
  183. m_pReg = pRast->m_TwoReg;
  184. break;
  185. default:
  186. m_pReg = NULL;
  187. _ASSERT(FALSE,"RDPSRegister::SetReg - Unknown register type.");
  188. break;
  189. }
  190. if( RegNum > MaxRegNum )
  191. {
  192. _ASSERT(FALSE,"RDPSRegister::SetReg - Register number too high.");
  193. }
  194. return;
  195. }
  196. //-----------------------------------------------------------------------------
  197. //
  198. // Initialize
  199. //
  200. // - Copies pixel shader token stream from DDI token stream.
  201. // - Counts the number of active texture stages for m_cActiveTextureStages.
  202. // - Translates shader into "RISC" instruction set to be executed
  203. // by refrast's shader VM
  204. //
  205. //-----------------------------------------------------------------------------
  206. HRESULT
  207. RDPShader::Initialize(
  208. RefDev* pRD, DWORD* pCode, DWORD ByteCodeSize, D3DCAPS8* pCaps )
  209. {
  210. m_pRD = pRD;
  211. m_CodeSize = ByteCodeSize/4; // bytecount -> dword count
  212. FLOAT fMin = -(pCaps->MaxPixelShaderValue);
  213. FLOAT fMax = (pCaps->MaxPixelShaderValue);
  214. // ------------------------------------------------------------------------
  215. //
  216. // First pass through shader to find the number of instructions,
  217. // figure out how many constants there are.
  218. //
  219. // ------------------------------------------------------------------------
  220. {
  221. DWORD* pToken = pCode;
  222. pToken++; // version token
  223. while (*pToken != D3DPS_END())
  224. {
  225. DWORD Inst = *pToken;
  226. if (*pToken++ & (1L<<31)) // instruction token
  227. {
  228. DPFERR("PixelShader Token #%d: instruction token error",_DWordCount());
  229. return E_FAIL;
  230. }
  231. if ( (Inst & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT )
  232. {
  233. pToken += (Inst & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
  234. m_cInst++;
  235. }
  236. else if( (Inst & D3DSI_OPCODE_MASK) == D3DSIO_DEF )
  237. {
  238. m_cConstDefs++;
  239. pToken += 5;
  240. }
  241. else
  242. {
  243. if (*pToken & (1L<<31)) pToken++; // destination param token
  244. while (*pToken & (1L<<31)) pToken++; // source param tokens
  245. m_cInst++;
  246. }
  247. if (_DWordCount() > (int)m_CodeSize)
  248. {
  249. DPFERR("PixelShader(%d tokens, %d expected): count error",_DWordCount(),m_CodeSize);
  250. return E_FAIL;
  251. }
  252. }
  253. pToken++; // step over END token
  254. if (_DWordCount() != (int)m_CodeSize)
  255. {
  256. DPFERR("PixelShader(%d tokens, %d expected): count error",_DWordCount(),m_CodeSize);
  257. return E_FAIL;
  258. }
  259. // make copy of original shader
  260. m_pCode = new DWORD[m_CodeSize];
  261. if (NULL == m_pCode)
  262. return E_OUTOFMEMORY;
  263. memcpy( m_pCode, pCode, ByteCodeSize );
  264. // allocate instruction array
  265. m_pInst = new PixelShaderInstruction[m_cInst];
  266. if (NULL == m_pInst)
  267. return E_OUTOFMEMORY;
  268. memset( m_pInst, 0x0, sizeof(PixelShaderInstruction)*m_cInst );
  269. m_pConstDefs = new ConstDef[m_cConstDefs];
  270. if (NULL == m_pConstDefs)
  271. return E_OUTOFMEMORY;
  272. }
  273. // ------------------------------------------------------------------------
  274. //
  275. // Second pass through shader to:
  276. // - produce a list of instructions, each one including opcodes,
  277. // comments, and disassembled text for access by shader debuggers.
  278. // - figure out the TSS # used (if any) by each instruction
  279. // - figure out the max texture stage # used
  280. // - figure out when the ref. pixel shader executor should
  281. // queue writes up and when to flush the queue, in order to
  282. // simulate co-issue.
  283. // - figure out which texture coordinate sets get used
  284. // - process constant DEF instructions into a list that can be
  285. // executed whenever SetPixelShader is done.
  286. //
  287. // ------------------------------------------------------------------------
  288. {
  289. DWORD* pToken = m_pCode;
  290. PixelShaderInstruction* pInst = m_pInst;
  291. PixelShaderInstruction* pPrevious_NonTrivial_Inst = NULL;
  292. pToken++; // skip over version
  293. BOOL bMinimizeReferencedTexCoords;
  294. if( (D3DPS_VERSION(1,3) >= *pCode) ||
  295. (D3DPS_VERSION(254,254) == *pCode ) )//legacy
  296. {
  297. bMinimizeReferencedTexCoords = FALSE;
  298. }
  299. else
  300. {
  301. bMinimizeReferencedTexCoords = TRUE;
  302. }
  303. UINT CurrConstDef = 0;
  304. while (*pToken != D3DPS_END())
  305. {
  306. switch( (*pToken) & D3DSI_OPCODE_MASK )
  307. {
  308. case D3DSIO_COMMENT:
  309. pInst->Opcode = *pToken;
  310. pInst->pComment = (pToken+1);
  311. pInst->CommentSize = ((*pToken) & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
  312. pToken += (pInst->CommentSize+1);
  313. pInst++;
  314. continue;
  315. case D3DSIO_DEF:
  316. {
  317. pToken++;
  318. m_pConstDefs[CurrConstDef].RegNum = (*pToken++) & D3DSP_REGNUM_MASK;
  319. // clamp constants on input to range of values in pixel shaders
  320. for( UINT i = 0; i < 4; i++ )
  321. {
  322. m_pConstDefs[CurrConstDef].f[i] = MAX( fMin, MIN( fMax, *(FLOAT*)pToken));
  323. pToken++;
  324. }
  325. CurrConstDef++;
  326. continue;
  327. }
  328. case D3DSIO_NOP:
  329. // get disasm string
  330. #if DBG
  331. PixelShaderInstDisAsm( pInst->Text, 128, pToken, 0x0 );
  332. #else // !DBG
  333. pInst->Text[ 0 ] = '\0';
  334. #endif // !DBG
  335. pInst->Opcode = *pToken++;
  336. pInst++;
  337. continue;
  338. }
  339. // get disasm string
  340. #if DBG
  341. PixelShaderInstDisAsm( pInst->Text, 128, pToken, 0x0 );
  342. #else // !DBG
  343. pInst->Text[ 0 ] = '\0';
  344. #endif // !DBG
  345. // get next instruction and parameters
  346. pInst->Opcode = *pToken++;
  347. pInst->SrcParamCount = 0;
  348. if (*pToken & (1L<<31))
  349. {
  350. pInst->DstParam = *pToken++;
  351. }
  352. while (*pToken & (1L<<31))
  353. {
  354. pInst->SrcParam[pInst->SrcParamCount++] = *pToken++;
  355. }
  356. // process TEX ops
  357. //
  358. BOOL bLegacyTexOp = FALSE;
  359. switch (pInst->Opcode & D3DSI_OPCODE_MASK)
  360. {
  361. default: break;
  362. case D3DSIO_TEXBEM_LEGACY:
  363. case D3DSIO_TEXBEML_LEGACY:
  364. bLegacyTexOp = TRUE;
  365. // fall through
  366. case D3DSIO_TEXCOORD:
  367. case D3DSIO_TEXKILL:
  368. case D3DSIO_TEX:
  369. case D3DSIO_TEXBEM:
  370. case D3DSIO_TEXBEML:
  371. case D3DSIO_TEXREG2AR:
  372. case D3DSIO_TEXREG2GB:
  373. case D3DSIO_TEXM3x2PAD:
  374. case D3DSIO_TEXM3x2TEX:
  375. case D3DSIO_TEXM3x3PAD:
  376. case D3DSIO_TEXM3x3TEX:
  377. case D3DSIO_TEXM3x3SPEC:
  378. case D3DSIO_TEXM3x3VSPEC:
  379. case D3DSIO_TEXM3x2DEPTH:
  380. case D3DSIO_TEXDP3:
  381. case D3DSIO_TEXREG2RGB:
  382. case D3DSIO_TEXDEPTH:
  383. case D3DSIO_TEXDP3TEX:
  384. case D3DSIO_TEXM3x3:
  385. pInst->bTexOp = TRUE;
  386. break;
  387. }
  388. if (pInst->bTexOp)
  389. {
  390. // update stage count and assign ptr to TSS for this op
  391. if (bLegacyTexOp)
  392. {
  393. m_cActiveTextureStages =
  394. max(m_cActiveTextureStages,(pInst->DstParam&D3DSP_REGNUM_MASK)+1);
  395. pInst->uiTSSNum = (pInst->DstParam&D3DSP_REGNUM_MASK)-1;
  396. }
  397. else
  398. {
  399. UINT Stage;
  400. BOOL bStageUsed = TRUE;
  401. switch(pInst->Opcode & D3DSI_OPCODE_MASK)
  402. {
  403. case D3DSIO_TEXCOORD:
  404. case D3DSIO_TEXDEPTH:
  405. case D3DSIO_TEXKILL:
  406. if( bMinimizeReferencedTexCoords )
  407. {
  408. bStageUsed = FALSE;
  409. break;
  410. }
  411. // falling through
  412. case D3DSIO_TEX:
  413. default:
  414. Stage = pInst->DstParam&D3DSP_REGNUM_MASK;
  415. break;
  416. }
  417. if( bStageUsed )
  418. {
  419. m_cActiveTextureStages = max(m_cActiveTextureStages,Stage+1);
  420. pInst->uiTSSNum = Stage;
  421. }
  422. }
  423. }
  424. if( pPrevious_NonTrivial_Inst )
  425. {
  426. // Queue write of last instruction if the current instruction has the
  427. // COISSUE flag.
  428. if( pInst->Opcode & D3DSI_COISSUE )
  429. {
  430. pPrevious_NonTrivial_Inst->bQueueWrite = TRUE;
  431. }
  432. // Flush writes after the previous instruction if it had the COISSUE
  433. // flag and the current instruction doesn't have it.
  434. if( !(pInst->Opcode & D3DSI_COISSUE) && (pPrevious_NonTrivial_Inst->Opcode & D3DSI_COISSUE) )
  435. {
  436. pPrevious_NonTrivial_Inst->bFlushQueue = TRUE;
  437. }
  438. }
  439. pPrevious_NonTrivial_Inst = pInst;
  440. if( bMinimizeReferencedTexCoords )
  441. {
  442. UpdateReferencedTexCoords(pInst, &m_ReferencedTexCoordMask);
  443. }
  444. pInst++;
  445. }
  446. if( !bMinimizeReferencedTexCoords )
  447. {
  448. m_ReferencedTexCoordMask = (1<<m_cActiveTextureStages) - 1;
  449. }
  450. }
  451. // ------------------------------------------------------------------------
  452. //
  453. // Third pass through the shader (through the list of instructions made
  454. // in the last pass) to translate instructions into a more basic ("RISC")
  455. // instruction set for the refrast executor.
  456. //
  457. // ------------------------------------------------------------------------
  458. {
  459. #define _Set(RegType, RegNum) Set(RegType,RegNum,pRast)
  460. #define _NewPSInst(__INST) \
  461. { \
  462. RDPSOffset = pRDPSInst - pRDPSInstBuffer + LastRDPSInstSize; \
  463. m_RDPSInstBuffer.SetGrowSize(MAX(512,RDPSOffset)); \
  464. if( FAILED(m_RDPSInstBuffer.Grow(RDPSOffset + sizeof(__INST##_PARAMS)))) \
  465. {return E_OUTOFMEMORY;} \
  466. pRDPSInstBuffer = &m_RDPSInstBuffer[0]; \
  467. pRDPSInst = pRDPSInstBuffer + RDPSOffset; \
  468. ((__INST##_PARAMS UNALIGNED64*)pRDPSInst)->Inst = __INST; \
  469. LastRDPSInstSize = sizeof(__INST##_PARAMS); \
  470. }
  471. #define _InstParam(__INST) (*(__INST##_PARAMS UNALIGNED64*)pRDPSInst)
  472. #define _NoteInstructionEvent _NewPSInst(RDPSINST_NEXTD3DPSINST); \
  473. _InstParam(RDPSINST_NEXTD3DPSINST).pInst = pInst;
  474. #define _EnterQuadPixelLoop if(!bInQuadPixelLoop) \
  475. { \
  476. _NewPSInst(RDPSINST_QUADLOOPBEGIN); \
  477. RDPSLoopOffset = RDPSOffset + sizeof(RDPSINST_QUADLOOPBEGIN_PARAMS); \
  478. bInQuadPixelLoop = TRUE; \
  479. }
  480. #define _LeaveQuadPixelLoop if(bInQuadPixelLoop) \
  481. { \
  482. _NewPSInst(RDPSINST_QUADLOOPEND); \
  483. _InstParam(RDPSINST_QUADLOOPEND).JumpBackByOffset = \
  484. RDPSOffset - RDPSLoopOffset;\
  485. bInQuadPixelLoop = FALSE; \
  486. }
  487. #define _EmitDstMod(__dstReg,__mask) _NewPSInst(RDPSINST_DSTMOD); \
  488. _InstParam(RDPSINST_DSTMOD).DstReg = __dstReg; \
  489. _InstParam(RDPSINST_DSTMOD).WriteMask = __mask; \
  490. _InstParam(RDPSINST_DSTMOD).fScale = DstScale; \
  491. _InstParam(RDPSINST_DSTMOD).fRangeMin = DstRange[0]; \
  492. _InstParam(RDPSINST_DSTMOD).fRangeMax = DstRange[1];
  493. // Th macro _EmitProj emits instructions to do the following:
  494. // - Put reciprocal of source (x,y,z,w) component __COMPONENT (ex. w) into scratch register 0 component (for w example:) 4
  495. // - Replicate reciprocal to rgb components of scratch register 0 (w example yields: 1/,1/w,1/w, <--1/w)
  496. // - Multiply source register register by scratch register (x/w,y/w,z/w,1) and put the result into the dest register.
  497. #define _EmitProj(__DESTTYPE,__DESTNUM,__SRCTYPE,__SRCNUM,__COMPONENT) \
  498. _NewPSInst(RDPSINST_RCP); \
  499. _InstParam(RDPSINST_RCP).DstReg._Set(RDPSREG_SCRATCH,0); \
  500. _InstParam(RDPSINST_RCP).SrcReg0._Set(__SRCTYPE,__SRCNUM); \
  501. _InstParam(RDPSINST_RCP).bSrcReg0_Negate = FALSE; \
  502. _InstParam(RDPSINST_RCP).WriteMask = __COMPONENT; \
  503. \
  504. _NewPSInst(RDPSINST_SWIZZLE); \
  505. _InstParam(RDPSINST_SWIZZLE).DstReg._Set(RDPSREG_SCRATCH,0); \
  506. _InstParam(RDPSINST_SWIZZLE).SrcReg0._Set(RDPSREG_SCRATCH,0); \
  507. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 \
  508. | RDPS_COMPONENTMASK_2 | RDPS_COMPONENTMASK_3; \
  509. _InstParam(RDPSINST_SWIZZLE).Swizzle = \
  510. (RDPS_COMPONENTMASK_0 == __COMPONENT) ? RDPS_REPLICATERED : \
  511. (RDPS_COMPONENTMASK_1 == __COMPONENT) ? RDPS_REPLICATEGREEN : \
  512. (RDPS_COMPONENTMASK_2 == __COMPONENT) ? RDPS_REPLICATEBLUE : RDPS_REPLICATEALPHA; \
  513. \
  514. _NewPSInst(RDPSINST_MUL); \
  515. _InstParam(RDPSINST_MUL).DstReg._Set(__DESTTYPE,__DESTNUM); \
  516. _InstParam(RDPSINST_MUL).SrcReg0._Set(RDPSREG_SCRATCH,0); \
  517. _InstParam(RDPSINST_MUL).SrcReg1._Set(__SRCTYPE,__SRCNUM); \
  518. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = FALSE; \
  519. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = FALSE; \
  520. _InstParam(RDPSINST_MUL).WriteMask = \
  521. (RDPS_COMPONENTMASK_0 == __COMPONENT) ? RDPS_COMPONENTMASK_0 : \
  522. (RDPS_COMPONENTMASK_1 == __COMPONENT) ? RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 : \
  523. (RDPS_COMPONENTMASK_2 == __COMPONENT) ? RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | \
  524. RDPS_COMPONENTMASK_2 : RDPS_COMPONENTMASK_ALL;
  525. BYTE ComponentSwizzle[4] = {RDPS_REPLICATERED, RDPS_REPLICATEGREEN, RDPS_REPLICATEBLUE, RDPS_REPLICATEALPHA};
  526. BYTE ComponentMask[4] = {RDPS_COMPONENTMASK_0, RDPS_COMPONENTMASK_1, RDPS_COMPONENTMASK_2, RDPS_COMPONENTMASK_3};
  527. int QueueIndex = -1; // current queue location (for staging results when simulating coissue)
  528. UINT i;
  529. BOOL bInQuadPixelLoop = FALSE;
  530. RefRast* pRast = &m_pRD->m_Rast;
  531. RDPSRegister ZeroReg; ZeroReg._Set(RDPSREG_ZERO,0);
  532. RDPSRegister OneReg; OneReg._Set(RDPSREG_ONE,0);
  533. RDPSRegister TwoReg; TwoReg._Set(RDPSREG_TWO,0);
  534. // destination parameter controls
  535. RDPSRegister DstReg;
  536. FLOAT DstScale; // Result Shift Scale - +/- 2**n only
  537. FLOAT DstRange[2]; // clamp dest to this range
  538. BYTE DstWriteMask; // per-component write mask
  539. PRGBAVEC pDstReg; // address of dest register
  540. // source parameter controls
  541. RDPSRegister SrcReg[3];
  542. BYTE* pRDPSInstBuffer = NULL;
  543. BYTE* pRDPSInst = pRDPSInstBuffer;
  544. size_t RDPSOffset, RDPSLoopOffset;
  545. size_t LastRDPSInstSize = 0;
  546. DWORD Version = *m_pCode;
  547. for (UINT CurrentPSInst=0; CurrentPSInst < m_cInst; CurrentPSInst++)
  548. {
  549. PixelShaderInstruction* pInst = m_pInst + CurrentPSInst;
  550. DWORD Opcode = pInst->Opcode & D3DSI_OPCODE_MASK;
  551. DWORD SrcSwizzle[3];
  552. BYTE SourceReadMasks[3];
  553. BYTE SourceReadMasksAfterSwizzle[3];
  554. BOOL bForceNeg1To1Clamp[3] = {FALSE, FALSE, FALSE};
  555. BOOL bEmitQueueWrite = FALSE;
  556. RDPSRegister QueuedWriteDstReg;
  557. BYTE QueuedWriteDstWriteMask;
  558. BYTE ProjComponent[3] = {0,0,0};
  559. BOOL bEmitProj[3] = {FALSE, FALSE, FALSE};
  560. BOOL bProjOnEval[3] = {FALSE, FALSE, FALSE};
  561. BOOL bEmitSrcMod[3] = {FALSE, FALSE, FALSE};
  562. BOOL bEmitSwizzle[3] = {FALSE, FALSE, FALSE};
  563. BOOL bSrcNegate[3] = {FALSE, FALSE, FALSE};
  564. BOOL bSrcBias[3] = {FALSE, FALSE, FALSE};
  565. BOOL bSrcTimes2[3] = {FALSE, FALSE, FALSE};
  566. BOOL bSrcComplement[3] = {FALSE, FALSE, FALSE};
  567. switch( Opcode )
  568. {
  569. continue;
  570. case D3DSIO_DEF:
  571. // nothing to do -> DEF has already been processed out and is not an true instruction
  572. continue;
  573. case D3DSIO_COMMENT:
  574. continue;
  575. case D3DSIO_PHASE:
  576. case D3DSIO_NOP:
  577. #if DBG
  578. _NoteInstructionEvent
  579. #endif
  580. continue;
  581. }
  582. #if DBG
  583. _NoteInstructionEvent
  584. #endif
  585. // do some preliminary setup for this instruction
  586. UINT RegNum = pInst->DstParam & D3DSP_REGNUM_MASK;
  587. switch (pInst->DstParam & D3DSP_REGTYPE_MASK)
  588. {
  589. case D3DSPR_TEXTURE:
  590. DstReg._Set(RDPSREG_TEXTURE, RegNum); break;
  591. case D3DSPR_TEMP:
  592. DstReg._Set(RDPSREG_TEMP, RegNum); break;
  593. default:
  594. _ASSERT( FALSE, "RDPShader::Initialize - Unexpected destination register type." );
  595. break;
  596. }
  597. DstWriteMask = (pInst->DstParam & D3DSP_WRITEMASK_ALL) >> RDPS_COMPONENTMASK_SHIFT;
  598. if( pInst->bQueueWrite )
  599. {
  600. QueueIndex++;
  601. QueuedWriteDstReg = DstReg;
  602. QueuedWriteDstWriteMask = DstWriteMask;
  603. DstReg._Set(RDPSREG_QUEUEDWRITE,QueueIndex);
  604. _ASSERT(QueueIndex <= RDPS_MAX_NUMQUEUEDWRITEREG, "Too many queued writes in pixelshader (improperly handled co-issue)." );
  605. bEmitQueueWrite = TRUE;
  606. }
  607. CalculateSourceReadMasks(pInst, SourceReadMasks, FALSE,Version);
  608. CalculateSourceReadMasks(pInst, SourceReadMasksAfterSwizzle, TRUE,Version);
  609. for (i=0; i < pInst->SrcParamCount; i++)
  610. {
  611. RegNum = pInst->SrcParam[i]&D3DSP_REGNUM_MASK;
  612. switch (pInst->SrcParam[i] & D3DSP_REGTYPE_MASK)
  613. {
  614. case D3DSPR_TEMP:
  615. SrcReg[i]._Set(RDPSREG_TEMP, RegNum); break;
  616. case D3DSPR_TEXTURE:
  617. SrcReg[i]._Set(RDPSREG_TEXTURE, RegNum); break;
  618. case D3DSPR_INPUT:
  619. SrcReg[i]._Set(RDPSREG_INPUT, RegNum); break;
  620. case D3DSPR_CONST:
  621. SrcReg[i]._Set(RDPSREG_CONST, RegNum);
  622. // Force a [-1,1] clamp after applying modifier (for constants only)
  623. // This overrides the the standard [-MaxPixelShaderValue,MaxPixelShaderValue] clamp.
  624. // An IHV that supports MaxPixelShaderValue > 1 forgot to do this for constants.
  625. bForceNeg1To1Clamp[i] = TRUE;
  626. break;
  627. default:
  628. _ASSERT( FALSE, "RDPShader::Initialize - Unexpected source register type." );
  629. break;
  630. }
  631. if( (D3DSPSM_DZ == (pInst->SrcParam[i] & D3DSP_SRCMOD_MASK)) ||
  632. (D3DSPSM_DW == (pInst->SrcParam[i] & D3DSP_SRCMOD_MASK)) )
  633. {
  634. if( D3DSPSM_DZ == (pInst->SrcParam[i] & D3DSP_SRCMOD_MASK))
  635. {
  636. ProjComponent[i] = RDPS_COMPONENTMASK_2;
  637. }
  638. else // _DW
  639. {
  640. if( D3DPS_VERSION(1,4) == Version )
  641. ProjComponent[i] = RDPS_COMPONENTMASK_2;
  642. else
  643. ProjComponent[i] = RDPS_COMPONENTMASK_3;
  644. }
  645. if( D3DSPR_TEXTURE == (pInst->SrcParam[i] & D3DSP_REGTYPE_MASK ) ) // t# register being used to represent evaluated texcoord.
  646. {
  647. bProjOnEval[i] = TRUE;
  648. }
  649. else
  650. bEmitProj[i] = TRUE;
  651. }
  652. else
  653. {
  654. bEmitSrcMod[i] = TRUE;
  655. switch (pInst->SrcParam[i] & D3DSP_SRCMOD_MASK)
  656. {
  657. default:
  658. case D3DSPSM_NONE:
  659. if( !bForceNeg1To1Clamp[i] )
  660. bEmitSrcMod[i] = FALSE;
  661. break;
  662. case D3DSPSM_NEG:
  663. bSrcNegate[i] = TRUE; // negate is not part of source modifier
  664. if( !bForceNeg1To1Clamp[i] )
  665. bEmitSrcMod[i] = FALSE;
  666. break;
  667. case D3DSPSM_BIAS:
  668. bSrcBias[i] = TRUE;
  669. break;
  670. case D3DSPSM_BIASNEG:
  671. bSrcNegate[i] = TRUE;
  672. bSrcBias[i] = TRUE;
  673. break;
  674. case D3DSPSM_SIGN: // _bx2
  675. bSrcBias[i] = TRUE;
  676. bSrcTimes2[i] = TRUE;
  677. break;
  678. case D3DSPSM_SIGNNEG: // negative _bx2
  679. bSrcNegate[i] = TRUE; // negate is not part of source modifier
  680. bSrcBias[i] = TRUE;
  681. bSrcTimes2[i] = TRUE;
  682. break;
  683. case D3DSPSM_COMP:
  684. bSrcComplement[i] = TRUE;
  685. break;
  686. case D3DSPSM_X2:
  687. bSrcTimes2[i] = TRUE;
  688. break;
  689. case D3DSPSM_X2NEG:
  690. bSrcNegate[i] = TRUE; // negate is not part of source modifier
  691. bSrcTimes2[i] = TRUE;
  692. break;
  693. }
  694. _ASSERT(!(bSrcComplement[i] && (bSrcTimes2[i]||bSrcBias[i]||bSrcNegate[i])),"RDPShader::Initialize - Complement cannot be combined with other modifiers.");
  695. }
  696. SrcSwizzle[i] = (pInst->SrcParam[i] & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
  697. bEmitSwizzle[i] = (D3DSP_NOSWIZZLE != (pInst->SrcParam[i] & D3DSP_SWIZZLE_MASK));
  698. }
  699. // set clamp values
  700. switch (pInst->DstParam & D3DSP_DSTMOD_MASK)
  701. {
  702. default:
  703. case D3DSPDM_NONE:
  704. if(pInst->bTexOp)
  705. {
  706. DstRange[0] = -FLT_MAX;
  707. DstRange[1] = FLT_MAX;
  708. }
  709. else
  710. {
  711. DstRange[0] = fMin;
  712. DstRange[1] = fMax;
  713. }
  714. break;
  715. case D3DSPDM_SATURATE:
  716. DstRange[0] = 0.F;
  717. DstRange[1] = 1.F;
  718. break;
  719. }
  720. UINT ShiftScale =
  721. (pInst->DstParam & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
  722. if (ShiftScale & 0x8)
  723. {
  724. ShiftScale = ((~ShiftScale)&0x7)+1; // negative magnitude
  725. DstScale = 1.f/(FLOAT)(1<<ShiftScale);
  726. }
  727. else
  728. {
  729. DstScale = (FLOAT)(1<<ShiftScale);
  730. }
  731. // finished preliminary setup, now start emitting ops...
  732. _EnterQuadPixelLoop
  733. if( bEmitQueueWrite )
  734. {
  735. _NewPSInst(RDPSINST_QUEUEWRITE);
  736. _InstParam(RDPSINST_QUEUEWRITE).DstReg = QueuedWriteDstReg;
  737. _InstParam(RDPSINST_QUEUEWRITE).WriteMask = QueuedWriteDstWriteMask;
  738. }
  739. for (i=0; i < pInst->SrcParamCount; i++)
  740. {
  741. if( bEmitProj[i] )
  742. {
  743. _EmitProj(RDPSREG_POSTMODSRC,i,SrcReg[i].GetRegType(),SrcReg[i].GetRegNum(),ProjComponent[i]);
  744. SrcReg[i]._Set(RDPSREG_POSTMODSRC,i);
  745. }
  746. if( bEmitSrcMod[i] )
  747. {
  748. _NewPSInst(RDPSINST_SRCMOD);
  749. _InstParam(RDPSINST_SRCMOD).DstReg._Set(RDPSREG_POSTMODSRC,i);
  750. _InstParam(RDPSINST_SRCMOD).SrcReg0 = SrcReg[i];
  751. _InstParam(RDPSINST_SRCMOD).WriteMask = SourceReadMasks[i];
  752. _InstParam(RDPSINST_SRCMOD).bBias = bSrcBias[i];
  753. _InstParam(RDPSINST_SRCMOD).bTimes2 = bSrcTimes2[i];
  754. _InstParam(RDPSINST_SRCMOD).bComplement = bSrcComplement[i];
  755. _InstParam(RDPSINST_SRCMOD).fRangeMin = bForceNeg1To1Clamp[i] ? -1.0f : fMin;
  756. _InstParam(RDPSINST_SRCMOD).fRangeMax = bForceNeg1To1Clamp[i] ? 1.0f : fMax;
  757. SrcReg[i]._Set(RDPSREG_POSTMODSRC,i);
  758. }
  759. if( bEmitSwizzle[i] && !bProjOnEval[i] )
  760. {
  761. _NewPSInst(RDPSINST_SWIZZLE);
  762. _InstParam(RDPSINST_SWIZZLE).DstReg._Set(RDPSREG_POSTMODSRC,i);
  763. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = SrcReg[i];
  764. _InstParam(RDPSINST_SWIZZLE).WriteMask = SourceReadMasksAfterSwizzle[i];
  765. _InstParam(RDPSINST_SWIZZLE).Swizzle = SrcSwizzle[i];
  766. SrcReg[i]._Set(RDPSREG_POSTMODSRC,i);
  767. }
  768. }
  769. switch(Opcode)
  770. {
  771. case D3DSIO_TEXCOORD:
  772. case D3DSIO_TEXKILL:
  773. {
  774. if( !( (D3DSIO_TEXKILL == Opcode) &&
  775. (D3DSPR_TEMP == (pInst->DstParam & D3DSP_REGTYPE_MASK))
  776. )
  777. )
  778. {
  779. UINT CoordSet = pInst->SrcParam[0] ? (pInst->SrcParam[0] & D3DSP_REGNUM_MASK) :
  780. (pInst->DstParam & D3DSP_REGNUM_MASK);
  781. RDPSRegister CoordReg;
  782. if(bProjOnEval[0])
  783. CoordReg._Set(RDPSREG_POSTMODSRC,0);
  784. else
  785. CoordReg = DstReg;
  786. // For TEXCOORD, clamp 0. to 1 only there is no source parameter (ps.1.0, ps.1.1)
  787. // For TEXKILL, never clamp
  788. // NOTE: the TEXCOORD clamp is a temporary limitation for DX8 shader models
  789. BOOL bTexCoordClamp = ((D3DSIO_TEXCOORD == Opcode) && (!pInst->SrcParam[0])) ? TRUE : FALSE;
  790. _NewPSInst(RDPSINST_EVAL);
  791. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  792. _InstParam(RDPSINST_EVAL).uiCoordSet = CoordSet;
  793. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // projection disabled (unless _p modifier used -> _EmitProj below)
  794. _InstParam(RDPSINST_EVAL).bClamp = bTexCoordClamp;
  795. if( bProjOnEval[0] )
  796. {
  797. if( bEmitSwizzle[0] )
  798. {
  799. _NewPSInst(RDPSINST_SWIZZLE);
  800. _InstParam(RDPSINST_SWIZZLE).DstReg = DstReg;
  801. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = CoordReg;
  802. _InstParam(RDPSINST_SWIZZLE).WriteMask = SourceReadMasksAfterSwizzle[0];
  803. _InstParam(RDPSINST_SWIZZLE).Swizzle = SrcSwizzle[0];
  804. }
  805. _EmitProj(DstReg.GetRegType(),DstReg.GetRegNum(),DstReg.GetRegType(),DstReg.GetRegNum(),ProjComponent[0]);
  806. }
  807. // check version (first DWORD of code token stream), and always
  808. // set 4th component to 1.0 for ps.1.3 or earlier
  809. if ( D3DPS_VERSION(1,3) >= Version )
  810. {
  811. _NewPSInst(RDPSINST_MOV);
  812. _InstParam(RDPSINST_MOV).DstReg = DstReg;
  813. _InstParam(RDPSINST_MOV).SrcReg0 = OneReg; // 1.0f
  814. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  815. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_3;
  816. }
  817. }
  818. _EmitDstMod(DstReg,DstWriteMask)
  819. if( D3DSIO_TEXKILL == Opcode )
  820. {
  821. _NewPSInst(RDPSINST_KILL);
  822. _InstParam(RDPSINST_KILL).DstReg = DstReg;
  823. }
  824. }
  825. break;
  826. case D3DSIO_TEX:
  827. {
  828. RDPSRegister CoordReg;
  829. BOOL bDoSampleCoords = TRUE;
  830. UINT CoordSet = pInst->SrcParam[0] ? (pInst->SrcParam[0] & D3DSP_REGNUM_MASK) :
  831. (pInst->DstParam & D3DSP_REGNUM_MASK);
  832. if( pInst->SrcParam[0] )
  833. {
  834. CoordReg = SrcReg[0];
  835. if( D3DSPR_TEMP == (pInst->SrcParam[0] & D3DSP_REGTYPE_MASK) )
  836. bDoSampleCoords = FALSE;
  837. }
  838. else // no source param.
  839. {
  840. CoordReg._Set(RDPSREG_SCRATCH,0);
  841. }
  842. if( bDoSampleCoords )
  843. {
  844. _NewPSInst(RDPSINST_EVAL);
  845. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  846. _InstParam(RDPSINST_EVAL).uiCoordSet = CoordSet;
  847. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = bProjOnEval[0]; // if we have _p modifier, we do _EmitProj below
  848. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  849. }
  850. if( bProjOnEval[0] )
  851. {
  852. if( bEmitSwizzle[0] )
  853. {
  854. _NewPSInst(RDPSINST_SWIZZLE);
  855. _InstParam(RDPSINST_SWIZZLE).DstReg._Set(RDPSREG_POSTMODSRC,0);
  856. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = CoordReg;
  857. _InstParam(RDPSINST_SWIZZLE).WriteMask = SourceReadMasksAfterSwizzle[0];
  858. _InstParam(RDPSINST_SWIZZLE).Swizzle = SrcSwizzle[0];
  859. CoordReg._Set(RDPSREG_POSTMODSRC,0);
  860. }
  861. _EmitProj(RDPSREG_POSTMODSRC,0,CoordReg.GetRegType(),CoordReg.GetRegNum(),ProjComponent[0]);
  862. CoordReg._Set(RDPSREG_POSTMODSRC,0);
  863. }
  864. _LeaveQuadPixelLoop
  865. PRGBAVEC pCoordReg = CoordReg.GetRegPtr();
  866. _NewPSInst(RDPSINST_TEXCOVERAGE);
  867. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  868. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  869. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  870. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pCoordReg[1][0]; // du/dx
  871. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pCoordReg[0][0];
  872. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pCoordReg[2][0]; // du/dy
  873. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pCoordReg[0][0];
  874. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pCoordReg[1][1]; // dv/dx
  875. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pCoordReg[0][1];
  876. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pCoordReg[2][1]; // dv/dy
  877. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pCoordReg[0][1];
  878. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = &pCoordReg[1][2]; // dw/dx
  879. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 = &pCoordReg[0][2];
  880. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = &pCoordReg[2][2]; // dw/dy
  881. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &pCoordReg[0][2];
  882. _EnterQuadPixelLoop
  883. _NewPSInst(RDPSINST_SAMPLE);
  884. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  885. _InstParam(RDPSINST_SAMPLE).CoordReg = CoordReg;
  886. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  887. _EmitDstMod(DstReg,DstWriteMask)
  888. }
  889. break;
  890. case D3DSIO_TEXDP3:
  891. case D3DSIO_TEXDP3TEX:
  892. {
  893. RDPSRegister CoordReg;
  894. CoordReg._Set(RDPSREG_SCRATCH,0);
  895. _NewPSInst(RDPSINST_EVAL);
  896. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  897. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  898. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // no projection
  899. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  900. if( D3DSIO_TEXDP3 == Opcode )
  901. {
  902. _NewPSInst(RDPSINST_DP3);
  903. _InstParam(RDPSINST_DP3).DstReg = DstReg;
  904. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  905. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  906. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  907. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  908. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_ALL;
  909. _EmitDstMod(DstReg,DstWriteMask)
  910. }
  911. else // D3DSIO_TEXDP3TEX
  912. {
  913. _NewPSInst(RDPSINST_DP3);
  914. _InstParam(RDPSINST_DP3).DstReg = CoordReg;
  915. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  916. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  917. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  918. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  919. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_0;
  920. _NewPSInst(RDPSINST_MOV);
  921. _InstParam(RDPSINST_MOV).DstReg = CoordReg;
  922. _InstParam(RDPSINST_MOV).SrcReg0 = ZeroReg; // 0.0f
  923. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  924. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  925. _LeaveQuadPixelLoop
  926. PRGBAVEC pCoordReg = CoordReg.GetRegPtr();
  927. _NewPSInst(RDPSINST_TEXCOVERAGE);
  928. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  929. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  930. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  931. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pCoordReg[1][0]; // du/dx
  932. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pCoordReg[0][0];
  933. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pCoordReg[2][0]; // du/dy
  934. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pCoordReg[0][0];
  935. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = // dv/dx
  936. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 =
  937. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = // dv/dy
  938. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 =
  939. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = // dw/dx
  940. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 =
  941. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = // dw/dy
  942. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &ZeroReg.GetRegPtr()[0][0]; // 0.0f
  943. _EnterQuadPixelLoop
  944. _NewPSInst(RDPSINST_SAMPLE);
  945. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  946. _InstParam(RDPSINST_SAMPLE).CoordReg = CoordReg;
  947. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  948. _EmitDstMod(DstReg,DstWriteMask)
  949. }
  950. }
  951. break;
  952. case D3DSIO_TEXREG2AR:
  953. case D3DSIO_TEXREG2GB:
  954. case D3DSIO_TEXREG2RGB:
  955. {
  956. UINT I0, I1;
  957. PRGBAVEC pSrcReg0 = SrcReg[0].GetRegPtr();
  958. switch( Opcode )
  959. {
  960. case D3DSIO_TEXREG2AR:
  961. I0 = 3;
  962. I1 = 0;
  963. break;
  964. case D3DSIO_TEXREG2GB:
  965. I0 = 1;
  966. I1 = 2;
  967. break;
  968. case D3DSIO_TEXREG2RGB:
  969. I0 = 0;
  970. I1 = 1;
  971. break;
  972. }
  973. _LeaveQuadPixelLoop
  974. _NewPSInst(RDPSINST_TEXCOVERAGE);
  975. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  976. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  977. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  978. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pSrcReg0[1][I0]; // du/dx
  979. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pSrcReg0[0][I0];
  980. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pSrcReg0[2][I0]; // du/dy
  981. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pSrcReg0[0][I0];
  982. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pSrcReg0[1][I1]; // dv/dx
  983. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pSrcReg0[0][I1];
  984. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pSrcReg0[2][I1]; // dv/dy
  985. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pSrcReg0[0][I1];
  986. switch( Opcode )
  987. {
  988. case D3DSIO_TEXREG2AR:
  989. case D3DSIO_TEXREG2GB:
  990. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = // dw/dx
  991. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 =
  992. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = // dw/dy
  993. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &ZeroReg.GetRegPtr()[0][0]; // 0.0f
  994. break;
  995. case D3DSIO_TEXREG2RGB:
  996. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = &pSrcReg0[1][2]; // dw/dx
  997. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 = &pSrcReg0[0][2];
  998. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = &pSrcReg0[2][2]; // dw/dy
  999. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &pSrcReg0[0][2];
  1000. break;
  1001. }
  1002. _EnterQuadPixelLoop
  1003. RDPSRegister CoordReg;
  1004. CoordReg._Set(RDPSREG_SCRATCH,0);
  1005. _NewPSInst(RDPSINST_SWIZZLE);
  1006. _InstParam(RDPSINST_SWIZZLE).DstReg = CoordReg;
  1007. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = SrcReg[0];
  1008. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_0;
  1009. _InstParam(RDPSINST_SWIZZLE).Swizzle = ComponentSwizzle[I0];
  1010. _NewPSInst(RDPSINST_SWIZZLE);
  1011. _InstParam(RDPSINST_SWIZZLE).DstReg = CoordReg;
  1012. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = SrcReg[0];
  1013. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_1;
  1014. _InstParam(RDPSINST_SWIZZLE).Swizzle = ComponentSwizzle[I1];
  1015. _NewPSInst(RDPSINST_MOV);
  1016. _InstParam(RDPSINST_MOV).DstReg = CoordReg;
  1017. _InstParam(RDPSINST_MOV).SrcReg0 = (D3DSIO_TEXREG2RGB == Opcode ? SrcReg[0] : ZeroReg );
  1018. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  1019. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_2;
  1020. _NewPSInst(RDPSINST_SAMPLE);
  1021. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  1022. _InstParam(RDPSINST_SAMPLE).CoordReg = CoordReg;
  1023. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1024. _EmitDstMod(DstReg,DstWriteMask)
  1025. }
  1026. break;
  1027. case D3DSIO_TEXBEM:
  1028. case D3DSIO_TEXBEML:
  1029. case D3DSIO_TEXBEM_LEGACY: // refrast only -> used with legacy fixed function rasterizer
  1030. case D3DSIO_TEXBEML_LEGACY: // refrast only -> used with legacy fixed function rasterizer
  1031. {
  1032. BOOL bDoLuminance = ((D3DSIO_TEXBEML == Opcode) || (D3DSIO_TEXBEML_LEGACY == Opcode));
  1033. RDPSRegister CoordReg;
  1034. CoordReg._Set(RDPSREG_SCRATCH,0);
  1035. _NewPSInst(RDPSINST_EVAL);
  1036. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  1037. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  1038. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = FALSE;
  1039. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  1040. _NewPSInst(RDPSINST_BEM);
  1041. _InstParam(RDPSINST_BEM).DstReg = CoordReg;
  1042. _InstParam(RDPSINST_BEM).SrcReg0 = CoordReg;
  1043. _InstParam(RDPSINST_BEM).SrcReg1 = SrcReg[0];
  1044. _InstParam(RDPSINST_BEM).bSrcReg0_Negate = FALSE;
  1045. _InstParam(RDPSINST_BEM).bSrcReg1_Negate = FALSE;
  1046. _InstParam(RDPSINST_BEM).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1;
  1047. _InstParam(RDPSINST_BEM).uiStage = pInst->uiTSSNum;
  1048. _EmitDstMod(CoordReg,RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1)
  1049. _LeaveQuadPixelLoop
  1050. PRGBAVEC pCoordReg = CoordReg.GetRegPtr();
  1051. _NewPSInst(RDPSINST_TEXCOVERAGE);
  1052. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1053. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  1054. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  1055. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pCoordReg[1][0]; // du/dx
  1056. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pCoordReg[0][0];
  1057. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pCoordReg[2][0]; // du/dy
  1058. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pCoordReg[0][0];
  1059. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pCoordReg[1][1]; // dv/dx
  1060. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pCoordReg[0][1];
  1061. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pCoordReg[2][1]; // dv/dy
  1062. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pCoordReg[0][1];
  1063. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = // dw/dx
  1064. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 =
  1065. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = // dw/dy
  1066. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &ZeroReg.GetRegPtr()[0][0]; // 0.0f
  1067. _EnterQuadPixelLoop
  1068. _NewPSInst(RDPSINST_SAMPLE);
  1069. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  1070. _InstParam(RDPSINST_SAMPLE).CoordReg = CoordReg;
  1071. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1072. if( bDoLuminance )
  1073. {
  1074. _NewPSInst(RDPSINST_LUMINANCE);
  1075. _InstParam(RDPSINST_LUMINANCE).DstReg = DstReg;
  1076. _InstParam(RDPSINST_LUMINANCE).SrcReg0 = DstReg;
  1077. _InstParam(RDPSINST_LUMINANCE).SrcReg1 = SrcReg[0];
  1078. _InstParam(RDPSINST_LUMINANCE).bSrcReg0_Negate = FALSE;
  1079. _InstParam(RDPSINST_LUMINANCE).bSrcReg1_Negate = FALSE;
  1080. _InstParam(RDPSINST_LUMINANCE).uiStage = pInst->uiTSSNum;
  1081. }
  1082. _EmitDstMod(DstReg,DstWriteMask)
  1083. }
  1084. break;
  1085. case D3DSIO_TEXDEPTH:
  1086. _NewPSInst(RDPSINST_DEPTH);
  1087. _InstParam(RDPSINST_DEPTH).DstReg = DstReg;
  1088. break;
  1089. case D3DSIO_TEXM3x2PAD:
  1090. {
  1091. RDPSRegister CoordReg;
  1092. CoordReg._Set(RDPSREG_SCRATCH,0);
  1093. // do dot product for first row of matrix multiply
  1094. // evaluate texture coordinate; projection disabled
  1095. _NewPSInst(RDPSINST_EVAL);
  1096. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  1097. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  1098. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // no projection
  1099. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  1100. // do row of transform - tex coord * vector loaded from texture (on previous stage)
  1101. _NewPSInst(RDPSINST_DP3);
  1102. _InstParam(RDPSINST_DP3).DstReg._Set(DstReg.GetRegType(),DstReg.GetRegNum()+1);
  1103. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  1104. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  1105. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1106. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1107. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_0;
  1108. }
  1109. break;
  1110. case D3DSIO_TEXM3x3PAD:
  1111. {
  1112. BOOL bSecondPad = (D3DSIO_TEXM3x3PAD != ((pInst + 1)->Opcode & D3DSI_OPCODE_MASK));
  1113. BOOL bInVSPECSequence = (D3DSIO_TEXM3x3VSPEC == (((pInst + (bSecondPad?1:2))->Opcode) & D3DSI_OPCODE_MASK));
  1114. RDPSRegister CoordReg, EyeReg;
  1115. CoordReg._Set(RDPSREG_SCRATCH,0);
  1116. EyeReg._Set(RDPSREG_SCRATCH,1);
  1117. // do dot product for first row of matrix multiply
  1118. // evaluate texture coordinate; projection disabled
  1119. _NewPSInst(RDPSINST_EVAL);
  1120. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  1121. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  1122. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // no projection
  1123. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  1124. // do row of transform - tex coord * vector loaded from texture (on previous stage)
  1125. _NewPSInst(RDPSINST_DP3);
  1126. _InstParam(RDPSINST_DP3).DstReg._Set(DstReg.GetRegType(),DstReg.GetRegNum()+(bSecondPad?1:2));
  1127. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  1128. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  1129. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1130. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1131. _InstParam(RDPSINST_DP3).WriteMask = bSecondPad?RDPS_COMPONENTMASK_1:RDPS_COMPONENTMASK_0;
  1132. if(bInVSPECSequence)
  1133. {
  1134. // eye vector encoded in 4th element of texture coordinates
  1135. _NewPSInst(RDPSINST_SWIZZLE);
  1136. _InstParam(RDPSINST_SWIZZLE).DstReg = EyeReg;
  1137. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = CoordReg;
  1138. _InstParam(RDPSINST_SWIZZLE).WriteMask = bSecondPad?RDPS_COMPONENTMASK_1:RDPS_COMPONENTMASK_0;
  1139. _InstParam(RDPSINST_SWIZZLE).Swizzle = RDPS_REPLICATEALPHA;
  1140. }
  1141. }
  1142. break;
  1143. case D3DSIO_TEXM3x2TEX:
  1144. case D3DSIO_TEXM3x3:
  1145. case D3DSIO_TEXM3x3TEX:
  1146. case D3DSIO_TEXM3x3SPEC:
  1147. case D3DSIO_TEXM3x3VSPEC:
  1148. case D3DSIO_TEXM3x2DEPTH:
  1149. {
  1150. BOOL bIs3D = (D3DSIO_TEXM3x2TEX != Opcode) && (D3DSIO_TEXM3x2DEPTH != Opcode);
  1151. RDPSRegister CoordReg, EyeReg;
  1152. CoordReg._Set(RDPSREG_SCRATCH,0);
  1153. EyeReg._Set(RDPSREG_SCRATCH,1);
  1154. // do dot product for last row of matrix multiply
  1155. // evaluate texture coordinate; projection disabled
  1156. _NewPSInst(RDPSINST_EVAL);
  1157. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  1158. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  1159. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // no projection
  1160. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  1161. // do row of transform - tex coord * vector loaded from texture (on previous stage)
  1162. _NewPSInst(RDPSINST_DP3);
  1163. _InstParam(RDPSINST_DP3).DstReg = DstReg;
  1164. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  1165. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  1166. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1167. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1168. _InstParam(RDPSINST_DP3).WriteMask = bIs3D ? RDPS_COMPONENTMASK_2 : RDPS_COMPONENTMASK_1;
  1169. if(D3DSIO_TEXM3x3VSPEC == Opcode)
  1170. {
  1171. // eye vector encoded in 4th element of texture coordinates
  1172. _NewPSInst(RDPSINST_SWIZZLE);
  1173. _InstParam(RDPSINST_SWIZZLE).DstReg = EyeReg;
  1174. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = CoordReg;
  1175. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_2;
  1176. _InstParam(RDPSINST_SWIZZLE).Swizzle = RDPS_REPLICATEALPHA;
  1177. }
  1178. // Now do stuff that depends on which TEXM3x* instruction this is...
  1179. if( D3DSIO_TEXM3x3 == Opcode )
  1180. {
  1181. _NewPSInst(RDPSINST_MOV);
  1182. _InstParam(RDPSINST_MOV).DstReg = DstReg;
  1183. _InstParam(RDPSINST_MOV).SrcReg0 = OneReg; // 1.0f
  1184. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  1185. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_3;
  1186. _EmitDstMod(DstReg,DstWriteMask)
  1187. }
  1188. else if ( (D3DSIO_TEXM3x2TEX == Opcode) ||
  1189. (D3DSIO_TEXM3x3TEX == Opcode) )
  1190. {
  1191. // do straight lookup with transformed tex coords - this
  1192. // vector is not normalized, but normalization is not necessary
  1193. // for a cubemap lookup
  1194. // compute gradients for diffuse lookup
  1195. _LeaveQuadPixelLoop
  1196. PRGBAVEC pDstReg = DstReg.GetRegPtr();
  1197. _NewPSInst(RDPSINST_TEXCOVERAGE);
  1198. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1199. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  1200. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  1201. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pDstReg[1][0]; // du/dx
  1202. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pDstReg[0][0];
  1203. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pDstReg[2][0]; // du/dy
  1204. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pDstReg[0][0];
  1205. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pDstReg[1][1]; // dv/dx
  1206. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pDstReg[0][1];
  1207. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pDstReg[2][1]; // dv/dy
  1208. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pDstReg[0][1];
  1209. if( bIs3D )
  1210. {
  1211. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = &pDstReg[1][2]; // dw/dx
  1212. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 = &pDstReg[0][2];
  1213. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = &pDstReg[2][2]; // dw/dy
  1214. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &pDstReg[0][2];
  1215. }
  1216. else
  1217. {
  1218. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = // dw/dx
  1219. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 =
  1220. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = // dw/dy
  1221. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &ZeroReg.GetRegPtr()[0][0]; // 0.0f
  1222. }
  1223. _EnterQuadPixelLoop
  1224. // do lookup
  1225. if( !bIs3D )
  1226. {
  1227. _NewPSInst(RDPSINST_MOV);
  1228. _InstParam(RDPSINST_MOV).DstReg = DstReg;
  1229. _InstParam(RDPSINST_MOV).SrcReg0 = ZeroReg; // 0.0f
  1230. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  1231. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_2;
  1232. }
  1233. _NewPSInst(RDPSINST_SAMPLE);
  1234. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  1235. _InstParam(RDPSINST_SAMPLE).CoordReg = DstReg;
  1236. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1237. _EmitDstMod(DstReg,DstWriteMask)
  1238. }
  1239. else if ( Opcode == D3DSIO_TEXM3x2DEPTH )
  1240. {
  1241. // Take resulting u,v values and compute u/v, which
  1242. // can be interpreted is z/w = perspective correct depth.
  1243. // Then perturb the z coord for the pixel.
  1244. _NewPSInst(RDPSINST_DEPTH);
  1245. _InstParam(RDPSINST_DEPTH).DstReg = DstReg;
  1246. }
  1247. else if ( (Opcode == D3DSIO_TEXM3x3SPEC) ||
  1248. (Opcode == D3DSIO_TEXM3x3VSPEC) )
  1249. {
  1250. RDPSRegister NdotE, NdotN, RCPNdotN, Scale, ReflReg;
  1251. NdotE._Set(RDPSREG_SCRATCH,2);
  1252. NdotN._Set(RDPSREG_SCRATCH,3);
  1253. RCPNdotN = NdotN; // reuse same register
  1254. Scale = NdotE; // reuse same register
  1255. ReflReg = CoordReg; // reuse same register
  1256. // compute reflection vector and do lookup - the normal needs
  1257. // to be normalized here, which is included in this expression
  1258. if (D3DSIO_TEXM3x3SPEC == Opcode)
  1259. {
  1260. // eye vector is constant register
  1261. EyeReg = SrcReg[1];
  1262. } // else (TEXM3x3VSPEC) -> eye is what was copied out of the 4th component of 3 texcoords
  1263. // Compute reflection vector: 2(NdotE/NdotN) * N - E ...
  1264. // Calculate NdotE
  1265. _NewPSInst(RDPSINST_DP3);
  1266. _InstParam(RDPSINST_DP3).DstReg = NdotE;
  1267. _InstParam(RDPSINST_DP3).SrcReg0 = DstReg; // N
  1268. _InstParam(RDPSINST_DP3).SrcReg1 = EyeReg; // E
  1269. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1270. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1271. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_3;
  1272. // Calculate NdotN
  1273. _NewPSInst(RDPSINST_DP3);
  1274. _InstParam(RDPSINST_DP3).DstReg = NdotN;
  1275. _InstParam(RDPSINST_DP3).SrcReg0 = DstReg; // N
  1276. _InstParam(RDPSINST_DP3).SrcReg1 = DstReg; // N
  1277. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1278. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1279. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_3;
  1280. // Calculate scale = 2(NdotE/NdotN):
  1281. // a) Calculate reciprocal of NdotN
  1282. _NewPSInst(RDPSINST_RCP);
  1283. _InstParam(RDPSINST_RCP).DstReg = RCPNdotN;
  1284. _InstParam(RDPSINST_RCP).SrcReg0 = NdotN;
  1285. _InstParam(RDPSINST_RCP).bSrcReg0_Negate = FALSE;
  1286. _InstParam(RDPSINST_RCP).WriteMask = RDPS_COMPONENTMASK_3;
  1287. // b) Multiply NdotE by reciprocal NdotN
  1288. _NewPSInst(RDPSINST_MUL);
  1289. _InstParam(RDPSINST_MUL).DstReg = Scale;
  1290. _InstParam(RDPSINST_MUL).SrcReg0 = NdotE;
  1291. _InstParam(RDPSINST_MUL).SrcReg1 = RCPNdotN;
  1292. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = FALSE;
  1293. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = FALSE;
  1294. _InstParam(RDPSINST_MUL).WriteMask = RDPS_COMPONENTMASK_3;
  1295. // c) Multiply by 2
  1296. _NewPSInst(RDPSINST_MUL);
  1297. _InstParam(RDPSINST_MUL).DstReg = Scale;
  1298. _InstParam(RDPSINST_MUL).SrcReg0 = Scale;
  1299. _InstParam(RDPSINST_MUL).SrcReg1 = TwoReg; // 2.0f
  1300. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = FALSE;
  1301. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = FALSE;
  1302. _InstParam(RDPSINST_MUL).WriteMask = RDPS_COMPONENTMASK_3;
  1303. // d) Replicate result to rgb
  1304. _NewPSInst(RDPSINST_SWIZZLE);
  1305. _InstParam(RDPSINST_SWIZZLE).DstReg = Scale;
  1306. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = Scale;
  1307. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  1308. _InstParam(RDPSINST_SWIZZLE).Swizzle = RDPS_REPLICATEALPHA;
  1309. // Calculate reflection = scale * N - E
  1310. _NewPSInst(RDPSINST_MUL);
  1311. _InstParam(RDPSINST_MUL).DstReg = ReflReg;
  1312. _InstParam(RDPSINST_MUL).SrcReg0 = Scale; // scale *
  1313. _InstParam(RDPSINST_MUL).SrcReg1 = DstReg; // N
  1314. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = FALSE;
  1315. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = FALSE;
  1316. _InstParam(RDPSINST_MUL).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  1317. _NewPSInst(RDPSINST_SUB);
  1318. _InstParam(RDPSINST_SUB).DstReg = ReflReg;
  1319. _InstParam(RDPSINST_SUB).SrcReg0 = ReflReg; // (scale * N) -
  1320. _InstParam(RDPSINST_SUB).SrcReg1 = EyeReg; // E
  1321. _InstParam(RDPSINST_SUB).bSrcReg0_Negate = FALSE;
  1322. _InstParam(RDPSINST_SUB).bSrcReg1_Negate = FALSE;
  1323. _InstParam(RDPSINST_SUB).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  1324. // compute gradients for reflection lookup
  1325. _LeaveQuadPixelLoop
  1326. PRGBAVEC pReflReg = ReflReg.GetRegPtr();
  1327. _NewPSInst(RDPSINST_TEXCOVERAGE);
  1328. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1329. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  1330. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  1331. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pReflReg[1][0]; // du/dx
  1332. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pReflReg[0][0];
  1333. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pReflReg[2][0]; // du/dy
  1334. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pReflReg[0][0];
  1335. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pReflReg[1][1]; // dv/dx
  1336. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pReflReg[0][1];
  1337. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pReflReg[2][1]; // dv/dy
  1338. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pReflReg[0][1];
  1339. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = &pReflReg[1][2]; // dw/dx
  1340. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 = &pReflReg[0][2];
  1341. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = &pReflReg[2][2]; // dw/dy
  1342. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &pReflReg[0][2];
  1343. _EnterQuadPixelLoop
  1344. // do lookup
  1345. _NewPSInst(RDPSINST_SAMPLE);
  1346. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  1347. _InstParam(RDPSINST_SAMPLE).CoordReg = ReflReg;
  1348. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1349. _EmitDstMod(DstReg,DstWriteMask)
  1350. }
  1351. }
  1352. break;
  1353. case D3DSIO_BEM:
  1354. _NewPSInst(RDPSINST_BEM);
  1355. _InstParam(RDPSINST_BEM).DstReg = DstReg;
  1356. _InstParam(RDPSINST_BEM).SrcReg0 = SrcReg[0];
  1357. _InstParam(RDPSINST_BEM).SrcReg1 = SrcReg[1];
  1358. _InstParam(RDPSINST_BEM).bSrcReg0_Negate = bSrcNegate[0];
  1359. _InstParam(RDPSINST_BEM).bSrcReg1_Negate = bSrcNegate[1];
  1360. _InstParam(RDPSINST_BEM).WriteMask = DstWriteMask;
  1361. _InstParam(RDPSINST_BEM).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1362. _EmitDstMod(DstReg,DstWriteMask)
  1363. break;
  1364. case D3DSIO_MOV:
  1365. _NewPSInst(RDPSINST_MOV);
  1366. _InstParam(RDPSINST_MOV).DstReg = DstReg;
  1367. _InstParam(RDPSINST_MOV).SrcReg0 = SrcReg[0];
  1368. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = bSrcNegate[0];
  1369. _InstParam(RDPSINST_MOV).WriteMask = DstWriteMask;
  1370. _EmitDstMod(DstReg,DstWriteMask)
  1371. break;
  1372. case D3DSIO_FRC:
  1373. _NewPSInst(RDPSINST_FRC);
  1374. _InstParam(RDPSINST_FRC).DstReg = DstReg;
  1375. _InstParam(RDPSINST_FRC).SrcReg0 = SrcReg[0];
  1376. _InstParam(RDPSINST_FRC).bSrcReg0_Negate = bSrcNegate[0];
  1377. _InstParam(RDPSINST_FRC).WriteMask = DstWriteMask;
  1378. _EmitDstMod(DstReg,DstWriteMask)
  1379. break;
  1380. case D3DSIO_ADD:
  1381. _NewPSInst(RDPSINST_ADD);
  1382. _InstParam(RDPSINST_ADD).DstReg = DstReg;
  1383. _InstParam(RDPSINST_ADD).SrcReg0 = SrcReg[0];
  1384. _InstParam(RDPSINST_ADD).SrcReg1 = SrcReg[1];
  1385. _InstParam(RDPSINST_ADD).bSrcReg0_Negate = bSrcNegate[0];
  1386. _InstParam(RDPSINST_ADD).bSrcReg1_Negate = bSrcNegate[1];
  1387. _InstParam(RDPSINST_ADD).WriteMask = DstWriteMask;
  1388. _EmitDstMod(DstReg,DstWriteMask)
  1389. break;
  1390. case D3DSIO_SUB:
  1391. _NewPSInst(RDPSINST_SUB);
  1392. _InstParam(RDPSINST_SUB).DstReg = DstReg;
  1393. _InstParam(RDPSINST_SUB).SrcReg0 = SrcReg[0];
  1394. _InstParam(RDPSINST_SUB).SrcReg1 = SrcReg[1];
  1395. _InstParam(RDPSINST_SUB).bSrcReg0_Negate = bSrcNegate[0];
  1396. _InstParam(RDPSINST_SUB).bSrcReg1_Negate = bSrcNegate[1];
  1397. _InstParam(RDPSINST_SUB).WriteMask = DstWriteMask;
  1398. _EmitDstMod(DstReg,DstWriteMask)
  1399. break;
  1400. case D3DSIO_MUL:
  1401. _NewPSInst(RDPSINST_MUL);
  1402. _InstParam(RDPSINST_MUL).DstReg = DstReg;
  1403. _InstParam(RDPSINST_MUL).SrcReg0 = SrcReg[0];
  1404. _InstParam(RDPSINST_MUL).SrcReg1 = SrcReg[1];
  1405. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = bSrcNegate[0];
  1406. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = bSrcNegate[1];
  1407. _InstParam(RDPSINST_MUL).WriteMask = DstWriteMask;
  1408. _EmitDstMod(DstReg,DstWriteMask)
  1409. break;
  1410. case D3DSIO_DP3:
  1411. _NewPSInst(RDPSINST_DP3);
  1412. _InstParam(RDPSINST_DP3).DstReg = DstReg;
  1413. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  1414. _InstParam(RDPSINST_DP3).SrcReg1 = SrcReg[1];
  1415. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = bSrcNegate[0];
  1416. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = bSrcNegate[1];
  1417. _InstParam(RDPSINST_DP3).WriteMask = DstWriteMask;
  1418. _EmitDstMod(DstReg,DstWriteMask)
  1419. break;
  1420. case D3DSIO_DP4:
  1421. _NewPSInst(RDPSINST_DP4);
  1422. _InstParam(RDPSINST_DP4).DstReg = DstReg;
  1423. _InstParam(RDPSINST_DP4).SrcReg0 = SrcReg[0];
  1424. _InstParam(RDPSINST_DP4).SrcReg1 = SrcReg[1];
  1425. _InstParam(RDPSINST_DP4).bSrcReg0_Negate = bSrcNegate[0];
  1426. _InstParam(RDPSINST_DP4).bSrcReg1_Negate = bSrcNegate[1];
  1427. _InstParam(RDPSINST_DP4).WriteMask = DstWriteMask;
  1428. _EmitDstMod(DstReg,DstWriteMask)
  1429. break;
  1430. case D3DSIO_MAD:
  1431. _NewPSInst(RDPSINST_MAD);
  1432. _InstParam(RDPSINST_MAD).DstReg = DstReg;
  1433. _InstParam(RDPSINST_MAD).SrcReg0 = SrcReg[0];
  1434. _InstParam(RDPSINST_MAD).SrcReg1 = SrcReg[1];
  1435. _InstParam(RDPSINST_MAD).SrcReg2 = SrcReg[2];
  1436. _InstParam(RDPSINST_MAD).bSrcReg0_Negate = bSrcNegate[0];
  1437. _InstParam(RDPSINST_MAD).bSrcReg1_Negate = bSrcNegate[1];
  1438. _InstParam(RDPSINST_MAD).bSrcReg2_Negate = bSrcNegate[2];
  1439. _InstParam(RDPSINST_MAD).WriteMask = DstWriteMask;
  1440. _EmitDstMod(DstReg,DstWriteMask)
  1441. break;
  1442. case D3DSIO_LRP:
  1443. _NewPSInst(RDPSINST_LRP);
  1444. _InstParam(RDPSINST_LRP).DstReg = DstReg;
  1445. _InstParam(RDPSINST_LRP).SrcReg0 = SrcReg[0];
  1446. _InstParam(RDPSINST_LRP).SrcReg1 = SrcReg[1];
  1447. _InstParam(RDPSINST_LRP).SrcReg2 = SrcReg[2];
  1448. _InstParam(RDPSINST_LRP).bSrcReg0_Negate = bSrcNegate[0];
  1449. _InstParam(RDPSINST_LRP).bSrcReg1_Negate = bSrcNegate[1];
  1450. _InstParam(RDPSINST_LRP).bSrcReg2_Negate = bSrcNegate[2];
  1451. _InstParam(RDPSINST_LRP).WriteMask = DstWriteMask;
  1452. _EmitDstMod(DstReg,DstWriteMask)
  1453. break;
  1454. case D3DSIO_CND:
  1455. _NewPSInst(RDPSINST_CND);
  1456. _InstParam(RDPSINST_CND).DstReg = DstReg;
  1457. _InstParam(RDPSINST_CND).SrcReg0 = SrcReg[0];
  1458. _InstParam(RDPSINST_CND).SrcReg1 = SrcReg[1];
  1459. _InstParam(RDPSINST_CND).SrcReg2 = SrcReg[2];
  1460. _InstParam(RDPSINST_CND).bSrcReg0_Negate = bSrcNegate[0];
  1461. _InstParam(RDPSINST_CND).bSrcReg1_Negate = bSrcNegate[1];
  1462. _InstParam(RDPSINST_CND).bSrcReg2_Negate = bSrcNegate[2];
  1463. _InstParam(RDPSINST_CND).WriteMask = DstWriteMask;
  1464. _EmitDstMod(DstReg,DstWriteMask)
  1465. break;
  1466. case D3DSIO_CMP:
  1467. _NewPSInst(RDPSINST_CMP);
  1468. _InstParam(RDPSINST_CMP).DstReg = DstReg;
  1469. _InstParam(RDPSINST_CMP).SrcReg0 = SrcReg[0];
  1470. _InstParam(RDPSINST_CMP).SrcReg1 = SrcReg[1];
  1471. _InstParam(RDPSINST_CMP).SrcReg2 = SrcReg[2];
  1472. _InstParam(RDPSINST_CMP).bSrcReg0_Negate = bSrcNegate[0];
  1473. _InstParam(RDPSINST_CMP).bSrcReg1_Negate = bSrcNegate[1];
  1474. _InstParam(RDPSINST_CMP).bSrcReg2_Negate = bSrcNegate[2];
  1475. _InstParam(RDPSINST_CMP).WriteMask = DstWriteMask;
  1476. _EmitDstMod(DstReg,DstWriteMask)
  1477. break;
  1478. default:
  1479. break;
  1480. }
  1481. if( pInst->bFlushQueue )
  1482. {
  1483. _EnterQuadPixelLoop
  1484. _NewPSInst(RDPSINST_FLUSHQUEUE);
  1485. QueueIndex = -1;
  1486. }
  1487. #if DBG
  1488. _LeaveQuadPixelLoop
  1489. #endif
  1490. }
  1491. // Flush queue at end of shader if there is anything on it
  1492. if( -1 != QueueIndex )
  1493. {
  1494. _EnterQuadPixelLoop
  1495. _NewPSInst(RDPSINST_FLUSHQUEUE);
  1496. QueueIndex = -1;
  1497. }
  1498. _LeaveQuadPixelLoop
  1499. _NewPSInst(RDPSINST_END);
  1500. #if DBG
  1501. if( pRast->m_bDebugPrintTranslatedPixelShaderTokens )
  1502. RDPSDisAsm(pRDPSInstBuffer, m_pConstDefs, m_cConstDefs,pCaps->MaxPixelShaderValue, Version);
  1503. #endif
  1504. }
  1505. return S_OK;
  1506. }
  1507. ///////////////////////////////////////////////////////////////////////////////
  1508. // end