Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1673 lines
82 KiB

  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Copyright (C) Microsoft Corporation, 2000.
  3. //
  4. // pixshade.cpp
  5. //
  6. // Direct3D Reference Device - Pixel Shader
  7. //
  8. ///////////////////////////////////////////////////////////////////////////////
  9. #include "pch.cpp"
  10. #pragma hdrstop
  11. //-----------------------------------------------------------------------------
  12. RDPShader::RDPShader(void)
  13. {
  14. m_pRD = NULL;
  15. m_pCode = NULL;
  16. m_CodeSize = 0;
  17. m_cActiveTextureStages = 0;
  18. m_ReferencedTexCoordMask = 0;
  19. m_cInst = 0;
  20. m_pInst = NULL;
  21. m_cConstDefs = 0;
  22. m_pConstDefs = NULL;
  23. }
  24. //-----------------------------------------------------------------------------
  25. RDPShader::~RDPShader()
  26. {
  27. if (NULL != m_pCode) delete[] m_pCode;
  28. if (NULL != m_pInst) delete[] m_pInst;
  29. if (NULL != m_pConstDefs) delete[] m_pConstDefs;
  30. }
  31. #define _DWordCount() (pToken - pCode)
  32. #define _RegisterNeedsToBeInitializedWithTexcoords(Reg) (*pReferencedTexCoordMask)|=(1<<Reg);
  33. //-----------------------------------------------------------------------------
  34. //
  35. // UpdateReferencedTexCoords
  36. //
  37. // Called for each instruction while parsing a 1.3 pixelshader.
  38. // Updates pReferencedTexCoordMask (bitfield) to represent
  39. // which texture coordinate sets are actually used by the shader.
  40. // This is used to eliminate unnecessary attribute setup/sampling during
  41. // primitive rasterization.
  42. //
  43. //-----------------------------------------------------------------------------
  44. void UpdateReferencedTexCoords(PixelShaderInstruction* pInst,
  45. DWORD* pReferencedTexCoordMask )
  46. {
  47. switch( pInst->Opcode & D3DSI_OPCODE_MASK )
  48. {
  49. case D3DSIO_TEX:
  50. case D3DSIO_TEXCOORD:
  51. case D3DSIO_TEXDEPTH:
  52. {
  53. for( UINT i = 0; i < 3; i++ )
  54. {
  55. UINT RegNum = pInst->SrcParam[i] & 0xFF;
  56. if( D3DSPR_TEXTURE == (pInst->SrcParam[i] & D3DSP_REGTYPE_MASK) )
  57. _RegisterNeedsToBeInitializedWithTexcoords(RegNum);
  58. }
  59. }
  60. break;
  61. case D3DSIO_TEXKILL: // treat dest param as source
  62. {
  63. UINT RegNum = pInst->DstParam & 0xFF;
  64. if( D3DSPR_TEXTURE == (pInst->DstParam & D3DSP_REGTYPE_MASK) )
  65. _RegisterNeedsToBeInitializedWithTexcoords(RegNum);
  66. }
  67. break;
  68. }
  69. }
  70. void CalculateSourceReadMasks(PixelShaderInstruction* pInst, BYTE* pSourceReadMasks, BOOL bAfterSwizzle, DWORD dwVersion)
  71. {
  72. UINT i, j;
  73. DWORD Opcode = pInst->Opcode & D3DSI_OPCODE_MASK;
  74. BYTE ComponentMask[4]= {RDPS_COMPONENTMASK_0, RDPS_COMPONENTMASK_1, RDPS_COMPONENTMASK_2, RDPS_COMPONENTMASK_3};
  75. for( i = 0; i < pInst->SrcParamCount; i++ )
  76. {
  77. BYTE NeededComponents;
  78. BYTE ReadComponents = 0;
  79. switch( Opcode )
  80. {
  81. case D3DSIO_TEX: // only in ps.1.4 does texld have source parameter
  82. if( D3DPS_VERSION(1,4) == dwVersion )
  83. {
  84. // for ps.1.4, texld has a source parameter
  85. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  86. }
  87. else // versions < ps.1.4 don't have a src param on tex, so we shouldn't get here. But maybe in ps.2.0...
  88. {
  89. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2 | RDPS_COMPONENTMASK_3;
  90. }
  91. break;
  92. case D3DSIO_TEXCOORD:
  93. if( D3DPS_VERSION(1,4) == dwVersion )
  94. {
  95. // for ps.1.4, texcrd has a source parameter
  96. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  97. }
  98. else // versions < ps.1.4 don't have a src param on texcoord, so we shouldn't get here. But maybe in ps.2.0...
  99. {
  100. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2 | RDPS_COMPONENTMASK_3;
  101. }
  102. break;
  103. case D3DSIO_TEXBEM:
  104. case D3DSIO_TEXBEML:
  105. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1;
  106. break;
  107. case D3DSIO_DP3:
  108. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  109. break;
  110. case D3DSIO_DP4:
  111. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2 | RDPS_COMPONENTMASK_3;
  112. break;
  113. case D3DSIO_BEM: // ps.1.4
  114. NeededComponents = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1;
  115. break;
  116. default:
  117. // standard component-wise instruction,
  118. // OR an op we know reads .rgba and we also know it will be validated to .rgba writemask
  119. NeededComponents = (pInst->DstParam & D3DSP_WRITEMASK_ALL) >> RDPS_COMPONENTMASK_SHIFT;
  120. break;
  121. }
  122. if( bAfterSwizzle )
  123. {
  124. pSourceReadMasks[i] = NeededComponents;
  125. }
  126. else
  127. {
  128. // Figure out which components of this source parameter are read (taking into account swizzle)
  129. for(j = 0; j < 4; j++)
  130. {
  131. if( NeededComponents & ComponentMask[j] )
  132. ReadComponents |= ComponentMask[((pInst->SrcParam[i] & D3DSP_SWIZZLE_MASK) >> (D3DVS_SWIZZLE_SHIFT + 2*j)) & 0x3];
  133. }
  134. pSourceReadMasks[i] = ReadComponents;
  135. }
  136. }
  137. }
  138. void RDPSRegister::Set(RDPS_REGISTER_TYPE RegType, UINT RegNum, RefRast* pRast)
  139. {
  140. m_RegType = RegType;
  141. m_RegNum = RegNum;
  142. UINT MaxRegNum = 0;
  143. switch( RegType )
  144. {
  145. case RDPSREG_INPUT:
  146. MaxRegNum = RDPS_MAX_NUMINPUTREG - 1;
  147. m_pReg = pRast->m_InputReg[RegNum];
  148. break;
  149. case RDPSREG_TEMP:
  150. MaxRegNum = RDPS_MAX_NUMTEMPREG - 1;
  151. m_pReg = pRast->m_TempReg[RegNum];
  152. break;
  153. case RDPSREG_CONST:
  154. MaxRegNum = RDPS_MAX_NUMCONSTREG - 1;
  155. m_pReg = pRast->m_ConstReg[RegNum];
  156. break;
  157. case RDPSREG_TEXTURE:
  158. MaxRegNum = RDPS_MAX_NUMTEXTUREREG - 1;
  159. m_pReg = pRast->m_TextReg[RegNum];
  160. break;
  161. case RDPSREG_POSTMODSRC:
  162. MaxRegNum = RDPS_MAX_NUMPOSTMODSRCREG - 1;
  163. m_pReg = pRast->m_PostModSrcReg[RegNum];
  164. break;
  165. case RDPSREG_SCRATCH:
  166. MaxRegNum = RDPS_MAX_NUMSCRATCHREG - 1;
  167. m_pReg = pRast->m_ScratchReg[RegNum];
  168. break;
  169. case RDPSREG_QUEUEDWRITE:
  170. MaxRegNum = RDPS_MAX_NUMQUEUEDWRITEREG - 1;
  171. m_pReg = pRast->m_QueuedWriteReg[RegNum];
  172. break;
  173. case RDPSREG_ZERO:
  174. MaxRegNum = 0;
  175. m_pReg = pRast->m_ZeroReg;
  176. break;
  177. case RDPSREG_ONE:
  178. MaxRegNum = 0;
  179. m_pReg = pRast->m_OneReg;
  180. break;
  181. case RDPSREG_TWO:
  182. MaxRegNum = 0;
  183. m_pReg = pRast->m_TwoReg;
  184. break;
  185. default:
  186. m_pReg = NULL;
  187. _ASSERT(FALSE,"RDPSRegister::SetReg - Unknown register type.");
  188. break;
  189. }
  190. if( RegNum > MaxRegNum )
  191. {
  192. _ASSERT(FALSE,"RDPSRegister::SetReg - Register number too high.");
  193. }
  194. return;
  195. }
  196. //-----------------------------------------------------------------------------
  197. //
  198. // Initialize
  199. //
  200. // - Copies pixel shader token stream from DDI token stream.
  201. // - Counts the number of active texture stages for m_cActiveTextureStages.
  202. // - Translates shader into "RISC" instruction set to be executed
  203. // by refrast's shader VM
  204. //
  205. //-----------------------------------------------------------------------------
  206. HRESULT
  207. RDPShader::Initialize(
  208. RefDev* pRD, DWORD* pCode, DWORD ByteCodeSize, D3DCAPS8* pCaps )
  209. {
  210. m_pRD = pRD;
  211. m_CodeSize = ByteCodeSize/4; // bytecount -> dword count
  212. FLOAT fMin = -(pCaps->MaxPixelShaderValue);
  213. FLOAT fMax = (pCaps->MaxPixelShaderValue);
  214. // ------------------------------------------------------------------------
  215. //
  216. // First pass through shader to find the number of instructions,
  217. // figure out how many constants there are.
  218. //
  219. // ------------------------------------------------------------------------
  220. {
  221. DWORD* pToken = pCode;
  222. pToken++; // version token
  223. while (*pToken != D3DPS_END())
  224. {
  225. DWORD Inst = *pToken;
  226. if (*pToken++ & (1L<<31)) // instruction token
  227. {
  228. DPFERR("PixelShader Token #%d: instruction token error",_DWordCount());
  229. return E_FAIL;
  230. }
  231. if ( (Inst & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT )
  232. {
  233. pToken += (Inst & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
  234. m_cInst++;
  235. }
  236. else if( (Inst & D3DSI_OPCODE_MASK) == D3DSIO_DEF )
  237. {
  238. m_cConstDefs++;
  239. pToken += 5;
  240. }
  241. else
  242. {
  243. if (*pToken & (1L<<31)) pToken++; // destination param token
  244. while (*pToken & (1L<<31)) pToken++; // source param tokens
  245. m_cInst++;
  246. }
  247. if (_DWordCount() > (int)m_CodeSize)
  248. {
  249. DPFERR("PixelShader(%d tokens, %d expected): count error",_DWordCount(),m_CodeSize);
  250. return E_FAIL;
  251. }
  252. }
  253. pToken++; // step over END token
  254. if (_DWordCount() != (int)m_CodeSize)
  255. {
  256. DPFERR("PixelShader(%d tokens, %d expected): count error",_DWordCount(),m_CodeSize);
  257. return E_FAIL;
  258. }
  259. // make copy of original shader
  260. m_pCode = new DWORD[m_CodeSize];
  261. if (NULL == m_pCode)
  262. return E_OUTOFMEMORY;
  263. memcpy( m_pCode, pCode, ByteCodeSize );
  264. // allocate instruction array
  265. m_pInst = new PixelShaderInstruction[m_cInst];
  266. if (NULL == m_pInst)
  267. return E_OUTOFMEMORY;
  268. memset( m_pInst, 0x0, sizeof(PixelShaderInstruction)*m_cInst );
  269. m_pConstDefs = new ConstDef[m_cConstDefs];
  270. if (NULL == m_pConstDefs)
  271. return E_OUTOFMEMORY;
  272. }
  273. // ------------------------------------------------------------------------
  274. //
  275. // Second pass through shader to:
  276. // - produce a list of instructions, each one including opcodes,
  277. // comments, and disassembled text for access by shader debuggers.
  278. // - figure out the TSS # used (if any) by each instruction
  279. // - figure out the max texture stage # used
  280. // - figure out when the ref. pixel shader executor should
  281. // queue writes up and when to flush the queue, in order to
  282. // simulate co-issue.
  283. // - figure out which texture coordinate sets get used
  284. // - process constant DEF instructions into a list that can be
  285. // executed whenever SetPixelShader is done.
  286. //
  287. // ------------------------------------------------------------------------
  288. {
  289. DWORD* pToken = m_pCode;
  290. PixelShaderInstruction* pInst = m_pInst;
  291. PixelShaderInstruction* pPrevious_NonTrivial_Inst = NULL;
  292. pToken++; // skip over version
  293. BOOL bMinimizeReferencedTexCoords;
  294. if( (D3DPS_VERSION(1,3) >= *pCode) ||
  295. (D3DPS_VERSION(254,254) == *pCode ) )//legacy
  296. {
  297. bMinimizeReferencedTexCoords = FALSE;
  298. }
  299. else
  300. {
  301. bMinimizeReferencedTexCoords = TRUE;
  302. }
  303. UINT CurrConstDef = 0;
  304. while (*pToken != D3DPS_END())
  305. {
  306. switch( (*pToken) & D3DSI_OPCODE_MASK )
  307. {
  308. case D3DSIO_COMMENT:
  309. pInst->Opcode = *pToken;
  310. pInst->pComment = (pToken+1);
  311. pInst->CommentSize = ((*pToken) & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
  312. pToken += (pInst->CommentSize+1);
  313. pInst++;
  314. continue;
  315. case D3DSIO_DEF:
  316. {
  317. pToken++;
  318. m_pConstDefs[CurrConstDef].RegNum = (*pToken++) & D3DSP_REGNUM_MASK;
  319. // clamp constants on input to range of values in pixel shaders
  320. for( UINT i = 0; i < 4; i++ )
  321. {
  322. m_pConstDefs[CurrConstDef].f[i] = MAX( fMin, MIN( fMax, *(FLOAT*)pToken));
  323. pToken++;
  324. }
  325. CurrConstDef++;
  326. continue;
  327. }
  328. case D3DSIO_NOP:
  329. // get disasm string
  330. PixelShaderInstDisAsm( pInst->Text, 128, pToken, 0x0 );
  331. pInst->Opcode = *pToken++;
  332. pInst++;
  333. continue;
  334. }
  335. // get disasm string
  336. PixelShaderInstDisAsm( pInst->Text, 128, pToken, 0x0 );
  337. // get next instruction and parameters
  338. pInst->Opcode = *pToken++;
  339. pInst->SrcParamCount = 0;
  340. if (*pToken & (1L<<31))
  341. {
  342. pInst->DstParam = *pToken++;
  343. }
  344. while (*pToken & (1L<<31))
  345. {
  346. pInst->SrcParam[pInst->SrcParamCount++] = *pToken++;
  347. }
  348. // process TEX ops
  349. //
  350. BOOL bLegacyTexOp = FALSE;
  351. switch (pInst->Opcode & D3DSI_OPCODE_MASK)
  352. {
  353. default: break;
  354. case D3DSIO_TEXBEM_LEGACY:
  355. case D3DSIO_TEXBEML_LEGACY:
  356. bLegacyTexOp = TRUE;
  357. // fall through
  358. case D3DSIO_TEXCOORD:
  359. case D3DSIO_TEXKILL:
  360. case D3DSIO_TEX:
  361. case D3DSIO_TEXBEM:
  362. case D3DSIO_TEXBEML:
  363. case D3DSIO_TEXREG2AR:
  364. case D3DSIO_TEXREG2GB:
  365. case D3DSIO_TEXM3x2PAD:
  366. case D3DSIO_TEXM3x2TEX:
  367. case D3DSIO_TEXM3x3PAD:
  368. case D3DSIO_TEXM3x3TEX:
  369. case D3DSIO_TEXM3x3SPEC:
  370. case D3DSIO_TEXM3x3VSPEC:
  371. case D3DSIO_TEXM3x2DEPTH:
  372. case D3DSIO_TEXDP3:
  373. case D3DSIO_TEXREG2RGB:
  374. case D3DSIO_TEXDEPTH:
  375. case D3DSIO_TEXDP3TEX:
  376. case D3DSIO_TEXM3x3:
  377. pInst->bTexOp = TRUE;
  378. break;
  379. }
  380. if (pInst->bTexOp)
  381. {
  382. // update stage count and assign ptr to TSS for this op
  383. if (bLegacyTexOp)
  384. {
  385. m_cActiveTextureStages =
  386. max(m_cActiveTextureStages,(pInst->DstParam&D3DSP_REGNUM_MASK)+1);
  387. pInst->uiTSSNum = (pInst->DstParam&D3DSP_REGNUM_MASK)-1;
  388. }
  389. else
  390. {
  391. UINT Stage;
  392. BOOL bStageUsed = TRUE;
  393. switch(pInst->Opcode & D3DSI_OPCODE_MASK)
  394. {
  395. case D3DSIO_TEXCOORD:
  396. case D3DSIO_TEXDEPTH:
  397. case D3DSIO_TEXKILL:
  398. if( bMinimizeReferencedTexCoords )
  399. {
  400. bStageUsed = FALSE;
  401. break;
  402. }
  403. // falling through
  404. case D3DSIO_TEX:
  405. default:
  406. Stage = pInst->DstParam&D3DSP_REGNUM_MASK;
  407. break;
  408. }
  409. if( bStageUsed )
  410. {
  411. m_cActiveTextureStages = max(m_cActiveTextureStages,Stage+1);
  412. pInst->uiTSSNum = Stage;
  413. }
  414. }
  415. }
  416. if( pPrevious_NonTrivial_Inst )
  417. {
  418. // Queue write of last instruction if the current instruction has the
  419. // COISSUE flag.
  420. if( pInst->Opcode & D3DSI_COISSUE )
  421. {
  422. pPrevious_NonTrivial_Inst->bQueueWrite = TRUE;
  423. }
  424. // Flush writes after the previous instruction if it had the COISSUE
  425. // flag and the current instruction doesn't have it.
  426. if( !(pInst->Opcode & D3DSI_COISSUE) && (pPrevious_NonTrivial_Inst->Opcode & D3DSI_COISSUE) )
  427. {
  428. pPrevious_NonTrivial_Inst->bFlushQueue = TRUE;
  429. }
  430. }
  431. pPrevious_NonTrivial_Inst = pInst;
  432. if( bMinimizeReferencedTexCoords )
  433. {
  434. UpdateReferencedTexCoords(pInst, &m_ReferencedTexCoordMask);
  435. }
  436. pInst++;
  437. }
  438. if( !bMinimizeReferencedTexCoords )
  439. {
  440. m_ReferencedTexCoordMask = (1<<m_cActiveTextureStages) - 1;
  441. }
  442. }
  443. // ------------------------------------------------------------------------
  444. //
  445. // Third pass through the shader (through the list of instructions made
  446. // in the last pass) to translate instructions into a more basic ("RISC")
  447. // instruction set for the refrast executor.
  448. //
  449. // ------------------------------------------------------------------------
  450. {
  451. #define _Set(RegType, RegNum) Set(RegType,RegNum,pRast)
  452. #define _NewPSInst(__INST) \
  453. { \
  454. RDPSOffset = pRDPSInst - pRDPSInstBuffer + LastRDPSInstSize; \
  455. m_RDPSInstBuffer.SetGrowSize(MAX(512,RDPSOffset)); \
  456. if( FAILED(m_RDPSInstBuffer.Grow(RDPSOffset + sizeof(__INST##_PARAMS)))) \
  457. {return E_OUTOFMEMORY;} \
  458. pRDPSInstBuffer = &m_RDPSInstBuffer[0]; \
  459. pRDPSInst = pRDPSInstBuffer + RDPSOffset; \
  460. ((__INST##_PARAMS UNALIGNED64*)pRDPSInst)->Inst = __INST; \
  461. LastRDPSInstSize = sizeof(__INST##_PARAMS); \
  462. }
  463. #define _InstParam(__INST) (*(__INST##_PARAMS UNALIGNED64*)pRDPSInst)
  464. #define _NoteInstructionEvent _NewPSInst(RDPSINST_NEXTD3DPSINST); \
  465. _InstParam(RDPSINST_NEXTD3DPSINST).pInst = pInst;
  466. #define _EnterQuadPixelLoop if(!bInQuadPixelLoop) \
  467. { \
  468. _NewPSInst(RDPSINST_QUADLOOPBEGIN); \
  469. RDPSLoopOffset = RDPSOffset + sizeof(RDPSINST_QUADLOOPBEGIN_PARAMS); \
  470. bInQuadPixelLoop = TRUE; \
  471. }
  472. #define _LeaveQuadPixelLoop if(bInQuadPixelLoop) \
  473. { \
  474. _NewPSInst(RDPSINST_QUADLOOPEND); \
  475. _InstParam(RDPSINST_QUADLOOPEND).JumpBackByOffset = \
  476. RDPSOffset - RDPSLoopOffset;\
  477. bInQuadPixelLoop = FALSE; \
  478. }
  479. #define _EmitDstMod(__dstReg,__mask) _NewPSInst(RDPSINST_DSTMOD); \
  480. _InstParam(RDPSINST_DSTMOD).DstReg = __dstReg; \
  481. _InstParam(RDPSINST_DSTMOD).WriteMask = __mask; \
  482. _InstParam(RDPSINST_DSTMOD).fScale = DstScale; \
  483. _InstParam(RDPSINST_DSTMOD).fRangeMin = DstRange[0]; \
  484. _InstParam(RDPSINST_DSTMOD).fRangeMax = DstRange[1];
  485. // Th macro _EmitProj emits instructions to do the following:
  486. // - Put reciprocal of source (x,y,z,w) component __COMPONENT (ex. w) into scratch register 0 component (for w example:) 4
  487. // - Replicate reciprocal to rgb components of scratch register 0 (w example yields: 1/,1/w,1/w, <--1/w)
  488. // - Multiply source register register by scratch register (x/w,y/w,z/w,1) and put the result into the dest register.
  489. #define _EmitProj(__DESTTYPE,__DESTNUM,__SRCTYPE,__SRCNUM,__COMPONENT) \
  490. _NewPSInst(RDPSINST_RCP); \
  491. _InstParam(RDPSINST_RCP).DstReg._Set(RDPSREG_SCRATCH,0); \
  492. _InstParam(RDPSINST_RCP).SrcReg0._Set(__SRCTYPE,__SRCNUM); \
  493. _InstParam(RDPSINST_RCP).bSrcReg0_Negate = FALSE; \
  494. _InstParam(RDPSINST_RCP).WriteMask = __COMPONENT; \
  495. \
  496. _NewPSInst(RDPSINST_SWIZZLE); \
  497. _InstParam(RDPSINST_SWIZZLE).DstReg._Set(RDPSREG_SCRATCH,0); \
  498. _InstParam(RDPSINST_SWIZZLE).SrcReg0._Set(RDPSREG_SCRATCH,0); \
  499. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 \
  500. | RDPS_COMPONENTMASK_2 | RDPS_COMPONENTMASK_3; \
  501. _InstParam(RDPSINST_SWIZZLE).Swizzle = \
  502. (RDPS_COMPONENTMASK_0 == __COMPONENT) ? RDPS_REPLICATERED : \
  503. (RDPS_COMPONENTMASK_1 == __COMPONENT) ? RDPS_REPLICATEGREEN : \
  504. (RDPS_COMPONENTMASK_2 == __COMPONENT) ? RDPS_REPLICATEBLUE : RDPS_REPLICATEALPHA; \
  505. \
  506. _NewPSInst(RDPSINST_MUL); \
  507. _InstParam(RDPSINST_MUL).DstReg._Set(__DESTTYPE,__DESTNUM); \
  508. _InstParam(RDPSINST_MUL).SrcReg0._Set(RDPSREG_SCRATCH,0); \
  509. _InstParam(RDPSINST_MUL).SrcReg1._Set(__SRCTYPE,__SRCNUM); \
  510. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = FALSE; \
  511. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = FALSE; \
  512. _InstParam(RDPSINST_MUL).WriteMask = \
  513. (RDPS_COMPONENTMASK_0 == __COMPONENT) ? RDPS_COMPONENTMASK_0 : \
  514. (RDPS_COMPONENTMASK_1 == __COMPONENT) ? RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 : \
  515. (RDPS_COMPONENTMASK_2 == __COMPONENT) ? RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | \
  516. RDPS_COMPONENTMASK_2 : RDPS_COMPONENTMASK_ALL;
  517. BYTE ComponentSwizzle[4] = {RDPS_REPLICATERED, RDPS_REPLICATEGREEN, RDPS_REPLICATEBLUE, RDPS_REPLICATEALPHA};
  518. BYTE ComponentMask[4] = {RDPS_COMPONENTMASK_0, RDPS_COMPONENTMASK_1, RDPS_COMPONENTMASK_2, RDPS_COMPONENTMASK_3};
  519. int QueueIndex = -1; // current queue location (for staging results when simulating coissue)
  520. UINT i;
  521. BOOL bInQuadPixelLoop = FALSE;
  522. RefRast* pRast = &m_pRD->m_Rast;
  523. RDPSRegister ZeroReg; ZeroReg._Set(RDPSREG_ZERO,0);
  524. RDPSRegister OneReg; OneReg._Set(RDPSREG_ONE,0);
  525. RDPSRegister TwoReg; TwoReg._Set(RDPSREG_TWO,0);
  526. // destination parameter controls
  527. RDPSRegister DstReg;
  528. FLOAT DstScale; // Result Shift Scale - +/- 2**n only
  529. FLOAT DstRange[2]; // clamp dest to this range
  530. BYTE DstWriteMask; // per-component write mask
  531. PRGBAVEC pDstReg; // address of dest register
  532. // source parameter controls
  533. RDPSRegister SrcReg[3];
  534. BYTE* pRDPSInstBuffer = NULL;
  535. BYTE* pRDPSInst = pRDPSInstBuffer;
  536. size_t RDPSOffset, RDPSLoopOffset;
  537. size_t LastRDPSInstSize = 0;
  538. DWORD Version = *m_pCode;
  539. for (UINT CurrentPSInst=0; CurrentPSInst < m_cInst; CurrentPSInst++)
  540. {
  541. PixelShaderInstruction* pInst = m_pInst + CurrentPSInst;
  542. DWORD Opcode = pInst->Opcode & D3DSI_OPCODE_MASK;
  543. DWORD SrcSwizzle[3];
  544. BYTE SourceReadMasks[3];
  545. BYTE SourceReadMasksAfterSwizzle[3];
  546. BOOL bForceNeg1To1Clamp[3] = {FALSE, FALSE, FALSE};
  547. BOOL bEmitQueueWrite = FALSE;
  548. RDPSRegister QueuedWriteDstReg;
  549. BYTE QueuedWriteDstWriteMask;
  550. BYTE ProjComponent[3] = {0,0,0};
  551. BOOL bEmitProj[3] = {FALSE, FALSE, FALSE};
  552. BOOL bProjOnEval[3] = {FALSE, FALSE, FALSE};
  553. BOOL bEmitSrcMod[3] = {FALSE, FALSE, FALSE};
  554. BOOL bEmitSwizzle[3] = {FALSE, FALSE, FALSE};
  555. BOOL bSrcNegate[3] = {FALSE, FALSE, FALSE};
  556. BOOL bSrcBias[3] = {FALSE, FALSE, FALSE};
  557. BOOL bSrcTimes2[3] = {FALSE, FALSE, FALSE};
  558. BOOL bSrcComplement[3] = {FALSE, FALSE, FALSE};
  559. switch( Opcode )
  560. {
  561. continue;
  562. case D3DSIO_DEF:
  563. // nothing to do -> DEF has already been processed out and is not an true instruction
  564. continue;
  565. case D3DSIO_COMMENT:
  566. continue;
  567. case D3DSIO_PHASE:
  568. case D3DSIO_NOP:
  569. #if DBG
  570. _NoteInstructionEvent
  571. #endif
  572. continue;
  573. }
  574. #if DBG
  575. _NoteInstructionEvent
  576. #endif
  577. // do some preliminary setup for this instruction
  578. UINT RegNum = pInst->DstParam & D3DSP_REGNUM_MASK;
  579. switch (pInst->DstParam & D3DSP_REGTYPE_MASK)
  580. {
  581. case D3DSPR_TEXTURE:
  582. DstReg._Set(RDPSREG_TEXTURE, RegNum); break;
  583. case D3DSPR_TEMP:
  584. DstReg._Set(RDPSREG_TEMP, RegNum); break;
  585. default:
  586. _ASSERT( FALSE, "RDPShader::Initialize - Unexpected destination register type." );
  587. break;
  588. }
  589. DstWriteMask = (pInst->DstParam & D3DSP_WRITEMASK_ALL) >> RDPS_COMPONENTMASK_SHIFT;
  590. if( pInst->bQueueWrite )
  591. {
  592. QueueIndex++;
  593. QueuedWriteDstReg = DstReg;
  594. QueuedWriteDstWriteMask = DstWriteMask;
  595. DstReg._Set(RDPSREG_QUEUEDWRITE,QueueIndex);
  596. _ASSERT(QueueIndex <= RDPS_MAX_NUMQUEUEDWRITEREG, "Too many queued writes in pixelshader (improperly handled co-issue)." );
  597. bEmitQueueWrite = TRUE;
  598. }
  599. CalculateSourceReadMasks(pInst, SourceReadMasks, FALSE,Version);
  600. CalculateSourceReadMasks(pInst, SourceReadMasksAfterSwizzle, TRUE,Version);
  601. for (i=0; i < pInst->SrcParamCount; i++)
  602. {
  603. RegNum = pInst->SrcParam[i]&D3DSP_REGNUM_MASK;
  604. switch (pInst->SrcParam[i] & D3DSP_REGTYPE_MASK)
  605. {
  606. case D3DSPR_TEMP:
  607. SrcReg[i]._Set(RDPSREG_TEMP, RegNum); break;
  608. case D3DSPR_TEXTURE:
  609. SrcReg[i]._Set(RDPSREG_TEXTURE, RegNum); break;
  610. case D3DSPR_INPUT:
  611. SrcReg[i]._Set(RDPSREG_INPUT, RegNum); break;
  612. case D3DSPR_CONST:
  613. SrcReg[i]._Set(RDPSREG_CONST, RegNum);
  614. // Force a [-1,1] clamp after applying modifier (for constants only)
  615. // This overrides the the standard [-MaxPixelShaderValue,MaxPixelShaderValue] clamp.
  616. // An IHV that supports MaxPixelShaderValue > 1 forgot to do this for constants.
  617. bForceNeg1To1Clamp[i] = TRUE;
  618. break;
  619. default:
  620. _ASSERT( FALSE, "RDPShader::Initialize - Unexpected source register type." );
  621. break;
  622. }
  623. if( (D3DSPSM_DZ == (pInst->SrcParam[i] & D3DSP_SRCMOD_MASK)) ||
  624. (D3DSPSM_DW == (pInst->SrcParam[i] & D3DSP_SRCMOD_MASK)) )
  625. {
  626. if( D3DSPSM_DZ == (pInst->SrcParam[i] & D3DSP_SRCMOD_MASK))
  627. {
  628. ProjComponent[i] = RDPS_COMPONENTMASK_2;
  629. }
  630. else // _DW
  631. {
  632. if( D3DPS_VERSION(1,4) == Version )
  633. ProjComponent[i] = RDPS_COMPONENTMASK_2;
  634. else
  635. ProjComponent[i] = RDPS_COMPONENTMASK_3;
  636. }
  637. if( D3DSPR_TEXTURE == (pInst->SrcParam[i] & D3DSP_REGTYPE_MASK ) ) // t# register being used to represent evaluated texcoord.
  638. {
  639. bProjOnEval[i] = TRUE;
  640. }
  641. else
  642. bEmitProj[i] = TRUE;
  643. }
  644. else
  645. {
  646. bEmitSrcMod[i] = TRUE;
  647. switch (pInst->SrcParam[i] & D3DSP_SRCMOD_MASK)
  648. {
  649. default:
  650. case D3DSPSM_NONE:
  651. if( !bForceNeg1To1Clamp[i] )
  652. bEmitSrcMod[i] = FALSE;
  653. break;
  654. case D3DSPSM_NEG:
  655. bSrcNegate[i] = TRUE; // negate is not part of source modifier
  656. if( !bForceNeg1To1Clamp[i] )
  657. bEmitSrcMod[i] = FALSE;
  658. break;
  659. case D3DSPSM_BIAS:
  660. bSrcBias[i] = TRUE;
  661. break;
  662. case D3DSPSM_BIASNEG:
  663. bSrcNegate[i] = TRUE;
  664. bSrcBias[i] = TRUE;
  665. break;
  666. case D3DSPSM_SIGN: // _bx2
  667. bSrcBias[i] = TRUE;
  668. bSrcTimes2[i] = TRUE;
  669. break;
  670. case D3DSPSM_SIGNNEG: // negative _bx2
  671. bSrcNegate[i] = TRUE; // negate is not part of source modifier
  672. bSrcBias[i] = TRUE;
  673. bSrcTimes2[i] = TRUE;
  674. break;
  675. case D3DSPSM_COMP:
  676. bSrcComplement[i] = TRUE;
  677. break;
  678. case D3DSPSM_X2:
  679. bSrcTimes2[i] = TRUE;
  680. break;
  681. case D3DSPSM_X2NEG:
  682. bSrcNegate[i] = TRUE; // negate is not part of source modifier
  683. bSrcTimes2[i] = TRUE;
  684. break;
  685. }
  686. _ASSERT(!(bSrcComplement[i] && (bSrcTimes2[i]||bSrcBias[i]||bSrcNegate[i])),"RDPShader::Initialize - Complement cannot be combined with other modifiers.");
  687. }
  688. SrcSwizzle[i] = (pInst->SrcParam[i] & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
  689. bEmitSwizzle[i] = (D3DSP_NOSWIZZLE != (pInst->SrcParam[i] & D3DSP_SWIZZLE_MASK));
  690. }
  691. // set clamp values
  692. switch (pInst->DstParam & D3DSP_DSTMOD_MASK)
  693. {
  694. default:
  695. case D3DSPDM_NONE:
  696. if(pInst->bTexOp)
  697. {
  698. DstRange[0] = -FLT_MAX;
  699. DstRange[1] = FLT_MAX;
  700. }
  701. else
  702. {
  703. DstRange[0] = fMin;
  704. DstRange[1] = fMax;
  705. }
  706. break;
  707. case D3DSPDM_SATURATE:
  708. DstRange[0] = 0.F;
  709. DstRange[1] = 1.F;
  710. break;
  711. }
  712. UINT ShiftScale =
  713. (pInst->DstParam & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
  714. if (ShiftScale & 0x8)
  715. {
  716. ShiftScale = ((~ShiftScale)&0x7)+1; // negative magnitude
  717. DstScale = 1.f/(FLOAT)(1<<ShiftScale);
  718. }
  719. else
  720. {
  721. DstScale = (FLOAT)(1<<ShiftScale);
  722. }
  723. // finished preliminary setup, now start emitting ops...
  724. _EnterQuadPixelLoop
  725. if( bEmitQueueWrite )
  726. {
  727. _NewPSInst(RDPSINST_QUEUEWRITE);
  728. _InstParam(RDPSINST_QUEUEWRITE).DstReg = QueuedWriteDstReg;
  729. _InstParam(RDPSINST_QUEUEWRITE).WriteMask = QueuedWriteDstWriteMask;
  730. }
  731. for (i=0; i < pInst->SrcParamCount; i++)
  732. {
  733. if( bEmitProj[i] )
  734. {
  735. _EmitProj(RDPSREG_POSTMODSRC,i,SrcReg[i].GetRegType(),SrcReg[i].GetRegNum(),ProjComponent[i]);
  736. SrcReg[i]._Set(RDPSREG_POSTMODSRC,i);
  737. }
  738. if( bEmitSrcMod[i] )
  739. {
  740. _NewPSInst(RDPSINST_SRCMOD);
  741. _InstParam(RDPSINST_SRCMOD).DstReg._Set(RDPSREG_POSTMODSRC,i);
  742. _InstParam(RDPSINST_SRCMOD).SrcReg0 = SrcReg[i];
  743. _InstParam(RDPSINST_SRCMOD).WriteMask = SourceReadMasks[i];
  744. _InstParam(RDPSINST_SRCMOD).bBias = bSrcBias[i];
  745. _InstParam(RDPSINST_SRCMOD).bTimes2 = bSrcTimes2[i];
  746. _InstParam(RDPSINST_SRCMOD).bComplement = bSrcComplement[i];
  747. _InstParam(RDPSINST_SRCMOD).fRangeMin = bForceNeg1To1Clamp[i] ? -1.0f : fMin;
  748. _InstParam(RDPSINST_SRCMOD).fRangeMax = bForceNeg1To1Clamp[i] ? 1.0f : fMax;
  749. SrcReg[i]._Set(RDPSREG_POSTMODSRC,i);
  750. }
  751. if( bEmitSwizzle[i] && !bProjOnEval[i] )
  752. {
  753. _NewPSInst(RDPSINST_SWIZZLE);
  754. _InstParam(RDPSINST_SWIZZLE).DstReg._Set(RDPSREG_POSTMODSRC,i);
  755. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = SrcReg[i];
  756. _InstParam(RDPSINST_SWIZZLE).WriteMask = SourceReadMasksAfterSwizzle[i];
  757. _InstParam(RDPSINST_SWIZZLE).Swizzle = SrcSwizzle[i];
  758. SrcReg[i]._Set(RDPSREG_POSTMODSRC,i);
  759. }
  760. }
  761. switch(Opcode)
  762. {
  763. case D3DSIO_TEXCOORD:
  764. case D3DSIO_TEXKILL:
  765. {
  766. if( !( (D3DSIO_TEXKILL == Opcode) &&
  767. (D3DSPR_TEMP == (pInst->DstParam & D3DSP_REGTYPE_MASK))
  768. )
  769. )
  770. {
  771. UINT CoordSet = pInst->SrcParam[0] ? (pInst->SrcParam[0] & D3DSP_REGNUM_MASK) :
  772. (pInst->DstParam & D3DSP_REGNUM_MASK);
  773. RDPSRegister CoordReg;
  774. if(bProjOnEval[0])
  775. CoordReg._Set(RDPSREG_POSTMODSRC,0);
  776. else
  777. CoordReg = DstReg;
  778. // For TEXCOORD, clamp 0. to 1 only there is no source parameter (ps.1.0, ps.1.1)
  779. // For TEXKILL, never clamp
  780. // NOTE: the TEXCOORD clamp is a temporary limitation for DX8 shader models
  781. BOOL bTexCoordClamp = ((D3DSIO_TEXCOORD == Opcode) && (!pInst->SrcParam[0])) ? TRUE : FALSE;
  782. _NewPSInst(RDPSINST_EVAL);
  783. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  784. _InstParam(RDPSINST_EVAL).uiCoordSet = CoordSet;
  785. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // projection disabled (unless _p modifier used -> _EmitProj below)
  786. _InstParam(RDPSINST_EVAL).bClamp = bTexCoordClamp;
  787. if( bProjOnEval[0] )
  788. {
  789. if( bEmitSwizzle[0] )
  790. {
  791. _NewPSInst(RDPSINST_SWIZZLE);
  792. _InstParam(RDPSINST_SWIZZLE).DstReg = DstReg;
  793. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = CoordReg;
  794. _InstParam(RDPSINST_SWIZZLE).WriteMask = SourceReadMasksAfterSwizzle[0];
  795. _InstParam(RDPSINST_SWIZZLE).Swizzle = SrcSwizzle[0];
  796. }
  797. _EmitProj(DstReg.GetRegType(),DstReg.GetRegNum(),DstReg.GetRegType(),DstReg.GetRegNum(),ProjComponent[0]);
  798. }
  799. // check version (first DWORD of code token stream), and always
  800. // set 4th component to 1.0 for ps.1.3 or earlier
  801. if ( D3DPS_VERSION(1,3) >= Version )
  802. {
  803. _NewPSInst(RDPSINST_MOV);
  804. _InstParam(RDPSINST_MOV).DstReg = DstReg;
  805. _InstParam(RDPSINST_MOV).SrcReg0 = OneReg; // 1.0f
  806. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  807. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_3;
  808. }
  809. }
  810. _EmitDstMod(DstReg,DstWriteMask)
  811. if( D3DSIO_TEXKILL == Opcode )
  812. {
  813. _NewPSInst(RDPSINST_KILL);
  814. _InstParam(RDPSINST_KILL).DstReg = DstReg;
  815. }
  816. }
  817. break;
  818. case D3DSIO_TEX:
  819. {
  820. RDPSRegister CoordReg;
  821. BOOL bDoSampleCoords = TRUE;
  822. UINT CoordSet = pInst->SrcParam[0] ? (pInst->SrcParam[0] & D3DSP_REGNUM_MASK) :
  823. (pInst->DstParam & D3DSP_REGNUM_MASK);
  824. if( pInst->SrcParam[0] )
  825. {
  826. CoordReg = SrcReg[0];
  827. if( D3DSPR_TEMP == (pInst->SrcParam[0] & D3DSP_REGTYPE_MASK) )
  828. bDoSampleCoords = FALSE;
  829. }
  830. else // no source param.
  831. {
  832. CoordReg._Set(RDPSREG_SCRATCH,0);
  833. }
  834. if( bDoSampleCoords )
  835. {
  836. _NewPSInst(RDPSINST_EVAL);
  837. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  838. _InstParam(RDPSINST_EVAL).uiCoordSet = CoordSet;
  839. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = bProjOnEval[0]; // if we have _p modifier, we do _EmitProj below
  840. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  841. }
  842. if( bProjOnEval[0] )
  843. {
  844. if( bEmitSwizzle[0] )
  845. {
  846. _NewPSInst(RDPSINST_SWIZZLE);
  847. _InstParam(RDPSINST_SWIZZLE).DstReg._Set(RDPSREG_POSTMODSRC,0);
  848. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = CoordReg;
  849. _InstParam(RDPSINST_SWIZZLE).WriteMask = SourceReadMasksAfterSwizzle[0];
  850. _InstParam(RDPSINST_SWIZZLE).Swizzle = SrcSwizzle[0];
  851. CoordReg._Set(RDPSREG_POSTMODSRC,0);
  852. }
  853. _EmitProj(RDPSREG_POSTMODSRC,0,CoordReg.GetRegType(),CoordReg.GetRegNum(),ProjComponent[0]);
  854. CoordReg._Set(RDPSREG_POSTMODSRC,0);
  855. }
  856. _LeaveQuadPixelLoop
  857. PRGBAVEC pCoordReg = CoordReg.GetRegPtr();
  858. _NewPSInst(RDPSINST_TEXCOVERAGE);
  859. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  860. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  861. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  862. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pCoordReg[1][0]; // du/dx
  863. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pCoordReg[0][0];
  864. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pCoordReg[2][0]; // du/dy
  865. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pCoordReg[0][0];
  866. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pCoordReg[1][1]; // dv/dx
  867. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pCoordReg[0][1];
  868. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pCoordReg[2][1]; // dv/dy
  869. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pCoordReg[0][1];
  870. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = &pCoordReg[1][2]; // dw/dx
  871. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 = &pCoordReg[0][2];
  872. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = &pCoordReg[2][2]; // dw/dy
  873. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &pCoordReg[0][2];
  874. _EnterQuadPixelLoop
  875. _NewPSInst(RDPSINST_SAMPLE);
  876. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  877. _InstParam(RDPSINST_SAMPLE).CoordReg = CoordReg;
  878. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  879. _EmitDstMod(DstReg,DstWriteMask)
  880. }
  881. break;
  882. case D3DSIO_TEXDP3:
  883. case D3DSIO_TEXDP3TEX:
  884. {
  885. RDPSRegister CoordReg;
  886. CoordReg._Set(RDPSREG_SCRATCH,0);
  887. _NewPSInst(RDPSINST_EVAL);
  888. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  889. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  890. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // no projection
  891. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  892. if( D3DSIO_TEXDP3 == Opcode )
  893. {
  894. _NewPSInst(RDPSINST_DP3);
  895. _InstParam(RDPSINST_DP3).DstReg = DstReg;
  896. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  897. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  898. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  899. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  900. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_ALL;
  901. _EmitDstMod(DstReg,DstWriteMask)
  902. }
  903. else // D3DSIO_TEXDP3TEX
  904. {
  905. _NewPSInst(RDPSINST_DP3);
  906. _InstParam(RDPSINST_DP3).DstReg = CoordReg;
  907. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  908. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  909. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  910. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  911. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_0;
  912. _NewPSInst(RDPSINST_MOV);
  913. _InstParam(RDPSINST_MOV).DstReg = CoordReg;
  914. _InstParam(RDPSINST_MOV).SrcReg0 = ZeroReg; // 0.0f
  915. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  916. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  917. _LeaveQuadPixelLoop
  918. PRGBAVEC pCoordReg = CoordReg.GetRegPtr();
  919. _NewPSInst(RDPSINST_TEXCOVERAGE);
  920. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  921. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  922. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  923. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pCoordReg[1][0]; // du/dx
  924. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pCoordReg[0][0];
  925. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pCoordReg[2][0]; // du/dy
  926. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pCoordReg[0][0];
  927. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = // dv/dx
  928. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 =
  929. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = // dv/dy
  930. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 =
  931. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = // dw/dx
  932. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 =
  933. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = // dw/dy
  934. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &ZeroReg.GetRegPtr()[0][0]; // 0.0f
  935. _EnterQuadPixelLoop
  936. _NewPSInst(RDPSINST_SAMPLE);
  937. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  938. _InstParam(RDPSINST_SAMPLE).CoordReg = CoordReg;
  939. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  940. _EmitDstMod(DstReg,DstWriteMask)
  941. }
  942. }
  943. break;
  944. case D3DSIO_TEXREG2AR:
  945. case D3DSIO_TEXREG2GB:
  946. case D3DSIO_TEXREG2RGB:
  947. {
  948. UINT I0, I1;
  949. PRGBAVEC pSrcReg0 = SrcReg[0].GetRegPtr();
  950. switch( Opcode )
  951. {
  952. case D3DSIO_TEXREG2AR:
  953. I0 = 3;
  954. I1 = 0;
  955. break;
  956. case D3DSIO_TEXREG2GB:
  957. I0 = 1;
  958. I1 = 2;
  959. break;
  960. case D3DSIO_TEXREG2RGB:
  961. I0 = 0;
  962. I1 = 1;
  963. break;
  964. }
  965. _LeaveQuadPixelLoop
  966. _NewPSInst(RDPSINST_TEXCOVERAGE);
  967. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  968. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  969. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  970. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pSrcReg0[1][I0]; // du/dx
  971. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pSrcReg0[0][I0];
  972. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pSrcReg0[2][I0]; // du/dy
  973. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pSrcReg0[0][I0];
  974. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pSrcReg0[1][I1]; // dv/dx
  975. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pSrcReg0[0][I1];
  976. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pSrcReg0[2][I1]; // dv/dy
  977. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pSrcReg0[0][I1];
  978. switch( Opcode )
  979. {
  980. case D3DSIO_TEXREG2AR:
  981. case D3DSIO_TEXREG2GB:
  982. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = // dw/dx
  983. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 =
  984. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = // dw/dy
  985. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &ZeroReg.GetRegPtr()[0][0]; // 0.0f
  986. break;
  987. case D3DSIO_TEXREG2RGB:
  988. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = &pSrcReg0[1][2]; // dw/dx
  989. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 = &pSrcReg0[0][2];
  990. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = &pSrcReg0[2][2]; // dw/dy
  991. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &pSrcReg0[0][2];
  992. break;
  993. }
  994. _EnterQuadPixelLoop
  995. RDPSRegister CoordReg;
  996. CoordReg._Set(RDPSREG_SCRATCH,0);
  997. _NewPSInst(RDPSINST_SWIZZLE);
  998. _InstParam(RDPSINST_SWIZZLE).DstReg = CoordReg;
  999. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = SrcReg[0];
  1000. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_0;
  1001. _InstParam(RDPSINST_SWIZZLE).Swizzle = ComponentSwizzle[I0];
  1002. _NewPSInst(RDPSINST_SWIZZLE);
  1003. _InstParam(RDPSINST_SWIZZLE).DstReg = CoordReg;
  1004. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = SrcReg[0];
  1005. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_1;
  1006. _InstParam(RDPSINST_SWIZZLE).Swizzle = ComponentSwizzle[I1];
  1007. _NewPSInst(RDPSINST_MOV);
  1008. _InstParam(RDPSINST_MOV).DstReg = CoordReg;
  1009. _InstParam(RDPSINST_MOV).SrcReg0 = (D3DSIO_TEXREG2RGB == Opcode ? SrcReg[0] : ZeroReg );
  1010. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  1011. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_2;
  1012. _NewPSInst(RDPSINST_SAMPLE);
  1013. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  1014. _InstParam(RDPSINST_SAMPLE).CoordReg = CoordReg;
  1015. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1016. _EmitDstMod(DstReg,DstWriteMask)
  1017. }
  1018. break;
  1019. case D3DSIO_TEXBEM:
  1020. case D3DSIO_TEXBEML:
  1021. case D3DSIO_TEXBEM_LEGACY: // refrast only -> used with legacy fixed function rasterizer
  1022. case D3DSIO_TEXBEML_LEGACY: // refrast only -> used with legacy fixed function rasterizer
  1023. {
  1024. BOOL bDoLuminance = ((D3DSIO_TEXBEML == Opcode) || (D3DSIO_TEXBEML_LEGACY == Opcode));
  1025. RDPSRegister CoordReg;
  1026. CoordReg._Set(RDPSREG_SCRATCH,0);
  1027. _NewPSInst(RDPSINST_EVAL);
  1028. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  1029. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  1030. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = FALSE;
  1031. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  1032. _NewPSInst(RDPSINST_BEM);
  1033. _InstParam(RDPSINST_BEM).DstReg = CoordReg;
  1034. _InstParam(RDPSINST_BEM).SrcReg0 = CoordReg;
  1035. _InstParam(RDPSINST_BEM).SrcReg1 = SrcReg[0];
  1036. _InstParam(RDPSINST_BEM).bSrcReg0_Negate = FALSE;
  1037. _InstParam(RDPSINST_BEM).bSrcReg1_Negate = FALSE;
  1038. _InstParam(RDPSINST_BEM).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1;
  1039. _InstParam(RDPSINST_BEM).uiStage = pInst->uiTSSNum;
  1040. _EmitDstMod(CoordReg,RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1)
  1041. _LeaveQuadPixelLoop
  1042. PRGBAVEC pCoordReg = CoordReg.GetRegPtr();
  1043. _NewPSInst(RDPSINST_TEXCOVERAGE);
  1044. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1045. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  1046. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  1047. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pCoordReg[1][0]; // du/dx
  1048. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pCoordReg[0][0];
  1049. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pCoordReg[2][0]; // du/dy
  1050. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pCoordReg[0][0];
  1051. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pCoordReg[1][1]; // dv/dx
  1052. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pCoordReg[0][1];
  1053. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pCoordReg[2][1]; // dv/dy
  1054. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pCoordReg[0][1];
  1055. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = // dw/dx
  1056. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 =
  1057. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = // dw/dy
  1058. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &ZeroReg.GetRegPtr()[0][0]; // 0.0f
  1059. _EnterQuadPixelLoop
  1060. _NewPSInst(RDPSINST_SAMPLE);
  1061. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  1062. _InstParam(RDPSINST_SAMPLE).CoordReg = CoordReg;
  1063. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1064. if( bDoLuminance )
  1065. {
  1066. _NewPSInst(RDPSINST_LUMINANCE);
  1067. _InstParam(RDPSINST_LUMINANCE).DstReg = DstReg;
  1068. _InstParam(RDPSINST_LUMINANCE).SrcReg0 = DstReg;
  1069. _InstParam(RDPSINST_LUMINANCE).SrcReg1 = SrcReg[0];
  1070. _InstParam(RDPSINST_LUMINANCE).bSrcReg0_Negate = FALSE;
  1071. _InstParam(RDPSINST_LUMINANCE).bSrcReg1_Negate = FALSE;
  1072. _InstParam(RDPSINST_LUMINANCE).uiStage = pInst->uiTSSNum;
  1073. }
  1074. _EmitDstMod(DstReg,DstWriteMask)
  1075. }
  1076. break;
  1077. case D3DSIO_TEXDEPTH:
  1078. _NewPSInst(RDPSINST_DEPTH);
  1079. _InstParam(RDPSINST_DEPTH).DstReg = DstReg;
  1080. break;
  1081. case D3DSIO_TEXM3x2PAD:
  1082. {
  1083. RDPSRegister CoordReg;
  1084. CoordReg._Set(RDPSREG_SCRATCH,0);
  1085. // do dot product for first row of matrix multiply
  1086. // evaluate texture coordinate; projection disabled
  1087. _NewPSInst(RDPSINST_EVAL);
  1088. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  1089. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  1090. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // no projection
  1091. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  1092. // do row of transform - tex coord * vector loaded from texture (on previous stage)
  1093. _NewPSInst(RDPSINST_DP3);
  1094. _InstParam(RDPSINST_DP3).DstReg._Set(DstReg.GetRegType(),DstReg.GetRegNum()+1);
  1095. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  1096. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  1097. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1098. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1099. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_0;
  1100. }
  1101. break;
  1102. case D3DSIO_TEXM3x3PAD:
  1103. {
  1104. BOOL bSecondPad = (D3DSIO_TEXM3x3PAD != ((pInst + 1)->Opcode & D3DSI_OPCODE_MASK));
  1105. BOOL bInVSPECSequence = (D3DSIO_TEXM3x3VSPEC == (((pInst + (bSecondPad?1:2))->Opcode) & D3DSI_OPCODE_MASK));
  1106. RDPSRegister CoordReg, EyeReg;
  1107. CoordReg._Set(RDPSREG_SCRATCH,0);
  1108. EyeReg._Set(RDPSREG_SCRATCH,1);
  1109. // do dot product for first row of matrix multiply
  1110. // evaluate texture coordinate; projection disabled
  1111. _NewPSInst(RDPSINST_EVAL);
  1112. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  1113. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  1114. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // no projection
  1115. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  1116. // do row of transform - tex coord * vector loaded from texture (on previous stage)
  1117. _NewPSInst(RDPSINST_DP3);
  1118. _InstParam(RDPSINST_DP3).DstReg._Set(DstReg.GetRegType(),DstReg.GetRegNum()+(bSecondPad?1:2));
  1119. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  1120. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  1121. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1122. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1123. _InstParam(RDPSINST_DP3).WriteMask = bSecondPad?RDPS_COMPONENTMASK_1:RDPS_COMPONENTMASK_0;
  1124. if(bInVSPECSequence)
  1125. {
  1126. // eye vector encoded in 4th element of texture coordinates
  1127. _NewPSInst(RDPSINST_SWIZZLE);
  1128. _InstParam(RDPSINST_SWIZZLE).DstReg = EyeReg;
  1129. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = CoordReg;
  1130. _InstParam(RDPSINST_SWIZZLE).WriteMask = bSecondPad?RDPS_COMPONENTMASK_1:RDPS_COMPONENTMASK_0;
  1131. _InstParam(RDPSINST_SWIZZLE).Swizzle = RDPS_REPLICATEALPHA;
  1132. }
  1133. }
  1134. break;
  1135. case D3DSIO_TEXM3x2TEX:
  1136. case D3DSIO_TEXM3x3:
  1137. case D3DSIO_TEXM3x3TEX:
  1138. case D3DSIO_TEXM3x3SPEC:
  1139. case D3DSIO_TEXM3x3VSPEC:
  1140. case D3DSIO_TEXM3x2DEPTH:
  1141. {
  1142. BOOL bIs3D = (D3DSIO_TEXM3x2TEX != Opcode) && (D3DSIO_TEXM3x2DEPTH != Opcode);
  1143. RDPSRegister CoordReg, EyeReg;
  1144. CoordReg._Set(RDPSREG_SCRATCH,0);
  1145. EyeReg._Set(RDPSREG_SCRATCH,1);
  1146. // do dot product for last row of matrix multiply
  1147. // evaluate texture coordinate; projection disabled
  1148. _NewPSInst(RDPSINST_EVAL);
  1149. _InstParam(RDPSINST_EVAL).DstReg = CoordReg;
  1150. _InstParam(RDPSINST_EVAL).uiCoordSet = pInst->DstParam & D3DSP_REGNUM_MASK;
  1151. _InstParam(RDPSINST_EVAL).bIgnoreD3DTTFF_PROJECTED = TRUE; // no projection
  1152. _InstParam(RDPSINST_EVAL).bClamp = FALSE;
  1153. // do row of transform - tex coord * vector loaded from texture (on previous stage)
  1154. _NewPSInst(RDPSINST_DP3);
  1155. _InstParam(RDPSINST_DP3).DstReg = DstReg;
  1156. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  1157. _InstParam(RDPSINST_DP3).SrcReg1 = CoordReg;
  1158. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1159. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1160. _InstParam(RDPSINST_DP3).WriteMask = bIs3D ? RDPS_COMPONENTMASK_2 : RDPS_COMPONENTMASK_1;
  1161. if(D3DSIO_TEXM3x3VSPEC == Opcode)
  1162. {
  1163. // eye vector encoded in 4th element of texture coordinates
  1164. _NewPSInst(RDPSINST_SWIZZLE);
  1165. _InstParam(RDPSINST_SWIZZLE).DstReg = EyeReg;
  1166. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = CoordReg;
  1167. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_2;
  1168. _InstParam(RDPSINST_SWIZZLE).Swizzle = RDPS_REPLICATEALPHA;
  1169. }
  1170. // Now do stuff that depends on which TEXM3x* instruction this is...
  1171. if( D3DSIO_TEXM3x3 == Opcode )
  1172. {
  1173. _NewPSInst(RDPSINST_MOV);
  1174. _InstParam(RDPSINST_MOV).DstReg = DstReg;
  1175. _InstParam(RDPSINST_MOV).SrcReg0 = OneReg; // 1.0f
  1176. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  1177. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_3;
  1178. _EmitDstMod(DstReg,DstWriteMask)
  1179. }
  1180. else if ( (D3DSIO_TEXM3x2TEX == Opcode) ||
  1181. (D3DSIO_TEXM3x3TEX == Opcode) )
  1182. {
  1183. // do straight lookup with transformed tex coords - this
  1184. // vector is not normalized, but normalization is not necessary
  1185. // for a cubemap lookup
  1186. // compute gradients for diffuse lookup
  1187. _LeaveQuadPixelLoop
  1188. PRGBAVEC pDstReg = DstReg.GetRegPtr();
  1189. _NewPSInst(RDPSINST_TEXCOVERAGE);
  1190. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1191. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  1192. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  1193. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pDstReg[1][0]; // du/dx
  1194. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pDstReg[0][0];
  1195. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pDstReg[2][0]; // du/dy
  1196. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pDstReg[0][0];
  1197. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pDstReg[1][1]; // dv/dx
  1198. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pDstReg[0][1];
  1199. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pDstReg[2][1]; // dv/dy
  1200. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pDstReg[0][1];
  1201. if( bIs3D )
  1202. {
  1203. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = &pDstReg[1][2]; // dw/dx
  1204. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 = &pDstReg[0][2];
  1205. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = &pDstReg[2][2]; // dw/dy
  1206. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &pDstReg[0][2];
  1207. }
  1208. else
  1209. {
  1210. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = // dw/dx
  1211. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 =
  1212. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = // dw/dy
  1213. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &ZeroReg.GetRegPtr()[0][0]; // 0.0f
  1214. }
  1215. _EnterQuadPixelLoop
  1216. // do lookup
  1217. if( !bIs3D )
  1218. {
  1219. _NewPSInst(RDPSINST_MOV);
  1220. _InstParam(RDPSINST_MOV).DstReg = DstReg;
  1221. _InstParam(RDPSINST_MOV).SrcReg0 = ZeroReg; // 0.0f
  1222. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = FALSE;
  1223. _InstParam(RDPSINST_MOV).WriteMask = RDPS_COMPONENTMASK_2;
  1224. }
  1225. _NewPSInst(RDPSINST_SAMPLE);
  1226. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  1227. _InstParam(RDPSINST_SAMPLE).CoordReg = DstReg;
  1228. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1229. _EmitDstMod(DstReg,DstWriteMask)
  1230. }
  1231. else if ( Opcode == D3DSIO_TEXM3x2DEPTH )
  1232. {
  1233. // Take resulting u,v values and compute u/v, which
  1234. // can be interpreted is z/w = perspective correct depth.
  1235. // Then perturb the z coord for the pixel.
  1236. _NewPSInst(RDPSINST_DEPTH);
  1237. _InstParam(RDPSINST_DEPTH).DstReg = DstReg;
  1238. }
  1239. else if ( (Opcode == D3DSIO_TEXM3x3SPEC) ||
  1240. (Opcode == D3DSIO_TEXM3x3VSPEC) )
  1241. {
  1242. RDPSRegister NdotE, NdotN, RCPNdotN, Scale, ReflReg;
  1243. NdotE._Set(RDPSREG_SCRATCH,2);
  1244. NdotN._Set(RDPSREG_SCRATCH,3);
  1245. RCPNdotN = NdotN; // reuse same register
  1246. Scale = NdotE; // reuse same register
  1247. ReflReg = CoordReg; // reuse same register
  1248. // compute reflection vector and do lookup - the normal needs
  1249. // to be normalized here, which is included in this expression
  1250. if (D3DSIO_TEXM3x3SPEC == Opcode)
  1251. {
  1252. // eye vector is constant register
  1253. EyeReg = SrcReg[1];
  1254. } // else (TEXM3x3VSPEC) -> eye is what was copied out of the 4th component of 3 texcoords
  1255. // Compute reflection vector: 2(NdotE/NdotN) * N - E ...
  1256. // Calculate NdotE
  1257. _NewPSInst(RDPSINST_DP3);
  1258. _InstParam(RDPSINST_DP3).DstReg = NdotE;
  1259. _InstParam(RDPSINST_DP3).SrcReg0 = DstReg; // N
  1260. _InstParam(RDPSINST_DP3).SrcReg1 = EyeReg; // E
  1261. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1262. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1263. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_3;
  1264. // Calculate NdotN
  1265. _NewPSInst(RDPSINST_DP3);
  1266. _InstParam(RDPSINST_DP3).DstReg = NdotN;
  1267. _InstParam(RDPSINST_DP3).SrcReg0 = DstReg; // N
  1268. _InstParam(RDPSINST_DP3).SrcReg1 = DstReg; // N
  1269. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = FALSE;
  1270. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = FALSE;
  1271. _InstParam(RDPSINST_DP3).WriteMask = RDPS_COMPONENTMASK_3;
  1272. // Calculate scale = 2(NdotE/NdotN):
  1273. // a) Calculate reciprocal of NdotN
  1274. _NewPSInst(RDPSINST_RCP);
  1275. _InstParam(RDPSINST_RCP).DstReg = RCPNdotN;
  1276. _InstParam(RDPSINST_RCP).SrcReg0 = NdotN;
  1277. _InstParam(RDPSINST_RCP).bSrcReg0_Negate = FALSE;
  1278. _InstParam(RDPSINST_RCP).WriteMask = RDPS_COMPONENTMASK_3;
  1279. // b) Multiply NdotE by reciprocal NdotN
  1280. _NewPSInst(RDPSINST_MUL);
  1281. _InstParam(RDPSINST_MUL).DstReg = Scale;
  1282. _InstParam(RDPSINST_MUL).SrcReg0 = NdotE;
  1283. _InstParam(RDPSINST_MUL).SrcReg1 = RCPNdotN;
  1284. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = FALSE;
  1285. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = FALSE;
  1286. _InstParam(RDPSINST_MUL).WriteMask = RDPS_COMPONENTMASK_3;
  1287. // c) Multiply by 2
  1288. _NewPSInst(RDPSINST_MUL);
  1289. _InstParam(RDPSINST_MUL).DstReg = Scale;
  1290. _InstParam(RDPSINST_MUL).SrcReg0 = Scale;
  1291. _InstParam(RDPSINST_MUL).SrcReg1 = TwoReg; // 2.0f
  1292. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = FALSE;
  1293. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = FALSE;
  1294. _InstParam(RDPSINST_MUL).WriteMask = RDPS_COMPONENTMASK_3;
  1295. // d) Replicate result to rgb
  1296. _NewPSInst(RDPSINST_SWIZZLE);
  1297. _InstParam(RDPSINST_SWIZZLE).DstReg = Scale;
  1298. _InstParam(RDPSINST_SWIZZLE).SrcReg0 = Scale;
  1299. _InstParam(RDPSINST_SWIZZLE).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  1300. _InstParam(RDPSINST_SWIZZLE).Swizzle = RDPS_REPLICATEALPHA;
  1301. // Calculate reflection = scale * N - E
  1302. _NewPSInst(RDPSINST_MUL);
  1303. _InstParam(RDPSINST_MUL).DstReg = ReflReg;
  1304. _InstParam(RDPSINST_MUL).SrcReg0 = Scale; // scale *
  1305. _InstParam(RDPSINST_MUL).SrcReg1 = DstReg; // N
  1306. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = FALSE;
  1307. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = FALSE;
  1308. _InstParam(RDPSINST_MUL).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  1309. _NewPSInst(RDPSINST_SUB);
  1310. _InstParam(RDPSINST_SUB).DstReg = ReflReg;
  1311. _InstParam(RDPSINST_SUB).SrcReg0 = ReflReg; // (scale * N) -
  1312. _InstParam(RDPSINST_SUB).SrcReg1 = EyeReg; // E
  1313. _InstParam(RDPSINST_SUB).bSrcReg0_Negate = FALSE;
  1314. _InstParam(RDPSINST_SUB).bSrcReg1_Negate = FALSE;
  1315. _InstParam(RDPSINST_SUB).WriteMask = RDPS_COMPONENTMASK_0 | RDPS_COMPONENTMASK_1 | RDPS_COMPONENTMASK_2;
  1316. // compute gradients for reflection lookup
  1317. _LeaveQuadPixelLoop
  1318. PRGBAVEC pReflReg = ReflReg.GetRegPtr();
  1319. _NewPSInst(RDPSINST_TEXCOVERAGE);
  1320. _InstParam(RDPSINST_TEXCOVERAGE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1321. _InstParam(RDPSINST_TEXCOVERAGE).pGradients = pRast->m_Gradients; // where to store gradients
  1322. // data from which to compute gradients. i.e.: du/dx = DUDX_0 - DUDX_1
  1323. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_0 = &pReflReg[1][0]; // du/dx
  1324. _InstParam(RDPSINST_TEXCOVERAGE).pDUDX_1 = &pReflReg[0][0];
  1325. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_0 = &pReflReg[2][0]; // du/dy
  1326. _InstParam(RDPSINST_TEXCOVERAGE).pDUDY_1 = &pReflReg[0][0];
  1327. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_0 = &pReflReg[1][1]; // dv/dx
  1328. _InstParam(RDPSINST_TEXCOVERAGE).pDVDX_1 = &pReflReg[0][1];
  1329. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_0 = &pReflReg[2][1]; // dv/dy
  1330. _InstParam(RDPSINST_TEXCOVERAGE).pDVDY_1 = &pReflReg[0][1];
  1331. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_0 = &pReflReg[1][2]; // dw/dx
  1332. _InstParam(RDPSINST_TEXCOVERAGE).pDWDX_1 = &pReflReg[0][2];
  1333. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_0 = &pReflReg[2][2]; // dw/dy
  1334. _InstParam(RDPSINST_TEXCOVERAGE).pDWDY_1 = &pReflReg[0][2];
  1335. _EnterQuadPixelLoop
  1336. // do lookup
  1337. _NewPSInst(RDPSINST_SAMPLE);
  1338. _InstParam(RDPSINST_SAMPLE).DstReg = DstReg;
  1339. _InstParam(RDPSINST_SAMPLE).CoordReg = ReflReg;
  1340. _InstParam(RDPSINST_SAMPLE).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1341. _EmitDstMod(DstReg,DstWriteMask)
  1342. }
  1343. }
  1344. break;
  1345. case D3DSIO_BEM:
  1346. _NewPSInst(RDPSINST_BEM);
  1347. _InstParam(RDPSINST_BEM).DstReg = DstReg;
  1348. _InstParam(RDPSINST_BEM).SrcReg0 = SrcReg[0];
  1349. _InstParam(RDPSINST_BEM).SrcReg1 = SrcReg[1];
  1350. _InstParam(RDPSINST_BEM).bSrcReg0_Negate = bSrcNegate[0];
  1351. _InstParam(RDPSINST_BEM).bSrcReg1_Negate = bSrcNegate[1];
  1352. _InstParam(RDPSINST_BEM).WriteMask = DstWriteMask;
  1353. _InstParam(RDPSINST_BEM).uiStage = pInst->DstParam & D3DSP_REGNUM_MASK;
  1354. _EmitDstMod(DstReg,DstWriteMask)
  1355. break;
  1356. case D3DSIO_MOV:
  1357. _NewPSInst(RDPSINST_MOV);
  1358. _InstParam(RDPSINST_MOV).DstReg = DstReg;
  1359. _InstParam(RDPSINST_MOV).SrcReg0 = SrcReg[0];
  1360. _InstParam(RDPSINST_MOV).bSrcReg0_Negate = bSrcNegate[0];
  1361. _InstParam(RDPSINST_MOV).WriteMask = DstWriteMask;
  1362. _EmitDstMod(DstReg,DstWriteMask)
  1363. break;
  1364. case D3DSIO_FRC:
  1365. _NewPSInst(RDPSINST_FRC);
  1366. _InstParam(RDPSINST_FRC).DstReg = DstReg;
  1367. _InstParam(RDPSINST_FRC).SrcReg0 = SrcReg[0];
  1368. _InstParam(RDPSINST_FRC).bSrcReg0_Negate = bSrcNegate[0];
  1369. _InstParam(RDPSINST_FRC).WriteMask = DstWriteMask;
  1370. _EmitDstMod(DstReg,DstWriteMask)
  1371. break;
  1372. case D3DSIO_ADD:
  1373. _NewPSInst(RDPSINST_ADD);
  1374. _InstParam(RDPSINST_ADD).DstReg = DstReg;
  1375. _InstParam(RDPSINST_ADD).SrcReg0 = SrcReg[0];
  1376. _InstParam(RDPSINST_ADD).SrcReg1 = SrcReg[1];
  1377. _InstParam(RDPSINST_ADD).bSrcReg0_Negate = bSrcNegate[0];
  1378. _InstParam(RDPSINST_ADD).bSrcReg1_Negate = bSrcNegate[1];
  1379. _InstParam(RDPSINST_ADD).WriteMask = DstWriteMask;
  1380. _EmitDstMod(DstReg,DstWriteMask)
  1381. break;
  1382. case D3DSIO_SUB:
  1383. _NewPSInst(RDPSINST_SUB);
  1384. _InstParam(RDPSINST_SUB).DstReg = DstReg;
  1385. _InstParam(RDPSINST_SUB).SrcReg0 = SrcReg[0];
  1386. _InstParam(RDPSINST_SUB).SrcReg1 = SrcReg[1];
  1387. _InstParam(RDPSINST_SUB).bSrcReg0_Negate = bSrcNegate[0];
  1388. _InstParam(RDPSINST_SUB).bSrcReg1_Negate = bSrcNegate[1];
  1389. _InstParam(RDPSINST_SUB).WriteMask = DstWriteMask;
  1390. _EmitDstMod(DstReg,DstWriteMask)
  1391. break;
  1392. case D3DSIO_MUL:
  1393. _NewPSInst(RDPSINST_MUL);
  1394. _InstParam(RDPSINST_MUL).DstReg = DstReg;
  1395. _InstParam(RDPSINST_MUL).SrcReg0 = SrcReg[0];
  1396. _InstParam(RDPSINST_MUL).SrcReg1 = SrcReg[1];
  1397. _InstParam(RDPSINST_MUL).bSrcReg0_Negate = bSrcNegate[0];
  1398. _InstParam(RDPSINST_MUL).bSrcReg1_Negate = bSrcNegate[1];
  1399. _InstParam(RDPSINST_MUL).WriteMask = DstWriteMask;
  1400. _EmitDstMod(DstReg,DstWriteMask)
  1401. break;
  1402. case D3DSIO_DP3:
  1403. _NewPSInst(RDPSINST_DP3);
  1404. _InstParam(RDPSINST_DP3).DstReg = DstReg;
  1405. _InstParam(RDPSINST_DP3).SrcReg0 = SrcReg[0];
  1406. _InstParam(RDPSINST_DP3).SrcReg1 = SrcReg[1];
  1407. _InstParam(RDPSINST_DP3).bSrcReg0_Negate = bSrcNegate[0];
  1408. _InstParam(RDPSINST_DP3).bSrcReg1_Negate = bSrcNegate[1];
  1409. _InstParam(RDPSINST_DP3).WriteMask = DstWriteMask;
  1410. _EmitDstMod(DstReg,DstWriteMask)
  1411. break;
  1412. case D3DSIO_DP4:
  1413. _NewPSInst(RDPSINST_DP4);
  1414. _InstParam(RDPSINST_DP4).DstReg = DstReg;
  1415. _InstParam(RDPSINST_DP4).SrcReg0 = SrcReg[0];
  1416. _InstParam(RDPSINST_DP4).SrcReg1 = SrcReg[1];
  1417. _InstParam(RDPSINST_DP4).bSrcReg0_Negate = bSrcNegate[0];
  1418. _InstParam(RDPSINST_DP4).bSrcReg1_Negate = bSrcNegate[1];
  1419. _InstParam(RDPSINST_DP4).WriteMask = DstWriteMask;
  1420. _EmitDstMod(DstReg,DstWriteMask)
  1421. break;
  1422. case D3DSIO_MAD:
  1423. _NewPSInst(RDPSINST_MAD);
  1424. _InstParam(RDPSINST_MAD).DstReg = DstReg;
  1425. _InstParam(RDPSINST_MAD).SrcReg0 = SrcReg[0];
  1426. _InstParam(RDPSINST_MAD).SrcReg1 = SrcReg[1];
  1427. _InstParam(RDPSINST_MAD).SrcReg2 = SrcReg[2];
  1428. _InstParam(RDPSINST_MAD).bSrcReg0_Negate = bSrcNegate[0];
  1429. _InstParam(RDPSINST_MAD).bSrcReg1_Negate = bSrcNegate[1];
  1430. _InstParam(RDPSINST_MAD).bSrcReg2_Negate = bSrcNegate[2];
  1431. _InstParam(RDPSINST_MAD).WriteMask = DstWriteMask;
  1432. _EmitDstMod(DstReg,DstWriteMask)
  1433. break;
  1434. case D3DSIO_LRP:
  1435. _NewPSInst(RDPSINST_LRP);
  1436. _InstParam(RDPSINST_LRP).DstReg = DstReg;
  1437. _InstParam(RDPSINST_LRP).SrcReg0 = SrcReg[0];
  1438. _InstParam(RDPSINST_LRP).SrcReg1 = SrcReg[1];
  1439. _InstParam(RDPSINST_LRP).SrcReg2 = SrcReg[2];
  1440. _InstParam(RDPSINST_LRP).bSrcReg0_Negate = bSrcNegate[0];
  1441. _InstParam(RDPSINST_LRP).bSrcReg1_Negate = bSrcNegate[1];
  1442. _InstParam(RDPSINST_LRP).bSrcReg2_Negate = bSrcNegate[2];
  1443. _InstParam(RDPSINST_LRP).WriteMask = DstWriteMask;
  1444. _EmitDstMod(DstReg,DstWriteMask)
  1445. break;
  1446. case D3DSIO_CND:
  1447. _NewPSInst(RDPSINST_CND);
  1448. _InstParam(RDPSINST_CND).DstReg = DstReg;
  1449. _InstParam(RDPSINST_CND).SrcReg0 = SrcReg[0];
  1450. _InstParam(RDPSINST_CND).SrcReg1 = SrcReg[1];
  1451. _InstParam(RDPSINST_CND).SrcReg2 = SrcReg[2];
  1452. _InstParam(RDPSINST_CND).bSrcReg0_Negate = bSrcNegate[0];
  1453. _InstParam(RDPSINST_CND).bSrcReg1_Negate = bSrcNegate[1];
  1454. _InstParam(RDPSINST_CND).bSrcReg2_Negate = bSrcNegate[2];
  1455. _InstParam(RDPSINST_CND).WriteMask = DstWriteMask;
  1456. _EmitDstMod(DstReg,DstWriteMask)
  1457. break;
  1458. case D3DSIO_CMP:
  1459. _NewPSInst(RDPSINST_CMP);
  1460. _InstParam(RDPSINST_CMP).DstReg = DstReg;
  1461. _InstParam(RDPSINST_CMP).SrcReg0 = SrcReg[0];
  1462. _InstParam(RDPSINST_CMP).SrcReg1 = SrcReg[1];
  1463. _InstParam(RDPSINST_CMP).SrcReg2 = SrcReg[2];
  1464. _InstParam(RDPSINST_CMP).bSrcReg0_Negate = bSrcNegate[0];
  1465. _InstParam(RDPSINST_CMP).bSrcReg1_Negate = bSrcNegate[1];
  1466. _InstParam(RDPSINST_CMP).bSrcReg2_Negate = bSrcNegate[2];
  1467. _InstParam(RDPSINST_CMP).WriteMask = DstWriteMask;
  1468. _EmitDstMod(DstReg,DstWriteMask)
  1469. break;
  1470. default:
  1471. break;
  1472. }
  1473. if( pInst->bFlushQueue )
  1474. {
  1475. _EnterQuadPixelLoop
  1476. _NewPSInst(RDPSINST_FLUSHQUEUE);
  1477. QueueIndex = -1;
  1478. }
  1479. #if DBG
  1480. _LeaveQuadPixelLoop
  1481. #endif
  1482. }
  1483. // Flush queue at end of shader if there is anything on it
  1484. if( -1 != QueueIndex )
  1485. {
  1486. _EnterQuadPixelLoop
  1487. _NewPSInst(RDPSINST_FLUSHQUEUE);
  1488. QueueIndex = -1;
  1489. }
  1490. _LeaveQuadPixelLoop
  1491. _NewPSInst(RDPSINST_END);
  1492. #if DBG
  1493. if( pRast->m_bDebugPrintTranslatedPixelShaderTokens )
  1494. RDPSDisAsm(pRDPSInstBuffer, m_pConstDefs, m_cConstDefs,pCaps->MaxPixelShaderValue, Version);
  1495. #endif
  1496. }
  1497. return S_OK;
  1498. }
  1499. ///////////////////////////////////////////////////////////////////////////////
  1500. // end