Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

372 lines
14 KiB

  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Copyright (C) Microsoft Corporation, 2000.
  3. //
  4. // psexec.cpp
  5. //
  6. // Direct3D Reference Device - Pixel Shader Execution
  7. //
  8. ///////////////////////////////////////////////////////////////////////////////
  9. #include "pch.cpp"
  10. #pragma hdrstop
  11. //-----------------------------------------------------------------------------
  12. //
  13. // ExecShader - Executes the current pixel shader.
  14. //
  15. //-----------------------------------------------------------------------------
  16. void
  17. RefRast::ExecShader( void )
  18. {
  19. #define _InstParam(__INST) (*(__INST##_PARAMS UNALIGNED64*)pRDPSInstBuffer)
  20. #define _StepOverInst(__INST) pRDPSInstBuffer += sizeof(__INST##_PARAMS);
  21. #define _DeclArgs(__INST) __INST##_PARAMS& Args = _InstParam(__INST);
  22. #define _PerChannel(__STATEMENT) \
  23. for( iChn=0; iChn<4; iChn++ ) \
  24. { \
  25. __STATEMENT \
  26. } \
  27. #define _PerChannelMasked(__STATEMENT) \
  28. for( iChn=0; iChn<4; iChn++ ) \
  29. { \
  30. if( !(Args.WriteMask & ComponentMask[iChn] ) ) \
  31. continue; \
  32. __STATEMENT \
  33. } \
  34. #define _Dst Args.DstReg.GetRegPtr()[m_iPix][iChn]
  35. #define _DstC(__chn) Args.DstReg.GetRegPtr()[m_iPix][__chn]
  36. #define _Src0 Args.SrcReg0.GetRegPtr()[m_iPix][iChn]
  37. #define _Src1 Args.SrcReg1.GetRegPtr()[m_iPix][iChn]
  38. #define _Src2 Args.SrcReg2.GetRegPtr()[m_iPix][iChn]
  39. #define _Src0C(__chn) Args.SrcReg0.GetRegPtr()[m_iPix][__chn]
  40. #define _Src1C(__chn) Args.SrcReg1.GetRegPtr()[m_iPix][__chn]
  41. #define _Src2C(__chn) Args.SrcReg2.GetRegPtr()[m_iPix][__chn]
  42. #define _Src0N (Args.bSrcReg0_Negate?(-_Src0):_Src0)
  43. #define _Src1N (Args.bSrcReg1_Negate?(-_Src1):_Src1)
  44. #define _Src2N (Args.bSrcReg2_Negate?(-_Src2):_Src2)
  45. #define _Src0NC(__chn) (Args.bSrcReg0_Negate?(-_Src0C(__chn)):_Src0C(__chn))
  46. #define _Src1NC(__chn) (Args.bSrcReg1_Negate?(-_Src1C(__chn)):_Src1C(__chn))
  47. #define _Src2NC(__chn) (Args.bSrcReg2_Negate?(-_Src2C(__chn)):_Src2C(__chn))
  48. BYTE ComponentMask[4] = {RDPS_COMPONENTMASK_0, RDPS_COMPONENTMASK_1, RDPS_COMPONENTMASK_2, RDPS_COMPONENTMASK_3};
  49. BYTE* pRDPSInstBuffer = &m_pCurrentPixelShader->m_RDPSInstBuffer[0]; // Buffer of "RISC" RDPS_* instructions to execute.
  50. int QueueIndex[4] = {-1,-1,-1,-1}; // For simulating co-issue sequentially ("parallel" writes staged in queue)
  51. int iChn; // For macros
  52. #if DBG
  53. PixelShaderInstruction* pCurrD3DPSInst = NULL; // Current true D3DSIO_ instruction being simulated.
  54. #endif
  55. m_bPixelDiscard[0] = m_bPixelDiscard[1] = m_bPixelDiscard[2] = m_bPixelDiscard[3] = FALSE;
  56. while(RDPSINST_END != _InstParam(RDPSINST_BASE).Inst)
  57. {
  58. switch(_InstParam(RDPSINST_BASE).Inst)
  59. {
  60. case RDPSINST_EVAL:
  61. {
  62. _DeclArgs(RDPSINST_EVAL)
  63. m_Attr[RDATTR_TEXTURE0+Args.uiCoordSet].Sample( Args.DstReg.GetRegPtr()[m_iPix],
  64. (FLOAT)m_iX[m_iPix], (FLOAT)m_iY[m_iPix],
  65. Args.bIgnoreD3DTTFF_PROJECTED, Args.bClamp );
  66. }
  67. _StepOverInst(RDPSINST_EVAL)
  68. break;
  69. case RDPSINST_SAMPLE:
  70. {
  71. _DeclArgs(RDPSINST_SAMPLE)
  72. ComputeTextureFilter( Args.uiStage, Args.CoordReg.GetRegPtr()[m_iPix] );
  73. SampleTexture( Args.uiStage, Args.DstReg.GetRegPtr()[m_iPix] );
  74. }
  75. _StepOverInst(RDPSINST_SAMPLE)
  76. break;
  77. case RDPSINST_KILL:
  78. {
  79. _DeclArgs(RDPSINST_KILL)
  80. DWORD TexKillFlags = 0x0; // TODO: get these from TSS or per-instruction
  81. _PerChannel(
  82. // compare against zero according to kill flags
  83. if ( TexKillFlags & (1<<iChn) )
  84. {
  85. if ( _Dst >= 0. )
  86. m_bPixelDiscard[m_iPix] |= 0x1;
  87. }
  88. else
  89. {
  90. if ( _Dst < 0. )
  91. m_bPixelDiscard[m_iPix] |= 0x1;
  92. }
  93. )
  94. }
  95. _StepOverInst(RDPSINST_KILL)
  96. break;
  97. case RDPSINST_BEM:
  98. {
  99. _DeclArgs(RDPSINST_BEM)
  100. RDTextureStageState* pTSS = &m_pRD->m_TextureStageState[Args.uiStage];
  101. // Just assuming Args.WriteMask is .rg
  102. _DstC(0) = _Src0NC(0) +
  103. pTSS->m_fVal[D3DTSS_BUMPENVMAT00] * _Src1NC(0) +
  104. pTSS->m_fVal[D3DTSS_BUMPENVMAT10] * _Src1NC(1);
  105. _DstC(1) = _Src0NC(1) +
  106. pTSS->m_fVal[D3DTSS_BUMPENVMAT01] * _Src1NC(0) +
  107. pTSS->m_fVal[D3DTSS_BUMPENVMAT11] * _Src1NC(1);
  108. }
  109. _StepOverInst(RDPSINST_BEM)
  110. break;
  111. case RDPSINST_LUMINANCE:
  112. {
  113. _DeclArgs(RDPSINST_LUMINANCE)
  114. RDTextureStageState* pTSS = &m_pRD->m_TextureStageState[Args.uiStage];
  115. FLOAT fLum = _Src1NC(2) *
  116. pTSS->m_fVal[D3DTSS_BUMPENVLSCALE] +
  117. pTSS->m_fVal[D3DTSS_BUMPENVLOFFSET];
  118. fLum = min(max(fLum, 0.0f), 1.0F);
  119. // apply luminance modulation to RGB only
  120. _DstC(0) = _Src0C(0)*fLum;
  121. _DstC(1) = _Src0C(1)*fLum;
  122. _DstC(2) = _Src0C(2)*fLum;
  123. }
  124. _StepOverInst(RDPSINST_LUMINANCE)
  125. break;
  126. case RDPSINST_DEPTH:
  127. {
  128. _DeclArgs(RDPSINST_DEPTH)
  129. FLOAT result;
  130. FLOAT* pDstReg = Args.DstReg.GetRegPtr()[m_iPix];
  131. if( pDstReg[1] )
  132. result = pDstReg[0] / pDstReg[1];
  133. else
  134. result = 1.0f;
  135. // clamp
  136. m_Depth[m_iPix] = MAX(0, MIN(1, result));
  137. // snap off extra bits by converting to/from buffer format - necessary
  138. // to make depth buffer equality tests function correctly
  139. SnapDepth();
  140. do
  141. {
  142. m_SampleDepth[m_CurrentSample][m_iPix] = m_Depth[m_iPix];
  143. }
  144. while (NextSample());
  145. }
  146. _StepOverInst(RDPSINST_DEPTH)
  147. break;
  148. case RDPSINST_SRCMOD:
  149. {
  150. _DeclArgs(RDPSINST_SRCMOD)
  151. _PerChannelMasked(
  152. if( Args.bComplement )
  153. _Dst = 1 - _Src0;
  154. else if( Args.bBias && Args.bTimes2 )
  155. _Dst = 2*(_Src0 - 0.5);
  156. else if( Args.bBias )
  157. _Dst = _Src0 - 0.5f;
  158. else if( Args.bTimes2 )
  159. _Dst = 2*_Src0;
  160. else
  161. _Dst = _Src0;
  162. _Dst = MAX( _Dst, Args.fRangeMin );
  163. _Dst = MIN( _Dst, Args.fRangeMax );
  164. )
  165. }
  166. _StepOverInst(RDPSINST_SRCMOD)
  167. break;
  168. case RDPSINST_SWIZZLE:
  169. {
  170. _DeclArgs(RDPSINST_SWIZZLE)
  171. BYTE Swizzle = Args.Swizzle;
  172. _PerChannelMasked(
  173. _Dst = _Src0C(Swizzle&0x3);
  174. Swizzle >>= 2;
  175. )
  176. }
  177. _StepOverInst(RDPSINST_SWIZZLE)
  178. break;
  179. case RDPSINST_DSTMOD:
  180. {
  181. _DeclArgs(RDPSINST_DSTMOD)
  182. _PerChannelMasked(
  183. _Dst *= Args.fScale;
  184. // clamp to range
  185. _Dst = MAX( _Dst, Args.fRangeMin );
  186. _Dst = MIN( _Dst, Args.fRangeMax );
  187. )
  188. }
  189. _StepOverInst(RDPSINST_DSTMOD)
  190. break;
  191. case RDPSINST_MOV:
  192. {
  193. _DeclArgs(RDPSINST_MOV)
  194. _PerChannelMasked(_Dst = _Src0N;)
  195. }
  196. _StepOverInst(RDPSINST_MOV)
  197. break;
  198. case RDPSINST_RCP:
  199. {
  200. _DeclArgs(RDPSINST_RCP)
  201. _PerChannelMasked(_Dst = _Src0N ? 1/_Src0N : 1.0f;)
  202. }
  203. _StepOverInst(RDPSINST_RCP)
  204. break;
  205. case RDPSINST_FRC:
  206. {
  207. _DeclArgs(RDPSINST_FRC)
  208. _PerChannelMasked(_Dst = _Src0N - (float)floor(_Src0N);)
  209. }
  210. _StepOverInst(RDPSINST_FRC)
  211. break;
  212. case RDPSINST_ADD:
  213. {
  214. _DeclArgs(RDPSINST_ADD)
  215. _PerChannelMasked(_Dst = _Src0N + _Src1N;)
  216. }
  217. _StepOverInst(RDPSINST_ADD)
  218. break;
  219. case RDPSINST_SUB:
  220. {
  221. _DeclArgs(RDPSINST_SUB)
  222. _PerChannelMasked(_Dst = _Src0N - _Src1N;)
  223. }
  224. _StepOverInst(RDPSINST_SUB)
  225. break;
  226. case RDPSINST_MUL:
  227. {
  228. _DeclArgs(RDPSINST_MUL)
  229. _PerChannelMasked(_Dst = _Src0N * _Src1N;);
  230. }
  231. _StepOverInst(RDPSINST_MUL)
  232. break;
  233. case RDPSINST_DP3:
  234. {
  235. _DeclArgs(RDPSINST_DP3)
  236. FLOAT dp3 = _Src0NC(0) * _Src1NC(0) +
  237. _Src0NC(1) * _Src1NC(1) +
  238. _Src0NC(2) * _Src1NC(2);
  239. _PerChannelMasked(_Dst = dp3;)
  240. }
  241. _StepOverInst(RDPSINST_DP3)
  242. break;
  243. case RDPSINST_DP4:
  244. {
  245. _DeclArgs(RDPSINST_DP4)
  246. FLOAT dp4 = _Src0NC(0) * _Src1NC(0) +
  247. _Src0NC(1) * _Src1NC(1) +
  248. _Src0NC(2) * _Src1NC(2) +
  249. _Src0NC(3) * _Src1NC(3);
  250. _PerChannelMasked(_Dst = dp4;)
  251. }
  252. _StepOverInst(RDPSINST_DP4)
  253. break;
  254. case RDPSINST_MAD:
  255. {
  256. _DeclArgs(RDPSINST_MAD)
  257. _PerChannelMasked(_Dst = _Src0N * _Src1N + _Src2N;)
  258. }
  259. _StepOverInst(RDPSINST_MAD)
  260. break;
  261. case RDPSINST_LRP:
  262. {
  263. _DeclArgs(RDPSINST_LRP)
  264. _PerChannelMasked(_Dst = (_Src0N*(_Src1N - _Src2N)) + _Src2N;)
  265. }
  266. _StepOverInst(RDPSINST_LRP)
  267. break;
  268. case RDPSINST_CND:
  269. {
  270. _DeclArgs(RDPSINST_CND)
  271. _PerChannelMasked(_Dst = _Src0N > 0.5f ? _Src1N : _Src2N;)
  272. }
  273. _StepOverInst(RDPSINST_CND)
  274. break;
  275. case RDPSINST_CMP:
  276. {
  277. _DeclArgs(RDPSINST_CMP)
  278. _PerChannelMasked(_Dst = _Src0N >= 0.f ? _Src1N : _Src2N;)
  279. }
  280. _StepOverInst(RDPSINST_CMP)
  281. break;
  282. case RDPSINST_TEXCOVERAGE:
  283. {
  284. _DeclArgs(RDPSINST_TEXCOVERAGE);
  285. Args.pGradients[0][0] = *Args.pDUDX_0 - *Args.pDUDX_1; // du/dx
  286. Args.pGradients[0][1] = *Args.pDUDY_0 - *Args.pDUDY_1; // du/dy
  287. Args.pGradients[1][0] = *Args.pDVDX_0 - *Args.pDVDX_1; // dv/dx
  288. Args.pGradients[1][1] = *Args.pDVDY_0 - *Args.pDVDY_1; // dv/dy
  289. Args.pGradients[2][0] = *Args.pDWDX_0 - *Args.pDWDX_1; // dw/dx
  290. Args.pGradients[2][1] = *Args.pDWDY_0 - *Args.pDWDY_1; // dw/dy
  291. ComputeTextureCoverage( Args.uiStage, Args.pGradients );
  292. }
  293. _StepOverInst(RDPSINST_TEXCOVERAGE)
  294. break;
  295. case RDPSINST_QUADLOOPBEGIN:
  296. m_iPix = 0;
  297. _StepOverInst(RDPSINST_QUADLOOPBEGIN)
  298. break;
  299. case RDPSINST_QUADLOOPEND:
  300. {
  301. _DeclArgs(RDPSINST_QUADLOOPEND);
  302. if( 4 > ++m_iPix )
  303. pRDPSInstBuffer -= Args.JumpBackByOffset;
  304. else
  305. _StepOverInst(RDPSINST_QUADLOOPEND)
  306. }
  307. break;
  308. case RDPSINST_QUEUEWRITE:
  309. {
  310. _DeclArgs(RDPSINST_QUEUEWRITE);
  311. QueueIndex[m_iPix]++;
  312. m_QueuedWriteDst[QueueIndex[m_iPix]].DstReg = Args.DstReg;
  313. m_QueuedWriteDst[QueueIndex[m_iPix]].WriteMask = Args.WriteMask;
  314. }
  315. _StepOverInst(RDPSINST_QUEUEWRITE)
  316. break;
  317. case RDPSINST_FLUSHQUEUE:
  318. {
  319. _ASSERT(QueueIndex[m_iPix] >= 0, "Nothing in pixelshader write queue to flush. Refrast mistranslated this pixelshader." );
  320. _ASSERT(QueueIndex[m_iPix] < RDPS_MAX_NUMQUEUEDWRITEREG, "Pixelshader write queue overflow. Refrast mistranslated this pixelshader." );
  321. for( int i = 0; i <= QueueIndex[m_iPix]; i++ )
  322. {
  323. _PerChannel(
  324. if (m_QueuedWriteDst[i].WriteMask & ComponentMask[iChn])
  325. m_QueuedWriteDst[i].DstReg.GetRegPtr()[m_iPix][iChn] = m_QueuedWriteReg[i][m_iPix][iChn];
  326. )
  327. }
  328. QueueIndex[m_iPix] = -1;
  329. }
  330. _StepOverInst(RDPSINST_FLUSHQUEUE)
  331. break;
  332. case RDPSINST_NEXTD3DPSINST:
  333. #if DBG
  334. if (m_pRD->m_pDbgMon)
  335. m_pRD->m_pDbgMon->NextEvent( D3DDM_EVENT_PIXELSHADERINST );
  336. pCurrD3DPSInst = _InstParam(RDPSINST_NEXTD3DPSINST).pInst; // Handy to look at when debugging.
  337. #endif
  338. _StepOverInst(RDPSINST_NEXTD3DPSINST)
  339. break;
  340. default:
  341. _ASSERT(FALSE,"Refrast::ExecShader() - Unrecognized micro-instruction!");
  342. break;
  343. }
  344. }
  345. }
  346. ///////////////////////////////////////////////////////////////////////////////
  347. // end