Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

405 lines
12 KiB

  1. dnl
  2. dnl d_StencilOp
  3. dnl
  4. dnl Macro to do stencil operation test routine
  5. dnl
  6. dnl $1 is one of STENCILFUNC
  7. dnl returns new stencil value in uS
  8. dnl
  9. dnl uSB is in mm3 and uSR is in mm4. Result in mm3.
  10. dnl
  11. dnl mm1 is uZ (iterated)
  12. dnl mm2 is uZB (original)
  13. dnl mm5 is the shifted uZB used for the compare. Later mm5 is needed for masking code below.
  14. dnl mm5 contains the final Z mask for the conditional Z write.
  15. define(`testVars',`
  16. EXTERN DW_One_One:MMWORD
  17. ')
  18. define(`d_StencSwitchcnt', 0)dnl
  19. define(`d_StencilOp', `
  20. define(`d_StencSwitchcnt', eval(d_StencSwitchcnt+1))dnl
  21. movd mm6, XpCtx(pdwRenderState + RS_STENCILWRITEMASK)
  22. movq mm5, mm3 ; Need to save this since it is used below.
  23. ;switch($1)
  24. ;{
  25. mov eax, $1 ;Compare from register instead of from memory.
  26. ;case D3DSTENCILOP_KEEP:
  27. cmp eax, D3DSTENCILOP_KEEP
  28. je doneswitch`'d_StencSwitchcnt
  29. ;uS = uSB;
  30. ;break;
  31. ; Dont have to do anything since input and output in same reg.
  32. ;case D3DSTENCILOP_ZERO:
  33. cmp eax, D3DSTENCILOP_ZERO
  34. jne @f
  35. ;uS = 0;
  36. pxor mm3, mm3
  37. ;break;
  38. jmp doneswitch`'d_StencSwitchcnt
  39. @@:
  40. ;case D3DSTENCILOP_REPLACE:
  41. cmp eax, D3DSTENCILOP_REPLACE
  42. jne @f
  43. ;uS = uSR;
  44. movq mm3, mm4
  45. ;break;
  46. jmp doneswitch`'d_StencSwitchcnt
  47. @@:
  48. ;case D3DSTENCILOP_INCRSAT:
  49. cmp eax, D3DSTENCILOP_INCRSAT
  50. jne @f
  51. ;uS = min(uSB + 1, 0xff);
  52. paddusb mm3, MMWORD PTR Val0x01
  53. ;break;
  54. jmp doneswitch`'d_StencSwitchcnt
  55. @@:
  56. ;case D3DSTENCILOP_DECRSAT:
  57. cmp eax, D3DSTENCILOP_DECRSAT
  58. jne @f
  59. ;uS = max(uSB - 1, 0x00);
  60. psubusb mm3, MMWORD PTR Val0x01
  61. ;break;
  62. jmp doneswitch`'d_StencSwitchcnt
  63. @@:
  64. ;case D3DSTENCILOP_INVERT:
  65. cmp eax, D3DSTENCILOP_INVERT
  66. jne @f
  67. ;uS = ~uSB;
  68. pxor mm3, MMWORD PTR Val0xffffffff
  69. ;break;
  70. jmp doneswitch`'d_StencSwitchcnt
  71. @@:
  72. ;case D3DSTENCILOP_INCR:
  73. cmp eax, D3DSTENCILOP_INCR
  74. jne @f
  75. ;uS = uSB + 1;
  76. paddd mm3, MMWORD PTR Val0x01
  77. ;break;
  78. jmp doneswitch`'d_StencSwitchcnt
  79. @@:
  80. ;case D3DSTENCILOP_DECR:
  81. cmp eax, D3DSTENCILOP_DECR
  82. jne @f
  83. ;uS = uSB - 1;
  84. psubd mm3, MMWORD PTR Val0x01
  85. ;break;
  86. jmp doneswitch`'d_StencSwitchcnt
  87. @@:
  88. ; Not a valid Stencil Case, just fall through
  89. ;}
  90. doneswitch`'d_StencSwitchcnt`':
  91. ;// do write mask, do not let stencil mess up Z bits
  92. ;uS &= pCtx->pdwRenderState[D3DRENDERSTATE_STENCILWRITEMASK];
  93. pand mm3, mm6
  94. ;uSB &= ~(pCtx->pdwRenderState[D3DRENDERSTATE_STENCILWRITEMASK]);
  95. pandn mm6, mm5
  96. ;uS |= uSB;
  97. por mm3, mm6
  98. ;uS &= 0xff;
  99. pand mm3, MMWORD PTR Val0xff
  100. ')dnl
  101. dnl
  102. dnl d_Test
  103. dnl
  104. dnl Macro to build test routine
  105. dnl
  106. dnl $1 is one of 16 32
  107. dnl $2 is one of ZWrite NoZWrite
  108. dnl $3 is one of ZDeferred NoZDeferred
  109. dnl $4 is one of ZAll ZNeverAlways ZLessGreaterEqual ZEqualNotEqual ZGreaterLessEqual
  110. dnl $5 is one of NoStencil Stencil
  111. dnl $6 is one of NotMonolithic Monolithic
  112. dnl $7 is jump label when Monolithic fails
  113. dnl
  114. dnl uZB mm2
  115. dnl uZBS mm5
  116. dnl uSB eax mm3
  117. dnl uSR edx mm4
  118. dnl
  119. define(`d_StencilFailcnt', 0)dnl
  120. define(`d_ZFailcnt', 0)dnl
  121. define(`d_Test', `
  122. ifelse(`$6', `NotMonolithic', `
  123. ;void Test_$1_$2_$3_$4_$5(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  124. ; PD3DI_RASTSPAN pS)
  125. ;{
  126. PUBLIC _MMX_Test_$1_$2_$3_$4_$5
  127. _MMX_Test_$1_$2_$3_$4_$5:
  128. ')
  129. define(`d_StencilFailcnt', eval(d_StencilFailcnt+1))dnl
  130. define(`d_ZFailcnt', eval(d_ZFailcnt+1))dnl
  131. ifelse(eval((d_index(`$1', `16') == 0) && (d_index(`$5', `Stencil') == 0)), `1', `
  132. ;D3D_WARN(0, "16 bit Z with stencil should never be called");
  133. ;DDASSERT(0);
  134. ', eval((d_index(`$4', `ZAll') == 0) && (d_index(`$5', `NoStencil') == 0)), `1', `
  135. ;D3D_WARN(0, "ZAll AND NoStencil, more optimal code should be called");
  136. ;DDASSERT(0);
  137. ', `
  138. movd mm4, XpS(uZ)
  139. ifelse(`$1', `16', `
  140. ;// 16 bit unsigned format
  141. ;UINT16 uZ = (UINT16)(pS->uZ>>15);
  142. movq mm1, mm4
  143. psrld mm1, 15 ; 31 bit value down 15 to 16 bit z value.
  144. ;UINT16 uZB = *((UINT16*)pS->pZ);
  145. mov esi, XpS(pZ)
  146. movzx eax, word ptr[esi]
  147. movd mm5, eax
  148. movq mm2, mm5
  149. ', `
  150. ;// 24S8 bit unsigned format
  151. ;UINT32 uZ = pS->uZ;
  152. movq mm1, mm4 ; No need for shift since setup gives 31 bits.
  153. ;UINT32 uZB = *((UINT32*)pS->pZ);
  154. mov esi, XpS(pZ)
  155. movd mm5, dword ptr[esi]
  156. movq mm2, mm5
  157. ;UINT32 uSB = uZB & 0xff;
  158. movd eax, mm2
  159. and eax, 0ffh
  160. movd mm7, eax ; for safe keeping, need this below
  161. ;UINT32 uZBS = uZB >> 1;
  162. psrld mm5, 1
  163. ') dnl 16 or 24s8
  164. ifelse(eval((d_index(`$4', `ZNeverAlways') != 0) && (d_index(`$1', `16') != 0)), `1', `
  165. ;uZ &= ~0x7f; // clear stencil bits so equal compare will work
  166. ;uZBS &= ~0x7f;
  167. pand mm1, MMWORD PTR Val0xffffff80 ; ~0x7f = 0xffffff80
  168. pand mm5, MMWORD PTR Val0xffffff80 ; ~0x7f = 0xffffff80
  169. ')
  170. ;pS->uZ += pP->iDZDX;
  171. paddd mm4, XpP(iDZDX)
  172. movd XpS(uZ), mm4
  173. ifelse(`$5', `Stencil', `
  174. ;uZ &= ~0x7f; // clear stencil region
  175. pand mm1, MMWORD PTR Val0xffffff80 ; ~0x7f = 0xffffff80
  176. ;uZB &= ~0xff;
  177. pand mm2, MMWORD PTR Val0xffffff00 ; ~0xff = 0xffffff00
  178. ;UINT32 uS; // holds final stencil value
  179. ;UINT32 uSR = pCtx->pdwRenderState[D3DRENDERSTATE_STENCILREF];
  180. mov edx, XpCtx(pdwRenderState + RS_STENCILREF)
  181. ;UINT32 uMask = pCtx->pdwRenderState[D3DRENDERSTATE_STENCILMASK];
  182. ;mov esi, XpCtx(pdwRenderState + RS_STENCILMASK)
  183. ; Not moved into a register to preserve pointer to ZBuffer location.
  184. movd mm4, edx ; Save values for later
  185. movd mm3, eax
  186. mov word ptr XpCtxSI(bStencilPass), 0
  187. ;if (SCMP(pCtx, uSB&uMask, uSR&uMask))
  188. ;{
  189. and edx, XpCtx(pdwRenderState + RS_STENCILMASK) ; uSB&uMask
  190. and eax, XpCtx(pdwRenderState + RS_STENCILMASK) ; uSR&uMask
  191. sub edx, eax
  192. and edx, XpCtx(iSAndMask)
  193. sub edx, XpCtx(iSNeg)
  194. sar edx, 31
  195. xor edx, XpCtx(iSXorMask)
  196. test edx, edx ; Not needed! Avoiding Flag stall?
  197. jz StencilFail`'d_StencilFailcnt
  198. ;if (ZCMP32(pCtx, uZ, uZBS))
  199. ;{
  200. movd edx, mm1
  201. movd eax, mm5
  202. sub edx, eax
  203. and edx, XpCtx(iZAndMask)
  204. sub edx, XpCtx(iZNeg)
  205. sar edx, 31
  206. xor edx, XpCtx(iZXorMask)
  207. test edx, edx ; Not needed! Avoiding Flag stall?
  208. jz ZFail`'d_ZFailcnt
  209. mov word ptr XpCtxSI(bStencilPass), 1
  210. d_StencilOp(XpCtx(pdwRenderState + RS_STENCILPASS))
  211. mov eax, XpCtx(pfnTestPassEnd)
  212. ifelse(`$3', `NoZDeferred', `ifelse(`$2', `ZWrite', `
  213. ;*((UINT32*)pS->pZ) = (uZ<<1) | uS;
  214. pslld mm1, 1
  215. por mm1, mm3
  216. movd dword ptr [esi], mm1', `
  217. ;*((UINT32*)pS->pZ) = uZB | uS;
  218. por mm2, mm3
  219. movd dword ptr [esi], mm2')
  220. ', `ifelse(`$2', `ZWrite', `
  221. ;pCtx->SI.uZDeferred = (uZ<<1) | uS;
  222. pslld mm1, 1
  223. por mm1, mm3
  224. movd dword ptr XpCtxSI(uZDeferred), mm1',`
  225. ;pCtx->SI.uZDeferred = uZB | uS;
  226. por mm2, mm3
  227. movd dword ptr XpCtxSI(uZDeferred), mm2')
  228. ')
  229. ;pCtx->pfnTestPassEnd(pCtx, pP, pS);
  230. jmp eax
  231. ;}
  232. ;else
  233. ;{
  234. ZFail`'d_ZFailcnt`':
  235. d_StencilOp(XpCtx(pdwRenderState + RS_STENCILZFAIL))
  236. mov eax, XpCtx(pfnTestFailEnd)
  237. ifelse(`$3', `NoZDeferred', `
  238. ;*((UINT32*)pS->pZ) = uZB | uS;
  239. por mm2, mm3
  240. movd dword ptr [esi], mm2
  241. ',`
  242. ;pCtx->SI.uZDeferred = uZB | uS;
  243. por mm2, mm3
  244. movd dword ptr XpCtxSI(uZDeferred), mm2
  245. ')
  246. ;pCtx->pfnTestFailEnd(pCtx, pP, pS);
  247. jmp eax
  248. ;}
  249. ;}
  250. ;else
  251. ;{
  252. StencilFail`'d_StencilFailcnt`':
  253. d_StencilOp(XpCtx(pdwRenderState + RS_STENCILFAIL))
  254. mov eax, XpCtx(pfnTestFailEnd)
  255. ifelse(`$3', `NoZDeferred', `
  256. ;*((UINT32*)pS->pZ) = uZB | uS;
  257. por mm2, mm3
  258. movd dword ptr [esi], mm2
  259. ',`
  260. ;pCtx->SI.uZDeferred = uZB | uS;
  261. por mm2, mm3
  262. movd dword ptr XpCtxSI(uZDeferred), mm2
  263. ')
  264. ;pCtx->pfnTestFailEnd(pCtx, pP, pS);
  265. jmp eax
  266. ;}
  267. ', ` dnl Above code is for Stencil. Below Code is for Standard Z buffer.
  268. ifelse(`$3', `ZDeferred', `
  269. ;pCtx->SI.uZDeferred = uZB;
  270. movd XpCtxSI(uZDeferred), mm2
  271. ')
  272. ifelse(`$4', `ZAll', `
  273. dnl 16 bit and 32 bit compare are the same.
  274. movd edx, mm1
  275. movd eax, mm5
  276. sub edx, eax
  277. and edx, XpCtx(iZAndMask)
  278. sub edx, XpCtx(iZNeg)
  279. sar edx, 31
  280. xor edx, XpCtx(iZXorMask)
  281. movd mm3, edx
  282. ',
  283. `$4', `ZNeverAlways', `
  284. ;if (~(pCtx->iZXorMask))
  285. movq mm3, XpCtx(iZXorMask)
  286. ',
  287. `$4', `ZGreaterLessEqual', `
  288. ;if ((pCtx->iZXorMask)^(uZ > uZB))
  289. pcmpeqd mm4, mm4
  290. movq mm3, mm1
  291. pcmpgtd mm3, mm5
  292. pxor mm3, XpCtx(iZXorMask)
  293. pxor mm3, mm4
  294. ',
  295. `$4', `ZEqualNotEqual', `
  296. ;if ((pCtx->iZXorMask)^(uZ != uZB))
  297. movq mm3, mm5
  298. pcmpeqd mm3, mm1
  299. pxor mm3, XpCtx(iZXorMask)
  300. ',
  301. `$4', `ZLessGreaterEqual', `
  302. ;if ((pCtx->iZXorMask)^(uZ >= uZB))
  303. movq mm3, mm5
  304. pcmpgtd mm3, mm1
  305. pxor mm3, XpCtx(iZXorMask)
  306. ', `
  307. #error Invalid arg to d_Test') dnl Matches with ZAll test above
  308. movd eax, mm3
  309. ifelse(`$6', `NotMonolithic', `
  310. mov eax, XpCtx(pfnTestFailEnd+eax*4)
  311. ')
  312. ifelse(`$3', `NoZDeferred',
  313. `ifelse(`$1', `16',
  314. `ifelse($2, `ZWrite',
  315. `
  316. ; *((UINT16*)pS->pZ) = uZ;
  317. pand mm1, mm3
  318. pandn mm3, mm2
  319. por mm1, mm3
  320. movd edx, mm1
  321. mov word ptr [esi], dx
  322. ', `
  323. ; *((UINT16*)pS->pZ) = uZB; Done already
  324. ')', `ifelse($2, `ZWrite', `
  325. ; *((UINT32*)pS->pZ) = (uZ<<1) | uSB;
  326. pslld mm1, 1 ; Shift iterated Z into position if 24 bit Z
  327. pand mm1, mm3
  328. pandn mm3, mm2
  329. por mm1, mm3
  330. por mm1, mm7
  331. movd dword ptr [esi], mm1
  332. ',`
  333. ; *((UINT32*)pS->pZ) = uZB | uSB;
  334. por mm2, mm7
  335. movd dword ptr [esi], mm2
  336. ')')',`ifelse(`$1', `16',`ifelse($2, `ZWrite',`
  337. ; pCtx->SI.uZDeferred = uZ;
  338. pand mm1, mm3
  339. pandn mm3, mm2
  340. por mm1, mm3
  341. movd XpCtxSI(uZDeferred), mm1
  342. ',`
  343. ; pCtx->SI.uZDeferred = uZB;
  344. movd XpCtxSI(uZDeferred), mm2
  345. ')',`ifelse($2, `ZWrite',`
  346. ; pCtx->SI.uZDeferred = (uZ<<1) | uSB;
  347. pslld mm1, 1 ; Shift iterated Z into position if 24 bit Z
  348. pand mm1, mm3
  349. pandn mm3, mm2
  350. por mm1, mm3
  351. por mm1, mm7
  352. movd XpCtxSI(uZDeferred), mm1
  353. ',`
  354. ; pCtx->SI.uZDeferred = uZB | uSB;
  355. por mm2, mm7
  356. movd XpCtxSI(uZDeferred), mm2
  357. ')')')
  358. ifelse(`$6', `NotMonolithic', `
  359. jmp eax
  360. ', `
  361. test eax, eax
  362. jz $7
  363. ')
  364. ') dnl matches with stencil on or off.
  365. ') dnl matches with error testing code above.
  366. ') dnl matches with definition of macro
  367. dnl