Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

501 lines
12 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; Monolith 3. Perspective Correct Nearest texturing
  4. ; 16 bit Zbuffer (LE or GR) 565
  5. ;
  6. ; Globals
  7. ;
  8. ; StackPos - stack pos holder
  9. ; uSpans - Number of spans to process
  10. ; iSurfaceStep - what to add to screen pointer
  11. ; iZStep - what to add to Z buffer pointer
  12. ; uPix - Pixel Count
  13. ;
  14. ; Changes from general MMX assembly.
  15. ; 1) Since texure and screen color formats are the same,
  16. ; the conversion to and from internal color formats was
  17. ; unnecessary.
  18. ; 2) Texture color does not need to be saved off and can stay
  19. ; in a register.
  20. ; 3) A few registers were changed to remove a few unnecessary
  21. ; moves.
  22. ;
  23. ;-----------------------------------------------------------------------------
  24. INCLUDE iammx.inc
  25. INCLUDE offs_acp.inc
  26. ; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
  27. ; at the LSB, then six bits of green, then five bits of red.
  28. ;TBD check to see if this value is correct.
  29. COLOR_SHIFT equ 8
  30. .586
  31. .model flat
  32. .data
  33. EXTERN IncHighandLow16:MMWORD
  34. EXTERN UFracVFracMask:MMWORD
  35. EXTERN UV32to15Mask:MMWORD
  36. EXTERN Makelow16one:MMWORD
  37. EXTERN MaskKeepUValues:MMWORD
  38. EXTERN MaskKeepVValues:MMWORD
  39. EXTERN UFrac:MMWORD
  40. EXTERN VFrac:MMWORD
  41. EXTERN Zero:MMWORD
  42. EXTERN memD3DTFG_POINT:MMWORD
  43. EXTERN GiveUp:MMWORD
  44. EXTERN LastW:MMWORD
  45. EXTERN Val0x000a000a:MMWORD
  46. EXTERN Val0xffff:MMWORD
  47. EXTERN Val0x0000002000000020:MMWORD
  48. EXTERN Val0x0000ffff0000ffff:MMWORD
  49. opt_MaskRed565to888 MMWORD 000000000000F800H
  50. EXTERN MaskRed565to888:MMWORD
  51. EXTERN MaskGreen565to888:MMWORD
  52. EXTERN MaskBlue565to888:MMWORD
  53. EXTERN MaskRed555to888:MMWORD
  54. EXTERN MaskGreen555to888:MMWORD
  55. EXTERN MaskBlue555to888:MMWORD
  56. EXTERN MaskAlpha1555to8888:MMWORD
  57. EXTERN MaskRed1555to8888:MMWORD
  58. EXTERN MaskGreen1555to8888:MMWORD
  59. EXTERN MaskBlue1555to8888:MMWORD
  60. EXTERN SetAlphato0xffff:MMWORD
  61. EXTERN SetAlphato0xff:MMWORD
  62. RedShift565to888 equ 8
  63. GreenShift565to888 equ 5
  64. BlueShift565to888 equ 3
  65. RedShift555to888 equ 9
  66. GreenShift555to888 equ 6
  67. BlueShift555to888 equ 3
  68. AlphaShift1555to8888 equ 16
  69. RedShift1555to8888 equ 9
  70. GreenShift1555to8888 equ 6
  71. BlueShift1555to8888 equ 3
  72. EXTERN Zero:MMWORD
  73. EXTERN DW_One_One:MMWORD
  74. EXTERN MaskOffAlpha:MMWORD
  75. EXTERN ShiftTA:MMWORD
  76. EXTERN Val0x00ff00ff00ff00ff:MMWORD
  77. EXTERN Val0x000000ff00ff00ff:MMWORD
  78. EXTERN Val0X0000000001000000:MMWORD
  79. EXTERN AlphaVal128:MMWORD
  80. EXTERN RGBVal128:MMWORD
  81. EXTERN g_uDitherValue:MMWORD
  82. EXTERN SetAlphato0xff:MMWORD
  83. EXTERN u888to565RedBlueMask:MMWORD
  84. EXTERN u888to565GreenMask:MMWORD
  85. EXTERN u888to565Multiplier:MMWORD
  86. EXTERN uVal0x000007ff03ff07ff:MMWORD
  87. EXTERN uVal0x0000078003c00780:MMWORD
  88. EXTERN u888to555RedBlueMask:MMWORD
  89. EXTERN u888to555GreenMask:MMWORD
  90. EXTERN u888to555Multiplier:MMWORD
  91. EXTERN uVal0x000007ff07ff07ff:MMWORD
  92. EXTERN uVal0x0000078007800780:MMWORD
  93. ; Span Variables
  94. StackPos dd ?
  95. uSpans dd ?
  96. ;-----------------------------------------------------------------------------
  97. ; Loop Variables
  98. iSurfaceStep dd ?
  99. iZStep dd ?
  100. uPix dd ?
  101. ;-----------------------------------------------------------------------------
  102. .code
  103. PUBLIC _MMXMLRast_3
  104. _MMXMLRast_3:
  105. push ebp
  106. mov StackPos, esp
  107. mov eax, esp
  108. sub esp, 0Ch ; This will need to change if stack frame size changes.
  109. push ebx
  110. push esi
  111. push edi
  112. ; Put pCtx into ebx
  113. mov ebx, [eax+8]
  114. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  115. mov ecx, [ebx+RASTCTX_pPrim]
  116. ;while (pP)
  117. ;{
  118. PrimLoop:
  119. cmp ecx, 0
  120. je ExitPrimLoop
  121. ;UINT16 uSpans = pP->uSpans;
  122. movzx eax, word ptr [ecx+RASTPRIM_uSpans]
  123. mov uSpans, eax
  124. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  125. mov ebp, ecx
  126. add ebp, SIZEOF_RASTPRIM
  127. ;while (uSpans-- > 0)
  128. ;{
  129. SpanLoop:
  130. mov edx, uSpans
  131. mov eax, edx
  132. dec eax
  133. mov uSpans, eax
  134. test edx, edx
  135. jle ExitSpanLoop
  136. ;pCtx->pfnBegin(pCtx, pP, pS);
  137. ;-----------------------------------------------------------------------------
  138. ; LoopAny code inserted here. This is to get rid of an extra
  139. ; jump.
  140. ;-----------------------------------------------------------------------------
  141. ; Setup Code begins
  142. ; get values to iterate
  143. ;uPix = pS->uPix;
  144. movzx eax, word ptr [ebp+RASTSPAN_uPix]
  145. mov uPix, eax
  146. ;pCtx->SI.iDW = 0x0;
  147. mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
  148. mov esi, [ebp+RASTSPAN_iW]
  149. movq mm5, MMWORD PTR [ebp+RASTSPAN_iUoW1]
  150. ;pCtx->SI.iUd_TexNum = d_WTimesUVoW(pS->iW,pS->iUoW1);
  151. ;pCtx->SI.iVd_TexNum = d_WTimesUVoW(pS->iW,pS->iVoW1);
  152. pslld mm5, 8
  153. shl esi, 4
  154. movd eax, mm5
  155. psrlq mm5, 32
  156. imul esi
  157. mov [ebx+RASTCTX_SI+SPANITER_iU1], edx
  158. movd eax, mm5
  159. imul esi
  160. mov [ebx+RASTCTX_SI+SPANITER_iV1], edx
  161. ;if (pP->iDOoWDX > 0)
  162. ;{
  163. cmp dword ptr [ecx+RASTPRIM_iDOoWDX], 0
  164. jg SpecialWLastMonTest
  165. ;// iSpecialW should be negative for the first 3 pixels of span
  166. ;pCtx->SI.iSpecialW = -3;
  167. mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], -3
  168. jmp DoneSpecialWifMonTest
  169. ;}
  170. ;else
  171. ;{
  172. SpecialWLastMonTest:
  173. ;// iSpecialW should be negative for the last 3 pixels of span
  174. ;pCtx->SI.iSpecialW = 0x7fff - uPix;
  175. mov eax, 07fffh
  176. sub eax, uPix
  177. ;pCtx->SI.iSpecialW += 5; // this may wrap, but it should
  178. add eax, 5
  179. mov [ebx+RASTCTX_SI+SPANITER_iSpecialW], eax
  180. ;}
  181. DoneSpecialWifMonTest:
  182. ;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
  183. ;{
  184. mov eax, [ecx+RASTPRIM_uFlags]
  185. and eax, D3DI_RASTPRIM_X_DEC
  186. test eax, eax
  187. jz LeftToRightSpan
  188. ;iZStep = -pCtx->iZStep;
  189. mov eax, [ebx+RASTCTX_iZStep]
  190. neg eax
  191. mov iZStep, eax
  192. ;iSurfaceStep = -pCtx->iSurfaceStep;
  193. mov eax, [ebx+RASTCTX_iSurfaceStep]
  194. neg eax
  195. mov iSurfaceStep, eax
  196. ;}
  197. jmp DoneSpanDirif
  198. ;else
  199. ;{
  200. LeftToRightSpan:
  201. ;iZStep = pCtx->iZStep;
  202. mov eax, [ebx+RASTCTX_iZStep]
  203. mov iZStep, eax
  204. ;iSurfaceStep = pCtx->iSurfaceStep;
  205. mov eax, [ebx+RASTCTX_iSurfaceStep]
  206. mov iSurfaceStep, eax
  207. ;}
  208. DoneSpanDirif:
  209. ; Setup Code Ends
  210. ; ----------------------------------------------------------------------------------------------------------------
  211. ; Loop Code Begins
  212. PixelLoop:
  213. ; Ztestcode
  214. ; edx is uZ
  215. ; eax is uZB
  216. ; 16 bit unsigned format
  217. ;UINT16 uZ = (UINT16)(pS->uZ>>15);
  218. ;UINT16 uZB = *((UINT16*)pS->pZ);
  219. mov edx, [ebp+RASTSPAN_uZ]
  220. movd mm4, edx
  221. mov esi, [ebp+RASTSPAN_pZ]
  222. shr edx, 15
  223. movzx eax, word ptr [esi]
  224. ;pS->uZ += pP->iDZDX;
  225. ;if ((pCtx->iZXorMask)^(uZ > uZB))
  226. ; !(uZ > uZB) <==>
  227. ; (uZ <= uZB) <==>
  228. ; (uZ < uZB+1) <==>
  229. ;
  230. sub eax, edx
  231. paddd mm4, [ecx+RASTPRIM_iDZDX]
  232. movd [ebp+RASTSPAN_uZ], mm4
  233. xor eax, [ebx+RASTCTX_iZXorMask]
  234. test eax, eax
  235. js FailLabel
  236. mov word ptr [esi], dx
  237. ; texturecode
  238. mov esi, [ebx+RASTCTX_pTexture]
  239. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  240. movd mm4, [esi+SPANTEX_iShiftU]
  241. psubw mm5, mm4
  242. movq mm4, mm5
  243. pand mm5, MMWORD PTR Val0xffff
  244. psrld mm4, 16
  245. movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
  246. psrad mm1, mm5
  247. movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
  248. psrad mm2, mm4
  249. punpckldq mm1, mm2
  250. movzx edx, word ptr [esi+SPANTEX_iShiftPitch]
  251. add edx, 16
  252. movd mm2, edx
  253. movq mm5, MMWORD ptr Makelow16one
  254. pslld mm5, mm2
  255. por mm5, MMWORD ptr Makelow16one
  256. psrad mm1, 6
  257. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  258. movd mm0, [esi+SPANTEX_uMaskU] ; Load U and V mask
  259. movq mm7, mm1
  260. movd mm4, [esi+SPANTEX_iFlipMaskU]
  261. pand mm7, mm4
  262. pcmpeqw mm7, MMWORD PTR Zero
  263. pandn mm7, mm0
  264. pand mm1, mm0
  265. pxor mm1, mm7
  266. pmaddwd mm1, mm5
  267. mov edi, [esi+SPANTEX_pBits]
  268. movd eax, mm1
  269. movzx edx, word ptr [edi+2*eax]
  270. mov edi, [ebp+RASTSPAN_pSurface]
  271. mov [edi], dx
  272. FailLabel:
  273. dec uPix
  274. jle ExitPixelLoop
  275. movq mm1, [ebp+RASTSPAN_uB]
  276. paddw mm1, [ecx+RASTPRIM_iDBDX]
  277. movq [ebp+RASTSPAN_uB], mm1
  278. movq mm5, [ebp+RASTSPAN_iUoW1]
  279. paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
  280. movq [ebp+RASTSPAN_iUoW1], mm5
  281. xor eax, eax
  282. mov ax, [ebp+RASTSPAN_iLOD]
  283. add ax, [ebp+RASTSPAN_iDLOD]
  284. mov [ebp+RASTSPAN_iLOD], ax
  285. mov eax, [ebp+RASTSPAN_iOoW]
  286. add eax, [ecx+RASTPRIM_iDOoWDX]
  287. mov [ebp+RASTSPAN_iOoW], eax
  288. mov edx, [ebp+RASTSPAN_iW]
  289. mov LastW, edx ; Save iW to calc iDW for next time.
  290. add edx, [ebx+RASTCTX_SI+SPANITER_iDW]
  291. xor edi, edi
  292. cmp di, word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW]
  293. jle DontDoSpecialW1
  294. cmp edx, edi
  295. jl WOutOfRange1
  296. mov edx, LastW
  297. sar edx, 1
  298. WOutOfRange1:
  299. mov GiveUp, 8 ; Pre decrementing instead of post decrementing.
  300. SpecW1Loop1:
  301. dec GiveUp
  302. jz ExitSpecWLoop1
  303. mov esi, (1 SHL 16)
  304. mov edi, edx
  305. imul edx
  306. sub esi, edx
  307. SpecW1Loop2:
  308. test esi, esi
  309. jns SpecW1ExitLoop2 ; This jump should be predicted correctly most of the time.
  310. add esi, (1 SHL 15)
  311. sar esi, 1
  312. jmp SpecW1Loop2
  313. SpecW1ExitLoop2:
  314. mov eax, edi
  315. shl eax, 5 ; 1.15.16 << 5 = 1.10.21 TBD Can I shift off upper bits??
  316. shl esi, 12 ; 4.15 << 12 = 4.27 ;
  317. mul esi
  318. sub edi, edx
  319. mov eax, edi
  320. sar eax, 31
  321. xor edi, eax
  322. sub edi, eax
  323. cmp edi, 020h ;Assuming that loop will only happen once.
  324. jbe ExitSpecWLoop1
  325. mov eax, [ebp+RASTSPAN_iOoW]
  326. jmp SpecW1Loop1
  327. DontDoSpecialW1:
  328. mov esi, (1 SHL 16)
  329. mov edi, edx
  330. mul edx
  331. sub esi, edx
  332. shl esi, 15
  333. mov eax, esi
  334. mul edi ; 0.2.30 * 1.15.16 = 1.17.46 >> 32 = 1.17.14
  335. shl edx, 2 ; 1.17.14 << 2 = 1.15.16
  336. ExitSpecWLoop1:
  337. mov [ebp+RASTSPAN_iW], edx
  338. mov esi, edx ; Save W for multiplying by UoW and VoW
  339. sub edx, LastW
  340. mov [ebx+RASTCTX_SI+SPANITER_iDW], edx
  341. inc word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW]
  342. pslld mm5, 8
  343. shl esi, 4
  344. movd eax, mm5
  345. psrlq mm5, 32
  346. imul esi
  347. mov [ebx+RASTCTX_SI+SPANITER_iU1], edx
  348. movd eax, mm5
  349. imul esi
  350. mov [ebx+RASTCTX_SI+SPANITER_iV1], edx
  351. mov eax, dword ptr [ebp+RASTSPAN_pZ]
  352. mov edx, dword ptr [ebp+RASTSPAN_pSurface]
  353. add eax, iZStep
  354. add edx, iSurfaceStep
  355. mov dword ptr [ebp+RASTSPAN_pZ], eax
  356. mov dword ptr [ebp+RASTSPAN_pSurface], edx
  357. jmp PixelLoop
  358. ExitPixelLoop:
  359. ; Loop code ends
  360. ;-----------------------------------------------------------------------------
  361. ; LoopAny code ends here
  362. ;-----------------------------------------------------------------------------
  363. ;pS++;
  364. add ebp, SIZEOF_RASTSPAN
  365. ;}
  366. jmp SpanLoop
  367. ExitSpanLoop:
  368. ;pP = pP->pNext;
  369. mov ecx, [ecx+RASTPRIM_pNext]
  370. ;}
  371. jmp PrimLoop
  372. ExitPrimLoop:
  373. ;_asm{
  374. emms
  375. ;}
  376. ;return S_OK;
  377. xor eax, eax
  378. ;}
  379. pop edi
  380. pop esi
  381. pop ebx
  382. mov esp, StackPos
  383. pop ebp
  384. ret
  385. END