Leaked source code of windows server 2003

451 lines
11 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; Monolith 24. Perspective Correct Nearest texturing
  4. ; NO Zbuffer X888
  5. ;
  6. ; Exactly the same as monolith 3 except color input is 32 bits and
  7. ; output is 32 bits and no Z buffer code
  8. ;
  9. ;-----------------------------------------------------------------------------
  10. INCLUDE iammx.inc
  11. INCLUDE offs_acp.inc
  12. ; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
  13. ; at the LSB, then six bits of green, then five bits of red.
  14. ;TBD check to see if this value is correct.
  15. COLOR_SHIFT equ 8
  16. .586
  17. .model flat
  18. .data
  19. EXTERN IncHighandLow16:MMWORD
  20. EXTERN UFracVFracMask:MMWORD
  21. EXTERN UV32to15Mask:MMWORD
  22. EXTERN Makelow16one:MMWORD
  23. EXTERN MaskKeepUValues:MMWORD
  24. EXTERN MaskKeepVValues:MMWORD
  25. EXTERN UFrac:MMWORD
  26. EXTERN VFrac:MMWORD
  27. EXTERN Zero:MMWORD
  28. EXTERN memD3DTFG_POINT:MMWORD
  29. EXTERN GiveUp:MMWORD
  30. EXTERN LastW:MMWORD
  31. EXTERN Val0x000a000a:MMWORD
  32. EXTERN Val0xffff:MMWORD
  33. EXTERN Val0x0000002000000020:MMWORD
  34. EXTERN Val0x0000ffff0000ffff:MMWORD
  35. opt_MaskRed565to888 MMWORD 000000000000F800H
  36. EXTERN MaskRed565to888:MMWORD
  37. EXTERN MaskGreen565to888:MMWORD
  38. EXTERN MaskBlue565to888:MMWORD
  39. EXTERN MaskRed555to888:MMWORD
  40. EXTERN MaskGreen555to888:MMWORD
  41. EXTERN MaskBlue555to888:MMWORD
  42. EXTERN MaskAlpha1555to8888:MMWORD
  43. EXTERN MaskRed1555to8888:MMWORD
  44. EXTERN MaskGreen1555to8888:MMWORD
  45. EXTERN MaskBlue1555to8888:MMWORD
  46. EXTERN SetAlphato0xffff:MMWORD
  47. EXTERN SetAlphato0xff:MMWORD
  48. RedShift565to888 equ 8
  49. GreenShift565to888 equ 5
  50. BlueShift565to888 equ 3
  51. RedShift555to888 equ 9
  52. GreenShift555to888 equ 6
  53. BlueShift555to888 equ 3
  54. AlphaShift1555to8888 equ 16
  55. RedShift1555to8888 equ 9
  56. GreenShift1555to8888 equ 6
  57. BlueShift1555to8888 equ 3
  58. EXTERN Zero:MMWORD
  59. EXTERN DW_One_One:MMWORD
  60. EXTERN MaskOffAlpha:MMWORD
  61. EXTERN ShiftTA:MMWORD
  62. EXTERN Val0x00ff00ff00ff00ff:MMWORD
  63. EXTERN Val0x000000ff00ff00ff:MMWORD
  64. EXTERN Val0X0000000001000000:MMWORD
  65. EXTERN AlphaVal128:MMWORD
  66. EXTERN RGBVal128:MMWORD
  67. EXTERN g_uDitherValue:MMWORD
  68. EXTERN SetAlphato0xff:MMWORD
  69. EXTERN u888to565RedBlueMask:MMWORD
  70. EXTERN u888to565GreenMask:MMWORD
  71. EXTERN u888to565Multiplier:MMWORD
  72. EXTERN uVal0x000007ff03ff07ff:MMWORD
  73. EXTERN uVal0x0000078003c00780:MMWORD
  74. EXTERN u888to555RedBlueMask:MMWORD
  75. EXTERN u888to555GreenMask:MMWORD
  76. EXTERN u888to555Multiplier:MMWORD
  77. EXTERN uVal0x000007ff07ff07ff:MMWORD
  78. EXTERN uVal0x0000078007800780:MMWORD
  79. ; Span Variables
  80. StackPos dd ?
  81. uSpans dd ?
  82. ;-----------------------------------------------------------------------------
  83. ; Loop Variables
  84. iSurfaceStep dd ?
  85. uPix dd ?
  86. ;-----------------------------------------------------------------------------
  87. .code
  88. PUBLIC _MMXMLRast_24
  89. _MMXMLRast_24:
  90. push ebp
  91. mov StackPos, esp
  92. mov eax, esp
  93. sub esp, 0Ch ; This will need to change if stack frame size changes.
  94. push ebx
  95. push esi
  96. push edi
  97. ; Put pCtx into ebx
  98. mov ebx, [eax+8]
  99. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  100. mov ecx, [ebx+RASTCTX_pPrim]
  101. ;while (pP)
  102. ;{
  103. PrimLoop:
  104. cmp ecx, 0
  105. je ExitPrimLoop
  106. ;UINT16 uSpans = pP->uSpans;
  107. movzx eax, word ptr [ecx+RASTPRIM_uSpans]
  108. mov uSpans, eax
  109. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  110. mov ebp, ecx
  111. add ebp, SIZEOF_RASTPRIM
  112. ;while (uSpans-- > 0)
  113. ;{
  114. SpanLoop:
  115. mov edx, uSpans
  116. mov eax, edx
  117. dec eax
  118. mov uSpans, eax
  119. test edx, edx
  120. jle ExitSpanLoop
  121. ;pCtx->pfnBegin(pCtx, pP, pS);
  122. ;-----------------------------------------------------------------------------
  123. ; LoopAny code inserted here. This is to get rid of an extra
  124. ; jump.
  125. ;-----------------------------------------------------------------------------
  126. ; Setup Code begins
  127. ; get values to iterate
  128. ;uPix = pS->uPix;
  129. movzx eax, word ptr [ebp+RASTSPAN_uPix]
  130. mov uPix, eax
  131. ;pCtx->SI.iDW = 0x0;
  132. mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
  133. mov esi, [ebp+RASTSPAN_iW]
  134. movq mm5, MMWORD PTR [ebp+RASTSPAN_iUoW1]
  135. ;pCtx->SI.iUd_TexNum = d_WTimesUVoW(pS->iW,pS->iUoW1);
  136. ;pCtx->SI.iVd_TexNum = d_WTimesUVoW(pS->iW,pS->iVoW1);
  137. pslld mm5, 8
  138. shl esi, 4
  139. movd eax, mm5
  140. psrlq mm5, 32
  141. imul esi
  142. mov [ebx+RASTCTX_SI+SPANITER_iU1], edx
  143. movd eax, mm5
  144. imul esi
  145. mov [ebx+RASTCTX_SI+SPANITER_iV1], edx
  146. ;if (pP->iDOoWDX > 0)
  147. ;{
  148. cmp dword ptr [ecx+RASTPRIM_iDOoWDX], 0
  149. jg SpecialWLastMonTest
  150. ;// iSpecialW should be negative for the first 3 pixels of span
  151. ;pCtx->SI.iSpecialW = -3;
  152. mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], -3
  153. jmp DoneSpecialWifMonTest
  154. ;}
  155. ;else
  156. ;{
  157. SpecialWLastMonTest:
  158. ;// iSpecialW should be negative for the last 3 pixels of span
  159. ;pCtx->SI.iSpecialW = 0x7fff - uPix;
  160. mov eax, 07fffh
  161. sub eax, uPix
  162. ;pCtx->SI.iSpecialW += 5; // this may wrap, but it should
  163. add eax, 5
  164. mov [ebx+RASTCTX_SI+SPANITER_iSpecialW], eax
  165. ;}
  166. DoneSpecialWifMonTest:
  167. ;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
  168. ;{
  169. mov eax, [ecx+RASTPRIM_uFlags]
  170. and eax, D3DI_RASTPRIM_X_DEC
  171. test eax, eax
  172. jz LeftToRightSpan
  173. ;iSurfaceStep = -pCtx->iSurfaceStep;
  174. mov eax, [ebx+RASTCTX_iSurfaceStep]
  175. neg eax
  176. mov iSurfaceStep, eax
  177. ;}
  178. jmp DoneSpanDirif
  179. ;else
  180. ;{
  181. LeftToRightSpan:
  182. ;iSurfaceStep = pCtx->iSurfaceStep;
  183. mov eax, [ebx+RASTCTX_iSurfaceStep]
  184. mov iSurfaceStep, eax
  185. ;}
  186. DoneSpanDirif:
  187. ; Setup Code Ends
  188. ; ----------------------------------------------------------------------------------------------------------------
  189. ; Loop Code Begins
  190. PixelLoop:
  191. ; texturecode
  192. mov esi, [ebx+RASTCTX_pTexture]
  193. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  194. movd mm4, [esi+SPANTEX_iShiftU]
  195. psubw mm5, mm4
  196. movq mm4, mm5
  197. pand mm5, MMWORD PTR Val0xffff
  198. psrld mm4, 16
  199. movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
  200. psrad mm1, mm5
  201. movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
  202. psrad mm2, mm4
  203. punpckldq mm1, mm2
  204. movzx edx, word ptr [esi+SPANTEX_iShiftPitch]
  205. add edx, 16
  206. movd mm2, edx
  207. movq mm5, MMWORD ptr Makelow16one
  208. pslld mm5, mm2
  209. por mm5, MMWORD ptr Makelow16one
  210. psrad mm1, 6
  211. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  212. movd mm0, [esi+SPANTEX_uMaskU] ; Load U and V mask
  213. movq mm7, mm1
  214. movd mm4, [esi+SPANTEX_iFlipMaskU]
  215. pand mm7, mm4
  216. pcmpeqw mm7, MMWORD PTR Zero
  217. pandn mm7, mm0
  218. pand mm1, mm0
  219. pxor mm1, mm7
  220. pmaddwd mm1, mm5
  221. mov edi, [esi+SPANTEX_pBits]
  222. movd eax, mm1
  223. mov edx, dword ptr [edi+4*eax]
  224. mov edi, [ebp+RASTSPAN_pSurface]
  225. and edx, 000ffffffh
  226. mov [edi], edx
  227. dec uPix
  228. jle ExitPixelLoop
  229. movq mm1, [ebp+RASTSPAN_uB]
  230. paddw mm1, [ecx+RASTPRIM_iDBDX]
  231. movq [ebp+RASTSPAN_uB], mm1
  232. movq mm5, [ebp+RASTSPAN_iUoW1]
  233. paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
  234. movq [ebp+RASTSPAN_iUoW1], mm5
  235. xor eax, eax
  236. mov ax, [ebp+RASTSPAN_iLOD]
  237. add ax, [ebp+RASTSPAN_iDLOD]
  238. mov [ebp+RASTSPAN_iLOD], ax
  239. mov eax, [ebp+RASTSPAN_iOoW]
  240. add eax, [ecx+RASTPRIM_iDOoWDX]
  241. mov [ebp+RASTSPAN_iOoW], eax
  242. mov edx, [ebp+RASTSPAN_iW]
  243. mov LastW, edx ; Save iW to calc iDW for next time.
  244. add edx, [ebx+RASTCTX_SI+SPANITER_iDW]
  245. xor edi, edi
  246. cmp di, word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW]
  247. jle DontDoSpecialW1
  248. cmp edx, edi
  249. jl WOutOfRange1
  250. mov edx, LastW
  251. sar edx, 1
  252. WOutOfRange1:
  253. mov GiveUp, 8 ; Pre decrementing instead of post decrementing.
  254. SpecW1Loop1:
  255. dec GiveUp
  256. jz ExitSpecWLoop1
  257. mov esi, (1 SHL 16)
  258. mov edi, edx
  259. imul edx
  260. sub esi, edx
  261. SpecW1Loop2:
  262. test esi, esi
  263. jns SpecW1ExitLoop2 ; This jump should be predicted correctly most of the time.
  264. add esi, (1 SHL 15)
  265. sar esi, 1
  266. jmp SpecW1Loop2
  267. SpecW1ExitLoop2:
  268. mov eax, edi
  269. shl eax, 5 ; 1.15.16 << 5 = 1.10.21 TBD Can I shift off upper bits??
  270. shl esi, 12 ; 4.15 << 12 = 4.27 ;
  271. mul esi
  272. sub edi, edx
  273. mov eax, edi
  274. sar eax, 31
  275. xor edi, eax
  276. sub edi, eax
  277. cmp edi, 020h ;Assuming that loop will only happen once.
  278. jbe ExitSpecWLoop1
  279. mov eax, [ebp+RASTSPAN_iOoW]
  280. jmp SpecW1Loop1
  281. DontDoSpecialW1:
  282. mov esi, (1 SHL 16)
  283. mov edi, edx
  284. mul edx
  285. sub esi, edx
  286. shl esi, 15
  287. mov eax, esi
  288. mul edi ; 0.2.30 * 1.15.16 = 1.17.46 >> 32 = 1.17.14
  289. shl edx, 2 ; 1.17.14 << 2 = 1.15.16
  290. ExitSpecWLoop1:
  291. mov [ebp+RASTSPAN_iW], edx
  292. mov esi, edx ; Save W for multiplying by UoW and VoW
  293. sub edx, LastW
  294. mov [ebx+RASTCTX_SI+SPANITER_iDW], edx
  295. inc word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW]
  296. pslld mm5, 8
  297. shl esi, 4
  298. movd eax, mm5
  299. psrlq mm5, 32
  300. imul esi
  301. mov [ebx+RASTCTX_SI+SPANITER_iU1], edx
  302. movd eax, mm5
  303. imul esi
  304. mov [ebx+RASTCTX_SI+SPANITER_iV1], edx
  305. mov edx, dword ptr [ebp+RASTSPAN_pSurface]
  306. add edx, iSurfaceStep
  307. mov dword ptr [ebp+RASTSPAN_pSurface], edx
  308. jmp PixelLoop
  309. ExitPixelLoop:
  310. ; Loop code ends
  311. ;-----------------------------------------------------------------------------
  312. ; LoopAny code ends here
  313. ;-----------------------------------------------------------------------------
  314. ;pS++;
  315. add ebp, SIZEOF_RASTSPAN
  316. ;}
  317. jmp SpanLoop
  318. ExitSpanLoop:
  319. ;pP = pP->pNext;
  320. mov ecx, [ecx+RASTPRIM_pNext]
  321. ;}
  322. jmp PrimLoop
  323. ExitPrimLoop:
  324. ;_asm{
  325. emms
  326. ;}
  327. ;return S_OK;
  328. xor eax, eax
  329. ;}
  330. pop edi
  331. pop esi
  332. pop ebx
  333. mov esp, StackPos
  334. pop ebp
  335. ret
  336. END