Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

408 lines
13 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; Monolith 9. Non-perspective 16 bit NO Z buffered 565
  4. ; Exactly the same as monolith 2 except Z buffer code removed.
  5. ;
  6. ;-----------------------------------------------------------------------------
  7. INCLUDE iammx.inc
  8. INCLUDE offs_acp.inc
  9. ; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
  10. ; at the LSB, then six bits of green, then five bits of red.
  11. ;TBD check to see if this value is correct.
  12. COLOR_SHIFT equ 8
  13. .586
  14. .model flat
  15. ; Big separating lines seperate code into span code
  16. ; and loop code. If span and loop are not going to
  17. ; end up being combined then it will be easy to
  18. ; seperate the code.
  19. .data
  20. ; Need externs for all of the variables that are needed for various beads
  21. EXTERN IncHighandLow16:MMWORD
  22. EXTERN UFracVFracMask:MMWORD
  23. EXTERN UV32to15Mask:MMWORD
  24. EXTERN Makelow16one:MMWORD
  25. EXTERN MaskKeepUValues:MMWORD
  26. EXTERN MaskKeepVValues:MMWORD
  27. EXTERN UFrac:MMWORD
  28. EXTERN VFrac:MMWORD
  29. EXTERN Zero:MMWORD
  30. EXTERN memD3DTFG_POINT:MMWORD
  31. EXTERN GiveUp:MMWORD
  32. EXTERN LastW:MMWORD
  33. EXTERN Val0x000a000a:MMWORD
  34. EXTERN Val0xffff:MMWORD
  35. EXTERN Val0x0000002000000020:MMWORD
  36. EXTERN Val0x0000ffff0000ffff:MMWORD
  37. EXTERN MaskRed565to888:MMWORD
  38. EXTERN MaskGreen565to888:MMWORD
  39. EXTERN MaskBlue565to888:MMWORD
  40. EXTERN MaskRed555to888:MMWORD
  41. EXTERN MaskGreen555to888:MMWORD
  42. EXTERN MaskBlue555to888:MMWORD
  43. EXTERN MaskAlpha1555to8888:MMWORD
  44. EXTERN MaskRed1555to8888:MMWORD
  45. EXTERN MaskGreen1555to8888:MMWORD
  46. EXTERN MaskBlue1555to8888:MMWORD
  47. ; TBD. I think that I want to do 0xffff instead of 0xff. This will
  48. ; have to be checked. There is a value very similiar to this in
  49. ; buf write.
  50. EXTERN SetAlphato0xffff:MMWORD
  51. EXTERN SetAlphato0xff:MMWORD
  52. ; TODO This equate are identical to the ones in texread.mas. Maybe they should be in a common .inc file.
  53. RedShift565to888 equ 8
  54. GreenShift565to888 equ 5
  55. BlueShift565to888 equ 3
  56. RedShift555to888 equ 9
  57. GreenShift555to888 equ 6
  58. BlueShift555to888 equ 3
  59. AlphaShift1555to8888 equ 16
  60. RedShift1555to8888 equ 9
  61. GreenShift1555to8888 equ 6
  62. BlueShift1555to8888 equ 3
  63. EXTERN Zero:MMWORD
  64. EXTERN DW_One_One:MMWORD
  65. EXTERN MaskOffAlpha:MMWORD
  66. EXTERN ShiftTA:MMWORD
  67. EXTERN Val0x00ff00ff00ff00ff:MMWORD
  68. EXTERN Val0x000000ff00ff00ff:MMWORD
  69. EXTERN Val0X0000000001000000:MMWORD
  70. EXTERN AlphaVal128:MMWORD
  71. EXTERN RGBVal128:MMWORD
  72. EXTERN g_uDitherValue:MMWORD
  73. EXTERN SetAlphato0xff:MMWORD
  74. EXTERN u888to565RedBlueMask:MMWORD
  75. EXTERN u888to565GreenMask:MMWORD
  76. EXTERN u888to565Multiplier:MMWORD
  77. EXTERN uVal0x000007ff03ff07ff:MMWORD
  78. EXTERN uVal0x0000078003c00780:MMWORD
  79. EXTERN u888to555RedBlueMask:MMWORD
  80. EXTERN u888to555GreenMask:MMWORD
  81. EXTERN u888to555Multiplier:MMWORD
  82. EXTERN uVal0x000007ff07ff07ff:MMWORD
  83. EXTERN uVal0x0000078007800780:MMWORD
  84. ;-----------------------------------------------------------------------------
  85. ; Span Variables
  86. uMaskU dq ?
  87. StackPos dd ?
  88. uSpans dd ?
  89. iShiftU dd ?
  90. iShiftPitch dd ?
  91. pBits dd ?
  92. ;-----------------------------------------------------------------------------
  93. ;-----------------------------------------------------------------------------
  94. ; Loop Variables
  95. iSurfaceStep dd ?
  96. uPix dd ?
  97. ;-----------------------------------------------------------------------------
  98. .code
  99. PUBLIC _MMXMLRast_9
  100. _MMXMLRast_9:
  101. push ebp
  102. mov StackPos, esp
  103. mov eax, esp
  104. sub esp, 0Ch ; This will need to change if stack frame size changes.
  105. push ebx
  106. push esi
  107. push edi
  108. ; Put pCtx into ebx
  109. mov ebx, [eax+8]
  110. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  111. mov ecx, [ebx+RASTCTX_pPrim]
  112. ;while (pP)
  113. ;{
  114. PrimLoop:
  115. cmp ecx, 0
  116. je ExitPrimLoop
  117. ;UINT16 uSpans = pP->uSpans;
  118. movzx eax, word ptr [ecx+RASTPRIM_uSpans]
  119. mov uSpans, eax
  120. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  121. mov ebp, ecx
  122. add ebp, SIZEOF_RASTPRIM
  123. SpanLoop:
  124. mov edx, uSpans
  125. mov eax, edx
  126. dec eax
  127. mov uSpans, eax
  128. test edx, edx
  129. jle ExitSpanLoop
  130. ;pCtx->pfnBegin(pCtx, pP, pS);
  131. ;-----------------------------------------------------------------------------
  132. ; LoopAny code inserted here. This is to get rid of an extra
  133. ; jump.
  134. ;-----------------------------------------------------------------------------
  135. ; Setup Code begins - get values to iterate
  136. movzx eax, word ptr [ebp+RASTSPAN_uPix]
  137. mov uPix, eax
  138. movq mm5, [ebp+RASTSPAN_iUoW1]
  139. ; non perspective correct.
  140. psrad mm5, TEX_TO_FINAL_SHIFT
  141. movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
  142. mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
  143. mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], 0
  144. mov eax, [ecx+RASTPRIM_uFlags]
  145. and eax, D3DI_RASTPRIM_X_DEC
  146. test eax, eax
  147. jz LeftToRightSpan
  148. mov eax, [ebx+RASTCTX_iSurfaceStep]
  149. neg eax
  150. mov iSurfaceStep, eax
  151. jmp DoneSpanDirif
  152. LeftToRightSpan:
  153. mov eax, [ebx+RASTCTX_iSurfaceStep]
  154. mov iSurfaceStep, eax
  155. DoneSpanDirif:
  156. ;******************************************
  157. mov esi, [ebx+RASTCTX_pTexture]
  158. mov edx, [esi + SPANTEX_iShiftU]
  159. mov iShiftU, edx
  160. movzx edx, word ptr [esi + SPANTEX_iShiftPitch]
  161. mov iShiftPitch, edx
  162. movd mm0, dword ptr [esi+SPANTEX_uMaskU] ; Load U and V mask
  163. movq MMWORD PTR uMaskU, mm0
  164. mov edx, [esi+SPANTEX_pBits]
  165. mov pBits, edx
  166. mov edi, [ebp+RASTSPAN_pSurface]
  167. ;******************************************
  168. PixelLoop:
  169. ; Doing UV calculation a little more accurate
  170. ; Exactly like C code.
  171. ; I iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
  172. ; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
  173. ; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
  174. ; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
  175. ; COMMENT1**
  176. ; If textures have a max of 1024 then shiftU0 would be at most 10 which would
  177. ; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
  178. ; It will also give bi-linear 6 bits of precision I think it was said that
  179. ; only five was needed.
  180. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  181. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  182. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  183. ;******************************************
  184. ;movd mm4, [esi+SPANTEX_iShiftU]
  185. movd mm4, iShiftU
  186. ;******************************************
  187. psubw mm5, mm4
  188. movq mm4, mm5
  189. pand mm5, MMWORD PTR Val0xffff
  190. psrld mm4, 16
  191. movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
  192. psrad mm1, mm5
  193. movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
  194. psrad mm2, mm4
  195. punpckldq mm1, mm2
  196. ; Texture Pitch cannot be calculated so it must be looked up in the iShiftPitch table
  197. ; ----------------- Start of hack
  198. ; ATTENTION This is really hacked right now. Just to get it working
  199. ; Pitch would be better for me, instead of pitch.
  200. ; With actual pitch, this would be two moves and a .
  201. ;******************************************
  202. ;movzx edx, word ptr [esi+SPANTEX_iShiftPitch]
  203. mov edx, iShiftPitch
  204. ;******************************************
  205. add edx, 16
  206. movd mm2, edx
  207. movq mm5, MMWORD ptr Makelow16one
  208. pslld mm5, mm2
  209. ; ----------------- End of hack
  210. por mm5, MMWORD ptr Makelow16one
  211. ; Make the low 16 bits of dword one
  212. ; This helps in calculating texture address.
  213. ; Gets U and V value into mm1 so that it can be mirrored, wrapped or
  214. ; clamped. This can be done for two values in the point case
  215. ; or four values in the bilinear case.
  216. ;iU00 >>= 6;
  217. ;iV00 >>= 6;
  218. psrad mm1, 6
  219. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  220. ; operations assume UV in low 32 bits.
  221. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  222. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  223. ;***************************************************************************************
  224. ; Replace general purpose wrap/mirror code with specific wrap code.
  225. ;***************************************************************************************
  226. pand mm1, MMWORD PTR uMaskU
  227. ;***************************************************************************************
  228. ;movd mm0, [esi+SPANTEX_uMaskU] ; Load U and V mask
  229. ; Monolith cases assumed that iLOD0 was zero so no needed.
  230. ;INT16 iFlip;
  231. ; MM1 should contain 16 bit iU and iV for both texture locations
  232. ; End Result is MM1 value wrapped or mirrored
  233. ; in Bilinear Case, four values can be done
  234. ; iU00, iV00, iU01, iV01
  235. ; This code really does alot for the bilinear case and is kinda wasteful
  236. ; in the normal mode.
  237. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  238. ;movq mm7, mm1
  239. ; Point doesnt need replication
  240. ;movd mm4, [esi+SPANTEX_iFlipMaskU]
  241. ; if bilinear replicate values together, Point doesnt need this.
  242. ;pand mm7, mm4
  243. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  244. ;pcmpeqw mm7, MMWORD PTR Zero
  245. ;iFlip1 = uMaskU0 & ~ iFlip1; ;iFlip2 = uMaskV0 & ~ iFlip2; ;iFlip3 = uMaskU0 & ~ iFlip3; ;iFlip4 = uMaskV0 & ~ iFlip4;
  246. ;pandn mm7, mm0
  247. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  248. ;pand mm1, mm0
  249. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  250. ;pxor mm1, mm7
  251. ; Result in mm4 now since TexAddrAll ends up that way.
  252. ; Still need to look at register useage more.
  253. movq mm4, mm1
  254. ;***************************************************************************************
  255. ; Making other two cases for texture addressing has to be simplier than
  256. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  257. ; TBD Make this better.
  258. ; values are still stored as iV01, iU00, iV00, iU01
  259. pmaddwd mm4, mm5 ; Throw in first address calculation.
  260. ; Just to get it started. Calculate
  261. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  262. ; From here until mov edi is code that is needed for border.
  263. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
  264. ; iV0 iU1 address should be done by now.
  265. ;******************************************
  266. movd eax, mm4
  267. shl eax, 1
  268. ;add eax, [esi+SPANTEX_pBits]
  269. add eax, pBits
  270. ;******************************************
  271. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead(iU00, iV00, pTex->iShiftU,
  272. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  273. ; -------------------- In Monolithic version calls are inlined.
  274. ; Generate Border Mask to always be true in non border case.
  275. ;pcmpeqd mm5, mm5
  276. mov dx, word ptr [eax]
  277. mov [edi], dx
  278. dec uPix ;// BUG BUG?? uPix should never start as zero should it?
  279. ;// if so, this is a bug.
  280. jle ExitPixelLoop
  281. ; Doing update code after span length test so that an extra update is not done.
  282. movq mm5, [ebp+RASTSPAN_iUoW1]
  283. paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
  284. movq [ebp+RASTSPAN_iUoW1], mm5
  285. ; mm5 still contains iUoW and iVoW which are the iU and iV values for
  286. ; non perspective correct.
  287. psrad mm5, TEX_TO_FINAL_SHIFT
  288. movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
  289. add edi, iSurfaceStep
  290. jmp PixelLoop
  291. ExitPixelLoop:
  292. ; Loop code ends
  293. ;-----------------------------------------------------------------------------
  294. ; LoopAny code ends here
  295. ;-----------------------------------------------------------------------------
  296. ;pS++;
  297. add ebp, SIZEOF_RASTSPAN
  298. ;}
  299. jmp SpanLoop
  300. ExitSpanLoop:
  301. ;pP = pP->pNext;
  302. mov ecx, [ecx+RASTPRIM_pNext]
  303. ;}
  304. jmp PrimLoop
  305. ExitPrimLoop:
  306. ;_asm{
  307. emms
  308. ;}
  309. ;return S_OK;
  310. xor eax, eax
  311. ;}
  312. pop edi
  313. pop esi
  314. pop ebx
  315. mov esp, StackPos
  316. pop ebp
  317. ret
  318. END