Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

398 lines
11 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ;
  4. ; Monolith 16. Non-perspective 16 bit Z buffered X888
  5. ;
  6. ; Exactly the same as monolith 2 except color input is 32 bits and
  7. ; output is 32 bits
  8. ;
  9. ;-----------------------------------------------------------------------------
  10. INCLUDE iammx.inc
  11. INCLUDE offs_acp.inc
  12. ; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
  13. ; at the LSB, then six bits of green, then five bits of red.
  14. ;TBD check to see if this value is correct.
  15. COLOR_SHIFT equ 8
  16. .586
  17. .model flat
  18. ; Big separating lines seperate code into span code
  19. ; and loop code. If span and loop are not going to
  20. ; end up being combined then it will be easy to
  21. ; seperate the code.
  22. .data
  23. ; Need externs for all of the variables that are needed for various beads
  24. EXTERN IncHighandLow16:MMWORD
  25. EXTERN UFracVFracMask:MMWORD
  26. EXTERN UV32to15Mask:MMWORD
  27. EXTERN Makelow16one:MMWORD
  28. EXTERN MaskKeepUValues:MMWORD
  29. EXTERN MaskKeepVValues:MMWORD
  30. EXTERN UFrac:MMWORD
  31. EXTERN VFrac:MMWORD
  32. EXTERN Zero:MMWORD
  33. EXTERN memD3DTFG_POINT:MMWORD
  34. EXTERN GiveUp:MMWORD
  35. EXTERN LastW:MMWORD
  36. EXTERN Val0x000a000a:MMWORD
  37. EXTERN Val0xffff:MMWORD
  38. EXTERN Val0x0000002000000020:MMWORD
  39. EXTERN Val0x0000ffff0000ffff:MMWORD
  40. EXTERN MaskRed565to888:MMWORD
  41. EXTERN MaskGreen565to888:MMWORD
  42. EXTERN MaskBlue565to888:MMWORD
  43. EXTERN MaskRed555to888:MMWORD
  44. EXTERN MaskGreen555to888:MMWORD
  45. EXTERN MaskBlue555to888:MMWORD
  46. EXTERN MaskAlpha1555to8888:MMWORD
  47. EXTERN MaskRed1555to8888:MMWORD
  48. EXTERN MaskGreen1555to8888:MMWORD
  49. EXTERN MaskBlue1555to8888:MMWORD
  50. ; TBD. I think that I want to do 0xffff instead of 0xff. This will
  51. ; have to be checked. There is a value very similiar to this in
  52. ; buf write.
  53. EXTERN SetAlphato0xffff:MMWORD
  54. EXTERN SetAlphato0xff:MMWORD
  55. ; TODO This equate are identical to the ones in texread.mas. Maybe they should be in a common .inc file.
  56. RedShift565to888 equ 8
  57. GreenShift565to888 equ 5
  58. BlueShift565to888 equ 3
  59. RedShift555to888 equ 9
  60. GreenShift555to888 equ 6
  61. BlueShift555to888 equ 3
  62. AlphaShift1555to8888 equ 16
  63. RedShift1555to8888 equ 9
  64. GreenShift1555to8888 equ 6
  65. BlueShift1555to8888 equ 3
  66. EXTERN Zero:MMWORD
  67. EXTERN DW_One_One:MMWORD
  68. EXTERN MaskOffAlpha:MMWORD
  69. EXTERN ShiftTA:MMWORD
  70. EXTERN Val0x00ff00ff00ff00ff:MMWORD
  71. EXTERN Val0x000000ff00ff00ff:MMWORD
  72. EXTERN Val0X0000000001000000:MMWORD
  73. EXTERN AlphaVal128:MMWORD
  74. EXTERN RGBVal128:MMWORD
  75. EXTERN g_uDitherValue:MMWORD
  76. EXTERN SetAlphato0xff:MMWORD
  77. EXTERN u888to565RedBlueMask:MMWORD
  78. EXTERN u888to565GreenMask:MMWORD
  79. EXTERN u888to565Multiplier:MMWORD
  80. EXTERN uVal0x000007ff03ff07ff:MMWORD
  81. EXTERN uVal0x0000078003c00780:MMWORD
  82. EXTERN u888to555RedBlueMask:MMWORD
  83. EXTERN u888to555GreenMask:MMWORD
  84. EXTERN u888to555Multiplier:MMWORD
  85. EXTERN uVal0x000007ff07ff07ff:MMWORD
  86. EXTERN uVal0x0000078007800780:MMWORD
  87. ;-----------------------------------------------------------------------------
  88. ; Span Variables
  89. uMaskU dq ?
  90. StackPos dd ?
  91. uSpans dd ?
  92. iShiftU dd ?
  93. iShiftPitch dd ?
  94. pBits dd ?
  95. ;-----------------------------------------------------------------------------
  96. ;-----------------------------------------------------------------------------
  97. ; Loop Variables
  98. iSurfaceStep dd ?
  99. iZStep dd ?
  100. uPix dd ?
  101. ;-----------------------------------------------------------------------------
  102. .code
  103. PUBLIC _MMXMLRast_16
  104. _MMXMLRast_16:
  105. push ebp
  106. mov StackPos, esp
  107. mov eax, esp
  108. sub esp, 0Ch ; This will need to change if stack frame size changes.
  109. push ebx
  110. push esi
  111. push edi
  112. ; Put pCtx into ebx
  113. mov ebx, [eax+8]
  114. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  115. mov ecx, [ebx+RASTCTX_pPrim]
  116. ;while (pP)
  117. ;{
  118. PrimLoop:
  119. cmp ecx, 0
  120. je ExitPrimLoop
  121. ;UINT16 uSpans = pP->uSpans;
  122. movzx eax, word ptr [ecx+RASTPRIM_uSpans]
  123. mov uSpans, eax
  124. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  125. mov ebp, ecx
  126. add ebp, SIZEOF_RASTPRIM
  127. SpanLoop:
  128. mov edx, uSpans
  129. mov eax, edx
  130. dec eax
  131. mov uSpans, eax
  132. test edx, edx
  133. jle ExitSpanLoop
  134. ;pCtx->pfnBegin(pCtx, pP, pS);
  135. ;-----------------------------------------------------------------------------
  136. ; LoopAny code inserted here. This is to get rid of an extra
  137. ; jump.
  138. ;-----------------------------------------------------------------------------
  139. ; Setup Code begins - get values to iterate
  140. movzx eax, word ptr [ebp+RASTSPAN_uPix]
  141. mov uPix, eax
  142. movq mm5, [ebp+RASTSPAN_iUoW1]
  143. ; non perspective correct.
  144. psrad mm5, TEX_TO_FINAL_SHIFT
  145. movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
  146. mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
  147. mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], 0
  148. mov eax, [ecx+RASTPRIM_uFlags]
  149. and eax, D3DI_RASTPRIM_X_DEC
  150. test eax, eax
  151. jz LeftToRightSpan
  152. mov eax, [ebx+RASTCTX_iZStep]
  153. neg eax
  154. mov iZStep, eax
  155. mov eax, [ebx+RASTCTX_iSurfaceStep]
  156. neg eax
  157. mov iSurfaceStep, eax
  158. jmp DoneSpanDirif
  159. LeftToRightSpan:
  160. mov eax, [ebx+RASTCTX_iZStep]
  161. mov iZStep, eax
  162. mov eax, [ebx+RASTCTX_iSurfaceStep]
  163. mov iSurfaceStep, eax
  164. DoneSpanDirif:
  165. ;******************************************
  166. mov esi, [ebx+RASTCTX_pTexture]
  167. mov edx, [esi + SPANTEX_iShiftU]
  168. mov iShiftU, edx
  169. movzx edx, word ptr [esi + SPANTEX_iShiftPitch]
  170. mov iShiftPitch, edx
  171. movd mm0, dword ptr [esi+SPANTEX_uMaskU] ; Load U and V mask
  172. movq MMWORD PTR uMaskU, mm0
  173. mov edx, [esi+SPANTEX_pBits]
  174. mov pBits, edx
  175. mov edi, [ebp+RASTSPAN_pSurface]
  176. mov esi, [ebp+RASTSPAN_pZ]
  177. ;******************************************
  178. PixelLoop:
  179. ; Ztestcode
  180. ; edx is uZ
  181. ; eax is uZB
  182. ; 16 bit unsigned format
  183. ;UINT16 uZ = (UINT16)(pS->uZ>>15);
  184. ;UINT16 uZB = *((UINT16*)pS->pZ);
  185. mov edx, [ebp+RASTSPAN_uZ]
  186. movd mm4, edx
  187. shr edx, 15
  188. movzx eax, word ptr [esi]
  189. ;pS->uZ += pP->iDZDX;
  190. ;if ((pCtx->iZXorMask)^(uZ > uZB))
  191. sub eax, edx
  192. paddd mm4, [ecx+RASTPRIM_iDZDX]
  193. movd [ebp+RASTSPAN_uZ], mm4
  194. xor eax, [ebx+RASTCTX_iZXorMask]
  195. test eax, eax
  196. js FailLabel
  197. mov word ptr [esi], dx
  198. ; texturecode
  199. xor eax, eax
  200. ; Doing UV calculation a little more accurate
  201. ; Exactly like C code.
  202. ; I iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
  203. ; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
  204. ; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
  205. ; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
  206. ; COMMENT1**
  207. ; If textures have a max of 1024 then shiftU0 would be at most 10 which would
  208. ; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
  209. ; It will also give bi-linear 6 bits of precision I think it was said that
  210. ; only five was needed.
  211. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  212. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  213. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  214. ;******************************************
  215. ;movd mm4, [esi+SPANTEX_iShiftU]
  216. movd mm4, iShiftU
  217. ;******************************************
  218. psubw mm5, mm4
  219. movq mm4, mm5
  220. pand mm5, MMWORD PTR Val0xffff
  221. psrld mm4, 16
  222. movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
  223. psrad mm1, mm5
  224. movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
  225. psrad mm2, mm4
  226. punpckldq mm1, mm2
  227. ; Texture Pitch cannot be calculated so it must be looked up in the iShiftPitch table
  228. ; ----------------- Start of hack
  229. ; ATTENTION This is really hacked right now. Just to get it working
  230. ; Pitch would be better for me, instead of pitch.
  231. ; With actual pitch, this would be two moves and a .
  232. ;******************************************
  233. ;movzx edx, word ptr [esi+SPANTEX_iShiftPitch]
  234. mov edx, iShiftPitch
  235. ;******************************************
  236. add edx, 16
  237. movd mm2, edx
  238. movq mm5, MMWORD ptr Makelow16one
  239. pslld mm5, mm2
  240. ; ----------------- End of hack
  241. por mm5, MMWORD ptr Makelow16one
  242. ; Make the low 16 bits of dword one
  243. ; This helps in calculating texture address.
  244. ; Gets U and V value into mm1 so that it can be mirrored, wrapped or
  245. ; clamped. This can be done for two values in the point case
  246. ; or four values in the bilinear case.
  247. ;iU00 >>= 6;
  248. ;iV00 >>= 6;
  249. psrad mm1, 6
  250. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  251. ; operations assume UV in low 32 bits.
  252. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  253. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  254. pand mm1, MMWORD PTR uMaskU
  255. ; Making other two cases for texture addressing has to be simplier than
  256. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  257. ; TBD Make this better.
  258. ; values are still stored as iV01, iU00, iV00, iU01
  259. movq mm4, mm1
  260. pmaddwd mm4, mm5 ; Throw in first address calculation.
  261. ; Just to get it started. Calculate
  262. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  263. ; iV0 iU1 address should be done by now.
  264. movd eax, mm4
  265. shl eax, 2
  266. ;add eax, [esi+SPANTEX_pBits]
  267. add eax, pBits
  268. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead(iU00, iV00, pTex->iShiftU,
  269. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  270. ; -------------------- In Monolithic version calls are inlined.
  271. ;pcmpeqd mm5, mm5
  272. mov edx, dword ptr [eax]
  273. and edx, 000ffffffh ; Have to make alpha 0x00 in 32 bit cases.
  274. mov [edi], edx
  275. FailLabel:
  276. dec uPix ;// BUG BUG?? uPix should never start as zero should it?
  277. ;// if so, this is a bug.
  278. jle ExitPixelLoop
  279. ; Doing update code after span length test so that an extra update is not done.
  280. movq mm5, [ebp+RASTSPAN_iUoW1]
  281. paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
  282. movq [ebp+RASTSPAN_iUoW1], mm5
  283. ; mm5 still contains iUoW and iVoW which are the iU and iV values for
  284. ; non perspective correct.
  285. psrad mm5, TEX_TO_FINAL_SHIFT
  286. movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
  287. add esi, iZStep
  288. add edi, iSurfaceStep
  289. jmp PixelLoop
  290. ExitPixelLoop:
  291. ; Loop code ends
  292. ;-----------------------------------------------------------------------------
  293. ; LoopAny code ends here
  294. ;-----------------------------------------------------------------------------
  295. ;pS++;
  296. add ebp, SIZEOF_RASTSPAN
  297. ;}
  298. jmp SpanLoop
  299. ExitSpanLoop:
  300. ;pP = pP->pNext;
  301. mov ecx, [ecx+RASTPRIM_pNext]
  302. ;}
  303. jmp PrimLoop
  304. ExitPrimLoop:
  305. ;_asm{
  306. emms
  307. ;}
  308. ;return S_OK;
  309. xor eax, eax
  310. ;}
  311. pop edi
  312. pop esi
  313. pop ebx
  314. mov esp, StackPos
  315. pop ebp
  316. ret
  317. END