Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

426 lines
12 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; Monolith 2. Non-perspective 16 bit Z buffered 565
  4. ;
  5. ; Globals (ATTENTION. Need to move all globals to stack.)
  6. ;
  7. ; uSpans - Count containing the number of spans.
  8. ; StackPos - Saves stack position.
  9. ; uPix - Pixel count
  10. ; iSurfaceStep - what to add to screen pointer
  11. ; iZStep - What to add to Z buffer pointer
  12. ;
  13. ; (The below globals are used to save esi register which
  14. ; was normally used for pTex pointer. Now esi is used
  15. ; for z buffer pointer.)
  16. ; uMaskU
  17. ; iShiftU
  18. ; iShiftPitch
  19. ; pBits
  20. ;
  21. ; Register Useage
  22. ;
  23. ; esi - Z buffer pointer
  24. ; edi - Screen buffer pointer
  25. ;
  26. ; All other are temporary
  27. ;
  28. ;
  29. ; The only differences between this monolith and the regular
  30. ; MMX assembly code are:
  31. ;
  32. ; 1) Uses LE/GR Z compare code that all other monoliths use.
  33. ; 2) All texture info is stored in Globals to free up esi.
  34. ; 3) This code does Wrap only for texture addressing
  35. ; 4) Since there is no modulation or bi-linear and the source
  36. ; and destination color formats are the same, there is
  37. ; no need to convert to and from the internal color format.
  38. ; 5) esi is reserved for the Zbuffer
  39. ; 6) edi is reserved for the screen buffer.
  40. ; 7) Texture read does not use edi as pBits so that edi can be
  41. ; preserved for screen buffer.
  42. ;
  43. ;-----------------------------------------------------------------------------
  44. INCLUDE iammx.inc
  45. INCLUDE offs_acp.inc
  46. ; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
  47. ; at the LSB, then six bits of green, then five bits of red.
  48. ;TBD check to see if this value is correct.
  49. COLOR_SHIFT equ 8
  50. .586
  51. .model flat
  52. ; Big separating lines seperate code into span code
  53. ; and loop code. If span and loop are not going to
  54. ; end up being combined then it will be easy to
  55. ; seperate the code.
  56. .data
  57. ; Need externs for all of the variables that are needed for various beads
  58. EXTERN IncHighandLow16:MMWORD
  59. EXTERN UFracVFracMask:MMWORD
  60. EXTERN UV32to15Mask:MMWORD
  61. EXTERN Makelow16one:MMWORD
  62. EXTERN MaskKeepUValues:MMWORD
  63. EXTERN MaskKeepVValues:MMWORD
  64. EXTERN UFrac:MMWORD
  65. EXTERN VFrac:MMWORD
  66. EXTERN Zero:MMWORD
  67. EXTERN memD3DTFG_POINT:MMWORD
  68. EXTERN GiveUp:MMWORD
  69. EXTERN LastW:MMWORD
  70. EXTERN Val0x000a000a:MMWORD
  71. EXTERN Val0xffff:MMWORD
  72. EXTERN Val0x0000002000000020:MMWORD
  73. EXTERN Val0x0000ffff0000ffff:MMWORD
  74. EXTERN MaskRed565to888:MMWORD
  75. EXTERN MaskGreen565to888:MMWORD
  76. EXTERN MaskBlue565to888:MMWORD
  77. EXTERN MaskRed555to888:MMWORD
  78. EXTERN MaskGreen555to888:MMWORD
  79. EXTERN MaskBlue555to888:MMWORD
  80. EXTERN MaskAlpha1555to8888:MMWORD
  81. EXTERN MaskRed1555to8888:MMWORD
  82. EXTERN MaskGreen1555to8888:MMWORD
  83. EXTERN MaskBlue1555to8888:MMWORD
  84. ; TBD. I think that I want to do 0xffff instead of 0xff. This will
  85. ; have to be checked. There is a value very similiar to this in
  86. ; buf write.
  87. EXTERN SetAlphato0xffff:MMWORD
  88. EXTERN SetAlphato0xff:MMWORD
  89. ; TODO This equate are identical to the ones in texread.mas. Maybe they should be in a common .inc file.
  90. RedShift565to888 equ 8
  91. GreenShift565to888 equ 5
  92. BlueShift565to888 equ 3
  93. RedShift555to888 equ 9
  94. GreenShift555to888 equ 6
  95. BlueShift555to888 equ 3
  96. AlphaShift1555to8888 equ 16
  97. RedShift1555to8888 equ 9
  98. GreenShift1555to8888 equ 6
  99. BlueShift1555to8888 equ 3
  100. EXTERN Zero:MMWORD
  101. EXTERN DW_One_One:MMWORD
  102. EXTERN MaskOffAlpha:MMWORD
  103. EXTERN ShiftTA:MMWORD
  104. EXTERN Val0x00ff00ff00ff00ff:MMWORD
  105. EXTERN Val0x000000ff00ff00ff:MMWORD
  106. EXTERN Val0X0000000001000000:MMWORD
  107. EXTERN AlphaVal128:MMWORD
  108. EXTERN RGBVal128:MMWORD
  109. EXTERN g_uDitherValue:MMWORD
  110. EXTERN SetAlphato0xff:MMWORD
  111. EXTERN u888to565RedBlueMask:MMWORD
  112. EXTERN u888to565GreenMask:MMWORD
  113. EXTERN u888to565Multiplier:MMWORD
  114. EXTERN uVal0x000007ff03ff07ff:MMWORD
  115. EXTERN uVal0x0000078003c00780:MMWORD
  116. EXTERN u888to555RedBlueMask:MMWORD
  117. EXTERN u888to555GreenMask:MMWORD
  118. EXTERN u888to555Multiplier:MMWORD
  119. EXTERN uVal0x000007ff07ff07ff:MMWORD
  120. EXTERN uVal0x0000078007800780:MMWORD
  121. ;-----------------------------------------------------------------------------
  122. ; Span Variables
  123. uMaskU dq ?
  124. StackPos dd ?
  125. uSpans dd ?
  126. iShiftU dd ?
  127. iShiftPitch dd ?
  128. pBits dd ?
  129. ;-----------------------------------------------------------------------------
  130. ;-----------------------------------------------------------------------------
  131. ; Loop Variables
  132. iSurfaceStep dd ?
  133. iZStep dd ?
  134. uPix dd ?
  135. ;-----------------------------------------------------------------------------
  136. .code
  137. PUBLIC _MMXMLRast_2
  138. _MMXMLRast_2:
  139. push ebp
  140. mov StackPos, esp
  141. mov eax, esp
  142. sub esp, 0Ch ; This will need to change if stack frame size changes.
  143. push ebx
  144. push esi
  145. push edi
  146. ; Put pCtx into ebx
  147. mov ebx, [eax+8]
  148. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  149. mov ecx, [ebx+RASTCTX_pPrim]
  150. ;while (pP)
  151. ;{
  152. PrimLoop:
  153. cmp ecx, 0
  154. je ExitPrimLoop
  155. ;UINT16 uSpans = pP->uSpans;
  156. movzx eax, word ptr [ecx+RASTPRIM_uSpans]
  157. mov uSpans, eax
  158. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  159. mov ebp, ecx
  160. add ebp, SIZEOF_RASTPRIM
  161. SpanLoop:
  162. mov edx, uSpans
  163. mov eax, edx
  164. dec eax
  165. mov uSpans, eax
  166. test edx, edx
  167. jle ExitSpanLoop
  168. ;pCtx->pfnBegin(pCtx, pP, pS);
  169. ;-----------------------------------------------------------------------------
  170. ; LoopAny code inserted here. This is to get rid of an extra
  171. ; jump.
  172. ;-----------------------------------------------------------------------------
  173. ; Setup Code begins - get values to iterate
  174. movzx eax, word ptr [ebp+RASTSPAN_uPix]
  175. mov uPix, eax
  176. movq mm5, [ebp+RASTSPAN_iUoW1]
  177. ; non perspective correct.
  178. psrad mm5, TEX_TO_FINAL_SHIFT
  179. movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
  180. mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
  181. mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], 0
  182. mov eax, [ecx+RASTPRIM_uFlags]
  183. and eax, D3DI_RASTPRIM_X_DEC
  184. test eax, eax
  185. jz LeftToRightSpan
  186. mov eax, [ebx+RASTCTX_iZStep]
  187. neg eax
  188. mov iZStep, eax
  189. mov eax, [ebx+RASTCTX_iSurfaceStep]
  190. neg eax
  191. mov iSurfaceStep, eax
  192. jmp DoneSpanDirif
  193. LeftToRightSpan:
  194. mov eax, [ebx+RASTCTX_iZStep]
  195. mov iZStep, eax
  196. mov eax, [ebx+RASTCTX_iSurfaceStep]
  197. mov iSurfaceStep, eax
  198. DoneSpanDirif:
  199. ;******************************************
  200. ; Extra Globals are used here.
  201. mov esi, [ebx+RASTCTX_pTexture]
  202. mov edx, [esi + SPANTEX_iShiftU]
  203. mov iShiftU, edx
  204. movzx edx, word ptr [esi + SPANTEX_iShiftPitch]
  205. mov iShiftPitch, edx
  206. movd mm0, dword ptr [esi+SPANTEX_uMaskU] ; Load U and V mask
  207. movq MMWORD PTR uMaskU, mm0
  208. mov edx, [esi+SPANTEX_pBits]
  209. mov pBits, edx
  210. mov edi, [ebp+RASTSPAN_pSurface]
  211. mov esi, [ebp+RASTSPAN_pZ]
  212. ;******************************************
  213. PixelLoop:
  214. ; Ztestcode
  215. ; edx is uZ
  216. ; eax is uZB
  217. ; 16 bit unsigned format
  218. ;UINT16 uZ = (UINT16)(pS->uZ>>15);
  219. ;UINT16 uZB = *((UINT16*)pS->pZ);
  220. mov edx, [ebp+RASTSPAN_uZ]
  221. movd mm4, edx
  222. shr edx, 15
  223. movzx eax, word ptr [esi]
  224. ;pS->uZ += pP->iDZDX;
  225. ;if ((pCtx->iZXorMask)^(uZ > uZB))
  226. sub eax, edx
  227. paddd mm4, [ecx+RASTPRIM_iDZDX]
  228. movd [ebp+RASTSPAN_uZ], mm4
  229. xor eax, [ebx+RASTCTX_iZXorMask]
  230. test eax, eax
  231. js FailLabel
  232. mov word ptr [esi], dx
  233. ; texturecode
  234. xor eax, eax
  235. ; Doing UV calculation a little more accurate
  236. ; Exactly like C code.
  237. ; I iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
  238. ; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
  239. ; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
  240. ; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
  241. ; COMMENT1**
  242. ; If textures have a max of 1024 then shiftU0 would be at most 10 which would
  243. ; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
  244. ; It will also give bi-linear 6 bits of precision I think it was said that
  245. ; only five was needed.
  246. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  247. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  248. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  249. movd mm4, iShiftU
  250. psubw mm5, mm4
  251. movq mm4, mm5
  252. pand mm5, MMWORD PTR Val0xffff
  253. psrld mm4, 16
  254. movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
  255. psrad mm1, mm5
  256. movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
  257. psrad mm2, mm4
  258. punpckldq mm1, mm2
  259. ; Texture Pitch cannot be calculated so it must be looked up in the iShiftPitch table
  260. mov edx, iShiftPitch
  261. add edx, 16
  262. movd mm2, edx
  263. movq mm5, MMWORD ptr Makelow16one
  264. pslld mm5, mm2
  265. por mm5, MMWORD ptr Makelow16one
  266. ; Make the low 16 bits of dword one
  267. ; This helps in calculating texture address.
  268. ; Gets U and V value into mm1 so that it can be mirrored, wrapped or
  269. ; clamped. This can be done for two values in the point case
  270. ; or four values in the bilinear case.
  271. ;iU00 >>= 6;
  272. ;iV00 >>= 6;
  273. psrad mm1, 6
  274. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  275. ; operations assume UV in low 32 bits.
  276. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  277. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  278. ; Replace general purpose wrap/mirror code with specific wrap code.
  279. pand mm1, MMWORD PTR uMaskU
  280. movq mm4, mm1
  281. ; Making other two cases for texture addressing has to be simplier than
  282. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  283. ; TBD Make this better.
  284. ; values are still stored as iV01, iU00, iV00, iU01
  285. pmaddwd mm4, mm5 ; Throw in first address calculation.
  286. ; Just to get it started. Calculate
  287. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  288. ; From here until mov edi is code that is needed for border.
  289. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
  290. ; iV0 iU1 address should be done by now.
  291. movd eax, mm4
  292. shl eax, 1
  293. add eax, pBits
  294. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead(iU00, iV00, pTex->iShiftU,
  295. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  296. mov dx, word ptr [eax]
  297. ; Write Texture.
  298. mov [edi], dx
  299. FailLabel:
  300. dec uPix
  301. jle ExitPixelLoop
  302. ; Doing update code after span length test so that an extra update is not done.
  303. movq mm5, [ebp+RASTSPAN_iUoW1]
  304. paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
  305. movq [ebp+RASTSPAN_iUoW1], mm5
  306. ; mm5 still contains iUoW and iVoW which are the iU and iV values for
  307. ; non perspective correct.
  308. psrad mm5, TEX_TO_FINAL_SHIFT
  309. movq [ebx+RASTCTX_SI+SPANITER_iU1], mm5
  310. add esi, iZStep
  311. add edi, iSurfaceStep
  312. jmp PixelLoop
  313. ExitPixelLoop:
  314. ; Loop code ends
  315. ;-----------------------------------------------------------------------------
  316. ; LoopAny code ends here
  317. ;-----------------------------------------------------------------------------
  318. ;pS++;
  319. add ebp, SIZEOF_RASTSPAN
  320. ;}
  321. jmp SpanLoop
  322. ExitSpanLoop:
  323. ;pP = pP->pNext;
  324. mov ecx, [ecx+RASTPRIM_pNext]
  325. ;}
  326. jmp PrimLoop
  327. ExitPrimLoop:
  328. ;_asm{
  329. emms
  330. ;}
  331. ;return S_OK;
  332. xor eax, eax
  333. ;}
  334. pop edi
  335. pop esi
  336. pop ebx
  337. mov esp, StackPos
  338. pop ebp
  339. ret
  340. END