Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

541 lines
14 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; Monolith 6. Perspective Correct Nearest Gouraud Modulated
  4. ; Z buffer (LE or GT) 565.
  5. ;
  6. ; Globals (ATTENTION)
  7. ;
  8. ; StackPos - stack pos holder
  9. ; uSpans - Number of spans to process
  10. ; iSurfaceStep - what to add to screen pointer
  11. ; iZStep - what to add to Z buffer pointer
  12. ; uPix - Pixel Count
  13. ;
  14. ; Changes from general MMX code.
  15. ; 1) Convert directly from 565 to internal format to remove
  16. ; extra unpack. Remove alpha set.
  17. ; 2) Didnt need to save texture color or blended color so these
  18. ; are kept in registers.
  19. ; 3) All calls and jumps were removed.
  20. ; 4) Removed alpha masking in in modulate code.
  21. ; 5) Change registers usage to prevent extra moves.
  22. ;
  23. ;-----------------------------------------------------------------------------
  24. INCLUDE iammx.inc
  25. INCLUDE offs_acp.inc
  26. ; Names are read LSB to MSB, so B5G6R5 means five bits of blue starting
  27. ; at the LSB, then six bits of green, then five bits of red.
  28. ;TBD check to see if this value is correct.
  29. COLOR_SHIFT equ 8
  30. .586
  31. .model flat
  32. .data
  33. EXTERN IncHighandLow16:MMWORD
  34. EXTERN UFracVFracMask:MMWORD
  35. EXTERN UV32to15Mask:MMWORD
  36. EXTERN Makelow16one:MMWORD
  37. EXTERN MaskKeepUValues:MMWORD
  38. EXTERN MaskKeepVValues:MMWORD
  39. EXTERN UFrac:MMWORD
  40. EXTERN VFrac:MMWORD
  41. EXTERN Zero:MMWORD
  42. EXTERN memD3DTFG_POINT:MMWORD
  43. EXTERN GiveUp:MMWORD
  44. EXTERN LastW:MMWORD
  45. EXTERN Val0x000a000a:MMWORD
  46. EXTERN Val0xffff:MMWORD
  47. EXTERN Val0x0000002000000020:MMWORD
  48. EXTERN Val0x0000ffff0000ffff:MMWORD
  49. opt_MaskRed565to888 MMWORD 000000000000F800H
  50. EXTERN MaskRed565to888:MMWORD
  51. EXTERN MaskGreen565to888:MMWORD
  52. EXTERN MaskBlue565to888:MMWORD
  53. EXTERN MaskRed555to888:MMWORD
  54. EXTERN MaskGreen555to888:MMWORD
  55. EXTERN MaskBlue555to888:MMWORD
  56. EXTERN MaskAlpha1555to8888:MMWORD
  57. EXTERN MaskRed1555to8888:MMWORD
  58. EXTERN MaskGreen1555to8888:MMWORD
  59. EXTERN MaskBlue1555to8888:MMWORD
  60. EXTERN SetAlphato0xffff:MMWORD
  61. EXTERN SetAlphato0xff:MMWORD
  62. RedShift565to888 equ 8
  63. GreenShift565to888 equ 5
  64. BlueShift565to888 equ 3
  65. RedShift555to888 equ 9
  66. GreenShift555to888 equ 6
  67. BlueShift555to888 equ 3
  68. AlphaShift1555to8888 equ 16
  69. RedShift1555to8888 equ 9
  70. GreenShift1555to8888 equ 6
  71. BlueShift1555to8888 equ 3
  72. EXTERN Zero:MMWORD
  73. EXTERN DW_One_One:MMWORD
  74. EXTERN MaskOffAlpha:MMWORD
  75. EXTERN ShiftTA:MMWORD
  76. EXTERN Val0x00ff00ff00ff00ff:MMWORD
  77. EXTERN Val0x000000ff00ff00ff:MMWORD
  78. EXTERN Val0X0000000001000000:MMWORD
  79. EXTERN AlphaVal128:MMWORD
  80. EXTERN RGBVal128:MMWORD
  81. EXTERN g_uDitherValue:MMWORD
  82. EXTERN SetAlphato0xff:MMWORD
  83. EXTERN u888to565RedBlueMask:MMWORD
  84. EXTERN u888to565GreenMask:MMWORD
  85. EXTERN u888to565Multiplier:MMWORD
  86. EXTERN uVal0x000007ff03ff07ff:MMWORD
  87. EXTERN uVal0x0000078003c00780:MMWORD
  88. EXTERN u888to555RedBlueMask:MMWORD
  89. EXTERN u888to555GreenMask:MMWORD
  90. EXTERN u888to555Multiplier:MMWORD
  91. EXTERN uVal0x000007ff07ff07ff:MMWORD
  92. EXTERN uVal0x0000078007800780:MMWORD
  93. ; Span Variables
  94. StackPos dd ?
  95. uSpans dd ?
  96. ;-----------------------------------------------------------------------------
  97. ; Loop Variables
  98. iSurfaceStep dd ?
  99. iZStep dd ?
  100. uPix dd ?
  101. ;-----------------------------------------------------------------------------
  102. .code
  103. PUBLIC _MMXMLRast_6
  104. _MMXMLRast_6:
  105. push ebp
  106. mov StackPos, esp
  107. mov eax, esp
  108. sub esp, 0Ch ; This will need to change if stack frame size changes.
  109. push ebx
  110. push esi
  111. push edi
  112. ; Put pCtx into ebx
  113. mov ebx, [eax+8]
  114. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  115. mov ecx, [ebx+RASTCTX_pPrim]
  116. ;while (pP)
  117. ;{
  118. PrimLoop:
  119. cmp ecx, 0
  120. je ExitPrimLoop
  121. ;UINT16 uSpans = pP->uSpans;
  122. movzx eax, word ptr [ecx+RASTPRIM_uSpans]
  123. mov uSpans, eax
  124. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  125. mov ebp, ecx
  126. add ebp, SIZEOF_RASTPRIM
  127. ;while (uSpans-- > 0)
  128. ;{
  129. SpanLoop:
  130. mov edx, uSpans
  131. mov eax, edx
  132. dec eax
  133. mov uSpans, eax
  134. test edx, edx
  135. jle ExitSpanLoop
  136. ;pCtx->pfnBegin(pCtx, pP, pS);
  137. ;-----------------------------------------------------------------------------
  138. ; LoopAny code inserted here. This is to get rid of an extra
  139. ; jump.
  140. ;-----------------------------------------------------------------------------
  141. ; Setup Code begins
  142. ; get values to iterate
  143. ;uPix = pS->uPix;
  144. movzx eax, word ptr [ebp+RASTSPAN_uPix]
  145. mov uPix, eax
  146. ;pCtx->SI.iDW = 0x0;
  147. mov dword ptr [ebx+RASTCTX_SI+SPANITER_iDW], 0
  148. mov esi, [ebp+RASTSPAN_iW]
  149. movq mm5, MMWORD PTR [ebp+RASTSPAN_iUoW1]
  150. ;pCtx->SI.iUd_TexNum = d_WTimesUVoW(pS->iW,pS->iUoW1);
  151. ;pCtx->SI.iVd_TexNum = d_WTimesUVoW(pS->iW,pS->iVoW1);
  152. pslld mm5, 8
  153. shl esi, 4
  154. movd eax, mm5
  155. psrlq mm5, 32
  156. imul esi
  157. mov [ebx+RASTCTX_SI+SPANITER_iU1], edx
  158. movd eax, mm5
  159. imul esi
  160. mov [ebx+RASTCTX_SI+SPANITER_iV1], edx
  161. ;if (pP->iDOoWDX > 0)
  162. ;{
  163. cmp dword ptr [ecx+RASTPRIM_iDOoWDX], 0
  164. jg SpecialWLastMonTest
  165. ;// iSpecialW should be negative for the first 3 pixels of span
  166. ;pCtx->SI.iSpecialW = -3;
  167. mov word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW], -3
  168. jmp DoneSpecialWifMonTest
  169. ;}
  170. ;else
  171. ;{
  172. SpecialWLastMonTest:
  173. ;// iSpecialW should be negative for the last 3 pixels of span
  174. ;pCtx->SI.iSpecialW = 0x7fff - uPix;
  175. mov eax, 07fffh
  176. sub eax, uPix
  177. ;pCtx->SI.iSpecialW += 5; // this may wrap, but it should
  178. add eax, 5
  179. mov [ebx+RASTCTX_SI+SPANITER_iSpecialW], eax
  180. ;}
  181. DoneSpecialWifMonTest:
  182. ;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
  183. ;{
  184. mov eax, [ecx+RASTPRIM_uFlags]
  185. and eax, D3DI_RASTPRIM_X_DEC
  186. test eax, eax
  187. jz LeftToRightSpan
  188. ;iZStep = -pCtx->iZStep;
  189. mov eax, [ebx+RASTCTX_iZStep]
  190. neg eax
  191. mov iZStep, eax
  192. ;iSurfaceStep = -pCtx->iSurfaceStep;
  193. mov eax, [ebx+RASTCTX_iSurfaceStep]
  194. neg eax
  195. mov iSurfaceStep, eax
  196. ;}
  197. jmp DoneSpanDirif
  198. ;else
  199. ;{
  200. LeftToRightSpan:
  201. ;iZStep = pCtx->iZStep;
  202. mov eax, [ebx+RASTCTX_iZStep]
  203. mov iZStep, eax
  204. ;iSurfaceStep = pCtx->iSurfaceStep;
  205. mov eax, [ebx+RASTCTX_iSurfaceStep]
  206. mov iSurfaceStep, eax
  207. ;}
  208. DoneSpanDirif:
  209. ; Setup Code Ends
  210. ; ----------------------------------------------------------------------------------------------------------------
  211. ; Loop Code Begins
  212. PixelLoop:
  213. ; Ztestcode
  214. ; edx is uZ
  215. ; eax is uZB
  216. ; 16 bit unsigned format
  217. ;UINT16 uZ = (UINT16)(pS->uZ>>15);
  218. ;UINT16 uZB = *((UINT16*)pS->pZ);
  219. mov edx, [ebp+RASTSPAN_uZ]
  220. movd mm4, edx
  221. mov esi, [ebp+RASTSPAN_pZ]
  222. shr edx, 15
  223. movzx eax, word ptr [esi]
  224. ;pS->uZ += pP->iDZDX;
  225. ;if ((pCtx->iZXorMask)^(uZ > uZB))
  226. ; !(uZ > uZB) <==>
  227. ; (uZ <= uZB) <==>
  228. ; (uZ < uZB+1) <==>
  229. ;
  230. sub eax, edx
  231. paddd mm4, [ecx+RASTPRIM_iDZDX]
  232. movd [ebp+RASTSPAN_uZ], mm4
  233. xor eax, [ebx+RASTCTX_iZXorMask]
  234. test eax, eax
  235. js FailLabel
  236. mov word ptr [esi], dx
  237. ; texturecode
  238. mov esi, [ebx+RASTCTX_pTexture]
  239. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  240. movd mm4, [esi+SPANTEX_iShiftU]
  241. psubw mm5, mm4
  242. movq mm4, mm5
  243. pand mm5, MMWORD PTR Val0xffff
  244. psrld mm4, 16
  245. movd mm1, [ebx+RASTCTX_SI+SPANITER_iU1]
  246. psrad mm1, mm5
  247. movd mm2, [ebx+RASTCTX_SI+SPANITER_iV1]
  248. psrad mm2, mm4
  249. punpckldq mm1, mm2
  250. movzx edx, word ptr [esi+SPANTEX_iShiftPitch]
  251. add edx, 16
  252. movd mm2, edx
  253. movq mm5, MMWORD ptr Makelow16one
  254. pslld mm5, mm2
  255. por mm5, MMWORD ptr Makelow16one
  256. psrad mm1, 6
  257. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  258. movd mm0, [esi+SPANTEX_uMaskU] ; Load U and V mask
  259. movq mm7, mm1
  260. movd mm4, [esi+SPANTEX_iFlipMaskU]
  261. pand mm7, mm4
  262. pcmpeqw mm7, MMWORD PTR Zero
  263. pandn mm7, mm0
  264. pand mm1, mm0
  265. pxor mm1, mm7
  266. pmaddwd mm1, mm5
  267. mov edi, [esi+SPANTEX_pBits]
  268. movd eax, mm1
  269. movzx eax, word ptr [edi+2*eax]
  270. ; got rid of the pupack with zero
  271. ; in color conversion.
  272. movd mm1, eax ; Make two more copies of input color
  273. movq mm2, mm1
  274. pand mm1, dword ptr MaskGreen565to888
  275. pand mm2, dword ptr opt_MaskRed565to888
  276. psllq mm2, 24
  277. psllq mm1, 13
  278. shl eax, 3
  279. por mm1, mm2
  280. and eax, 0FFH
  281. movd mm2, eax
  282. por mm2, mm1
  283. ;modulate
  284. movq mm1, [ebp+RASTSPAN_uB]
  285. psrlw mm1, COLOR_SHIFT ; COLOR_SHIFT is set to 8.
  286. pmullw mm1, mm2
  287. ;write
  288. mov edi, [ebp+RASTSPAN_pSurface]
  289. psrlw mm1, 8 ; Convert color1 from 8.8 two 0.8
  290. packuswb mm1, mm7 ; pack one color
  291. movq mm3, mm1
  292. pand mm1, MMWORD PTR u888to565RedBlueMask
  293. pmaddwd mm1, MMWORD PTR u888to565Multiplier
  294. pand mm3, MMWORD PTR u888to565GreenMask
  295. por mm1, mm3
  296. psrld mm1, 5
  297. movd edx, mm1
  298. mov [edi], dx
  299. FailLabel:
  300. dec uPix ;// BUG BUG?? uPix should never start as zero should it?
  301. jle ExitPixelLoop
  302. movq mm1, [ebp+RASTSPAN_uB]
  303. paddw mm1, [ecx+RASTPRIM_iDBDX]
  304. movq [ebp+RASTSPAN_uB], mm1
  305. movq mm5, [ebp+RASTSPAN_iUoW1]
  306. paddd mm5, [ecx+RASTPRIM_iDUoW1DX]
  307. movq [ebp+RASTSPAN_iUoW1], mm5
  308. xor eax, eax
  309. mov ax, [ebp+RASTSPAN_iLOD]
  310. add ax, [ebp+RASTSPAN_iDLOD]
  311. mov [ebp+RASTSPAN_iLOD], ax
  312. mov eax, [ebp+RASTSPAN_iOoW]
  313. add eax, [ecx+RASTPRIM_iDOoWDX]
  314. mov [ebp+RASTSPAN_iOoW], eax
  315. mov edx, [ebp+RASTSPAN_iW]
  316. mov LastW, edx ; Save iW to calc iDW for next time.
  317. add edx, [ebx+RASTCTX_SI+SPANITER_iDW]
  318. xor edi, edi
  319. cmp di, word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW]
  320. jle DontDoSpecialW1
  321. cmp edx, edi
  322. jl WOutOfRange1
  323. mov edx, LastW
  324. sar edx, 1
  325. WOutOfRange1:
  326. mov GiveUp, 8 ; Pre decrementing instead of post decrementing.
  327. SpecW1Loop1:
  328. dec GiveUp
  329. jz ExitSpecWLoop1
  330. mov esi, (1 SHL 16)
  331. mov edi, edx
  332. imul edx
  333. sub esi, edx
  334. SpecW1Loop2:
  335. test esi, esi
  336. jns SpecW1ExitLoop2 ; This jump should be predicted correctly most of the time.
  337. add esi, (1 SHL 15)
  338. sar esi, 1
  339. jmp SpecW1Loop2
  340. SpecW1ExitLoop2:
  341. mov eax, edi
  342. shl eax, 5 ; 1.15.16 << 5 = 1.10.21 TBD Can I shift off upper bits??
  343. shl esi, 12 ; 4.15 << 12 = 4.27 ;
  344. mul esi
  345. sub edi, edx
  346. mov eax, edi
  347. sar eax, 31
  348. xor edi, eax
  349. sub edi, eax
  350. cmp edi, 020h ;Assuming that loop will only happen once.
  351. jbe ExitSpecWLoop1
  352. mov eax, [ebp+RASTSPAN_iOoW]
  353. jmp SpecW1Loop1
  354. DontDoSpecialW1:
  355. mov esi, (1 SHL 16)
  356. mov edi, edx
  357. mul edx
  358. sub esi, edx
  359. shl esi, 15
  360. mov eax, esi
  361. mul edi ; 0.2.30 * 1.15.16 = 1.17.46 >> 32 = 1.17.14
  362. shl edx, 2 ; 1.17.14 << 2 = 1.15.16
  363. ExitSpecWLoop1:
  364. mov [ebp+RASTSPAN_iW], edx
  365. mov esi, edx ; Save W for multiplying by UoW and VoW
  366. sub edx, LastW
  367. mov [ebx+RASTCTX_SI+SPANITER_iDW], edx
  368. inc word ptr [ebx+RASTCTX_SI+SPANITER_iSpecialW]
  369. pslld mm5, 8
  370. shl esi, 4
  371. movd eax, mm5
  372. psrlq mm5, 32
  373. imul esi
  374. mov [ebx+RASTCTX_SI+SPANITER_iU1], edx
  375. movd eax, mm5
  376. imul esi
  377. mov [ebx+RASTCTX_SI+SPANITER_iV1], edx
  378. mov eax, dword ptr [ebp+RASTSPAN_pZ]
  379. mov edx, dword ptr [ebp+RASTSPAN_pSurface]
  380. add eax, iZStep
  381. add edx, iSurfaceStep
  382. mov dword ptr [ebp+RASTSPAN_pZ], eax
  383. mov dword ptr [ebp+RASTSPAN_pSurface], edx
  384. jmp PixelLoop
  385. ExitPixelLoop:
  386. ; Loop code ends
  387. ;-----------------------------------------------------------------------------
  388. ; LoopAny code ends here
  389. ;-----------------------------------------------------------------------------
  390. ;pS++;
  391. add ebp, SIZEOF_RASTSPAN
  392. ;}
  393. jmp SpanLoop
  394. ExitSpanLoop:
  395. ;pP = pP->pNext;
  396. mov ecx, [ecx+RASTPRIM_pNext]
  397. ;}
  398. jmp PrimLoop
  399. ExitPrimLoop:
  400. ;_asm{
  401. emms
  402. ;}
  403. ;return S_OK;
  404. xor eax, eax
  405. ;}
  406. pop edi
  407. pop esi
  408. pop ebx
  409. mov esp, StackPos
  410. pop ebp
  411. ret
  412. END