Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

568 lines
19 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; This file contains the general span parsing code combined with loop code.
  4. ;
  5. ;
  6. ; WARNING WARNING WARNING
  7. ; This asm file generated from mas file.
  8. ; EDIT THE MAS FILE.
  9. ; I warned you.
  10. ; WARNING WARNING WARNING
  11. ;
  12. ;-----------------------------------------------------------------------------
  13. INCLUDE iammx.inc
  14. INCLUDE offs_acp.inc
  15. include(`m4hdr.mh')dnl
  16. include(`cvars.mh')dnl
  17. include(`texaddra.mh')dnl
  18. EXTERN g_uDitherValue:MMWORD
  19. .586
  20. .model flat
  21. ; Big seperating lines seperate code into span code
  22. ; and loop code. If span and loop are not going to
  23. ; end up being combined then it will be easy to
  24. ; seperate the code.
  25. .data
  26. ;-----------------------------------------------------------------------------
  27. ; Span Variables
  28. StackPos dd ?
  29. uSpans dd ?
  30. ;-----------------------------------------------------------------------------
  31. ;-----------------------------------------------------------------------------
  32. ; Loop Variables
  33. ;// Table is needed to get starting value for dither, but can use xor trick afterwards to generate consecutive values.
  34. ;// Need to compare table based method with Xor method and compare timing/memory usage. It is good to keep xor method
  35. ;// around since it can be used more efficently when there are more free registers (i.e. a monolithic routine Probably
  36. ;// only enough registers to do it in a gouraud or gouraud/specular case).
  37. ;static UINT64 uMMXDitherTable[16] =
  38. ;{
  39. ; 0x0000000000000000 >> 6, 0x0000800080008000 >> 6, 0x0000200020002000 >> 6, 0x0000a000a000a000 >> 6,
  40. ; 0x0000c000c000c000 >> 6, 0x0000400040004000 >> 6, 0x0000e000e000e000 >> 6, 0x0000600060006000 >> 6,
  41. ; 0x0000300030003000 >> 6, 0x0000b000b000b000 >> 6, 0x0000100010001000 >> 6, 0x0000900090009000 >> 6,
  42. ; 0x0000f000f000f000 >> 6, 0x0000700070007000 >> 6, 0x0000d000d000d000 >> 6, 0x0000500050005000 >> 6
  43. ;};
  44. uMMXDitherTable dq 000000000000000h , 000800080008000h , 000200020002000h , 000a000a000a000h
  45. dq 000c000c000c000h , 000400040004000h , 000e000e000e000h , 000600060006000h
  46. dq 000300030003000h , 000b000b000b000h , 000100010001000h , 000900090009000h
  47. dq 000f000f000f000h , 000700070007000h , 000d000d000d000h , 000500050005000h
  48. u565MultShifter dq 00000000200010002h
  49. u555MultShifter dq 00000000200020002h
  50. uFogDXAdd dq 00000000400040004h
  51. iSurfaceStep dd ?
  52. iZStep dd ?
  53. uDitherXorXorMask dq 0
  54. uDitherXorMask dq 0
  55. uDitherXorXorMaskInitVal dq 0000200020002000h
  56. uDitherXorMaskInitVal dq 0000800080008000h
  57. uPix dd ?
  58. ;-----------------------------------------------------------------------------
  59. .code
  60. ;HRESULT MMX_RenderSpansAny(PD3DI_RASTCTX pCtx)
  61. ;{
  62. PUBLIC _MMX_RenderSpansAny
  63. _MMX_RenderSpansAny:
  64. push ebp
  65. mov StackPos, esp
  66. mov eax, esp
  67. sub esp, 0Ch ; This will need to change if stack frame size changes.
  68. push ebx
  69. push esi
  70. push edi
  71. ; Put pCtx into ebx
  72. mov ebx, [eax+8]
  73. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  74. mov ecx, XpCtx(pPrim)
  75. ; ATTENTION?? Should these be set by validation? I dont know
  76. ; why they would need to be since every span routine knows
  77. ; where the code needs to return. Also, How is pfnAlphaTestFailEnd
  78. ; different than pfnPixelEnd?
  79. mov eax, _MMX_LoopAnyEndPixel
  80. mov XpCtx(pfnPixelEnd), eax
  81. mov XpCtx(pfnAlphaTestFailEnd), eax
  82. ;while (pP)
  83. ;{
  84. PrimLoop:
  85. cmp ecx, 0
  86. je ExitPrimLoop
  87. ;UINT16 uSpans = pP->uSpans;
  88. movzx eax, word ptr XpP(uSpans)
  89. mov uSpans, eax
  90. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  91. mov ebp, ecx
  92. add ebp, SIZEOF_RASTPRIM
  93. ;while (uSpans-- > 0)
  94. ;{
  95. SpanLoop:
  96. mov edx, uSpans
  97. mov eax, edx
  98. dec eax
  99. mov uSpans, eax
  100. test edx, edx
  101. jle ExitSpanLoop
  102. ;pCtx->pfnBegin(pCtx, pP, pS);
  103. ;-----------------------------------------------------------------------------
  104. ; LoopAny code inserted here. This is to get rid of an extra
  105. ; jump.
  106. ;-----------------------------------------------------------------------------
  107. ; Setup Code begins
  108. ; get values to iterate
  109. ;uPix = pS->uPix;
  110. movzx eax, word ptr XpS(uPix)
  111. mov uPix, eax
  112. ; TODO Copy uFog and iDFog from pS to pCtx.SI
  113. ; so fog increment can be done faster in MMX.
  114. ; dont need to do this if there is no fog.
  115. ;if (pCtx->pdwRenderState[D3DRENDERSTATE_FOGENABLE]) {
  116. cmp dword ptr XpCtx(pdwRenderState+RS_FOGENABLE), 0
  117. je NoFogSetup
  118. ;D3DCOLOR FogColor = pCtx->pdwRenderState[D3DRENDERSTATE_FOGCOLOR];
  119. ;UINT16 FR = (UINT16)RGBA_GETRED(FogColor);
  120. ;UINT16 FG = (UINT16)RGBA_GETGREEN(FogColor);
  121. ;UINT16 FB = (UINT16)RGBA_GETBLUE(FogColor);
  122. pxor mm0, mm0
  123. movd mm1, XpCtx(pdwRenderState+RS_FOGCOLOR)
  124. ;UINT16 uMFog = 0xff - (pS->uFog>>8);
  125. pcmpeqd mm2, mm2
  126. movzx eax, word ptr XpS(uFog)
  127. shr eax, 8
  128. movd mm3, eax
  129. psubb mm2, mm3
  130. punpcklbw mm2, mm0
  131. punpcklwd mm2, mm2 ; Replicate uMFog
  132. punpckldq mm2, mm2
  133. ;pCtx->SI.uFogR = uMFog * FR; // 0.8 * 0.8 = 8.8
  134. ;pCtx->SI.uFogG = uMFog * FG;
  135. ;pCtx->SI.uFogB = uMFog * FB;
  136. punpcklbw mm1, mm0
  137. pmullw mm2, mm1
  138. movq XpCtxSI(uFogB), mm2
  139. ;INT32 iMDFog = -pS->iDFog;
  140. movsx eax, word ptr XpS(iDFog)
  141. neg eax
  142. movd mm3, eax
  143. punpcklwd mm3, mm3
  144. punpckldq mm3, mm3
  145. ;// 1.7.8 * 8.0 >> 8 = 1.7.8 (ATTENTION this could overflow, but it is naturally aligned for
  146. ;// doing the walking. Can fix by changing precision of uFogR values, or by clamping
  147. ;// range of iDFog.
  148. ;pCtx->SI.iFogRDX = (INT16)((iMDFog * FR) >> 8);
  149. ;pCtx->SI.iFogGDX = (INT16)((iMDFog * FG) >> 8);
  150. ;pCtx->SI.iFogBDX = (INT16)((iMDFog * FB) >> 8);
  151. psllw mm1, 7 ; Have to loose a bit on fog or add some extra code
  152. pmulhw mm3, mm1
  153. psllw mm3, 1
  154. ;// if iFog*DX is positive, iFog*DX will always be too small, hence no overflow
  155. ;// but if iFog*DX is negative, add some to make sure overflow does not
  156. ;// occur
  157. ;if (pCtx->SI.iFogRDX < 0)
  158. ;{
  159. ; pCtx->SI.iFogRDX = min(pCtx->SI.iFogRDX+4, 0);
  160. ;}
  161. pxor mm4, mm4 ; make zero for compare
  162. pcmpgtw mm4, mm3 ; ffff mask of all negative deltas
  163. movq mm5, mm4 ; save copy of mask
  164. pand mm4, MMWORD PTR uFogDXAdd ; 4 for negative deltas
  165. paddw mm3, mm4 ; 4 added to negative deltas
  166. movq mm2, mm3 ; copy of deltas after add
  167. pxor mm4, mm4 ; make zero for compare
  168. pcmpgtw mm2, mm4 ; ffff mask for all positive values
  169. pand mm2, mm5 ; ffff mask for all created positive values
  170. pandn mm2, mm3 ; all created positive values anded out to zero
  171. movq XpCtxSI(iFogBDX), mm2 ; save deltas
  172. ; Copy these values to Span Iterator so that they can be done at the same time
  173. ; as other increments.
  174. xor eax, eax
  175. mov ax, XpS(uFog)
  176. mov XpCtxSI(uFog), ax
  177. mov ax, XpS(iDFog)
  178. mov XpCtxSI(iDFog), ax
  179. ;}
  180. NoFogSetup:
  181. ; dont need to do this if not texture mapping
  182. ;if (pCtx->pdwRenderState[D3DRENDERSTATE_TEXTUREPERSPECTIVE])
  183. ;{
  184. cmp dword ptr XpCtx(pdwRenderState+RS_TEXTUREPERSPECTIVE), 0
  185. je SetupNonPerspective
  186. ;//pCtx->SI.iU1 = (pS->iW*(pS->iUoW1>>4))>>16; // 8.16 * 1.11.16 == 1.15.32 >> 16 == 1.15.16
  187. ;//pCtx->SI.iV1 = (pS->iW*(pS->iVoW1>>4))>>16;
  188. ;//pCtx->SI.iU2 = (pS->iW*(pS->iUoW2>>4))>>16;
  189. ;//pCtx->SI.iV2 = (pS->iW*(pS->iVoW2>>4))>>16;
  190. ;pCtx->SI.iDW = 0x0;
  191. mov dword ptr XpCtxSI(iDW), 0
  192. ; edi now is used to store the texture index
  193. push edi
  194. mov edi, 0
  195. LoopSetTexturePers:
  196. cmp edi, dword ptr XpCtx(cActTex)
  197. je DoneSetTexturePers
  198. mov esi, XpS(iW)
  199. movq mm5, MMWORD PTR XpS(UVoW + edi * SIZEOF_UV_UNION)
  200. d_UoWVoWTimesW()
  201. inc edi
  202. jmp LoopSetTexturePers
  203. DoneSetTexturePers:
  204. ; Restore edi
  205. pop edi
  206. ;if (pP->iDOoWDX > 0)
  207. ;{
  208. cmp dword ptr XpP(iDOoWDX), 0
  209. jg SpecialWLast3
  210. ;// iSpecialW should be negative for the first 3 pixels of span
  211. ;pCtx->SI.iSpecialW = -3;
  212. mov word ptr XpCtxSI(iSpecialW), -3
  213. jmp DoneSpecialWif
  214. ;}
  215. ;else
  216. ;{
  217. SpecialWLast3:
  218. ;// iSpecialW should be negative for the last 3 pixels of span
  219. ;pCtx->SI.iSpecialW = 0x7fff - uPix;
  220. mov eax, 07fffh
  221. sub eax, uPix
  222. ;pCtx->SI.iSpecialW += 5; // this may wrap, but it should
  223. add eax, 5
  224. mov XpCtxSI(iSpecialW), eax
  225. ;}
  226. DoneSpecialWif:
  227. jmp DonePerspectiveif
  228. ;}
  229. ;else
  230. ;{
  231. SetupNonPerspective:
  232. ; TODO Add assembly code for affine setup.
  233. ;pCtx->SI.iU1 = pS->iUoW1>>TEX_TO_FINAL_SHIFT; // 1.11.20 >> 4 == 1.15.16
  234. ;pCtx->SI.iV1 = pS->iVoW1>>TEX_TO_FINAL_SHIFT;
  235. ; edi now is used to store the texture index
  236. push edi
  237. mov edi, 0
  238. LoopSetTexture:
  239. cmp edi, dword ptr XpCtx(cActTex)
  240. je DoneSetTexture
  241. movq mm5, XpS(UVoW + edi * SIZEOF_UV_UNION)
  242. d_UpdateNonPersp()
  243. inc edi
  244. jmp LoopSetTexture
  245. DoneSetTexture:
  246. ; Restore edi
  247. pop edi
  248. ;pCtx->SI.iDW = 0x0;
  249. mov dword ptr XpCtxSI(iDW), 0
  250. ;pCtx->SI.iSpecialW = 0;
  251. mov word ptr XpCtxSI(iSpecialW), 0
  252. ;}
  253. DonePerspectiveif:
  254. ; Static variables are placed in
  255. ;static INT iSurfaceStep;
  256. ;static INT iZStep;
  257. ; Note: Dither code needs to be setup if either color dithering or alpha dithering are on.
  258. ;
  259. ;// Dither code depends on rendering direction.
  260. ;// Shift everything down by 6 then use multiply to shift up one to have an end result of either 565 or 555.
  261. ;static UINT64 uDitherXorMask; // will be either 1010b or 1000b (even or odd)
  262. ;static UINT64 uDitherXorXorMask;
  263. ;uDitherXorXorMask = 0x0000200020002000 >> 6;
  264. ;uDitherXorMask = 0x0000800080008000 >> 6;
  265. movq mm0, MMWORD PTR uDitherXorXorMaskInitVal
  266. psrlw mm0, 6
  267. movq MMWORD PTR uDitherXorXorMask, mm0
  268. movq mm0, MMWORD PTR uDitherXorMaskInitVal
  269. psrlw mm0, 6
  270. movq MMWORD PTR uDitherXorMask, mm0
  271. ;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
  272. ;{
  273. mov eax, XpP(uFlags)
  274. and eax, D3DI_RASTPRIM_X_DEC
  275. test eax, eax
  276. jz LeftToRightSpan
  277. ;iZStep = -pCtx->iZStep;
  278. mov eax, XpCtx(iZStep)
  279. neg eax
  280. mov iZStep, eax
  281. ;iSurfaceStep = -pCtx->iSurfaceStep;
  282. mov eax, XpCtx(iSurfaceStep)
  283. neg eax
  284. mov iSurfaceStep, eax
  285. ;pCtx->SI.iXStep = -1; // for dithering.
  286. ; This shouldnt be needed for dithering
  287. ; since I do it differently. TODO check this
  288. ;_asm{
  289. ; Dither xor mask starting value changes
  290. movq mm1, MMWORD PTR uDitherXorMask
  291. por mm1, MMWORD PTR uDitherXorXorMask
  292. movq MMWORD PTR uDitherXorMask, mm1
  293. ;}
  294. ;}
  295. jmp DoneSpanDirif
  296. ;else
  297. ;{
  298. LeftToRightSpan:
  299. ;iZStep = pCtx->iZStep;
  300. mov eax, XpCtx(iZStep)
  301. mov iZStep, eax
  302. ;iSurfaceStep = pCtx->iSurfaceStep;
  303. mov eax, XpCtx(iSurfaceStep)
  304. mov iSurfaceStep, eax
  305. ;pCtx->SI.iXStep = 1;
  306. ; iXStep shouldnt be needed. TODO check this.
  307. ;}
  308. DoneSpanDirif:
  309. ;// ----------------------------------------------------------------------------------------------------------------
  310. ;// Doing dither setup code even if dither is not turned on.
  311. ;// This code is not very clean. TODO clean it up after it works.
  312. ;_asm{
  313. ;//if(pS->uX & 1) uDitherXorValue |= uDitherXorXorValue;
  314. movzx eax, word ptr XpS(uX)
  315. ;// Create Zero or uDitherXorXorValue based on low bit of uX
  316. and eax, 1
  317. shl eax, (13 - 6)
  318. movd mm1, eax
  319. punpcklwd mm1, mm1
  320. punpckldq mm1, mm1
  321. ; TODO Do I need to and here so that I dont disrupt Alpha channel???
  322. pxor mm1, MMWORD PTR uDitherXorMask
  323. movq MMWORD PTR uDitherXorMask, mm1
  324. ;}
  325. ;// Keep dither pattern up to date directly, so keeping SI.uX up
  326. ;// to date is not necessary, except for debug
  327. ;//pCtx->SI.uDitherOffset = (pS->uY & 3) | ((pS->uX & 3)<<2);
  328. ;// I move along the dithertable completely orthogonal to the way the C code does. This should not make a difference.
  329. ;g_uDitherValue = uMMXDitherTable[( ((pS->uY & 3)<<2) | (pS->uX & 3))]; // >> 6; shift is done in table.
  330. movzx eax, word ptr XpS(uY)
  331. and eax, 3
  332. shl eax, 2
  333. movzx edx, word ptr XpS(uX)
  334. and edx, 3
  335. or eax, edx
  336. shl eax, 3
  337. movq mm1, MMWORD PTR uMMXDitherTable[eax]
  338. psrlw mm1, 6
  339. movq MMWORD PTR g_uDitherValue, mm1
  340. ;//if colormode is 565 then shift all green values down by one more.
  341. ;// TODO Add RAST_STYPE_B5G5R5A1 when code is done for that format.
  342. ;// Are these multiplies noticeable or should I use two tables?
  343. ;switch(pCtx->iSurfaceType)
  344. ;{
  345. ;case RAST_STYPE_B5G6R5:
  346. cmp dword ptr XpCtx(iSurfaceType), RAST_STYPE_B5G6R5
  347. jne Test555
  348. ;_asm{
  349. movq mm1, MMWORD PTR uDitherXorMask
  350. pmullw mm1, MMWORD PTR u565MultShifter
  351. movq MMWORD PTR uDitherXorMask, mm1
  352. movq mm1, MMWORD PTR uDitherXorXorMask
  353. pmullw mm1, MMWORD PTR u565MultShifter
  354. movq MMWORD PTR uDitherXorXorMask, mm1
  355. movq mm1, MMWORD PTR g_uDitherValue
  356. pmullw mm1, MMWORD PTR u565MultShifter
  357. movq MMWORD PTR g_uDitherValue, mm1
  358. ;}
  359. ;break;
  360. jmp DoneModDitherValues
  361. Test555:
  362. ;case RAST_STYPE_B5G5R5:
  363. ; Commented out this condional because dither needs to be on for alpha dithering
  364. ; which is independent of what type of color output we want.
  365. ;
  366. ;cmp dword ptr XpCtx(iSurfaceType), RAST_STYPE_B5G5R5
  367. ;jne DoneModDitherValues
  368. ;_asm{
  369. movq mm1, MMWORD PTR uDitherXorMask
  370. pmullw mm1, MMWORD PTR u555MultShifter
  371. movq MMWORD PTR uDitherXorMask, mm1
  372. movq mm1, MMWORD PTR uDitherXorXorMask
  373. pmullw mm1, MMWORD PTR u555MultShifter
  374. movq MMWORD PTR uDitherXorXorMask, mm1
  375. movq mm1, MMWORD PTR g_uDitherValue
  376. pmullw mm1, MMWORD PTR u555MultShifter
  377. movq MMWORD PTR g_uDitherValue, mm1
  378. ;}
  379. ;break;
  380. ;}
  381. DoneModDitherValues:
  382. ; Setup Code Ends
  383. ; ----------------------------------------------------------------------------------------------------------------
  384. ; Loop Code Begins
  385. ;//while (1)
  386. ;//{
  387. PixelLoop:
  388. ; uncomment to look at a span in a particular range
  389. ; movzx eax, word ptr XpS(uX)
  390. ; cmp eax, 340
  391. ; jl NotSpecial
  392. ; cmp eax, 363
  393. ; jg NotSpecial
  394. ; cmp word ptr XpS(uY), 330
  395. ; jne NotSpecial
  396. ;
  397. ; ; Special
  398. ; xor eax, eax
  399. ;
  400. ;NotSpecial:
  401. ; Probably dont need to move this into a register first.
  402. mov eax, XpCtx(pfnLoopEnd)
  403. ;pCtx->pfnLoopEnd(pCtx, pP, pS);
  404. jmp eax
  405. ; Just put EndBead here for now. After Kent and Drew decide on beads, code can be moved around.
  406. PUBLIC _MMX_LoopAnyEndPixel
  407. _MMX_LoopAnyEndPixel:
  408. ;//if (--uPix <= 0)
  409. ;// break;
  410. dec uPix ;// BUG BUG?? uPix should never start as zero should it?
  411. ;// if so, this is a bug.
  412. jle ExitPixelLoop
  413. ;//pS->pZ += iZStep;
  414. ;//pS->pSurface += iSurfaceStep;
  415. mov eax, dword ptr XpS(pZ)
  416. mov edx, dword ptr XpS(pSurface)
  417. add eax, iZStep
  418. add edx, iSurfaceStep
  419. mov dword ptr XpS(pZ), eax
  420. mov dword ptr XpS(pSurface), edx
  421. ;// dont update this in dithered write functions because of alpha test
  422. ;// ATTENTION could specialize loop routines based on things like dither and Z buffer
  423. ;//pCtx->SI.uDitherOffset = (pCtx->SI.uDitherOffset + (pCtx->SI.iXStep<<2)) & 0xf;
  424. ;// May Not need DitherOffset, but I might have to update xor masks.
  425. movq mm3, MMWORD PTR g_uDitherValue ; four bit value from table
  426. movq mm4, MMWORD PTR uDitherXorMask ; will be either 1010b or 1000b (even or odd)
  427. pxor mm3, mm4 ; change dither value
  428. pxor mm4, MMWORD PTR uDitherXorXorMask ; always 0010b
  429. movq MMWORD PTR uDitherXorMask, mm4 ; save new xor mask
  430. movq MMWORD PTR g_uDitherValue, mm3 ; save new dither value.
  431. ;#ifdef DBG
  432. ;// handy for debug to see where we are
  433. ;//pS->uX += (INT16)pCtx->SI.iXStep;
  434. ;#endif
  435. ;// } // while
  436. jmp PixelLoop
  437. ExitPixelLoop:
  438. ; Loop code ends
  439. ;-----------------------------------------------------------------------------
  440. ; LoopAny code ends here
  441. ;-----------------------------------------------------------------------------
  442. ;pS++;
  443. add ebp, SIZEOF_RASTSPAN
  444. ;}
  445. jmp SpanLoop
  446. ExitSpanLoop:
  447. ;pP = pP->pNext;
  448. mov ecx, XpP(pNext)
  449. ;}
  450. jmp PrimLoop
  451. ExitPrimLoop:
  452. ;_asm{
  453. emms
  454. ;}
  455. ;return S_OK;
  456. xor eax, eax
  457. ;}
  458. pop edi
  459. pop esi
  460. pop ebx
  461. mov esp, StackPos
  462. pop ebp
  463. ret
  464. ; ATTENTION Just putting this here, because selection code needs a function pointer
  465. PUBLIC _MMX_LoopAny
  466. _MMX_LoopAny:
  467. ; This Should never be called by anything.
  468. ret
  469. END