Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

554 lines
18 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; This file contains the general span parsing code combined with loop code.
  4. ;
  5. ;
  6. ; WARNING WARNING WARNING
  7. ; This asm file generated from mas file.
  8. ; EDIT THE MAS FILE.
  9. ; I warned you.
  10. ; WARNING WARNING WARNING
  11. ;
  12. ;-----------------------------------------------------------------------------
  13. INCLUDE iammx.inc
  14. INCLUDE offs_acp.inc
  15. include(`m4hdr.mh')dnl
  16. include(`cvars.mh')dnl
  17. include(`texaddra.mh')dnl
  18. EXTERN g_uDitherValue:MMWORD
  19. .586
  20. .model flat
  21. ; Big seperating lines seperate code into span code
  22. ; and loop code. If span and loop are not going to
  23. ; end up being combined then it will be easy to
  24. ; seperate the code.
  25. .data
  26. ;-----------------------------------------------------------------------------
  27. ; Span Variables
  28. StackPos dd ?
  29. uSpans dd ?
  30. ;-----------------------------------------------------------------------------
  31. ;-----------------------------------------------------------------------------
  32. ; Loop Variables
  33. ;// Table is needed to get starting value for dither, but can use xor trick afterwards to generate consecutive values.
  34. ;// Need to compare table based method with Xor method and compare timing/memory usage. It is good to keep xor method
  35. ;// around since it can be used more efficently when there are more free registers (i.e. a monolithic routine Probably
  36. ;// only enough registers to do it in a gouraud or gouraud/specular case).
  37. ;static UINT64 uMMXDitherTable[16] =
  38. ;{
  39. ; 0x0000000000000000 >> 6, 0x0000800080008000 >> 6, 0x0000200020002000 >> 6, 0x0000a000a000a000 >> 6,
  40. ; 0x0000c000c000c000 >> 6, 0x0000400040004000 >> 6, 0x0000e000e000e000 >> 6, 0x0000600060006000 >> 6,
  41. ; 0x0000300030003000 >> 6, 0x0000b000b000b000 >> 6, 0x0000100010001000 >> 6, 0x0000900090009000 >> 6,
  42. ; 0x0000f000f000f000 >> 6, 0x0000700070007000 >> 6, 0x0000d000d000d000 >> 6, 0x0000500050005000 >> 6
  43. ;};
  44. uMMXDitherTable dq 000000000000000h , 000800080008000h , 000200020002000h , 000a000a000a000h
  45. dq 000c000c000c000h , 000400040004000h , 000e000e000e000h , 000600060006000h
  46. dq 000300030003000h , 000b000b000b000h , 000100010001000h , 000900090009000h
  47. dq 000f000f000f000h , 000700070007000h , 000d000d000d000h , 000500050005000h
  48. u565MultShifter dq 00000000200010002h
  49. u555MultShifter dq 00000000200020002h
  50. uFogDXAdd dq 00000000400040004h
  51. iSurfaceStep dd ?
  52. iZStep dd ?
  53. uDitherXorXorMask dq 0
  54. uDitherXorMask dq 0
  55. uDitherXorXorMaskInitVal dq 0000200020002000h
  56. uDitherXorMaskInitVal dq 0000800080008000h
  57. uPix dd ?
  58. ;-----------------------------------------------------------------------------
  59. .code
  60. ;HRESULT MMX_RenderSpansAny(PD3DI_RASTCTX pCtx)
  61. ;{
  62. PUBLIC _MMX_RenderSpansAny
  63. _MMX_RenderSpansAny:
  64. push ebp
  65. mov StackPos, esp
  66. mov eax, esp
  67. sub esp, 0Ch ; This will need to change if stack frame size changes.
  68. push ebx
  69. push esi
  70. push edi
  71. ; Put pCtx into ebx
  72. mov ebx, [eax+8]
  73. ;PD3DI_RASTPRIM pP = pCtx->pPrim;
  74. mov ecx, XpCtx(pPrim)
  75. ; ATTENTION?? Should these be set by validation? I dont know
  76. ; why they would need to be since every span routine knows
  77. ; where the code needs to return. Also, How is pfnAlphaTestFailEnd
  78. ; different than pfnPixelEnd?
  79. mov eax, _MMX_LoopAnyEndPixel
  80. mov XpCtx(pfnPixelEnd), eax
  81. mov XpCtx(pfnAlphaTestFailEnd), eax
  82. ;while (pP)
  83. ;{
  84. PrimLoop:
  85. cmp ecx, 0
  86. je ExitPrimLoop
  87. ;UINT16 uSpans = pP->uSpans;
  88. movzx eax, word ptr XpP(uSpans)
  89. mov uSpans, eax
  90. ;PD3DI_RASTSPAN pS = (PD3DI_RASTSPAN)(pP + 1);
  91. mov ebp, ecx
  92. add ebp, SIZEOF_RASTPRIM
  93. ;while (uSpans-- > 0)
  94. ;{
  95. SpanLoop:
  96. mov edx, uSpans
  97. mov eax, edx
  98. dec eax
  99. mov uSpans, eax
  100. test edx, edx
  101. jle ExitSpanLoop
  102. ;pCtx->pfnBegin(pCtx, pP, pS);
  103. ;-----------------------------------------------------------------------------
  104. ; LoopAny code inserted here. This is to get rid of an extra
  105. ; jump.
  106. ;-----------------------------------------------------------------------------
  107. ; Setup Code begins
  108. ; get values to iterate
  109. ;uPix = pS->uPix;
  110. movzx eax, word ptr XpS(uPix)
  111. mov uPix, eax
  112. ; TODO Copy uFog and iDFog from pS to pCtx.SI
  113. ; so fog increment can be done faster in MMX.
  114. ; dont need to do this if there is no fog.
  115. ;if (pCtx->pdwRenderState[D3DRENDERSTATE_FOGENABLE]) {
  116. cmp dword ptr XpCtx(pdwRenderState+RS_FOGENABLE), 0
  117. je NoFogSetup
  118. ;D3DCOLOR FogColor = pCtx->pdwRenderState[D3DRENDERSTATE_FOGCOLOR];
  119. ;UINT16 FR = (UINT16)RGBA_GETRED(FogColor);
  120. ;UINT16 FG = (UINT16)RGBA_GETGREEN(FogColor);
  121. ;UINT16 FB = (UINT16)RGBA_GETBLUE(FogColor);
  122. pxor mm0, mm0
  123. movd mm1, XpCtx(pdwRenderState+RS_FOGCOLOR)
  124. ;UINT16 uMFog = 0xff - (pS->uFog>>8);
  125. pcmpeqd mm2, mm2
  126. movzx eax, word ptr XpS(uFog)
  127. shr eax, 8
  128. movd mm3, eax
  129. psubb mm2, mm3
  130. punpcklbw mm2, mm0
  131. punpcklwd mm2, mm2 ; Replicate uMFog
  132. punpckldq mm2, mm2
  133. ;pCtx->SI.uFogR = uMFog * FR; // 0.8 * 0.8 = 8.8
  134. ;pCtx->SI.uFogG = uMFog * FG;
  135. ;pCtx->SI.uFogB = uMFog * FB;
  136. punpcklbw mm1, mm0
  137. pmullw mm2, mm1
  138. movq XpCtxSI(uFogB), mm2
  139. ;INT32 iMDFog = -pS->iDFog;
  140. movsx eax, word ptr XpS(iDFog)
  141. neg eax
  142. movd mm3, eax
  143. punpcklwd mm3, mm3
  144. punpckldq mm3, mm3
  145. ;// 1.7.8 * 8.0 >> 8 = 1.7.8 (ATTENTION this could overflow, but it is naturally aligned for
  146. ;// doing the walking. Can fix by changing precision of uFogR values, or by clamping
  147. ;// range of iDFog.
  148. ;pCtx->SI.iFogRDX = (INT16)((iMDFog * FR) >> 8);
  149. ;pCtx->SI.iFogGDX = (INT16)((iMDFog * FG) >> 8);
  150. ;pCtx->SI.iFogBDX = (INT16)((iMDFog * FB) >> 8);
  151. psllw mm1, 7 ; Have to loose a bit on fog or add some extra code
  152. pmulhw mm3, mm1
  153. psllw mm3, 1
  154. ;// if iFog*DX is positive, iFog*DX will always be too small, hence no overflow
  155. ;// but if iFog*DX is negative, add some to make sure overflow does not
  156. ;// occur
  157. ;if (pCtx->SI.iFogRDX < 0)
  158. ;{
  159. ; pCtx->SI.iFogRDX = min(pCtx->SI.iFogRDX+4, 0);
  160. ;}
  161. pxor mm4, mm4 ; make zero for compare
  162. pcmpgtw mm4, mm3 ; ffff mask of all negative deltas
  163. movq mm5, mm4 ; save copy of mask
  164. pand mm4, MMWORD PTR uFogDXAdd ; 4 for negative deltas
  165. paddw mm3, mm4 ; 4 added to negative deltas
  166. movq mm2, mm3 ; copy of deltas after add
  167. pxor mm4, mm4 ; make zero for compare
  168. pcmpgtw mm2, mm4 ; ffff mask for all positive values
  169. pand mm2, mm5 ; ffff mask for all created positive values
  170. pandn mm2, mm3 ; all created positive values anded out to zero
  171. movq XpCtxSI(iFogBDX), mm2 ; save deltas
  172. ; Copy these values to Span Iterator so that they can be done at the same time
  173. ; as other increments.
  174. xor eax, eax
  175. mov ax, XpS(uFog)
  176. mov XpCtxSI(uFog), ax
  177. mov ax, XpS(iDFog)
  178. mov XpCtxSI(iDFog), ax
  179. ;}
  180. NoFogSetup:
  181. ; dont need to do this if not texture mapping
  182. ;if (pCtx->pdwRenderState[D3DRENDERSTATE_TEXTUREPERSPECTIVE])
  183. ;{
  184. cmp dword ptr XpCtx(pdwRenderState+RS_TEXTUREPERSPECTIVE), 0
  185. je SetupNonPerspective
  186. ;//pCtx->SI.iU1 = (pS->iW*(pS->iUoW1>>4))>>16; // 8.16 * 1.11.16 == 1.15.32 >> 16 == 1.15.16
  187. ;//pCtx->SI.iV1 = (pS->iW*(pS->iVoW1>>4))>>16;
  188. ;//pCtx->SI.iU2 = (pS->iW*(pS->iUoW2>>4))>>16;
  189. ;//pCtx->SI.iV2 = (pS->iW*(pS->iVoW2>>4))>>16;
  190. ;pCtx->SI.iDW = 0x0;
  191. mov dword ptr XpCtxSI(iDW), 0
  192. mov esi, XpS(iW)
  193. movq mm5, MMWORD PTR XpS(iUoW1)
  194. d_UoWVoWTimesW(1)
  195. mov esi, XpS(iW)
  196. movq mm5, MMWORD PTR XpS(iUoW2)
  197. d_UoWVoWTimesW(2)
  198. ;if (pP->iDOoWDX > 0)
  199. ;{
  200. cmp dword ptr XpP(iDOoWDX), 0
  201. jg SpecialWLast3
  202. ;// iSpecialW should be negative for the first 3 pixels of span
  203. ;pCtx->SI.iSpecialW = -3;
  204. mov word ptr XpCtxSI(iSpecialW), -3
  205. jmp DoneSpecialWif
  206. ;}
  207. ;else
  208. ;{
  209. SpecialWLast3:
  210. ;// iSpecialW should be negative for the last 3 pixels of span
  211. ;pCtx->SI.iSpecialW = 0x7fff - uPix;
  212. mov eax, 07fffh
  213. sub eax, uPix
  214. ;pCtx->SI.iSpecialW += 5; // this may wrap, but it should
  215. add eax, 5
  216. mov XpCtxSI(iSpecialW), eax
  217. ;}
  218. DoneSpecialWif:
  219. jmp DonePerspectiveif
  220. ;}
  221. ;else
  222. ;{
  223. SetupNonPerspective:
  224. ; TODO Add assembly code for affine setup.
  225. ;pCtx->SI.iU1 = pS->iUoW1>>TEX_TO_FINAL_SHIFT; // 1.11.20 >> 4 == 1.15.16
  226. ;pCtx->SI.iV1 = pS->iVoW1>>TEX_TO_FINAL_SHIFT;
  227. movq mm5, XpS(iUoW1)
  228. d_UpdateNonPersp(1)
  229. ;pCtx->SI.iU2 = pS->iUoW2>>TEX_TO_FINAL_SHIFT;
  230. ;pCtx->SI.iV2 = pS->iVoW2>>TEX_TO_FINAL_SHIFT;
  231. movq mm5, XpS(iUoW2)
  232. d_UpdateNonPersp(2)
  233. ;pCtx->SI.iDW = 0x0;
  234. mov dword ptr XpCtxSI(iDW), 0
  235. ;pCtx->SI.iSpecialW = 0;
  236. mov word ptr XpCtxSI(iSpecialW), 0
  237. ;}
  238. DonePerspectiveif:
  239. ; Static variables are placed in
  240. ;static INT iSurfaceStep;
  241. ;static INT iZStep;
  242. ; Note: Dither code needs to be setup if either color dithering or alpha dithering are on.
  243. ;
  244. ;// Dither code depends on rendering direction.
  245. ;// Shift everything down by 6 then use multiply to shift up one to have an end result of either 565 or 555.
  246. ;static UINT64 uDitherXorMask; // will be either 1010b or 1000b (even or odd)
  247. ;static UINT64 uDitherXorXorMask;
  248. ;uDitherXorXorMask = 0x0000200020002000 >> 6;
  249. ;uDitherXorMask = 0x0000800080008000 >> 6;
  250. movq mm0, MMWORD PTR uDitherXorXorMaskInitVal
  251. psrlw mm0, 6
  252. movq MMWORD PTR uDitherXorXorMask, mm0
  253. movq mm0, MMWORD PTR uDitherXorMaskInitVal
  254. psrlw mm0, 6
  255. movq MMWORD PTR uDitherXorMask, mm0
  256. ;if (pP->uFlags & D3DI_RASTPRIM_X_DEC)
  257. ;{
  258. mov eax, XpP(uFlags)
  259. and eax, D3DI_RASTPRIM_X_DEC
  260. test eax, eax
  261. jz LeftToRightSpan
  262. ;iZStep = -pCtx->iZStep;
  263. mov eax, XpCtx(iZStep)
  264. neg eax
  265. mov iZStep, eax
  266. ;iSurfaceStep = -pCtx->iSurfaceStep;
  267. mov eax, XpCtx(iSurfaceStep)
  268. neg eax
  269. mov iSurfaceStep, eax
  270. ;pCtx->SI.iXStep = -1; // for dithering.
  271. ; This shouldnt be needed for dithering
  272. ; since I do it differently. TODO check this
  273. ;_asm{
  274. ; Dither xor mask starting value changes
  275. movq mm1, MMWORD PTR uDitherXorMask
  276. por mm1, MMWORD PTR uDitherXorXorMask
  277. movq MMWORD PTR uDitherXorMask, mm1
  278. ;}
  279. ;}
  280. jmp DoneSpanDirif
  281. ;else
  282. ;{
  283. LeftToRightSpan:
  284. ;iZStep = pCtx->iZStep;
  285. mov eax, XpCtx(iZStep)
  286. mov iZStep, eax
  287. ;iSurfaceStep = pCtx->iSurfaceStep;
  288. mov eax, XpCtx(iSurfaceStep)
  289. mov iSurfaceStep, eax
  290. ;pCtx->SI.iXStep = 1;
  291. ; iXStep shouldnt be needed. TODO check this.
  292. ;}
  293. DoneSpanDirif:
  294. ;// ----------------------------------------------------------------------------------------------------------------
  295. ;// Doing dither setup code even if dither is not turned on.
  296. ;// This code is not very clean. TODO clean it up after it works.
  297. ;_asm{
  298. ;//if(pS->uX & 1) uDitherXorValue |= uDitherXorXorValue;
  299. movzx eax, word ptr XpS(uX)
  300. ;// Create Zero or uDitherXorXorValue based on low bit of uX
  301. and eax, 1
  302. shl eax, (13 - 6)
  303. movd mm1, eax
  304. punpcklwd mm1, mm1
  305. punpckldq mm1, mm1
  306. ; TODO Do I need to and here so that I dont disrupt Alpha channel???
  307. pxor mm1, MMWORD PTR uDitherXorMask
  308. movq MMWORD PTR uDitherXorMask, mm1
  309. ;}
  310. ;// Keep dither pattern up to date directly, so keeping SI.uX up
  311. ;// to date is not necessary, except for debug
  312. ;//pCtx->SI.uDitherOffset = (pS->uY & 3) | ((pS->uX & 3)<<2);
  313. ;// I move along the dithertable completely orthogonal to the way the C code does. This should not make a difference.
  314. ;g_uDitherValue = uMMXDitherTable[( ((pS->uY & 3)<<2) | (pS->uX & 3))]; // >> 6; shift is done in table.
  315. movzx eax, word ptr XpS(uY)
  316. and eax, 3
  317. shl eax, 2
  318. movzx edx, word ptr XpS(uX)
  319. and edx, 3
  320. or eax, edx
  321. shl eax, 3
  322. movq mm1, MMWORD PTR uMMXDitherTable[eax]
  323. psrlw mm1, 6
  324. movq MMWORD PTR g_uDitherValue, mm1
  325. ;//if colormode is 565 then shift all green values down by one more.
  326. ;// TODO Add RR_STYPE_B5G5R5A1 when code is done for that format.
  327. ;// Are these multiplies noticeable or should I use two tables?
  328. ;switch(pCtx->iSurfaceType)
  329. ;{
  330. ;case RR_STYPE_B5G6R5:
  331. cmp dword ptr XpCtx(iSurfaceType), RR_STYPE_B5G6R5
  332. jne Test555
  333. ;_asm{
  334. movq mm1, MMWORD PTR uDitherXorMask
  335. pmullw mm1, MMWORD PTR u565MultShifter
  336. movq MMWORD PTR uDitherXorMask, mm1
  337. movq mm1, MMWORD PTR uDitherXorXorMask
  338. pmullw mm1, MMWORD PTR u565MultShifter
  339. movq MMWORD PTR uDitherXorXorMask, mm1
  340. movq mm1, MMWORD PTR g_uDitherValue
  341. pmullw mm1, MMWORD PTR u565MultShifter
  342. movq MMWORD PTR g_uDitherValue, mm1
  343. ;}
  344. ;break;
  345. jmp DoneModDitherValues
  346. Test555:
  347. ;case RR_STYPE_B5G5R5:
  348. ; Commented out this condional because dither needs to be on for alpha dithering
  349. ; which is independent of what type of color output we want.
  350. ;
  351. ;cmp dword ptr XpCtx(iSurfaceType), RR_STYPE_B5G5R5
  352. ;jne DoneModDitherValues
  353. ;_asm{
  354. movq mm1, MMWORD PTR uDitherXorMask
  355. pmullw mm1, MMWORD PTR u555MultShifter
  356. movq MMWORD PTR uDitherXorMask, mm1
  357. movq mm1, MMWORD PTR uDitherXorXorMask
  358. pmullw mm1, MMWORD PTR u555MultShifter
  359. movq MMWORD PTR uDitherXorXorMask, mm1
  360. movq mm1, MMWORD PTR g_uDitherValue
  361. pmullw mm1, MMWORD PTR u555MultShifter
  362. movq MMWORD PTR g_uDitherValue, mm1
  363. ;}
  364. ;break;
  365. ;}
  366. DoneModDitherValues:
  367. ; Setup Code Ends
  368. ; ----------------------------------------------------------------------------------------------------------------
  369. ; Loop Code Begins
  370. ;//while (1)
  371. ;//{
  372. PixelLoop:
  373. ; uncomment to look at a span in a particular range
  374. ; movzx eax, word ptr XpS(uX)
  375. ; cmp eax, 340
  376. ; jl NotSpecial
  377. ; cmp eax, 363
  378. ; jg NotSpecial
  379. ; cmp word ptr XpS(uY), 330
  380. ; jne NotSpecial
  381. ;
  382. ; ; Special
  383. ; xor eax, eax
  384. ;
  385. ;NotSpecial:
  386. ; Probably dont need to move this into a register first.
  387. mov eax, XpCtx(pfnLoopEnd)
  388. ;pCtx->pfnLoopEnd(pCtx, pP, pS);
  389. jmp eax
  390. ; Just put EndBead here for now. After Kent and Drew decide on beads, code can be moved around.
  391. PUBLIC _MMX_LoopAnyEndPixel
  392. _MMX_LoopAnyEndPixel:
  393. ;//if (--uPix <= 0)
  394. ;// break;
  395. dec uPix ;// BUG BUG?? uPix should never start as zero should it?
  396. ;// if so, this is a bug.
  397. jle ExitPixelLoop
  398. ;//pS->pZ += iZStep;
  399. ;//pS->pSurface += iSurfaceStep;
  400. mov eax, dword ptr XpS(pZ)
  401. mov edx, dword ptr XpS(pSurface)
  402. add eax, iZStep
  403. add edx, iSurfaceStep
  404. mov dword ptr XpS(pZ), eax
  405. mov dword ptr XpS(pSurface), edx
  406. ;// dont update this in dithered write functions because of alpha test
  407. ;// ATTENTION could specialize loop routines based on things like dither and Z buffer
  408. ;//pCtx->SI.uDitherOffset = (pCtx->SI.uDitherOffset + (pCtx->SI.iXStep<<2)) & 0xf;
  409. ;// May Not need DitherOffset, but I might have to update xor masks.
  410. movq mm3, MMWORD PTR g_uDitherValue ; four bit value from table
  411. movq mm4, MMWORD PTR uDitherXorMask ; will be either 1010b or 1000b (even or odd)
  412. pxor mm3, mm4 ; change dither value
  413. pxor mm4, MMWORD PTR uDitherXorXorMask ; always 0010b
  414. movq MMWORD PTR uDitherXorMask, mm4 ; save new xor mask
  415. movq MMWORD PTR g_uDitherValue, mm3 ; save new dither value.
  416. ;#ifdef DBG
  417. ;// handy for debug to see where we are
  418. ;//pS->uX += (INT16)pCtx->SI.iXStep;
  419. ;#endif
  420. ;// } // while
  421. jmp PixelLoop
  422. ExitPixelLoop:
  423. ; Loop code ends
  424. ;-----------------------------------------------------------------------------
  425. ; LoopAny code ends here
  426. ;-----------------------------------------------------------------------------
  427. ;pS++;
  428. add ebp, SIZEOF_RASTSPAN
  429. ;}
  430. jmp SpanLoop
  431. ExitSpanLoop:
  432. ;pP = pP->pNext;
  433. mov ecx, XpP(pNext)
  434. ;}
  435. jmp PrimLoop
  436. ExitPrimLoop:
  437. ;_asm{
  438. emms
  439. ;}
  440. ;return S_OK;
  441. xor eax, eax
  442. ;}
  443. pop edi
  444. pop esi
  445. pop ebx
  446. mov esp, StackPos
  447. pop ebp
  448. ret
  449. ; ATTENTION Just putting this here, because selection code needs a function pointer
  450. PUBLIC _MMX_LoopAny
  451. _MMX_LoopAny:
  452. ; This Should never be called by anything.
  453. ret
  454. END