Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1148 lines
39 KiB

  1. ;
  2. ; WARNING WARNING WARNING
  3. ; This asm file generated from mas file.
  4. ; EDIT THE MAS FILE.
  5. ; I warned you.
  6. ; WARNING WARNING WARNING
  7. ;
  8. ;-----------------------------------------------------------------------------
  9. include(`m4hdr.mh')dnl
  10. include(`cvars.mh')dnl
  11. INCLUDE iammx.inc
  12. INCLUDE offs_acp.inc
  13. .586
  14. .model flat
  15. .data
  16. ;-----------------------------------------------------------------------------
  17. ; Current Texture
  18. iTex dd ?
  19. PUBLIC IncHighandLow16
  20. IncHighandLow16 dq 0001000000000001h
  21. PUBLIC UFracVFracMask
  22. UFracVFracMask dq 0000003f0000003fh ; Used to be 00000fff00000fffh. Change to 6 bits.
  23. PUBLIC UV32to15Mask
  24. UV32to15Mask dq 0000ffff0000ffffh ; ffff or 7fff???? dunno.
  25. PUBLIC Makelow16one
  26. Makelow16one dq 0000000100000001h
  27. PUBLIC MaskKeepUValues
  28. MaskKeepUValues dq 00000ffff0000ffffh
  29. PUBLIC MaskKeepVValues
  30. MaskKeepVValues dq 0ffff0000ffff0000h
  31. PUBLIC UFrac
  32. UFrac dq ?
  33. PUBLIC VFrac
  34. VFrac dq ?
  35. PUBLIC Val0x000a000a
  36. Val0x000a000a dq 000000000000a000ah
  37. PUBLIC Val0xffff
  38. Val0xffff dq 0ffffh
  39. PUBLIC Val0x0000002000000020
  40. Val0x0000002000000020 dq 0000002000000020h
  41. PUBLIC Val0x0000ffff0000ffff
  42. Val0x0000ffff0000ffff dq 0000ffff0000ffffh
  43. PUBLIC Zero
  44. Zero dq 0
  45. PUBLIC memD3DTFG_POINT
  46. memD3DTFG_POINT dq D3DTFG_POINT
  47. ; Used as counter on inside SpecialW loop.
  48. PUBLIC GiveUp
  49. GiveUp dd ?
  50. PUBLIC LastW
  51. LastW dd ?
  52. .code
  53. include(`texaddra.mh')dnl
  54. d_RepStr(`d_RepStr(`d_RepStr(`d_RepStr(`d_TexAddr(0, AA, BB, CC, DD, NotMonolithic)',
  55. `AA', `TexAddrWrapMirror', `TexAddrAll')',
  56. `BB', `NoPersp', `Persp')',
  57. `CC', ifelse(DD, NoLOD, `Point, Bilinear', `Point, Bilinear, MaybeBilinear'))',
  58. `DD', `NoLOD', `LOD')
  59. ;// All singing all dancing mip mapping address calculation and filtering.
  60. ;// No texture filtering code need be called after this bead.
  61. ;void Tex1AddrFilt_All_Mip(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  62. ; PD3DI_RASTSPAN pS, INT32 iTex)
  63. ;{
  64. PUBLIC _MMX_TexAddr_Filt_All_Mip
  65. _MMX_TexAddr_Filt_All_Mip:
  66. ;PD3DI_SPANTEX pTex = &pCtx->pTexture[0];
  67. mov esi, XpCtx(pTexture + eax*SIZEOF_PSPANTEX)
  68. ;INT16 iLOD0 = min(max(pS->iLOD >> 11, 0), pTex->cLOD);
  69. ;INT32 iU00 = pCtx->SI.iU`'d_TexNum<<(pTex->iShiftU - iLOD0);
  70. ;INT32 iV00 = pCtx->SI.iV`'d_TexNum<<(pTex->iShiftV - iLOD0);
  71. movq mm1, XpCtxSI(TexUV + eax * SIZEOF_UV_UNION)
  72. movsx eax, word ptr XpS(iLOD)
  73. sar eax, 11
  74. mov edx, eax
  75. sar edx, 31
  76. not edx
  77. ;xor edx, 0ffffffffh
  78. and eax, edx
  79. define(`d_MaxCLODcnt', eval(d_MaxCLODcnt+1))dnl
  80. cmp eax, XpTex(cLOD)
  81. jb NotMax`'d_MaxCLODcnt`'
  82. mov eax, XpTex(cLOD)
  83. NotMax`'d_MaxCLODcnt`':
  84. ; eax is use below so we will keep iLOD0 in mm3 and put it into eax later.
  85. movd mm3, eax
  86. ; ----------------------------------------
  87. ; Doing UV calculation a little more accurate
  88. ; Exactly like C code.
  89. ; I shift iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
  90. ; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
  91. ; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
  92. ; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
  93. ; COMMENT1**
  94. ; If textures have a max of 1024 then shiftU0 would be at most 10 which would
  95. ; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
  96. ; It will also give bi-linear 6 bits of precision I think it was said that
  97. ; only five was needed.
  98. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  99. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  100. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  101. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  102. movd mm4, XpTex(iShiftU)
  103. psubw mm4, mm3
  104. psubw mm5, mm4
  105. movq mm4, mm5
  106. pand mm5, MMWORD PTR Val0xffff
  107. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  108. psrld mm4, 16
  109. mov eax, iTex
  110. movd mm1, XpCtxSI(TexUV + eax * SIZEOF_UV_UNION)
  111. psrad mm1, mm5
  112. movd mm2, XpCtxSI(TexUV + eax * SIZEOF_UV_UNION + 4)
  113. psrad mm2, mm4
  114. punpckldq mm1, mm2
  115. ;// select filter based on whether we are minifying or magnifying
  116. ;D3DTEXTUREMINFILTER uFilter;
  117. ;if (pS->iLOD < 0)
  118. ;{
  119. ; // depends on the first two entries (POINT and LINEAR)
  120. ; // being the same for min and mag
  121. ; uFilter = (D3DTEXTUREMINFILTER)pTex->uMagFilter;
  122. ;}
  123. ;else
  124. ;{
  125. ; uFilter = pTex->uMinFilter;
  126. ;}
  127. ; Use edx mask from above to determine if iLOD is less than 0.
  128. mov eax, XpTex(uMinFilter)
  129. and eax, edx
  130. not edx
  131. and edx, XpTex(uMagFilter)
  132. or eax, edx
  133. ;if (uFilter == D3DTFG_LINEAR)
  134. ;{
  135. cmp eax, D3DTFG_LINEAR
  136. jne NotLinear
  137. ; Get LOD from mm3 and put in eax.
  138. movd eax, mm3
  139. ; Save this off because there is no way to keep it in a register until next time.
  140. mov edx, iTex
  141. movd XpCtxSI(TexCol+edx*4), mm3
  142. ; This helps in calculating texture address.
  143. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  144. add edx, 16
  145. movd mm2, edx
  146. movq mm5, MMWORD ptr Makelow16one
  147. pslld mm5, mm2
  148. por mm5, MMWORD ptr Makelow16one
  149. ;INT32 iHalf = 1<<(TEX_FINAL_SHIFT - iShiftU0 - 1);
  150. ;INT32 iUAlign = pCtx->SI.iU1 - iHalf;
  151. ;iHalf = 1<<(TEX_FINAL_SHIFT - iShiftV0 - 1);
  152. ;INT32 iVAlign = pCtx->SI.iV1 - iHalf;
  153. ;iU00 = iUAlign >> (TEX_FINAL_SHIFT - iShiftU0);
  154. ;iV00 = iVAlign >> (TEX_FINAL_SHIFT - iShiftV0);
  155. ;iUFrac0 = (iUAlign<<iShiftU0) & TEX_FINAL_FRAC_MASK;
  156. ;iVFrac0 = (iVAlign<<iShiftV0) & TEX_FINAL_FRAC_MASK;
  157. psubd mm1, MMWORD PTR Val0x0000002000000020
  158. ;INT32 iUFrac = iU00 & 0x03f;
  159. ;INT32 iVFrac = iV00 & 0x03f;
  160. ;iU00 >>= 6;
  161. ;iV00 >>= 6;
  162. movq mm2, mm1
  163. psrad mm1, 6
  164. ;pand mm1, MMWORD PTR Val0x0000ffff0000ffff
  165. pand mm2, dword ptr UFracVFracMask ; UFracVFracMask = 0x0000003f0000003f
  166. ; Going to use only 8 bits for bi-linear so that I can do a pmullw.
  167. ; Currently at 6 bits so shift up by 2.
  168. psllw mm2, 2
  169. movq mm0, mm2
  170. ; Replicate VFrac value for bilinear
  171. punpckhwd mm2, mm2
  172. punpcklwd mm2, mm2
  173. ; Replicate UFrac Value for bilinear
  174. punpcklwd mm0, mm0
  175. punpcklwd mm0, mm0
  176. movq dword ptr VFrac, mm2
  177. movq dword ptr UFrac, mm0
  178. ;INT32 iU01 = iU00 + 1;
  179. ;INT32 iV01 = iV00 + 1;
  180. packssdw mm1, mm1 ; replicate U and V value to upper 16 bit locations
  181. paddw mm1, dword ptr IncHighandLow16
  182. ; This will make texture values be (High word to low word):
  183. ; iV01, iU00, iV00, iU01
  184. ; Need to do this to make texture look up for bilinear easier.
  185. ; I have to combine to get all combinations anyway. It just
  186. ; happens to be better for me to have iV00, iU01 pair first.
  187. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  188. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  189. movd mm0, XpTex(uMaskU) ; Load U and V mask
  190. ; replicate mask if doing bilinear
  191. punpckldq mm0, mm0
  192. psrlw mm0, mm3
  193. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  194. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  195. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  196. ;movq mm6, XpS(iUoW`'d_TexNum)
  197. ;movq mm6, MMWORD PTR Zero
  198. pxor mm6, mm6
  199. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  200. ; I have rearranged some of it, but there still needs to be some
  201. ; fixes to it.
  202. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  203. movq mm7, mm1
  204. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  205. punpckldq mm4, mm4 ; copy UV
  206. psrlw mm4, mm3 ; Shifts mirror mask to correct bit location
  207. pand mm7, mm4
  208. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  209. pcmpeqw mm7, MMWORD PTR Zero
  210. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  211. pandn mm7, mm0
  212. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  213. pand mm1, mm0
  214. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  215. pxor mm1, mm7
  216. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  217. mov edi, iTex
  218. pcmpgtd mm6, XpS(UVoW + edi * SIZEOF_UV_UNION)
  219. packssdw mm6, mm6
  220. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  221. movd mm7, XpS(iOoW)
  222. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  223. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  224. pcmpgtd mm7, XpS(UVoW + edi * SIZEOF_UV_UNION)
  225. packssdw mm7, mm7
  226. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  227. movd mm0, XpTex(iClampMinU)
  228. punpckldq mm0, mm0
  229. pand mm0, mm6
  230. ; Save clamp2 because pandn will destory value.
  231. movq mm4, mm7
  232. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  233. movd mm2, XpTex(iClampMaxU)
  234. punpckldq mm2, mm2
  235. psraw mm2, mm3 ; Shifts clamp max to correct bit location
  236. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  237. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  238. pandn mm6, mm4
  239. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  240. movd mm2, XpTex(iClampEnU)
  241. punpckldq mm2, mm2
  242. pandn mm6, mm2
  243. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  244. pandn mm6, mm1
  245. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  246. por mm6, mm0
  247. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  248. por mm6, mm7
  249. movq mm4, mm6
  250. ; Making other two cases for texture addressing has to be simplier than
  251. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  252. ; TBD Make this better.
  253. ; values are still stored as iV01, iU00, iV00, iU01
  254. movq mm2, mm4
  255. movq mm3, mm4
  256. movq mm0, mm4
  257. pmaddwd mm4, mm5 ; Throw in first address calculation.
  258. ; Just to get it started. Calculate
  259. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  260. ; values are being changed to iV01, iU01, iV00, iU00
  261. ; seven instructions for this seems excessive.
  262. pand mm2, MMWORD ptr MaskKeepUValues
  263. pand mm3, MMWORD ptr MaskKeepVValues
  264. movq mm1, mm2
  265. psllq mm2, 32
  266. psrlq mm1, 32
  267. por mm3, mm2
  268. por mm3, mm1
  269. ; From here until mov edi is code that is needed for border.
  270. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
  271. ; mm0 = iV01, iU00, iV00, iU01
  272. ; mm3 = iV01, iU01, iV00, iU00
  273. ; Need to rearrange values to be like so v1 u0 v1 u1 v0 u0 v0 u1 in bytes
  274. ; This is really bad. Just doing whatever to get it to work.
  275. movq mm1, mm0
  276. punpckldq mm1, mm3 ; This will make mm1 = v0 u0 v0 u1
  277. movq mm2, mm3
  278. punpckhdq mm2, mm0 ; This will make mm0 = v1 u0 v1 u1
  279. packsswb mm1, mm2
  280. movq mm0, mm1
  281. pmaddwd mm3, mm5 ; Calculates iU1+iV0*iShiftU0 and iU0+iV1*iShiftU0
  282. mov edi, XpTex(pBits+eax*4)
  283. ; was esi. Cant change to esi because it is the pointer to pTex
  284. ; which is used by Border and ColorKey. Use edi for now and
  285. ; call routines through memory. Figure out if this is bad.
  286. ; load the read texture routine address into a register early
  287. ;mov edi, XpCtx(pfnTexRead)
  288. ; iV0 iU1 address should be done by now.
  289. movd eax, mm4
  290. ;UINT32 uTex00 = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  291. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  292. ; Combine U and V values before making call.
  293. ;call edi
  294. mov edx, iTex
  295. call dword ptr XpCtx(pfnTexRead+edx*4)
  296. movd eax, mm3
  297. movq mm7, mm1 ; Put TColor[iU0, uV0] in mm7
  298. ;UINT32 uTex10 = pCtx->pfnTexRead[0](iU01, iV00, pTex->iShiftU,
  299. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  300. ;call edi
  301. mov edx, iTex
  302. call dword ptr XpCtx(pfnTexRead+edx*4)
  303. psrlq mm3, 32
  304. psubw mm7, mm1
  305. psllw mm1, 8
  306. pmullw mm7, dword ptr UFrac
  307. paddw mm7, mm1 ; Should I copy mm1 to another variable and do shift/add later?
  308. movd eax, mm3
  309. ;UINT32 uTex01 = pCtx->pfnTexRead[0](iU00, iV01, pTex->iShiftU,
  310. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  311. ;call edi
  312. mov edx, iTex
  313. call dword ptr XpCtx(pfnTexRead+edx*4)
  314. psrlq mm4, 32
  315. movq mm6, mm1
  316. movd eax, mm4
  317. ;UINT32 uTex11 = pCtx->pfnTexRead[0](iU01, iV01, pTex->iShiftU,
  318. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  319. ;call edi
  320. mov edx, iTex
  321. call dword ptr XpCtx(pfnTexRead+edx*4)
  322. ;TexFiltBilinear(&pCtx->SI.TexCol[0], iUFrac, iVFrac, uTex00, uTex10, uTex01, uTex11);
  323. ; The amount of shifting instructions for this makes the other approach
  324. ; look pretty good.
  325. psubw mm6, mm1
  326. psllw mm1, 8
  327. pmullw mm6, dword ptr UFrac ; TBD explain this code better.
  328. movq mm4, mm7
  329. paddw mm6, mm1
  330. psrlw mm6, 8
  331. psrlw mm7, 8
  332. psubw mm6, mm7
  333. pmullw mm6, dword ptr VFrac
  334. paddw mm4, mm6
  335. psrlw mm4, 8
  336. ; TBD shouldnt have to pack and then unpack later. Should keep in a register
  337. ;packuswb mm4, mm4
  338. ;movd XpCtxSI(TexCol+edi*4), mm4
  339. movq MMWORD PTR XpCtxSI(uBB), mm4
  340. ;----Calc second mip level pixel------------------------------------------------------------------------------
  341. ;INT16 iLOD1 = (INT16)(min(iLOD0+(pS->iLOD > 0), pTex->cLOD));
  342. ;****** Need to save iLOD0 from above somehow.
  343. ; Saving it in second texture color for now.
  344. mov edx, iTex
  345. movd mm3, XpCtxSI(TexCol+edx * 4)
  346. pxor mm5, mm5
  347. movd mm2, XpS(iLOD)
  348. pcmpgtw mm2, mm5
  349. psubw mm3, mm2
  350. movd mm1, XpTex(cLOD)
  351. movq mm2, mm3
  352. pcmpgtw mm3, mm1
  353. pand mm1, mm3
  354. pandn mm3, mm2
  355. por mm3, mm1
  356. pand mm3, MMWORD PTR Val0xffff ; Get rid of any data in the high word.
  357. ; Get LOD from mm3 and put in eax.
  358. movd eax, mm3
  359. mov edx, iTex
  360. movq mm1, XpCtxSI(TexUV + edx * SIZEOF_UV_UNION)
  361. ;INT16 iShiftU1 = pTex->iShiftU - iLOD1;
  362. ;INT16 iShiftV1 = pTex->iShiftV - iLOD1;
  363. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  364. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  365. movd mm4, XpTex(iShiftU)
  366. psubw mm4, mm3
  367. psubw mm5, mm4
  368. movq mm4, mm5
  369. pand mm5, MMWORD PTR Val0xffff
  370. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  371. psrld mm4, 16
  372. movd mm1, XpCtxSI(TexUV + edx * SIZEOF_UV_UNION)
  373. psrad mm1, mm5
  374. movd mm2, XpCtxSI(TexUV + edx * SIZEOF_UV_UNION + 4)
  375. psrad mm2, mm4
  376. punpckldq mm1, mm2
  377. ; This helps in calculating texture address.
  378. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  379. add edx, 16
  380. movd mm2, edx
  381. movq mm5, MMWORD ptr Makelow16one
  382. pslld mm5, mm2
  383. por mm5, MMWORD ptr Makelow16one
  384. ;INT32 iHalf = 1<<(TEX_FINAL_SHIFT - iShiftU1 - 1);
  385. ;INT32 iUAlign = pCtx->SI.iU1 - iHalf;
  386. ;iHalf = 1<<(TEX_FINAL_SHIFT - iShiftV1 - 1);
  387. ;INT32 iVAlign = pCtx->SI.iV1 - iHalf;
  388. ;iU10 = iUAlign >> (TEX_FINAL_SHIFT - iShiftU0);
  389. ;iV10 = iVAlign >> (TEX_FINAL_SHIFT - iShiftV0);
  390. ;iUFrac0 = (iUAlign<<iShiftU0) & TEX_FINAL_FRAC_MASK;
  391. ;iVFrac0 = (iVAlign<<iShiftV0) & TEX_FINAL_FRAC_MASK;
  392. psubd mm1, MMWORD PTR Val0x0000002000000020
  393. ;INT32 iUFrac = iU00 & 0x03f;
  394. ;INT32 iVFrac = iV00 & 0x03f;
  395. ;iU00 >>= 6;
  396. ;iV00 >>= 6;
  397. movq mm2, mm1
  398. psrad mm1, 6
  399. ;pand mm1, MMWORD PTR Val0x0000ffff0000ffff
  400. pand mm2, dword ptr UFracVFracMask ; UFracVFracMask = 0x0000003f0000003f
  401. ; Going to use only 8 bits for bi-linear so that I can do a pmullw.
  402. ; Currently at 6 bits so shift up by 2.
  403. psllw mm2, 2
  404. movq mm0, mm2
  405. ; Replicate VFrac value for bilinear
  406. punpckhwd mm2, mm2
  407. punpcklwd mm2, mm2
  408. ; Replicate UFrac Value for bilinear
  409. punpcklwd mm0, mm0
  410. punpcklwd mm0, mm0
  411. movq dword ptr VFrac, mm2
  412. movq dword ptr UFrac, mm0
  413. ;INT32 iU01 = iU00 + 1;
  414. ;INT32 iV01 = iV00 + 1;
  415. packssdw mm1, mm1 ; replicate U and V value to upper 16 bit locations
  416. paddw mm1, dword ptr IncHighandLow16
  417. ; This will make texture values be (High word to low word):
  418. ; iV01, iU00, iV00, iU01
  419. ; Need to do this to make texture look up for bilinear easier.
  420. ; I have to combine to get all combinations anyway. It just
  421. ; happens to be better for me to have iV00, iU01 pair first.
  422. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  423. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  424. movd mm0, XpTex(uMaskU) ; Load U and V mask
  425. ; replicate mask if doing bilinear
  426. punpckldq mm0, mm0
  427. psrlw mm0, mm3
  428. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  429. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  430. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  431. ;movq mm6, XpS(iUoW`'d_TexNum)
  432. ;movq mm6, MMWORD PTR Zero
  433. pxor mm6, mm6
  434. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  435. ; I have rearranged some of it, but there still needs to be some
  436. ; fixes to it.
  437. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  438. movq mm7, mm1
  439. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  440. punpckldq mm4, mm4 ; copy UV
  441. psrlw mm4, mm3 ; Shifts mirror mask to correct bit location
  442. pand mm7, mm4
  443. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  444. pcmpeqw mm7, MMWORD PTR Zero
  445. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  446. pandn mm7, mm0
  447. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  448. pand mm1, mm0
  449. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  450. pxor mm1, mm7
  451. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  452. mov edi, iTex
  453. pcmpgtd mm6, XpS(UVoW + edi * SIZEOF_UV_UNION)
  454. packssdw mm6, mm6
  455. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  456. movd mm7, XpS(iOoW)
  457. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  458. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  459. pcmpgtd mm7, XpS(UVoW + edi * SIZEOF_UV_UNION)
  460. packssdw mm7, mm7
  461. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  462. movd mm0, XpTex(iClampMinU)
  463. punpckldq mm0, mm0
  464. pand mm0, mm6
  465. ; Save clamp2 because pandn will destory value.
  466. movq mm4, mm7
  467. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  468. movd mm2, XpTex(iClampMaxU)
  469. punpckldq mm2, mm2
  470. psraw mm2, mm3 ; Shifts clamp max to correct bit location
  471. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  472. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  473. pandn mm6, mm4
  474. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  475. movd mm2, XpTex(iClampEnU)
  476. punpckldq mm2, mm2
  477. pandn mm6, mm2
  478. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  479. pandn mm6, mm1
  480. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  481. por mm6, mm0
  482. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  483. por mm6, mm7
  484. movq mm4, mm6
  485. ; Making other two cases for texture addressing has to be simplier than
  486. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  487. ; TBD Make this better.
  488. ; values are still stored as iV01, iU00, iV00, iU01
  489. movq mm2, mm4
  490. movq mm3, mm4
  491. movq mm0, mm4
  492. pmaddwd mm4, mm5 ; Throw in first address calculation.
  493. ; Just to get it started. Calculate
  494. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  495. ; values are being changed to iV01, iU01, iV00, iU00
  496. ; seven instructions for this seems excessive.
  497. pand mm2, MMWORD ptr MaskKeepUValues
  498. pand mm3, MMWORD ptr MaskKeepVValues
  499. movq mm1, mm2
  500. psllq mm2, 32
  501. psrlq mm1, 32
  502. por mm3, mm2
  503. por mm3, mm1
  504. ; From here until mov edi is code that is needed for border.
  505. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
  506. ; mm0 = iV01, iU00, iV00, iU01
  507. ; mm3 = iV01, iU01, iV00, iU00
  508. ; Need to rearrange values to be like so v1 u0 v1 u1 v0 u0 v0 u1 in bytes
  509. ; This is really bad. Just doing whatever to get it to work.
  510. movq mm1, mm0
  511. punpckldq mm1, mm3 ; This will make mm1 = v0 u0 v0 u1
  512. movq mm2, mm3
  513. punpckhdq mm2, mm0 ; This will make mm0 = v1 u0 v1 u1
  514. packsswb mm1, mm2
  515. movq mm0, mm1
  516. pmaddwd mm3, mm5 ; Calculates iU1+iV0*iShiftU0 and iU0+iV1*iShiftU0
  517. mov edi, XpTex(pBits+eax*4)
  518. ; was esi. Cant change to esi because it is the pointer to pTex
  519. ; which is used by Border and ColorKey. Use edi for now and
  520. ; call routines through memory. Figure out if this is bad.
  521. ; load the read texture routine address into a register early
  522. ;mov edi, XpCtx(pfnTexRead)
  523. ; iV0 iU1 address should be done by now.
  524. movd eax, mm4
  525. ;UINT32 uTex00 = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  526. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  527. ; Combine U and V values before making call.
  528. ;call edi
  529. mov edx, iTex
  530. call dword ptr XpCtx(pfnTexRead + edx*4)
  531. movd eax, mm3
  532. movq mm7, mm1 ; Put TColor[iU0, uV0] in mm7
  533. ;UINT32 uTex10 = pCtx->pfnTexRead[0](iU01, iV00, pTex->iShiftU,
  534. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  535. ;call edi
  536. mov edx, iTex
  537. call dword ptr XpCtx(pfnTexRead + edx*4)
  538. psrlq mm3, 32
  539. psubw mm7, mm1
  540. psllw mm1, 8
  541. pmullw mm7, dword ptr UFrac
  542. paddw mm7, mm1 ; Should I copy mm1 to another variable and do shift/add later?
  543. movd eax, mm3
  544. ;UINT32 uTex01 = pCtx->pfnTexRead[0](iU00, iV01, pTex->iShiftU,
  545. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  546. ;call edi
  547. mov edx, iTex
  548. call dword ptr XpCtx(pfnTexRead + edx*4)
  549. psrlq mm4, 32
  550. movq mm6, mm1
  551. movd eax, mm4
  552. ;UINT32 uTex11 = pCtx->pfnTexRead[0](iU01, iV01, pTex->iShiftU,
  553. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  554. ;call edi
  555. mov edx, iTex
  556. call dword ptr XpCtx(pfnTexRead + edx*4)
  557. ;TexFiltBilinear(&pCtx->SI.TexCol[0], iUFrac, iVFrac, uTex00, uTex10, uTex01, uTex11);
  558. ; The amount of shifting instructions for this makes the other approach
  559. ; look pretty good.
  560. psubw mm6, mm1
  561. psllw mm1, 8
  562. pmullw mm6, dword ptr UFrac ; TBD explain this code better.
  563. movq mm4, mm7
  564. paddw mm6, mm1
  565. psrlw mm6, 8
  566. psrlw mm7, 8
  567. psubw mm6, mm7
  568. pmullw mm6, dword ptr VFrac
  569. paddw mm4, mm6
  570. psrlw mm4, 8
  571. ; TBD shouldnt have to pack and then unpack later. Should keep in a register
  572. ;packuswb mm4, mm4
  573. ;movd XpCtxSI(TexCol), mm4
  574. jmp mipinterp
  575. NotLinear:
  576. ; Get LOD from mm3 and put in eax.
  577. movd eax, mm3
  578. ; This helps in calculating texture address.
  579. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  580. add edx, 16
  581. movd mm2, edx
  582. movq mm5, MMWORD ptr Makelow16one
  583. pslld mm5, mm2
  584. por mm5, MMWORD ptr Makelow16one
  585. ;iU00 >>= 6;
  586. ;iV00 >>= 6;
  587. psrad mm1, 6
  588. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  589. ; operations assume UV in low 32 bits.
  590. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  591. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  592. movd mm0, XpTex(uMaskU) ; Load U and V mask
  593. ; iLOD0 shift value left over from above. TBD. Put this in in mip case
  594. ; Could do this one before or after the unpack also.
  595. psrlw mm0, mm3
  596. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  597. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  598. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  599. ;movq mm6, XpS(iUoW`'d_TexNum)
  600. ;movq mm6, MMWORD PTR Zero
  601. pxor mm6, mm6
  602. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  603. ; I have rearranged some of it, but there still needs to be some
  604. ; fixes to it.
  605. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  606. movq mm7, mm1
  607. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  608. psrlw mm4, mm3 ; Shifts mirror mask to correct bit location
  609. pand mm7, mm4
  610. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  611. pcmpeqw mm7, MMWORD PTR Zero
  612. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  613. pandn mm7, mm0
  614. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  615. pand mm1, mm0
  616. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  617. pxor mm1, mm7
  618. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  619. mov edi, iTex
  620. pcmpgtd mm6, XpS(UVoW + edi * SIZEOF_UV_UNION)
  621. packssdw mm6, mm6
  622. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  623. movd mm7, XpS(iOoW)
  624. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  625. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  626. pcmpgtd mm7, XpS(UVoW + edi * SIZEOF_UV_UNION)
  627. packssdw mm7, mm7
  628. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  629. movd mm0, XpTex(iClampMinU)
  630. pand mm0, mm6
  631. ; Save clamp2 because pandn will destory value.
  632. movq mm4, mm7
  633. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  634. movd mm2, XpTex(iClampMaxU)
  635. psraw mm2, mm3 ; Shifts clamp max to correct bit location
  636. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  637. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  638. pandn mm6, mm4
  639. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  640. movd mm2, XpTex(iClampEnU)
  641. pandn mm6, mm2
  642. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  643. pandn mm6, mm1
  644. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  645. por mm6, mm0
  646. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  647. por mm6, mm7
  648. movq mm4, mm6
  649. movq mm0, mm4
  650. pmaddwd mm4, mm5 ; Throw in first address calculation.
  651. ; Just to get it started. Calculate
  652. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  653. ; Point needs to be in same format as bilinear for border
  654. packsswb mm0, mm0
  655. mov edi, XpTex(pBits+eax*4)
  656. ; iV0 iU1 address should be done by now.
  657. movd eax, mm4
  658. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  659. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  660. mov edx, iTex
  661. call dword ptr XpCtx(pfnTexRead + edx*4)
  662. ; TBD Currently have to pack and then unpack later. Should be able
  663. ; to leave the value in some register for a while. I would think.
  664. ;packuswb mm1, mm1
  665. movq XpCtxSI(uBB), mm1
  666. ;----Calc second mip level pixel------------------------------------------------------------------------------
  667. ;****** iLOD0 was saved in mm3 from above.
  668. ;INT16 iLOD1 = (INT16)(min(iLOD0+(pS->iLOD > 0), pTex->cLOD));
  669. pxor mm5, mm5
  670. movd mm2, XpS(iLOD)
  671. pcmpgtw mm2, mm5
  672. psubw mm3, mm2
  673. movd mm1, XpTex(cLOD)
  674. movq mm2, mm3
  675. pcmpgtw mm3, mm1
  676. pand mm1, mm3
  677. pandn mm3, mm2
  678. por mm3, mm1
  679. pand mm3, MMWORD PTR Val0xffff ; Get rid of any data in the high word.
  680. ; Get LOD from mm3 and put in eax.
  681. movd eax, mm3
  682. mov edx, iTex
  683. movq mm1, XpCtxSI(TexUV + edx * SIZEOF_UV_UNION)
  684. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  685. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  686. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  687. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  688. movd mm4, XpTex(iShiftU)
  689. psubw mm4, mm3
  690. psubw mm5, mm4
  691. movq mm4, mm5
  692. pand mm5, MMWORD PTR Val0xffff
  693. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  694. psrld mm4, 16
  695. movd mm1, XpCtxSI(TexUV + edx * SIZEOF_UV_UNION)
  696. psrad mm1, mm5
  697. movd mm2, XpCtxSI(TexUV + edx * SIZEOF_UV_UNION + 4)
  698. psrad mm2, mm4
  699. punpckldq mm1, mm2
  700. ; This helps in calculating texture address.
  701. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  702. add edx, 16
  703. movd mm2, edx
  704. movq mm5, MMWORD ptr Makelow16one
  705. pslld mm5, mm2
  706. por mm5, MMWORD ptr Makelow16one
  707. ;iU00 >>= 6;
  708. ;iV00 >>= 6;
  709. psrad mm1, 6
  710. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  711. ; operations assume UV in low 32 bits.
  712. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  713. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  714. movd mm0, XpTex(uMaskU) ; Load U and V mask
  715. ; iLOD0 shift value left over from above. TBD. Put this in in mip case
  716. ; Could do this one before or after the unpack also.
  717. psrlw mm0, mm3
  718. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  719. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  720. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  721. ;movq mm6, XpS(iUoW`'d_TexNum)
  722. ;movq mm6, MMWORD PTR Zero
  723. pxor mm6, mm6
  724. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  725. ; I have rearranged some of it, but there still needs to be some
  726. ; fixes to it.
  727. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  728. movq mm7, mm1
  729. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  730. psrlw mm4, mm3 ; Shifts mirror mask to correct bit location
  731. pand mm7, mm4
  732. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  733. pcmpeqw mm7, MMWORD PTR Zero
  734. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  735. pandn mm7, mm0
  736. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  737. pand mm1, mm0
  738. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  739. pxor mm1, mm7
  740. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  741. mov edi, iTex
  742. pcmpgtd mm6, XpS(UVoW + edi * SIZEOF_UV_UNION)
  743. packssdw mm6, mm6
  744. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  745. movd mm7, XpS(iOoW)
  746. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  747. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  748. pcmpgtd mm7, XpS(UVoW + edi * SIZEOF_UV_UNION)
  749. packssdw mm7, mm7
  750. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  751. movd mm0, XpTex(iClampMinU)
  752. pand mm0, mm6
  753. ; Save clamp2 because pandn will destory value.
  754. movq mm4, mm7
  755. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  756. movd mm2, XpTex(iClampMaxU)
  757. psraw mm2, mm3 ; Shifts clamp max to correct bit location
  758. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  759. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  760. pandn mm6, mm4
  761. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  762. movd mm2, XpTex(iClampEnU)
  763. pandn mm6, mm2
  764. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  765. pandn mm6, mm1
  766. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  767. por mm6, mm0
  768. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  769. por mm6, mm7
  770. movq mm4, mm6
  771. movq mm0, mm4
  772. pmaddwd mm4, mm5 ; Throw in first address calculation.
  773. ; Just to get it started. Calculate
  774. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  775. ; Point needs to be in same format as bilinear for border
  776. packsswb mm0, mm0
  777. mov edi, XpTex(pBits+eax*4)
  778. ; iV0 iU1 address should be done by now.
  779. movd eax, mm4
  780. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  781. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  782. mov edx, iTex
  783. call dword ptr XpCtx(pfnTexRead + edx*4)
  784. ; TBD Currently have to pack and then unpack later. Should be able
  785. ; to leave the value in some register for a while. I would think.
  786. ;packuswb mm1, mm1
  787. ;movd XpCtxSI(TexCol), mm1
  788. movq mm4, mm1
  789. mipinterp:
  790. ;INT32 r0, r1;
  791. ;INT32 g0, g1;
  792. ;INT32 b0, b1;
  793. ;INT32 a0, a1;
  794. ;r0 = RGBA_GETRED(uTex0);
  795. ;r1 = RGBA_GETRED(uTex1);
  796. ;g0 = RGBA_GETGREEN(uTex0);
  797. ;g1 = RGBA_GETGREEN(uTex1);
  798. ;b0 = RGBA_GETBLUE(uTex0);
  799. ;b1 = RGBA_GETBLUE(uTex1);
  800. ;a0 = RGBA_GETALPHA(uTex0);
  801. ;a1 = RGBA_GETALPHA(uTex1);
  802. dnl d_bcom()
  803. ;Tex1 in mm4, tex0 will be in mm1
  804. movq mm1, XpCtxSI(uBB)
  805. movq mm2, mm1
  806. psubw mm4, mm1
  807. psllw mm2, 8
  808. ;INT32 t = pS->iLOD & 0x7ff;
  809. mov eax, XpS(iLOD)
  810. shr eax, 3
  811. and eax, 0ffh
  812. movd mm3, eax
  813. ; Replicate
  814. punpcklwd mm3, mm3
  815. punpckldq mm3, mm3
  816. ;INT32 mt = 0x7ff - t;
  817. ;r0 = (mt*r0 + t*r1)>>11;
  818. ;g0 = (mt*g0 + t*g1)>>11;
  819. ;b0 = (mt*b0 + t*b1)>>11;
  820. ;a0 = (mt*a0 + t*a1)>>11;
  821. pmullw mm4, mm3
  822. paddw mm4, mm2
  823. dnl d_ecom()
  824. ;movq mm4, XpCtxSI(uBB)
  825. psrlw mm4, 8
  826. packuswb mm4, mm4
  827. push edi
  828. mov edi, iTex
  829. movd XpCtxSI(TexCol + edi * 4), mm4
  830. d_UpdateUoWandVoW()
  831. pop edi
  832. d_UpdateLOD()
  833. d_UpdateOoW()
  834. ;pS->iW = 0x00800000/(pS->iOoW>>16); // 9.23/1.15 = 8.8
  835. d_WDivide()
  836. push edi
  837. mov edi, iTex
  838. d_UoWVoWTimesW()
  839. pop edi
  840. ; load the next bead address.
  841. ;mov eax, XpCtx(pfnTex1AddrEnd)
  842. ; pCtx->pfnTex1AddrEnd(pCtx, pP, pS);
  843. ;jmp eax
  844. ; We now need to return here
  845. ret
  846. ;}
  847. ;void TexAddr_Wrapper(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  848. ; PD3DI_RASTSPAN pS)
  849. ;{
  850. PUBLIC _MMX_TexAddr_Wrapper
  851. _MMX_TexAddr_Wrapper:
  852. ;for (INT32 i = 0; i < (INT32)pCtx->cActTex; i++)
  853. ;{
  854. ; pCtx->pfnTexAddr[i](pCtx, pP, pS, i);
  855. ;}
  856. mov iTex, 0
  857. mov eax, 0
  858. LOOP_TEXTURES:
  859. cmp eax, dword ptr XpCtx(cActTex)
  860. jz DONE_LOOP_TEXTURES
  861. call dword ptr XpCtx(pfnTexAddr + eax * 4)
  862. mov eax, iTex
  863. inc eax
  864. mov iTex, eax
  865. jmp LOOP_TEXTURES
  866. DONE_LOOP_TEXTURES:
  867. ; load the next bead address.
  868. mov eax, XpCtx(pfnTexAddrEnd)
  869. ; pCtx->pfnTexAddrEnd(pCtx, pP, pS);
  870. jmp eax
  871. ;}
  872. END