Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1076 lines
36 KiB

  1. ;
  2. ; WARNING WARNING WARNING
  3. ; This asm file generated from mas file.
  4. ; EDIT THE MAS FILE.
  5. ; I warned you.
  6. ; WARNING WARNING WARNING
  7. ;
  8. ;-----------------------------------------------------------------------------
  9. include(`m4hdr.mh')dnl
  10. include(`cvars.mh')dnl
  11. INCLUDE iammx.inc
  12. INCLUDE offs_acp.inc
  13. .586
  14. .model flat
  15. .data
  16. PUBLIC IncHighandLow16
  17. IncHighandLow16 dq 0001000000000001h
  18. PUBLIC UFracVFracMask
  19. UFracVFracMask dq 0000003f0000003fh ; Used to be 00000fff00000fffh. Change to 6 bits.
  20. PUBLIC UV32to15Mask
  21. UV32to15Mask dq 0000ffff0000ffffh ; ffff or 7fff???? dunno.
  22. PUBLIC Makelow16one
  23. Makelow16one dq 0000000100000001h
  24. PUBLIC MaskKeepUValues
  25. MaskKeepUValues dq 00000ffff0000ffffh
  26. PUBLIC MaskKeepVValues
  27. MaskKeepVValues dq 0ffff0000ffff0000h
  28. PUBLIC UFrac
  29. UFrac dq ?
  30. PUBLIC VFrac
  31. VFrac dq ?
  32. PUBLIC Val0x000a000a
  33. Val0x000a000a dq 000000000000a000ah
  34. PUBLIC Val0xffff
  35. Val0xffff dq 0ffffh
  36. PUBLIC Val0x0000002000000020
  37. Val0x0000002000000020 dq 0000002000000020h
  38. PUBLIC Val0x0000ffff0000ffff
  39. Val0x0000ffff0000ffff dq 0000ffff0000ffffh
  40. PUBLIC Zero
  41. Zero dq 0
  42. PUBLIC memD3DTFG_POINT
  43. memD3DTFG_POINT dq D3DTFG_POINT
  44. ; Used as counter on inside SpecialW loop.
  45. PUBLIC GiveUp
  46. GiveUp dd ?
  47. PUBLIC LastW
  48. LastW dd ?
  49. .code
  50. include(`texaddra.mh')dnl
  51. d_RepStr(`d_RepStr(`d_RepStr(`d_RepStr(`d_TexAddr(0, AA, BB, CC, DD, NotMonolithic)',
  52. `AA', `TexAddrWrapMirror', `TexAddrAll')',
  53. `BB', `NoPersp', `Persp')',
  54. `CC', ifelse(DD, NoLOD, `Point, Bilinear', `Point, Bilinear, MaybeBilinear'))',
  55. `DD', `NoLOD', `LOD')
  56. ;// All singing all dancing mip mapping address calculation and filtering.
  57. ;// No texture filtering code need be called after this bead.
  58. ;void Tex1AddrFilt_All_Mip(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  59. ; PD3DI_RASTSPAN pS)
  60. ;{
  61. PUBLIC _MMX_Tex1Addr_Filt_All_Mip
  62. _MMX_Tex1Addr_Filt_All_Mip:
  63. define(`d_TexNum', 1)dnl
  64. ;PD3DI_SPANTEX pTex = &pCtx->pTexture[0];
  65. mov esi, XpCtx(pTexture + 0*SIZEOF_PSPANTEX)
  66. ;INT16 iLOD0 = min(max(pS->iLOD >> 11, 0), pTex->cLOD);
  67. ;INT32 iU00 = pCtx->SI.iU`'d_TexNum<<(pTex->iShiftU - iLOD0);
  68. ;INT32 iV00 = pCtx->SI.iV`'d_TexNum<<(pTex->iShiftV - iLOD0);
  69. movq mm1, XpCtxSI(iU`'d_TexNum)
  70. movsx eax, word ptr XpS(iLOD)
  71. sar eax, 11
  72. mov edx, eax
  73. sar edx, 31
  74. not edx
  75. ;xor edx, 0ffffffffh
  76. and eax, edx
  77. define(`d_MaxCLODcnt', eval(d_MaxCLODcnt+1))dnl
  78. cmp eax, XpTex(cLOD)
  79. jb NotMax`'d_MaxCLODcnt`'
  80. mov eax, XpTex(cLOD)
  81. NotMax`'d_MaxCLODcnt`':
  82. ; eax is use below so we will keep iLOD0 in mm3 and put it into eax later.
  83. movd mm3, eax
  84. ; ----------------------------------------
  85. ; Doing UV calculation a little more accurate
  86. ; Exactly like C code.
  87. ; I shift iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
  88. ; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
  89. ; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
  90. ; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
  91. ; COMMENT1**
  92. ; If textures have a max of 1024 then shiftU0 would be at most 10 which would
  93. ; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
  94. ; It will also give bi-linear 6 bits of precision I think it was said that
  95. ; only five was needed.
  96. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  97. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  98. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  99. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  100. movd mm4, XpTex(iShiftU)
  101. psubw mm4, mm3
  102. psubw mm5, mm4
  103. movq mm4, mm5
  104. pand mm5, MMWORD PTR Val0xffff
  105. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  106. psrld mm4, 16
  107. movd mm1, XpCtxSI(iU`'d_TexNum)
  108. psrad mm1, mm5
  109. movd mm2, XpCtxSI(iV`'d_TexNum)
  110. psrad mm2, mm4
  111. punpckldq mm1, mm2
  112. ;// select filter based on whether we are minifying or magnifying
  113. ;D3DTEXTUREMINFILTER uFilter;
  114. ;if (pS->iLOD < 0)
  115. ;{
  116. ; // depends on the first two entries (POINT and LINEAR)
  117. ; // being the same for min and mag
  118. ; uFilter = (D3DTEXTUREMINFILTER)pTex->uMagFilter;
  119. ;}
  120. ;else
  121. ;{
  122. ; uFilter = pTex->uMinFilter;
  123. ;}
  124. ; Use edx mask from above to determine if iLOD is less than 0.
  125. mov eax, XpTex(uMinFilter)
  126. and eax, edx
  127. not edx
  128. and edx, XpTex(uMagFilter)
  129. or eax, edx
  130. ;if (uFilter == D3DTFG_LINEAR)
  131. ;{
  132. cmp eax, D3DTFG_LINEAR
  133. jne NotLinear
  134. ; Get LOD from mm3 and put in eax.
  135. movd eax, mm3
  136. ; Save this off because there is no way to keep it in a register until next time.
  137. movd XpCtxSI(TexCol+4), mm3
  138. ; This helps in calculating texture address.
  139. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  140. add edx, 16
  141. movd mm2, edx
  142. movq mm5, MMWORD ptr Makelow16one
  143. pslld mm5, mm2
  144. por mm5, MMWORD ptr Makelow16one
  145. ;INT32 iHalf = 1<<(TEX_FINAL_SHIFT - iShiftU0 - 1);
  146. ;INT32 iUAlign = pCtx->SI.iU1 - iHalf;
  147. ;iHalf = 1<<(TEX_FINAL_SHIFT - iShiftV0 - 1);
  148. ;INT32 iVAlign = pCtx->SI.iV1 - iHalf;
  149. ;iU00 = iUAlign >> (TEX_FINAL_SHIFT - iShiftU0);
  150. ;iV00 = iVAlign >> (TEX_FINAL_SHIFT - iShiftV0);
  151. ;iUFrac0 = (iUAlign<<iShiftU0) & TEX_FINAL_FRAC_MASK;
  152. ;iVFrac0 = (iVAlign<<iShiftV0) & TEX_FINAL_FRAC_MASK;
  153. psubd mm1, MMWORD PTR Val0x0000002000000020
  154. ;INT32 iUFrac = iU00 & 0x03f;
  155. ;INT32 iVFrac = iV00 & 0x03f;
  156. ;iU00 >>= 6;
  157. ;iV00 >>= 6;
  158. movq mm2, mm1
  159. psrad mm1, 6
  160. ;pand mm1, MMWORD PTR Val0x0000ffff0000ffff
  161. pand mm2, dword ptr UFracVFracMask ; UFracVFracMask = 0x0000003f0000003f
  162. ; Going to use only 8 bits for bi-linear so that I can do a pmullw.
  163. ; Currently at 6 bits so shift up by 2.
  164. psllw mm2, 2
  165. movq mm0, mm2
  166. ; Replicate VFrac value for bilinear
  167. punpckhwd mm2, mm2
  168. punpcklwd mm2, mm2
  169. ; Replicate UFrac Value for bilinear
  170. punpcklwd mm0, mm0
  171. punpcklwd mm0, mm0
  172. movq dword ptr VFrac, mm2
  173. movq dword ptr UFrac, mm0
  174. ;INT32 iU01 = iU00 + 1;
  175. ;INT32 iV01 = iV00 + 1;
  176. packssdw mm1, mm1 ; replicate U and V value to upper 16 bit locations
  177. paddw mm1, dword ptr IncHighandLow16
  178. ; This will make texture values be (High word to low word):
  179. ; iV01, iU00, iV00, iU01
  180. ; Need to do this to make texture look up for bilinear easier.
  181. ; I have to combine to get all combinations anyway. It just
  182. ; happens to be better for me to have iV00, iU01 pair first.
  183. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  184. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  185. movd mm0, XpTex(uMaskU) ; Load U and V mask
  186. ; replicate mask if doing bilinear
  187. punpckldq mm0, mm0
  188. psrlw mm0, mm3
  189. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  190. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  191. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  192. ;movq mm6, XpS(iUoW`'d_TexNum)
  193. ;movq mm6, MMWORD PTR Zero
  194. pxor mm6, mm6
  195. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  196. ; I have rearranged some of it, but there still needs to be some
  197. ; fixes to it.
  198. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  199. movq mm7, mm1
  200. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  201. punpckldq mm4, mm4 ; copy UV
  202. pand mm7, mm4
  203. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  204. pcmpeqw mm7, MMWORD PTR Zero
  205. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  206. pandn mm7, mm0
  207. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  208. pand mm1, mm0
  209. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  210. pxor mm1, mm7
  211. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  212. pcmpgtd mm6, XpS(iUoW`'d_TexNum)
  213. packssdw mm6, mm6
  214. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  215. movd mm7, XpS(iOoW)
  216. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  217. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  218. pcmpgtd mm7, XpS(iUoW`'d_TexNum)
  219. packssdw mm7, mm7
  220. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  221. movd mm0, XpTex(iClampMinU)
  222. punpckldq mm0, mm0
  223. pand mm0, mm6
  224. ; Save clamp2 because pandn will destory value.
  225. movq mm4, mm7
  226. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  227. movd mm2, XpTex(iClampMaxU)
  228. punpckldq mm2, mm2
  229. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  230. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  231. pandn mm6, mm4
  232. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  233. movd mm2, XpTex(iClampEnU)
  234. punpckldq mm2, mm2
  235. pandn mm6, mm2
  236. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  237. pandn mm6, mm1
  238. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  239. por mm6, mm0
  240. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  241. por mm6, mm7
  242. movq mm4, mm6
  243. ; Making other two cases for texture addressing has to be simplier than
  244. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  245. ; TBD Make this better.
  246. ; values are still stored as iV01, iU00, iV00, iU01
  247. movq mm2, mm4
  248. movq mm3, mm4
  249. movq mm0, mm4
  250. pmaddwd mm4, mm5 ; Throw in first address calculation.
  251. ; Just to get it started. Calculate
  252. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  253. ; values are being changed to iV01, iU01, iV00, iU00
  254. ; seven instructions for this seems excessive.
  255. pand mm2, MMWORD ptr MaskKeepUValues
  256. pand mm3, MMWORD ptr MaskKeepVValues
  257. movq mm1, mm2
  258. psllq mm2, 32
  259. psrlq mm1, 32
  260. por mm3, mm2
  261. por mm3, mm1
  262. ; From here until mov edi is code that is needed for border.
  263. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
  264. ; mm0 = iV01, iU00, iV00, iU01
  265. ; mm3 = iV01, iU01, iV00, iU00
  266. ; Need to rearrange values to be like so v1 u0 v1 u1 v0 u0 v0 u1 in bytes
  267. ; This is really bad. Just doing whatever to get it to work.
  268. movq mm1, mm0
  269. punpckldq mm1, mm3 ; This will make mm1 = v0 u0 v0 u1
  270. movq mm2, mm3
  271. punpckhdq mm2, mm0 ; This will make mm0 = v1 u0 v1 u1
  272. packsswb mm1, mm2
  273. movq mm0, mm1
  274. pmaddwd mm3, mm5 ; Calculates iU1+iV0*iShiftU0 and iU0+iV1*iShiftU0
  275. mov edi, XpTex(pBits+eax*4)
  276. ; was esi. Cant change to esi because it is the pointer to pTex
  277. ; which is used by Border and ColorKey. Use edi for now and
  278. ; call routines through memory. Figure out if this is bad.
  279. ; load the read texture routine address into a register early
  280. ;mov edi, XpCtx(pfnTexRead)
  281. ; iV0 iU1 address should be done by now.
  282. movd eax, mm4
  283. ;UINT32 uTex00 = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  284. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  285. ; Combine U and V values before making call.
  286. ;call edi
  287. call dword ptr XpCtx(pfnTexRead)
  288. movd eax, mm3
  289. movq mm7, mm1 ; Put TColor[iU0, uV0] in mm7
  290. ;UINT32 uTex10 = pCtx->pfnTexRead[0](iU01, iV00, pTex->iShiftU,
  291. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  292. ;call edi
  293. call dword ptr XpCtx(pfnTexRead)
  294. psrlq mm3, 32
  295. psubw mm7, mm1
  296. psllw mm1, 8
  297. pmullw mm7, dword ptr UFrac
  298. paddw mm7, mm1 ; Should I copy mm1 to another variable and do shift/add later?
  299. movd eax, mm3
  300. ;UINT32 uTex01 = pCtx->pfnTexRead[0](iU00, iV01, pTex->iShiftU,
  301. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  302. ;call edi
  303. call dword ptr XpCtx(pfnTexRead)
  304. psrlq mm4, 32
  305. movq mm6, mm1
  306. movd eax, mm4
  307. ;UINT32 uTex11 = pCtx->pfnTexRead[0](iU01, iV01, pTex->iShiftU,
  308. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  309. ;call edi
  310. call dword ptr XpCtx(pfnTexRead)
  311. ;TexFiltBilinear(&pCtx->SI.TexCol[0], iUFrac, iVFrac, uTex00, uTex10, uTex01, uTex11);
  312. ; The amount of shifting instructions for this makes the other approach
  313. ; look pretty good.
  314. psubw mm6, mm1
  315. psllw mm1, 8
  316. pmullw mm6, dword ptr UFrac ; TBD explain this code better.
  317. movq mm4, mm7
  318. paddw mm6, mm1
  319. psrlw mm6, 8
  320. psrlw mm7, 8
  321. psubw mm6, mm7
  322. pmullw mm6, dword ptr VFrac
  323. paddw mm4, mm6
  324. psrlw mm4, 8
  325. ; TBD shouldnt have to pack and then unpack later. Should keep in a register
  326. ;packuswb mm4, mm4
  327. ;movd XpCtxSI(TexCol), mm4
  328. movq MMWORD PTR XpCtxSI(uBB), mm4
  329. ;----Calc second mip level pixel------------------------------------------------------------------------------
  330. ;INT16 iLOD1 = (INT16)(min(iLOD0+(pS->iLOD > 0), pTex->cLOD));
  331. ;****** Need to save iLOD0 from above somehow.
  332. ; Saving it in second texture color for now.
  333. movd mm3, XpCtxSI(TexCol+4)
  334. pxor mm5, mm5
  335. movd mm2, XpS(iLOD)
  336. pcmpgtw mm2, mm5
  337. psubw mm3, mm2
  338. movd mm1, XpTex(cLOD)
  339. movq mm2, mm3
  340. pcmpgtw mm3, mm1
  341. pand mm1, mm3
  342. pandn mm3, mm2
  343. por mm3, mm1
  344. pand mm3, MMWORD PTR Val0xffff ; Get rid of any data in the high word.
  345. ; Get LOD from mm3 and put in eax.
  346. movd eax, mm3
  347. movq mm1, XpCtxSI(iU`'d_TexNum)
  348. ;INT16 iShiftU1 = pTex->iShiftU - iLOD1;
  349. ;INT16 iShiftV1 = pTex->iShiftV - iLOD1;
  350. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  351. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  352. movd mm4, XpTex(iShiftU)
  353. psubw mm4, mm3
  354. psubw mm5, mm4
  355. movq mm4, mm5
  356. pand mm5, MMWORD PTR Val0xffff
  357. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  358. psrld mm4, 16
  359. movd mm1, XpCtxSI(iU`'d_TexNum)
  360. psrad mm1, mm5
  361. movd mm2, XpCtxSI(iV`'d_TexNum)
  362. psrad mm2, mm4
  363. punpckldq mm1, mm2
  364. ; This helps in calculating texture address.
  365. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  366. add edx, 16
  367. movd mm2, edx
  368. movq mm5, MMWORD ptr Makelow16one
  369. pslld mm5, mm2
  370. por mm5, MMWORD ptr Makelow16one
  371. ;INT32 iHalf = 1<<(TEX_FINAL_SHIFT - iShiftU1 - 1);
  372. ;INT32 iUAlign = pCtx->SI.iU1 - iHalf;
  373. ;iHalf = 1<<(TEX_FINAL_SHIFT - iShiftV1 - 1);
  374. ;INT32 iVAlign = pCtx->SI.iV1 - iHalf;
  375. ;iU10 = iUAlign >> (TEX_FINAL_SHIFT - iShiftU0);
  376. ;iV10 = iVAlign >> (TEX_FINAL_SHIFT - iShiftV0);
  377. ;iUFrac0 = (iUAlign<<iShiftU0) & TEX_FINAL_FRAC_MASK;
  378. ;iVFrac0 = (iVAlign<<iShiftV0) & TEX_FINAL_FRAC_MASK;
  379. psubd mm1, MMWORD PTR Val0x0000002000000020
  380. ;INT32 iUFrac = iU00 & 0x03f;
  381. ;INT32 iVFrac = iV00 & 0x03f;
  382. ;iU00 >>= 6;
  383. ;iV00 >>= 6;
  384. movq mm2, mm1
  385. psrad mm1, 6
  386. ;pand mm1, MMWORD PTR Val0x0000ffff0000ffff
  387. pand mm2, dword ptr UFracVFracMask ; UFracVFracMask = 0x0000003f0000003f
  388. ; Going to use only 8 bits for bi-linear so that I can do a pmullw.
  389. ; Currently at 6 bits so shift up by 2.
  390. psllw mm2, 2
  391. movq mm0, mm2
  392. ; Replicate VFrac value for bilinear
  393. punpckhwd mm2, mm2
  394. punpcklwd mm2, mm2
  395. ; Replicate UFrac Value for bilinear
  396. punpcklwd mm0, mm0
  397. punpcklwd mm0, mm0
  398. movq dword ptr VFrac, mm2
  399. movq dword ptr UFrac, mm0
  400. ;INT32 iU01 = iU00 + 1;
  401. ;INT32 iV01 = iV00 + 1;
  402. packssdw mm1, mm1 ; replicate U and V value to upper 16 bit locations
  403. paddw mm1, dword ptr IncHighandLow16
  404. ; This will make texture values be (High word to low word):
  405. ; iV01, iU00, iV00, iU01
  406. ; Need to do this to make texture look up for bilinear easier.
  407. ; I have to combine to get all combinations anyway. It just
  408. ; happens to be better for me to have iV00, iU01 pair first.
  409. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  410. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  411. movd mm0, XpTex(uMaskU) ; Load U and V mask
  412. ; replicate mask if doing bilinear
  413. punpckldq mm0, mm0
  414. psrlw mm0, mm3
  415. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  416. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  417. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  418. ;movq mm6, XpS(iUoW`'d_TexNum)
  419. ;movq mm6, MMWORD PTR Zero
  420. pxor mm6, mm6
  421. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  422. ; I have rearranged some of it, but there still needs to be some
  423. ; fixes to it.
  424. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  425. movq mm7, mm1
  426. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  427. punpckldq mm4, mm4 ; copy UV
  428. pand mm7, mm4
  429. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  430. pcmpeqw mm7, MMWORD PTR Zero
  431. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  432. pandn mm7, mm0
  433. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  434. pand mm1, mm0
  435. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  436. pxor mm1, mm7
  437. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  438. pcmpgtd mm6, XpS(iUoW`'d_TexNum)
  439. packssdw mm6, mm6
  440. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  441. movd mm7, XpS(iOoW)
  442. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  443. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  444. pcmpgtd mm7, XpS(iUoW`'d_TexNum)
  445. packssdw mm7, mm7
  446. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  447. movd mm3, XpTex(iClampMinU)
  448. punpckldq mm3, mm3
  449. pand mm3, mm6
  450. ; Save clamp2 because pandn will destory value.
  451. movq mm4, mm7
  452. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  453. movd mm2, XpTex(iClampMaxU)
  454. punpckldq mm2, mm2
  455. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  456. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  457. pandn mm6, mm4
  458. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  459. movd mm2, XpTex(iClampEnU)
  460. punpckldq mm2, mm2
  461. pandn mm6, mm2
  462. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  463. pandn mm6, mm1
  464. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  465. por mm6, mm3
  466. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  467. por mm6, mm7
  468. movq mm4, mm6
  469. ; Making other two cases for texture addressing has to be simplier than
  470. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  471. ; TBD Make this better.
  472. ; values are still stored as iV01, iU00, iV00, iU01
  473. movq mm2, mm4
  474. movq mm3, mm4
  475. movq mm0, mm4
  476. pmaddwd mm4, mm5 ; Throw in first address calculation.
  477. ; Just to get it started. Calculate
  478. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  479. ; values are being changed to iV01, iU01, iV00, iU00
  480. ; seven instructions for this seems excessive.
  481. pand mm2, MMWORD ptr MaskKeepUValues
  482. pand mm3, MMWORD ptr MaskKeepVValues
  483. movq mm1, mm2
  484. psllq mm2, 32
  485. psrlq mm1, 32
  486. por mm3, mm2
  487. por mm3, mm1
  488. ; From here until mov edi is code that is needed for border.
  489. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
  490. ; mm0 = iV01, iU00, iV00, iU01
  491. ; mm3 = iV01, iU01, iV00, iU00
  492. ; Need to rearrange values to be like so v1 u0 v1 u1 v0 u0 v0 u1 in bytes
  493. ; This is really bad. Just doing whatever to get it to work.
  494. movq mm1, mm0
  495. punpckldq mm1, mm3 ; This will make mm1 = v0 u0 v0 u1
  496. movq mm2, mm3
  497. punpckhdq mm2, mm0 ; This will make mm0 = v1 u0 v1 u1
  498. packsswb mm1, mm2
  499. movq mm0, mm1
  500. pmaddwd mm3, mm5 ; Calculates iU1+iV0*iShiftU0 and iU0+iV1*iShiftU0
  501. mov edi, XpTex(pBits+eax*4)
  502. ; was esi. Cant change to esi because it is the pointer to pTex
  503. ; which is used by Border and ColorKey. Use edi for now and
  504. ; call routines through memory. Figure out if this is bad.
  505. ; load the read texture routine address into a register early
  506. ;mov edi, XpCtx(pfnTexRead)
  507. ; iV0 iU1 address should be done by now.
  508. movd eax, mm4
  509. ;UINT32 uTex00 = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  510. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  511. ; Combine U and V values before making call.
  512. ;call edi
  513. call dword ptr XpCtx(pfnTexRead)
  514. movd eax, mm3
  515. movq mm7, mm1 ; Put TColor[iU0, uV0] in mm7
  516. ;UINT32 uTex10 = pCtx->pfnTexRead[0](iU01, iV00, pTex->iShiftU,
  517. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  518. ;call edi
  519. call dword ptr XpCtx(pfnTexRead)
  520. psrlq mm3, 32
  521. psubw mm7, mm1
  522. psllw mm1, 8
  523. pmullw mm7, dword ptr UFrac
  524. paddw mm7, mm1 ; Should I copy mm1 to another variable and do shift/add later?
  525. movd eax, mm3
  526. ;UINT32 uTex01 = pCtx->pfnTexRead[0](iU00, iV01, pTex->iShiftU,
  527. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  528. ;call edi
  529. call dword ptr XpCtx(pfnTexRead)
  530. psrlq mm4, 32
  531. movq mm6, mm1
  532. movd eax, mm4
  533. ;UINT32 uTex11 = pCtx->pfnTexRead[0](iU01, iV01, pTex->iShiftU,
  534. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  535. ;call edi
  536. call dword ptr XpCtx(pfnTexRead)
  537. ;TexFiltBilinear(&pCtx->SI.TexCol[0], iUFrac, iVFrac, uTex00, uTex10, uTex01, uTex11);
  538. ; The amount of shifting instructions for this makes the other approach
  539. ; look pretty good.
  540. psubw mm6, mm1
  541. psllw mm1, 8
  542. pmullw mm6, dword ptr UFrac ; TBD explain this code better.
  543. movq mm4, mm7
  544. paddw mm6, mm1
  545. psrlw mm6, 8
  546. psrlw mm7, 8
  547. psubw mm6, mm7
  548. pmullw mm6, dword ptr VFrac
  549. paddw mm4, mm6
  550. psrlw mm4, 8
  551. ; TBD shouldnt have to pack and then unpack later. Should keep in a register
  552. ;packuswb mm4, mm4
  553. ;movd XpCtxSI(TexCol), mm4
  554. jmp mipinterp
  555. NotLinear:
  556. ; Get LOD from mm3 and put in eax.
  557. movd eax, mm3
  558. ; This helps in calculating texture address.
  559. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  560. add edx, 16
  561. movd mm2, edx
  562. movq mm5, MMWORD ptr Makelow16one
  563. pslld mm5, mm2
  564. por mm5, MMWORD ptr Makelow16one
  565. ;iU00 >>= 6;
  566. ;iV00 >>= 6;
  567. psrad mm1, 6
  568. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  569. ; operations assume UV in low 32 bits.
  570. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  571. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  572. movd mm0, XpTex(uMaskU) ; Load U and V mask
  573. ; iLOD0 shift value left over from above. TBD. Put this in in mip case
  574. ; Could do this one before or after the unpack also.
  575. psrlw mm0, mm3
  576. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  577. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  578. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  579. ;movq mm6, XpS(iUoW`'d_TexNum)
  580. ;movq mm6, MMWORD PTR Zero
  581. pxor mm6, mm6
  582. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  583. ; I have rearranged some of it, but there still needs to be some
  584. ; fixes to it.
  585. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  586. movq mm7, mm1
  587. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  588. pand mm7, mm4
  589. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  590. pcmpeqw mm7, MMWORD PTR Zero
  591. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  592. pandn mm7, mm0
  593. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  594. pand mm1, mm0
  595. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  596. pxor mm1, mm7
  597. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  598. pcmpgtd mm6, XpS(iUoW`'d_TexNum)
  599. packssdw mm6, mm6
  600. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  601. movd mm7, XpS(iOoW)
  602. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  603. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  604. pcmpgtd mm7, XpS(iUoW`'d_TexNum)
  605. packssdw mm7, mm7
  606. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  607. movd mm0, XpTex(iClampMinU)
  608. pand mm0, mm6
  609. ; Save clamp2 because pandn will destory value.
  610. movq mm4, mm7
  611. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  612. movd mm2, XpTex(iClampMaxU)
  613. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  614. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  615. pandn mm6, mm4
  616. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  617. movd mm2, XpTex(iClampEnU)
  618. pandn mm6, mm2
  619. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  620. pandn mm6, mm1
  621. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  622. por mm6, mm0
  623. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  624. por mm6, mm7
  625. movq mm4, mm6
  626. movq mm0, mm4
  627. pmaddwd mm4, mm5 ; Throw in first address calculation.
  628. ; Just to get it started. Calculate
  629. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  630. ; Point needs to be in same format as bilinear for border
  631. packsswb mm0, mm0
  632. mov edi, XpTex(pBits+eax*4)
  633. ; iV0 iU1 address should be done by now.
  634. movd eax, mm4
  635. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  636. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  637. call dword ptr XpCtx(pfnTexRead)
  638. ; TBD Currently have to pack and then unpack later. Should be able
  639. ; to leave the value in some register for a while. I would think.
  640. ;packuswb mm1, mm1
  641. movq XpCtxSI(uBB), mm1
  642. ;----Calc second mip level pixel------------------------------------------------------------------------------
  643. ;****** iLOD0 was saved in mm3 from above.
  644. ;INT16 iLOD1 = (INT16)(min(iLOD0+(pS->iLOD > 0), pTex->cLOD));
  645. pxor mm5, mm5
  646. movd mm2, XpS(iLOD)
  647. pcmpgtw mm2, mm5
  648. psubw mm3, mm2
  649. movd mm1, XpTex(cLOD)
  650. movq mm2, mm3
  651. pcmpgtw mm3, mm1
  652. pand mm1, mm3
  653. pandn mm3, mm2
  654. por mm3, mm1
  655. pand mm3, MMWORD PTR Val0xffff ; Get rid of any data in the high word.
  656. ; Get LOD from mm3 and put in eax.
  657. movd eax, mm3
  658. movq mm1, XpCtxSI(iU`'d_TexNum)
  659. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  660. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  661. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  662. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  663. movd mm4, XpTex(iShiftU)
  664. psubw mm4, mm3
  665. psubw mm5, mm4
  666. movq mm4, mm5
  667. pand mm5, MMWORD PTR Val0xffff
  668. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  669. psrld mm4, 16
  670. movd mm1, XpCtxSI(iU`'d_TexNum)
  671. psrad mm1, mm5
  672. movd mm2, XpCtxSI(iV`'d_TexNum)
  673. psrad mm2, mm4
  674. punpckldq mm1, mm2
  675. ; This helps in calculating texture address.
  676. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  677. add edx, 16
  678. movd mm2, edx
  679. movq mm5, MMWORD ptr Makelow16one
  680. pslld mm5, mm2
  681. por mm5, MMWORD ptr Makelow16one
  682. ;iU00 >>= 6;
  683. ;iV00 >>= 6;
  684. psrad mm1, 6
  685. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  686. ; operations assume UV in low 32 bits.
  687. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  688. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  689. movd mm0, XpTex(uMaskU) ; Load U and V mask
  690. ; iLOD0 shift value left over from above. TBD. Put this in in mip case
  691. ; Could do this one before or after the unpack also.
  692. psrlw mm0, mm3
  693. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  694. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  695. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  696. ;movq mm6, XpS(iUoW`'d_TexNum)
  697. ;movq mm6, MMWORD PTR Zero
  698. pxor mm6, mm6
  699. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  700. ; I have rearranged some of it, but there still needs to be some
  701. ; fixes to it.
  702. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  703. movq mm7, mm1
  704. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  705. pand mm7, mm4
  706. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  707. pcmpeqw mm7, MMWORD PTR Zero
  708. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  709. pandn mm7, mm0
  710. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  711. pand mm1, mm0
  712. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  713. pxor mm1, mm7
  714. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  715. pcmpgtd mm6, XpS(iUoW`'d_TexNum)
  716. packssdw mm6, mm6
  717. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  718. movd mm7, XpS(iOoW)
  719. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  720. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  721. pcmpgtd mm7, XpS(iUoW`'d_TexNum)
  722. packssdw mm7, mm7
  723. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  724. movd mm0, XpTex(iClampMinU)
  725. pand mm0, mm6
  726. ; Save clamp2 because pandn will destory value.
  727. movq mm4, mm7
  728. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  729. movd mm2, XpTex(iClampMaxU)
  730. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  731. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  732. pandn mm6, mm4
  733. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  734. movd mm2, XpTex(iClampEnU)
  735. pandn mm6, mm2
  736. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  737. pandn mm6, mm1
  738. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  739. por mm6, mm0
  740. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  741. por mm6, mm7
  742. movq mm4, mm6
  743. movq mm0, mm4
  744. pmaddwd mm4, mm5 ; Throw in first address calculation.
  745. ; Just to get it started. Calculate
  746. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  747. ; Point needs to be in same format as bilinear for border
  748. packsswb mm0, mm0
  749. mov edi, XpTex(pBits+eax*4)
  750. ; iV0 iU1 address should be done by now.
  751. movd eax, mm4
  752. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  753. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  754. call dword ptr XpCtx(pfnTexRead)
  755. ; TBD Currently have to pack and then unpack later. Should be able
  756. ; to leave the value in some register for a while. I would think.
  757. ;packuswb mm1, mm1
  758. ;movd XpCtxSI(TexCol), mm1
  759. movq mm4, mm1
  760. mipinterp:
  761. ;INT32 r0, r1;
  762. ;INT32 g0, g1;
  763. ;INT32 b0, b1;
  764. ;INT32 a0, a1;
  765. ;r0 = RGBA_GETRED(uTex0);
  766. ;r1 = RGBA_GETRED(uTex1);
  767. ;g0 = RGBA_GETGREEN(uTex0);
  768. ;g1 = RGBA_GETGREEN(uTex1);
  769. ;b0 = RGBA_GETBLUE(uTex0);
  770. ;b1 = RGBA_GETBLUE(uTex1);
  771. ;a0 = RGBA_GETALPHA(uTex0);
  772. ;a1 = RGBA_GETALPHA(uTex1);
  773. dnl d_bcom()
  774. ;Tex1 in mm4, tex0 will be in mm1
  775. movq mm1, XpCtxSI(uBB)
  776. movq mm2, mm1
  777. psubw mm4, mm1
  778. psllw mm2, 8
  779. ;INT32 t = pS->iLOD & 0x7ff;
  780. mov eax, XpS(iLOD)
  781. shr eax, 3
  782. and eax, 0ffh
  783. movd mm3, eax
  784. ; Replicate
  785. punpcklwd mm3, mm3
  786. punpckldq mm3, mm3
  787. ;INT32 mt = 0x7ff - t;
  788. ;r0 = (mt*r0 + t*r1)>>11;
  789. ;g0 = (mt*g0 + t*g1)>>11;
  790. ;b0 = (mt*b0 + t*b1)>>11;
  791. ;a0 = (mt*a0 + t*a1)>>11;
  792. pmullw mm4, mm3
  793. paddw mm4, mm2
  794. dnl d_ecom()
  795. ;movq mm4, XpCtxSI(uBB)
  796. psrlw mm4, 8
  797. packuswb mm4, mm4
  798. movd XpCtxSI(TexCol), mm4
  799. d_UpdateUoWandVoW(1)
  800. d_UpdateLOD()
  801. d_UpdateOoW()
  802. ;pS->iW = 0x00800000/(pS->iOoW>>16); // 9.23/1.15 = 8.8
  803. d_WDivide()
  804. d_UoWVoWTimesW(1)
  805. ; load the next bead address.
  806. mov eax, XpCtx(pfnTex1AddrEnd)
  807. ; pCtx->pfnTex1AddrEnd(pCtx, pP, pS);
  808. jmp eax
  809. ;}
  810. END