Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1084 lines
37 KiB

  1. ;
  2. ; WARNING WARNING WARNING
  3. ; This asm file generated from mas file.
  4. ; EDIT THE MAS FILE.
  5. ; I warned you.
  6. ; WARNING WARNING WARNING
  7. ;
  8. ;-----------------------------------------------------------------------------
  9. include(`m4hdr.mh')dnl
  10. include(`cvars.mh')dnl
  11. INCLUDE iammx.inc
  12. INCLUDE offs_acp.inc
  13. .586
  14. .model flat
  15. .data
  16. PUBLIC IncHighandLow16
  17. IncHighandLow16 dq 0001000000000001h
  18. PUBLIC UFracVFracMask
  19. UFracVFracMask dq 0000003f0000003fh ; Used to be 00000fff00000fffh. Change to 6 bits.
  20. PUBLIC UV32to15Mask
  21. UV32to15Mask dq 0000ffff0000ffffh ; ffff or 7fff???? dunno.
  22. PUBLIC Makelow16one
  23. Makelow16one dq 0000000100000001h
  24. PUBLIC MaskKeepUValues
  25. MaskKeepUValues dq 00000ffff0000ffffh
  26. PUBLIC MaskKeepVValues
  27. MaskKeepVValues dq 0ffff0000ffff0000h
  28. PUBLIC UFrac
  29. UFrac dq ?
  30. PUBLIC VFrac
  31. VFrac dq ?
  32. PUBLIC Val0x000a000a
  33. Val0x000a000a dq 000000000000a000ah
  34. PUBLIC Val0xffff
  35. Val0xffff dq 0ffffh
  36. PUBLIC Val0x0000002000000020
  37. Val0x0000002000000020 dq 0000002000000020h
  38. PUBLIC Val0x0000ffff0000ffff
  39. Val0x0000ffff0000ffff dq 0000ffff0000ffffh
  40. PUBLIC Zero
  41. Zero dq 0
  42. PUBLIC memD3DTFG_POINT
  43. memD3DTFG_POINT dq D3DTFG_POINT
  44. ; Used as counter on inside SpecialW loop.
  45. PUBLIC GiveUp
  46. GiveUp dd ?
  47. PUBLIC LastW
  48. LastW dd ?
  49. .code
  50. include(`texaddra.mh')dnl
  51. d_RepStr(`d_RepStr(`d_RepStr(`d_RepStr(`d_TexAddr(0, AA, BB, CC, DD, NotMonolithic)',
  52. `AA', `TexAddrWrapMirror', `TexAddrAll')',
  53. `BB', `NoPersp', `Persp')',
  54. `CC', ifelse(DD, NoLOD, `Point, Bilinear', `Point, Bilinear, MaybeBilinear'))',
  55. `DD', `NoLOD', `LOD')
  56. ;// All singing all dancing mip mapping address calculation and filtering.
  57. ;// No texture filtering code need be called after this bead.
  58. ;void Tex1AddrFilt_All_Mip(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  59. ; PD3DI_RASTSPAN pS)
  60. ;{
  61. PUBLIC _MMX_Tex1Addr_Filt_All_Mip
  62. _MMX_Tex1Addr_Filt_All_Mip:
  63. define(`d_TexNum', 1)dnl
  64. ;PD3DI_SPANTEX pTex = &pCtx->pTexture[0];
  65. mov esi, XpCtx(pTexture + 0*SIZEOF_PSPANTEX)
  66. ;INT16 iLOD0 = min(max(pS->iLOD >> 11, 0), pTex->cLOD);
  67. ;INT32 iU00 = pCtx->SI.iU`'d_TexNum<<(pTex->iShiftU - iLOD0);
  68. ;INT32 iV00 = pCtx->SI.iV`'d_TexNum<<(pTex->iShiftV - iLOD0);
  69. movq mm1, XpCtxSI(iU`'d_TexNum)
  70. movsx eax, word ptr XpS(iLOD)
  71. sar eax, 11
  72. mov edx, eax
  73. sar edx, 31
  74. not edx
  75. ;xor edx, 0ffffffffh
  76. and eax, edx
  77. define(`d_MaxCLODcnt', eval(d_MaxCLODcnt+1))dnl
  78. cmp eax, XpTex(cLOD)
  79. jb NotMax`'d_MaxCLODcnt`'
  80. mov eax, XpTex(cLOD)
  81. NotMax`'d_MaxCLODcnt`':
  82. ; eax is use below so we will keep iLOD0 in mm3 and put it into eax later.
  83. movd mm3, eax
  84. ; ----------------------------------------
  85. ; Doing UV calculation a little more accurate
  86. ; Exactly like C code.
  87. ; I shift iU and iV to the right not by (TEX_FINAL_SHIFT - iShiftU0) but by
  88. ; (TEX_FINAL_SHIFT - iShiftU0 - 6). iShiftU0 = pTex->iShiftU - iLOD0
  89. ; (TEX_FINAL_SHIFT - (pTex->iShiftU - iLOD0))
  90. ; (TEX_FINAL_SHIFT + iLOD0 - pTex->iShiftU)
  91. ; COMMENT1**
  92. ; If textures have a max of 1024 then shiftU0 would be at most 10 which would
  93. ; make (TEXT_FINAL_SHIFT - iShiftU - 6) at most zero. This is why I choose 6
  94. ; It will also give bi-linear 6 bits of precision I think it was said that
  95. ; only five was needed.
  96. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  97. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  98. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  99. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  100. movd mm4, XpTex(iShiftU)
  101. psubw mm4, mm3
  102. psubw mm5, mm4
  103. movq mm4, mm5
  104. pand mm5, MMWORD PTR Val0xffff
  105. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  106. psrld mm4, 16
  107. movd mm1, XpCtxSI(iU`'d_TexNum)
  108. psrad mm1, mm5
  109. movd mm2, XpCtxSI(iV`'d_TexNum)
  110. psrad mm2, mm4
  111. punpckldq mm1, mm2
  112. ;// select filter based on whether we are minifying or magnifying
  113. ;D3DTEXTUREMINFILTER uFilter;
  114. ;if (pS->iLOD < 0)
  115. ;{
  116. ; // depends on the first two entries (POINT and LINEAR)
  117. ; // being the same for min and mag
  118. ; uFilter = (D3DTEXTUREMINFILTER)pTex->uMagFilter;
  119. ;}
  120. ;else
  121. ;{
  122. ; uFilter = pTex->uMinFilter;
  123. ;}
  124. ; Use edx mask from above to determine if iLOD is less than 0.
  125. mov eax, XpTex(uMinFilter)
  126. and eax, edx
  127. not edx
  128. and edx, XpTex(uMagFilter)
  129. or eax, edx
  130. ;if (uFilter == D3DTFG_LINEAR)
  131. ;{
  132. cmp eax, D3DTFG_LINEAR
  133. jne NotLinear
  134. ; Get LOD from mm3 and put in eax.
  135. movd eax, mm3
  136. ; Save this off because there is no way to keep it in a register until next time.
  137. movd XpCtxSI(TexCol+4), mm3
  138. ; This helps in calculating texture address.
  139. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  140. add edx, 16
  141. movd mm2, edx
  142. movq mm5, MMWORD ptr Makelow16one
  143. pslld mm5, mm2
  144. por mm5, MMWORD ptr Makelow16one
  145. ;INT32 iHalf = 1<<(TEX_FINAL_SHIFT - iShiftU0 - 1);
  146. ;INT32 iUAlign = pCtx->SI.iU1 - iHalf;
  147. ;iHalf = 1<<(TEX_FINAL_SHIFT - iShiftV0 - 1);
  148. ;INT32 iVAlign = pCtx->SI.iV1 - iHalf;
  149. ;iU00 = iUAlign >> (TEX_FINAL_SHIFT - iShiftU0);
  150. ;iV00 = iVAlign >> (TEX_FINAL_SHIFT - iShiftV0);
  151. ;iUFrac0 = (iUAlign<<iShiftU0) & TEX_FINAL_FRAC_MASK;
  152. ;iVFrac0 = (iVAlign<<iShiftV0) & TEX_FINAL_FRAC_MASK;
  153. psubd mm1, MMWORD PTR Val0x0000002000000020
  154. ;INT32 iUFrac = iU00 & 0x03f;
  155. ;INT32 iVFrac = iV00 & 0x03f;
  156. ;iU00 >>= 6;
  157. ;iV00 >>= 6;
  158. movq mm2, mm1
  159. psrad mm1, 6
  160. ;pand mm1, MMWORD PTR Val0x0000ffff0000ffff
  161. pand mm2, dword ptr UFracVFracMask ; UFracVFracMask = 0x0000003f0000003f
  162. ; Going to use only 8 bits for bi-linear so that I can do a pmullw.
  163. ; Currently at 6 bits so shift up by 2.
  164. psllw mm2, 2
  165. movq mm0, mm2
  166. ; Replicate VFrac value for bilinear
  167. punpckhwd mm2, mm2
  168. punpcklwd mm2, mm2
  169. ; Replicate UFrac Value for bilinear
  170. punpcklwd mm0, mm0
  171. punpcklwd mm0, mm0
  172. movq dword ptr VFrac, mm2
  173. movq dword ptr UFrac, mm0
  174. ;INT32 iU01 = iU00 + 1;
  175. ;INT32 iV01 = iV00 + 1;
  176. packssdw mm1, mm1 ; replicate U and V value to upper 16 bit locations
  177. paddw mm1, dword ptr IncHighandLow16
  178. ; This will make texture values be (High word to low word):
  179. ; iV01, iU00, iV00, iU01
  180. ; Need to do this to make texture look up for bilinear easier.
  181. ; I have to combine to get all combinations anyway. It just
  182. ; happens to be better for me to have iV00, iU01 pair first.
  183. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  184. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  185. movd mm0, XpTex(uMaskU) ; Load U and V mask
  186. ; replicate mask if doing bilinear
  187. punpckldq mm0, mm0
  188. psrlw mm0, mm3
  189. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  190. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  191. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  192. ;movq mm6, XpS(iUoW`'d_TexNum)
  193. ;movq mm6, MMWORD PTR Zero
  194. pxor mm6, mm6
  195. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  196. ; I have rearranged some of it, but there still needs to be some
  197. ; fixes to it.
  198. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  199. movq mm7, mm1
  200. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  201. punpckldq mm4, mm4 ; copy UV
  202. psrlw mm4, mm3 ; Shifts mirror mask to correct bit location
  203. pand mm7, mm4
  204. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  205. pcmpeqw mm7, MMWORD PTR Zero
  206. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  207. pandn mm7, mm0
  208. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  209. pand mm1, mm0
  210. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  211. pxor mm1, mm7
  212. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  213. pcmpgtd mm6, XpS(iUoW`'d_TexNum)
  214. packssdw mm6, mm6
  215. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  216. movd mm7, XpS(iOoW)
  217. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  218. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  219. pcmpgtd mm7, XpS(iUoW`'d_TexNum)
  220. packssdw mm7, mm7
  221. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  222. movd mm0, XpTex(iClampMinU)
  223. punpckldq mm0, mm0
  224. pand mm0, mm6
  225. ; Save clamp2 because pandn will destory value.
  226. movq mm4, mm7
  227. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  228. movd mm2, XpTex(iClampMaxU)
  229. punpckldq mm2, mm2
  230. psraw mm2, mm3 ; Shifts clamp max to correct bit location
  231. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  232. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  233. pandn mm6, mm4
  234. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  235. movd mm2, XpTex(iClampEnU)
  236. punpckldq mm2, mm2
  237. pandn mm6, mm2
  238. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  239. pandn mm6, mm1
  240. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  241. por mm6, mm0
  242. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  243. por mm6, mm7
  244. movq mm4, mm6
  245. ; Making other two cases for texture addressing has to be simplier than
  246. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  247. ; TBD Make this better.
  248. ; values are still stored as iV01, iU00, iV00, iU01
  249. movq mm2, mm4
  250. movq mm3, mm4
  251. movq mm0, mm4
  252. pmaddwd mm4, mm5 ; Throw in first address calculation.
  253. ; Just to get it started. Calculate
  254. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  255. ; values are being changed to iV01, iU01, iV00, iU00
  256. ; seven instructions for this seems excessive.
  257. pand mm2, MMWORD ptr MaskKeepUValues
  258. pand mm3, MMWORD ptr MaskKeepVValues
  259. movq mm1, mm2
  260. psllq mm2, 32
  261. psrlq mm1, 32
  262. por mm3, mm2
  263. por mm3, mm1
  264. ; From here until mov edi is code that is needed for border.
  265. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
  266. ; mm0 = iV01, iU00, iV00, iU01
  267. ; mm3 = iV01, iU01, iV00, iU00
  268. ; Need to rearrange values to be like so v1 u0 v1 u1 v0 u0 v0 u1 in bytes
  269. ; This is really bad. Just doing whatever to get it to work.
  270. movq mm1, mm0
  271. punpckldq mm1, mm3 ; This will make mm1 = v0 u0 v0 u1
  272. movq mm2, mm3
  273. punpckhdq mm2, mm0 ; This will make mm0 = v1 u0 v1 u1
  274. packsswb mm1, mm2
  275. movq mm0, mm1
  276. pmaddwd mm3, mm5 ; Calculates iU1+iV0*iShiftU0 and iU0+iV1*iShiftU0
  277. mov edi, XpTex(pBits+eax*4)
  278. ; was esi. Cant change to esi because it is the pointer to pTex
  279. ; which is used by Border and ColorKey. Use edi for now and
  280. ; call routines through memory. Figure out if this is bad.
  281. ; load the read texture routine address into a register early
  282. ;mov edi, XpCtx(pfnTexRead)
  283. ; iV0 iU1 address should be done by now.
  284. movd eax, mm4
  285. ;UINT32 uTex00 = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  286. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  287. ; Combine U and V values before making call.
  288. ;call edi
  289. call dword ptr XpCtx(pfnTexRead)
  290. movd eax, mm3
  291. movq mm7, mm1 ; Put TColor[iU0, uV0] in mm7
  292. ;UINT32 uTex10 = pCtx->pfnTexRead[0](iU01, iV00, pTex->iShiftU,
  293. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  294. ;call edi
  295. call dword ptr XpCtx(pfnTexRead)
  296. psrlq mm3, 32
  297. psubw mm7, mm1
  298. psllw mm1, 8
  299. pmullw mm7, dword ptr UFrac
  300. paddw mm7, mm1 ; Should I copy mm1 to another variable and do shift/add later?
  301. movd eax, mm3
  302. ;UINT32 uTex01 = pCtx->pfnTexRead[0](iU00, iV01, pTex->iShiftU,
  303. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  304. ;call edi
  305. call dword ptr XpCtx(pfnTexRead)
  306. psrlq mm4, 32
  307. movq mm6, mm1
  308. movd eax, mm4
  309. ;UINT32 uTex11 = pCtx->pfnTexRead[0](iU01, iV01, pTex->iShiftU,
  310. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  311. ;call edi
  312. call dword ptr XpCtx(pfnTexRead)
  313. ;TexFiltBilinear(&pCtx->SI.TexCol[0], iUFrac, iVFrac, uTex00, uTex10, uTex01, uTex11);
  314. ; The amount of shifting instructions for this makes the other approach
  315. ; look pretty good.
  316. psubw mm6, mm1
  317. psllw mm1, 8
  318. pmullw mm6, dword ptr UFrac ; TBD explain this code better.
  319. movq mm4, mm7
  320. paddw mm6, mm1
  321. psrlw mm6, 8
  322. psrlw mm7, 8
  323. psubw mm6, mm7
  324. pmullw mm6, dword ptr VFrac
  325. paddw mm4, mm6
  326. psrlw mm4, 8
  327. ; TBD shouldnt have to pack and then unpack later. Should keep in a register
  328. ;packuswb mm4, mm4
  329. ;movd XpCtxSI(TexCol), mm4
  330. movq MMWORD PTR XpCtxSI(uBB), mm4
  331. ;----Calc second mip level pixel------------------------------------------------------------------------------
  332. ;INT16 iLOD1 = (INT16)(min(iLOD0+(pS->iLOD > 0), pTex->cLOD));
  333. ;****** Need to save iLOD0 from above somehow.
  334. ; Saving it in second texture color for now.
  335. movd mm3, XpCtxSI(TexCol+4)
  336. pxor mm5, mm5
  337. movd mm2, XpS(iLOD)
  338. pcmpgtw mm2, mm5
  339. psubw mm3, mm2
  340. movd mm1, XpTex(cLOD)
  341. movq mm2, mm3
  342. pcmpgtw mm3, mm1
  343. pand mm1, mm3
  344. pandn mm3, mm2
  345. por mm3, mm1
  346. pand mm3, MMWORD PTR Val0xffff ; Get rid of any data in the high word.
  347. ; Get LOD from mm3 and put in eax.
  348. movd eax, mm3
  349. movq mm1, XpCtxSI(iU`'d_TexNum)
  350. ;INT16 iShiftU1 = pTex->iShiftU - iLOD1;
  351. ;INT16 iShiftV1 = pTex->iShiftV - iLOD1;
  352. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  353. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  354. movd mm4, XpTex(iShiftU)
  355. psubw mm4, mm3
  356. psubw mm5, mm4
  357. movq mm4, mm5
  358. pand mm5, MMWORD PTR Val0xffff
  359. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  360. psrld mm4, 16
  361. movd mm1, XpCtxSI(iU`'d_TexNum)
  362. psrad mm1, mm5
  363. movd mm2, XpCtxSI(iV`'d_TexNum)
  364. psrad mm2, mm4
  365. punpckldq mm1, mm2
  366. ; This helps in calculating texture address.
  367. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  368. add edx, 16
  369. movd mm2, edx
  370. movq mm5, MMWORD ptr Makelow16one
  371. pslld mm5, mm2
  372. por mm5, MMWORD ptr Makelow16one
  373. ;INT32 iHalf = 1<<(TEX_FINAL_SHIFT - iShiftU1 - 1);
  374. ;INT32 iUAlign = pCtx->SI.iU1 - iHalf;
  375. ;iHalf = 1<<(TEX_FINAL_SHIFT - iShiftV1 - 1);
  376. ;INT32 iVAlign = pCtx->SI.iV1 - iHalf;
  377. ;iU10 = iUAlign >> (TEX_FINAL_SHIFT - iShiftU0);
  378. ;iV10 = iVAlign >> (TEX_FINAL_SHIFT - iShiftV0);
  379. ;iUFrac0 = (iUAlign<<iShiftU0) & TEX_FINAL_FRAC_MASK;
  380. ;iVFrac0 = (iVAlign<<iShiftV0) & TEX_FINAL_FRAC_MASK;
  381. psubd mm1, MMWORD PTR Val0x0000002000000020
  382. ;INT32 iUFrac = iU00 & 0x03f;
  383. ;INT32 iVFrac = iV00 & 0x03f;
  384. ;iU00 >>= 6;
  385. ;iV00 >>= 6;
  386. movq mm2, mm1
  387. psrad mm1, 6
  388. ;pand mm1, MMWORD PTR Val0x0000ffff0000ffff
  389. pand mm2, dword ptr UFracVFracMask ; UFracVFracMask = 0x0000003f0000003f
  390. ; Going to use only 8 bits for bi-linear so that I can do a pmullw.
  391. ; Currently at 6 bits so shift up by 2.
  392. psllw mm2, 2
  393. movq mm0, mm2
  394. ; Replicate VFrac value for bilinear
  395. punpckhwd mm2, mm2
  396. punpcklwd mm2, mm2
  397. ; Replicate UFrac Value for bilinear
  398. punpcklwd mm0, mm0
  399. punpcklwd mm0, mm0
  400. movq dword ptr VFrac, mm2
  401. movq dword ptr UFrac, mm0
  402. ;INT32 iU01 = iU00 + 1;
  403. ;INT32 iV01 = iV00 + 1;
  404. packssdw mm1, mm1 ; replicate U and V value to upper 16 bit locations
  405. paddw mm1, dword ptr IncHighandLow16
  406. ; This will make texture values be (High word to low word):
  407. ; iV01, iU00, iV00, iU01
  408. ; Need to do this to make texture look up for bilinear easier.
  409. ; I have to combine to get all combinations anyway. It just
  410. ; happens to be better for me to have iV00, iU01 pair first.
  411. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  412. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  413. movd mm0, XpTex(uMaskU) ; Load U and V mask
  414. ; replicate mask if doing bilinear
  415. punpckldq mm0, mm0
  416. psrlw mm0, mm3
  417. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  418. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  419. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  420. ;movq mm6, XpS(iUoW`'d_TexNum)
  421. ;movq mm6, MMWORD PTR Zero
  422. pxor mm6, mm6
  423. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  424. ; I have rearranged some of it, but there still needs to be some
  425. ; fixes to it.
  426. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  427. movq mm7, mm1
  428. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  429. punpckldq mm4, mm4 ; copy UV
  430. psrlw mm4, mm3 ; Shifts mirror mask to correct bit location
  431. pand mm7, mm4
  432. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  433. pcmpeqw mm7, MMWORD PTR Zero
  434. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  435. pandn mm7, mm0
  436. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  437. pand mm1, mm0
  438. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  439. pxor mm1, mm7
  440. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  441. pcmpgtd mm6, XpS(iUoW`'d_TexNum)
  442. packssdw mm6, mm6
  443. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  444. movd mm7, XpS(iOoW)
  445. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  446. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  447. pcmpgtd mm7, XpS(iUoW`'d_TexNum)
  448. packssdw mm7, mm7
  449. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  450. movd mm0, XpTex(iClampMinU)
  451. punpckldq mm0, mm0
  452. pand mm0, mm6
  453. ; Save clamp2 because pandn will destory value.
  454. movq mm4, mm7
  455. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  456. movd mm2, XpTex(iClampMaxU)
  457. punpckldq mm2, mm2
  458. psraw mm2, mm3 ; Shifts clamp max to correct bit location
  459. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  460. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  461. pandn mm6, mm4
  462. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  463. movd mm2, XpTex(iClampEnU)
  464. punpckldq mm2, mm2
  465. pandn mm6, mm2
  466. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  467. pandn mm6, mm1
  468. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  469. por mm6, mm0
  470. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  471. por mm6, mm7
  472. movq mm4, mm6
  473. ; Making other two cases for texture addressing has to be simplier than
  474. ; this and not use so many registers. Puts U1 V0 U0 V1 into mm3.
  475. ; TBD Make this better.
  476. ; values are still stored as iV01, iU00, iV00, iU01
  477. movq mm2, mm4
  478. movq mm3, mm4
  479. movq mm0, mm4
  480. pmaddwd mm4, mm5 ; Throw in first address calculation.
  481. ; Just to get it started. Calculate
  482. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  483. ; values are being changed to iV01, iU01, iV00, iU00
  484. ; seven instructions for this seems excessive.
  485. pand mm2, MMWORD ptr MaskKeepUValues
  486. pand mm3, MMWORD ptr MaskKeepVValues
  487. movq mm1, mm2
  488. psllq mm2, 32
  489. psrlq mm1, 32
  490. por mm3, mm2
  491. por mm3, mm1
  492. ; From here until mov edi is code that is needed for border.
  493. ; all sign bits are stored in bytes so that border code can tell if uv went below zero.
  494. ; mm0 = iV01, iU00, iV00, iU01
  495. ; mm3 = iV01, iU01, iV00, iU00
  496. ; Need to rearrange values to be like so v1 u0 v1 u1 v0 u0 v0 u1 in bytes
  497. ; This is really bad. Just doing whatever to get it to work.
  498. movq mm1, mm0
  499. punpckldq mm1, mm3 ; This will make mm1 = v0 u0 v0 u1
  500. movq mm2, mm3
  501. punpckhdq mm2, mm0 ; This will make mm0 = v1 u0 v1 u1
  502. packsswb mm1, mm2
  503. movq mm0, mm1
  504. pmaddwd mm3, mm5 ; Calculates iU1+iV0*iShiftU0 and iU0+iV1*iShiftU0
  505. mov edi, XpTex(pBits+eax*4)
  506. ; was esi. Cant change to esi because it is the pointer to pTex
  507. ; which is used by Border and ColorKey. Use edi for now and
  508. ; call routines through memory. Figure out if this is bad.
  509. ; load the read texture routine address into a register early
  510. ;mov edi, XpCtx(pfnTexRead)
  511. ; iV0 iU1 address should be done by now.
  512. movd eax, mm4
  513. ;UINT32 uTex00 = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  514. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  515. ; Combine U and V values before making call.
  516. ;call edi
  517. call dword ptr XpCtx(pfnTexRead)
  518. movd eax, mm3
  519. movq mm7, mm1 ; Put TColor[iU0, uV0] in mm7
  520. ;UINT32 uTex10 = pCtx->pfnTexRead[0](iU01, iV00, pTex->iShiftU,
  521. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  522. ;call edi
  523. call dword ptr XpCtx(pfnTexRead)
  524. psrlq mm3, 32
  525. psubw mm7, mm1
  526. psllw mm1, 8
  527. pmullw mm7, dword ptr UFrac
  528. paddw mm7, mm1 ; Should I copy mm1 to another variable and do shift/add later?
  529. movd eax, mm3
  530. ;UINT32 uTex01 = pCtx->pfnTexRead[0](iU00, iV01, pTex->iShiftU,
  531. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  532. ;call edi
  533. call dword ptr XpCtx(pfnTexRead)
  534. psrlq mm4, 32
  535. movq mm6, mm1
  536. movd eax, mm4
  537. ;UINT32 uTex11 = pCtx->pfnTexRead[0](iU01, iV01, pTex->iShiftU,
  538. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  539. ;call edi
  540. call dword ptr XpCtx(pfnTexRead)
  541. ;TexFiltBilinear(&pCtx->SI.TexCol[0], iUFrac, iVFrac, uTex00, uTex10, uTex01, uTex11);
  542. ; The amount of shifting instructions for this makes the other approach
  543. ; look pretty good.
  544. psubw mm6, mm1
  545. psllw mm1, 8
  546. pmullw mm6, dword ptr UFrac ; TBD explain this code better.
  547. movq mm4, mm7
  548. paddw mm6, mm1
  549. psrlw mm6, 8
  550. psrlw mm7, 8
  551. psubw mm6, mm7
  552. pmullw mm6, dword ptr VFrac
  553. paddw mm4, mm6
  554. psrlw mm4, 8
  555. ; TBD shouldnt have to pack and then unpack later. Should keep in a register
  556. ;packuswb mm4, mm4
  557. ;movd XpCtxSI(TexCol), mm4
  558. jmp mipinterp
  559. NotLinear:
  560. ; Get LOD from mm3 and put in eax.
  561. movd eax, mm3
  562. ; This helps in calculating texture address.
  563. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  564. add edx, 16
  565. movd mm2, edx
  566. movq mm5, MMWORD ptr Makelow16one
  567. pslld mm5, mm2
  568. por mm5, MMWORD ptr Makelow16one
  569. ;iU00 >>= 6;
  570. ;iV00 >>= 6;
  571. psrad mm1, 6
  572. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  573. ; operations assume UV in low 32 bits.
  574. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  575. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  576. movd mm0, XpTex(uMaskU) ; Load U and V mask
  577. ; iLOD0 shift value left over from above. TBD. Put this in in mip case
  578. ; Could do this one before or after the unpack also.
  579. psrlw mm0, mm3
  580. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  581. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  582. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  583. ;movq mm6, XpS(iUoW`'d_TexNum)
  584. ;movq mm6, MMWORD PTR Zero
  585. pxor mm6, mm6
  586. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  587. ; I have rearranged some of it, but there still needs to be some
  588. ; fixes to it.
  589. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  590. movq mm7, mm1
  591. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  592. psrlw mm4, mm3 ; Shifts mirror mask to correct bit location
  593. pand mm7, mm4
  594. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  595. pcmpeqw mm7, MMWORD PTR Zero
  596. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  597. pandn mm7, mm0
  598. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  599. pand mm1, mm0
  600. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  601. pxor mm1, mm7
  602. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  603. pcmpgtd mm6, XpS(iUoW`'d_TexNum)
  604. packssdw mm6, mm6
  605. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  606. movd mm7, XpS(iOoW)
  607. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  608. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  609. pcmpgtd mm7, XpS(iUoW`'d_TexNum)
  610. packssdw mm7, mm7
  611. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  612. movd mm0, XpTex(iClampMinU)
  613. pand mm0, mm6
  614. ; Save clamp2 because pandn will destory value.
  615. movq mm4, mm7
  616. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  617. movd mm2, XpTex(iClampMaxU)
  618. psraw mm2, mm3 ; Shifts clamp max to correct bit location
  619. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  620. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  621. pandn mm6, mm4
  622. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  623. movd mm2, XpTex(iClampEnU)
  624. pandn mm6, mm2
  625. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  626. pandn mm6, mm1
  627. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  628. por mm6, mm0
  629. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  630. por mm6, mm7
  631. movq mm4, mm6
  632. movq mm0, mm4
  633. pmaddwd mm4, mm5 ; Throw in first address calculation.
  634. ; Just to get it started. Calculate
  635. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  636. ; Point needs to be in same format as bilinear for border
  637. packsswb mm0, mm0
  638. mov edi, XpTex(pBits+eax*4)
  639. ; iV0 iU1 address should be done by now.
  640. movd eax, mm4
  641. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  642. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  643. call dword ptr XpCtx(pfnTexRead)
  644. ; TBD Currently have to pack and then unpack later. Should be able
  645. ; to leave the value in some register for a while. I would think.
  646. ;packuswb mm1, mm1
  647. movq XpCtxSI(uBB), mm1
  648. ;----Calc second mip level pixel------------------------------------------------------------------------------
  649. ;****** iLOD0 was saved in mm3 from above.
  650. ;INT16 iLOD1 = (INT16)(min(iLOD0+(pS->iLOD > 0), pTex->cLOD));
  651. pxor mm5, mm5
  652. movd mm2, XpS(iLOD)
  653. pcmpgtw mm2, mm5
  654. psubw mm3, mm2
  655. movd mm1, XpTex(cLOD)
  656. movq mm2, mm3
  657. pcmpgtw mm3, mm1
  658. pand mm1, mm3
  659. pandn mm3, mm2
  660. por mm3, mm1
  661. pand mm3, MMWORD PTR Val0xffff ; Get rid of any data in the high word.
  662. ; Get LOD from mm3 and put in eax.
  663. movd eax, mm3
  664. movq mm1, XpCtxSI(iU`'d_TexNum)
  665. ;INT16 iShiftU0 = pTex->iShiftU - iLOD0;
  666. ;INT16 iShiftV0 = pTex->iShiftV - iLOD0;
  667. movq mm5, MMWORD PTR Val0x000a000a ; This is TEX_FINAL_SHIFT - 6 = 10.
  668. punpcklwd mm3, mm3 ; Make two copys of iLOD to subtract U and V
  669. movd mm4, XpTex(iShiftU)
  670. psubw mm4, mm3
  671. psubw mm5, mm4
  672. movq mm4, mm5
  673. pand mm5, MMWORD PTR Val0xffff
  674. pand mm3, MMWORD PTR Val0xffff ; Make iLOD back to only one copy
  675. psrld mm4, 16
  676. movd mm1, XpCtxSI(iU`'d_TexNum)
  677. psrad mm1, mm5
  678. movd mm2, XpCtxSI(iV`'d_TexNum)
  679. psrad mm2, mm4
  680. punpckldq mm1, mm2
  681. ; This helps in calculating texture address.
  682. movzx edx, word ptr XpTex(iShiftPitch+eax*2)
  683. add edx, 16
  684. movd mm2, edx
  685. movq mm5, MMWORD ptr Makelow16one
  686. pslld mm5, mm2
  687. por mm5, MMWORD ptr Makelow16one
  688. ;iU00 >>= 6;
  689. ;iV00 >>= 6;
  690. psrad mm1, 6
  691. packssdw mm1, mm1 ; Value needs to be packed since all wrap/mirror
  692. ; operations assume UV in low 32 bits.
  693. ;UINT16 uMaskU0 = pTex->uMaskU >> iLOD0; UINT16 uMaskV0 = pTex->uMaskV >> iLOD0;
  694. ; put mask in mm3 and replicate to match location for wrap/mirror/clamp
  695. movd mm0, XpTex(uMaskU) ; Load U and V mask
  696. ; iLOD0 shift value left over from above. TBD. Put this in in mip case
  697. ; Could do this one before or after the unpack also.
  698. psrlw mm0, mm3
  699. ;INT16 iFlip, iClamp1, iClamp2, iClampMinT, iClampMaxT;
  700. ;INT16 iUoWAdj = (INT16)(pS->iUoW`'d_TexNum >> 12); // adjust to match W
  701. ;INT16 iVoWAdj = (INT16)(pS->iVoW`'d_TexNum >> 12);
  702. ;movq mm6, XpS(iUoW`'d_TexNum)
  703. ;movq mm6, MMWORD PTR Zero
  704. pxor mm6, mm6
  705. ; TBD Data in SPANTEX needs to be rearange to make life simpler.
  706. ; I have rearranged some of it, but there still needs to be some
  707. ; fixes to it.
  708. ;iFlip1 = iU00 & pTex->iFlipMaskU; ;iFlip2 = iV00 & pTex->iFlipMaskV; ;iFlip3 = iU01 & pTex->iFlipMaskU; ;iFlip4 = iV01 & pTex->iFlipMaskV;
  709. movq mm7, mm1
  710. movd mm4, XpTex(iFlipMaskU) ; This should copy U and V mask at the same time.
  711. psrlw mm4, mm3 ; Shifts mirror mask to correct bit location
  712. pand mm7, mm4
  713. ;iFlip1 = MMX_cmpeqw(iFlip1, 0); ;iFlip2 = MMX_cmpeqw(iFlip2, 0); ;iFlip3 = MMX_cmpeqw(iFlip3, 0); ;iFlip4 = MMX_cmpeqw(iFlip4, 0);
  714. pcmpeqw mm7, MMWORD PTR Zero
  715. ;iFlip1 = uMaskU0 &~ iFlip1; ;iFlip2 = uMaskV0 &~ iFlip2; ;iFlip3 = uMaskU0 &~ iFlip3; ;iFlip4 = uMaskV0 &~ iFlip4;
  716. pandn mm7, mm0
  717. ;iU00 &= uMaskU0; ;iV00 &= uMaskV0; ;iU01 &= uMaskU0; ;iV01 &= uMaskV0;
  718. pand mm1, mm0
  719. ;iU00 ^= iFlip1; ;iV00 ^= iFlip2; ;iU01 ^= iFlip3; ;iV01 ^= iFlip4;
  720. pxor mm1, mm7
  721. ;iClamp11 = MMX_cmpgtw(0, iUoWAdj); ;iClamp12 = MMX_cmpgtw(0, iVoWAdj);
  722. pcmpgtd mm6, XpS(iUoW`'d_TexNum)
  723. packssdw mm6, mm6
  724. ;iClamp21 = MMX_cmpgtw(iOoWAdj, iUoWAdj); ;iClamp22 = MMX_cmpgtw(iOoWAdj, iVoWAdj);
  725. movd mm7, XpS(iOoW)
  726. punpckldq mm7, mm7 ; Make a copy of OoW to compare both UoW and VoW.
  727. psrld mm7, 11 ; Make OoWs Precision Match UoWs.
  728. pcmpgtd mm7, XpS(iUoW`'d_TexNum)
  729. packssdw mm7, mm7
  730. ;iClampMinT1 = pTex->iClampMinU & iClamp11; ;iClampMinT2 = pTex->iClampMinV & iClamp12; ;iClampMinT3 = pTex->iClampMinU & iClamp13; ;iClampMinT4 = pTex->iClampMinV & iClamp14;
  731. movd mm0, XpTex(iClampMinU)
  732. pand mm0, mm6
  733. ; Save clamp2 because pandn will destory value.
  734. movq mm4, mm7
  735. ;iClampMaxT1 = pTex->iClampMaxU &~ iClamp21; ;iClampMaxT2 = pTex->iClampMaxV &~ iClamp22; ;iClampMaxT3 = pTex->iClampMaxU &~ iClamp23; ;iClampMaxT4 = pTex->iClampMaxV &~ iClamp24;
  736. movd mm2, XpTex(iClampMaxU)
  737. psraw mm2, mm3 ; Shifts clamp max to correct bit location
  738. pandn mm7, mm2 ; Since iClamp2 is already negated, I can just do an AND.
  739. ;iClamp21 &= ~iClamp11; ;iClamp22 &= ~iClamp12; ;iClamp23 &= ~iClamp13; ;iClamp24 &= ~iClamp14;
  740. pandn mm6, mm4
  741. ;iClamp21 = pTex->iClampEnU &~ iClamp21; ;iClamp22 = pTex->iClampEnU &~ iClamp22; ;iClamp23 = pTex->iClampEnU &~ iClamp23; ;iClamp24 = pTex->iClampEnU &~ iClamp24;
  742. movd mm2, XpTex(iClampEnU)
  743. pandn mm6, mm2
  744. ;iU00 &= ~iClamp21; ;iV00 &= ~iClamp22; ;iU01 &= ~iClamp23; ;iV01 &= ~iClamp24;
  745. pandn mm6, mm1
  746. ;iU00 |= iClampMinT1; ;iV00 |= iClampMinT2; ;iU01 |= iClampMinT3; ;iV01 |= iClampMinT4;
  747. por mm6, mm0
  748. ;iU00 |= iClampMaxT1; ;iV00 |= iClampMaxT2; ;iU01 |= iClampMaxT3; ;iV01 |= iClampMaxT4;
  749. por mm6, mm7
  750. movq mm4, mm6
  751. movq mm0, mm4
  752. pmaddwd mm4, mm5 ; Throw in first address calculation.
  753. ; Just to get it started. Calculate
  754. ; iU0+iV1*iShiftU0 and iU1+iV0*iShiftU0
  755. ; Point needs to be in same format as bilinear for border
  756. packsswb mm0, mm0
  757. mov edi, XpTex(pBits+eax*4)
  758. ; iV0 iU1 address should be done by now.
  759. movd eax, mm4
  760. ;pCtx->SI.TexCol[0] = pCtx->pfnTexRead[0](iU00, iV00, pTex->iShiftU,
  761. ; pTex->pBits[iLOD0], &pCtx->Texture[0]);
  762. call dword ptr XpCtx(pfnTexRead)
  763. ; TBD Currently have to pack and then unpack later. Should be able
  764. ; to leave the value in some register for a while. I would think.
  765. ;packuswb mm1, mm1
  766. ;movd XpCtxSI(TexCol), mm1
  767. movq mm4, mm1
  768. mipinterp:
  769. ;INT32 r0, r1;
  770. ;INT32 g0, g1;
  771. ;INT32 b0, b1;
  772. ;INT32 a0, a1;
  773. ;r0 = RGBA_GETRED(uTex0);
  774. ;r1 = RGBA_GETRED(uTex1);
  775. ;g0 = RGBA_GETGREEN(uTex0);
  776. ;g1 = RGBA_GETGREEN(uTex1);
  777. ;b0 = RGBA_GETBLUE(uTex0);
  778. ;b1 = RGBA_GETBLUE(uTex1);
  779. ;a0 = RGBA_GETALPHA(uTex0);
  780. ;a1 = RGBA_GETALPHA(uTex1);
  781. dnl d_bcom()
  782. ;Tex1 in mm4, tex0 will be in mm1
  783. movq mm1, XpCtxSI(uBB)
  784. movq mm2, mm1
  785. psubw mm4, mm1
  786. psllw mm2, 8
  787. ;INT32 t = pS->iLOD & 0x7ff;
  788. mov eax, XpS(iLOD)
  789. shr eax, 3
  790. and eax, 0ffh
  791. movd mm3, eax
  792. ; Replicate
  793. punpcklwd mm3, mm3
  794. punpckldq mm3, mm3
  795. ;INT32 mt = 0x7ff - t;
  796. ;r0 = (mt*r0 + t*r1)>>11;
  797. ;g0 = (mt*g0 + t*g1)>>11;
  798. ;b0 = (mt*b0 + t*b1)>>11;
  799. ;a0 = (mt*a0 + t*a1)>>11;
  800. pmullw mm4, mm3
  801. paddw mm4, mm2
  802. dnl d_ecom()
  803. ;movq mm4, XpCtxSI(uBB)
  804. psrlw mm4, 8
  805. packuswb mm4, mm4
  806. movd XpCtxSI(TexCol), mm4
  807. d_UpdateUoWandVoW(1)
  808. d_UpdateLOD()
  809. d_UpdateOoW()
  810. ;pS->iW = 0x00800000/(pS->iOoW>>16); // 9.23/1.15 = 8.8
  811. d_WDivide()
  812. d_UoWVoWTimesW(1)
  813. ; load the next bead address.
  814. mov eax, XpCtx(pfnTex1AddrEnd)
  815. ; pCtx->pfnTex1AddrEnd(pCtx, pP, pS);
  816. jmp eax
  817. ;}
  818. END