Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

884 lines
30 KiB

  1. ;-----------------------------------------------------------------------------
  2. ;
  3. ; This file contains texture blending functions.
  4. ;
  5. ; TODO Unpack TexColor[0] and TexColor[1].
  6. ; TODO Change ONLY the alpha part of the value. Two AND's and a OR
  7. ;
  8. ;-----------------------------------------------------------------------------
  9. include(`m4hdr.mh')dnl
  10. include(`cvars.mh')dnl
  11. include(`texblend.mh')dnl
  12. .586
  13. .model flat
  14. .data
  15. PUBLIC MaskOffAlpha
  16. MaskOffAlpha dq 00000ffffffffffffh
  17. PUBLIC ShiftTA
  18. ShiftTA dq 00100000000000000h
  19. PUBLIC Val0x00ff00ff00ff00ff
  20. Val0x00ff00ff00ff00ff dq 000ff00ff00ff00ffh
  21. PUBLIC Val0x000000ff00ff00ff
  22. Val0x000000ff00ff00ff dq 0000000ff00ff00ffh
  23. PUBLIC Val0X0000000001000000
  24. Val0X0000000001000000 dq 00000000001000000h
  25. PUBLIC Val0x0000400040004000
  26. Val0x0000400040004000 dq 00000400040004000h
  27. PUBLIC Val0x4000000000000000
  28. Val0x4000000000000000 dq 04000000000000000h
  29. PUBLIC AlphaVal128
  30. AlphaVal128 dq 04000000000000000h
  31. PUBLIC RGBVal128
  32. RGBVal128 dq 00000400040004000h ; This is actually 64 in the high byte since values needed to be
  33. ; shifted down by one to fake saturation.
  34. .code
  35. INCLUDE iammx.inc
  36. INCLUDE offs_acp.inc
  37. EXTERN Zero:MMWORD
  38. ;-----------------------------------------------------------------------------
  39. ;
  40. ; TexBlend_Tex1_None
  41. ;
  42. ; cPix = cSrc
  43. ; aPix = aSrc
  44. ;
  45. ;-----------------------------------------------------------------------------
  46. ;void TexBlend_Tex1_None(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  47. ; PD3DI_RASTSPAN pS)
  48. ;{
  49. PUBLIC _MMX_TexBlend_Tex1_None
  50. _MMX_TexBlend_Tex1_None:
  51. ; Get ready for next indirect jump
  52. mov eax, XpCtx(pfnTexBlendEnd)
  53. d_TexBlend_Tex1_None(NotMonolithic)
  54. ;pCtx->pfnTexBlendEnd(pCtx, pP, pS);
  55. jmp eax
  56. ;}
  57. ;-----------------------------------------------------------------------------
  58. ;
  59. ; TexBlend_Tex1_Decal
  60. ;
  61. ; cPix = cTex
  62. ; aPix = aTex
  63. ;
  64. ;-----------------------------------------------------------------------------
  65. ;void TexBlend_Tex1_Decal(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  66. ; PD3DI_RASTSPAN pS)
  67. ;{
  68. PUBLIC _MMX_TexBlend_Tex1_Decal
  69. _MMX_TexBlend_Tex1_Decal:
  70. ; Get ready for next indirect jump
  71. mov eax, XpCtx(pfnTexBlendEnd)
  72. d_TexBlend_Tex1_Decal(NotMonolithic)
  73. ;pCtx->pfnTexBlendEnd(pCtx, pP, pS);
  74. jmp eax
  75. ;}
  76. ;-----------------------------------------------------------------------------
  77. ;
  78. ; TexBlend_Tex1_Modulate
  79. ;
  80. ; cPix = cSrc * cTex
  81. ; aPix = aTex
  82. ;
  83. ;-----------------------------------------------------------------------------
  84. ;void TexBlend_Tex1_Modulate(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  85. ; PD3DI_RASTSPAN pS)
  86. ;{
  87. PUBLIC _MMX_TexBlend_Tex1_Modulate
  88. _MMX_TexBlend_Tex1_Modulate:
  89. ; Get ready for next indirect jump
  90. mov eax, XpCtx(pfnTexBlendEnd)
  91. d_TexBlend_Tex1_Modulate(NotMonolithic)
  92. ;pCtx->pfnTexBlendEnd(pCtx, pP, pS);
  93. jmp eax
  94. ;}
  95. ;-----------------------------------------------------------------------------
  96. ;
  97. ; TexBlend_Tex1_ModulateAlphaOVR
  98. ;
  99. ; cPix = cSrc * cTex
  100. ; aPix = aSrc
  101. ;
  102. ;-----------------------------------------------------------------------------
  103. ;void TexBlend_Tex1_ModulateAlphaOVR(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  104. ; PD3DI_RASTSPAN pS)
  105. ;{
  106. PUBLIC _MMX_TexBlend_Tex1_ModulateAlphaOVR
  107. _MMX_TexBlend_Tex1_ModulateAlphaOVR:
  108. ; Get ready for next indirect jump
  109. mov eax, XpCtx(pfnTexBlendEnd)
  110. d_TexBlend_Tex1_ModulateAlphaOVR(NotMonolithic)
  111. ;pCtx->pfnTexBlendEnd(pCtx, pP, pS);
  112. jmp eax
  113. ;}
  114. ;-----------------------------------------------------------------------------
  115. ;
  116. ; TexBlend_Tex1_Gen
  117. ;
  118. ; Calls first set of function pointers to do general texture blending.
  119. ;
  120. ;-----------------------------------------------------------------------------
  121. ;void CMMX_TexBlend_Tex1_Gen(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  122. ; PD3DI_RASTSPAN pS)
  123. ;{
  124. PUBLIC _MMX_TexBlend_Tex1_Gen
  125. _MMX_TexBlend_Tex1_Gen:
  126. ; Initialize input to diffuse color (the default)
  127. ; D3DI_RASTCOLOR Input = *(D3DI_RASTCOLOR*)&pS->uB;
  128. movq mm7, XpS(uB)
  129. ;D3DI_RASTCOLOR Arg1;
  130. ; in MM1
  131. ;D3DI_RASTCOLOR Arg2;
  132. ; in MM2
  133. mov edx, 0
  134. ;pCtx->pfnTexBlendGetAlpha[0](&Arg1, &Arg2, &Input, pCtx, pS, 0);
  135. call dword ptr XpCtx(pfnTexBlendGetAlpha)
  136. ;pCtx->pfnTexBlendOpAlpha[0]((D3DI_RASTCOLOR*)&pCtx->SI.uBB, &Arg1, &Arg2, pCtx, pS, 0);
  137. call dword ptr XpCtx(pfnTexBlendOpAlpha)
  138. ;pCtx->pfnTexBlendGetColor[0](&Arg1, &Arg2, &Input, pCtx, pS, 0);
  139. call dword ptr XpCtx(pfnTexBlendGetColor)
  140. ;pCtx->pfnTexBlendOpColor[0]((D3DI_RASTCOLOR*)&pCtx->SI.uBB, &Arg1, &Arg2, pCtx, pS, 0);
  141. call dword ptr XpCtx(pfnTexBlendOpColor)
  142. ; Get ready for next indirect jump
  143. mov eax, XpCtx(pfnTexBlendEnd)
  144. movq XpCtxSI(uBB), mm4
  145. ;pCtx->pfnTexBlendEnd(pCtx, pP, pS);
  146. jmp eax
  147. ;}
  148. ;-----------------------------------------------------------------------------
  149. ;
  150. ; TexBlend_TexM_Gen
  151. ;
  152. ; Calls all sets of function pointers to do general texture blending.
  153. ;
  154. ;-----------------------------------------------------------------------------
  155. ;void CMMX_TexBlend_TexM_Gen(PD3DI_RASTCTX pCtx, PD3DI_RASTPRIM pP,
  156. ; PD3DI_RASTSPAN pS)
  157. ;{
  158. PUBLIC _MMX_TexBlend_TexM_Gen
  159. _MMX_TexBlend_TexM_Gen:
  160. ; Initialize input to diffuse color (the default)
  161. ; D3DI_RASTCOLOR Input0 = *(D3DI_RASTCOLOR*)&pS->uB;
  162. movq mm7, XpS(uB)
  163. ;D3DI_RASTCOLOR Input1;
  164. ;D3DI_RASTCOLOR Arg1;
  165. ;D3DI_RASTCOLOR Arg2;
  166. ; Set iTex to point to first texture color. Last argument of next 4 calls.
  167. mov edx, 0
  168. ;pCtx->pfnTexBlendGetAlpha[0](&Arg1, &Arg2, &Input0, pCtx, pS, 0);
  169. call dword ptr XpCtx(pfnTexBlendGetAlpha)
  170. ;pCtx->pfnTexBlendOpAlpha[0](&Input1, &Arg1, &Arg2, pCtx, pS, 0);
  171. call dword ptr XpCtx(pfnTexBlendOpAlpha)
  172. ;pCtx->pfnTexBlendGetColor[0](&Arg1, &Arg2, &Input0, pCtx, pS, 0);
  173. call dword ptr XpCtx(pfnTexBlendGetColor)
  174. ;pCtx->pfnTexBlendOpColor[0](&Input1, &Arg1, &Arg2, pCtx, pS, 0);
  175. call dword ptr XpCtx(pfnTexBlendOpColor)
  176. ; Set iTex to point to second texture color. Last argument of next 4 calls.
  177. mov edx, 4
  178. mov eax, 1
  179. BlendLoop:
  180. cmp eax, dword ptr XpCtx(cActTex)
  181. je DoneBlendLoop
  182. ; Result of pre routines is in mm4 and pInput is passed as mm7
  183. movq mm7, mm4
  184. ;pCtx->pfnTexBlendGetAlpha[1](&Arg1, &Arg2, &Input1, pCtx, pS, 1);
  185. call dword ptr XpCtx(pfnTexBlendGetAlpha+edx)
  186. ;pCtx->pfnTexBlendOpAlpha[1]((D3DI_RASTCOLOR*)&pCtx->SI.uBB, &Arg1, &Arg2, pCtx, pS, 1);
  187. call dword ptr XpCtx(pfnTexBlendOpAlpha+edx)
  188. ;pCtx->pfnTexBlendGetColor[1](&Arg1, &Arg2, &Input1, pCtx, pS, 1);
  189. call dword ptr XpCtx(pfnTexBlendGetColor+edx)
  190. ;pCtx->pfnTexBlendOpColor[1]((D3DI_RASTCOLOR*)&pCtx->SI.uBB, &Arg1, &Arg2, pCtx, pS, 1);
  191. call dword ptr XpCtx(pfnTexBlendOpColor+edx)
  192. inc eax
  193. add edx, 4
  194. jmp BlendLoop
  195. DoneBlendLoop:
  196. ; Get ready for next indirect jump
  197. mov eax, XpCtx(pfnTexBlendEnd)
  198. movq XpCtxSI(uBB), mm4
  199. ;pCtx->pfnTexBlendEnd(pCtx, pP, pS);
  200. jmp eax
  201. ;}
  202. define(`d_TexBlendGetAlpha', `; void CMMX_TexBlend_Get_Alpha_$1_$2(PD3DI_RASTCOLOR pArg1, PD3DI_RASTCOLOR pArg2, PD3DI_RASTCOLOR pInput,
  203. ; PD3DI_RASTCTX pCtx, PD3DI_RASTSPAN pS, INT32 iTex)
  204. ; {
  205. PUBLIC _MMX_TexBlend_Get_Alpha_$1_$2
  206. _MMX_TexBlend_Get_Alpha_$1_$2:
  207. ifelse(`$1', `TextureAlpha', `
  208. ;pArg1->uA = (UINT8)RGBA_GETALPHA(pCtx->SI.TexCol[iTex]);
  209. pxor mm5, mm5
  210. movq mm1, XpCtxSI(TexCol+edx)
  211. punpcklbw mm1, mm5
  212. ', `$1', `InvTextureAlpha', `
  213. ;pArg1->uA = (UINT8)~(RGBA_GETALPHA(pCtx->SI.TexCol[iTex]));
  214. pcmpeqd mm5, mm5
  215. movq mm1, XpCtxSI(TexCol+edx)
  216. pxor mm1, mm5
  217. pxor mm5, mm5
  218. punpcklbw mm1, mm5
  219. ')
  220. dnl
  221. ifelse(`$2', `DiffuseAlpha', `
  222. ;pArg2->uA = (UINT8)(pS->uA>>COLOR_SHIFT);
  223. movq mm2, XpS(uB)
  224. psrlw mm2, 8
  225. ', `$2', `InputAlpha', `
  226. ;pArg2->uA = (UINT8)(pInput->uA>>COLOR_SHIFT);
  227. movq mm2, mm7
  228. psrlw mm2, 8
  229. ', `$2', `FactorAlpha', `
  230. ;pArg2->uA = 0;
  231. pxor mm5, mm5
  232. movd mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR)
  233. punpcklbw mm2, mm5
  234. ', `$2', `InvDiffuseAlpha', `
  235. ;pArg2->uA = (UINT8)~((pS->uA>>COLOR_SHIFT));
  236. pcmpeqd mm2, mm2
  237. pxor mm2, XpS(uB)
  238. psrlw mm2, 8
  239. ', `$2', `InvInputAlpha', `
  240. ;pArg2->uA = (UINT8)~((pInput->uA>>COLOR_SHIFT));
  241. pcmpeqd mm2, mm2
  242. pxor mm2, mm7
  243. psrlw mm2, 8
  244. ', `$2', `InvFactorAlpha', `
  245. ;pArg2->uA = (UINT8)~((RGBA_GETALPHA(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR]));
  246. pcmpeqd mm5, mm5
  247. movd mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR)
  248. pxor mm2, mm5
  249. pxor mm5, mm5
  250. punpcklbw mm2, mm5
  251. ', `$2', `SpecularAlpha', `
  252. ;pArg2->uA = (UINT8)(pS->uA>>COLOR_SHIFT);
  253. movq mm2, XpS(uFog)
  254. psrlw mm2, 8
  255. ', `$2', `InvSpecularAlpha', `
  256. ;pArg2->uA = (UINT8)~((pS->uFog>>COLOR_SHIFT));
  257. pcmpeqd mm2, mm2
  258. pxor mm2, XpS(uFog)
  259. psrlw mm2, 8
  260. ')
  261. ret
  262. ;}
  263. ')
  264. dnl
  265. d_RepStr(`d_RepStr(`d_TexBlendGetAlpha(AA, BB)',
  266. `AA', TextureAlpha, InvTextureAlpha)',
  267. `BB', DiffuseAlpha, InputAlpha, FactorAlpha, InvDiffuseAlpha, InvInputAlpha, InvFactorAlpha,
  268. SpecularAlpha, InvSpecularAlpha)
  269. dnl
  270. dnl
  271. define(`d_TexBlendOpAlpha', `; void CMMX_TexBlend_Op_Alpha_$1(PD3DI_RASTCOLOR pArg1, PD3DI_RASTCOLOR pArg2, PD3DI_RASTCOLOR pInput,
  272. ; PD3DI_RASTCTX pCtx, PD3DI_RASTSPAN pS, INT32 iTex)
  273. ; {
  274. PUBLIC _MMX_TexBlend_Op_Alpha_$1
  275. _MMX_TexBlend_Op_Alpha_$1:
  276. ifelse(`$1', `None', `
  277. ;pOut->uA = pS->uA;
  278. movq mm3, XpS(uB)
  279. ', `$1', `CopyArg1', `
  280. ;pOut->uA = pArg1->uA<<COLOR_SHIFT;
  281. movq mm3, mm1
  282. psllw mm3, 8
  283. ', `$1', `CopyArg2', `
  284. ;pOut->uA = pArg2->uA<<COLOR_SHIFT;
  285. movq mm3, mm2
  286. psllw mm3, 8
  287. ', `$1', `Modulate', `
  288. ;pOut->uA = pArg1->uA*pArg2->uA;
  289. movq mm3, mm1
  290. pmullw mm3, mm2
  291. ', `$1', `Modulate2', `
  292. ;pOut->uA = min(((UINT32)pArg1->uA*pArg2->uA)<<1, 0xffff);
  293. movq mm3, mm1
  294. pmullw mm3, mm2
  295. movq mm4, mm3
  296. psllw mm3, 1
  297. psraw mm4, 15 ; Make mask based on high bit. This is used to make value saturate.
  298. paddusw mm3, mm4 ; This could just as will be an por to set all bits.
  299. ', `$1', `Modulate4', `
  300. ;pOut->uA = min(((UINT32)pArg1->uA*pArg2->uA)<<2, 0xffff);
  301. movq mm3, mm1
  302. pmullw mm3, mm2
  303. movq mm4, mm3
  304. psllw mm3, 2
  305. paddusw mm4, MMWORD PTR Val0x4000000000000000 ; If either of the two upper bits are on,
  306. ; this will turn on the sign bit.
  307. psraw mm4, 15
  308. paddusw mm3, mm4 ; This could just as will be an por to set all bits.
  309. ', `$1', `Add', `
  310. ;pOut->uA = min((UINT32)pArg1->uA+pArg2->uA, 0xffff);
  311. movq mm3, mm1
  312. paddusb mm3, mm2
  313. psllw mm3, 8
  314. ', `$1', `AddSigned', `
  315. ;pOut->uA = max((INT32)pArg1->uA+pArg2->uA-128, 0x0);
  316. movq mm3, mm1
  317. movq mm4, mm2
  318. ; Actually only shifting up by 7 instead of 8.
  319. psllw mm3, 7 ; Shift down by one bit to check to see if there is a carry.
  320. psllw mm4, 7 ; Cant use saturate twice since were doing add and sub and it could mess up result.
  321. paddw mm4, mm3
  322. psubusw mm4, MMWORD PTR AlphaVal128 ; subtract 128 from shifted value. It is really 64 in the upper byte.
  323. movq mm3, mm4
  324. psraw mm4, 15
  325. psllw mm3, 1
  326. por mm3, mm4 ; Could have used paddusw here, but por achieves same effect.
  327. ', `$1', `BlendDiffuseAlpha', `
  328. ;INT32 iA = pS->uA;
  329. ;pOut->uA = (UINT16)(iA*(pArg1->uA - pArg2->uA) + (pArg2->uA<<COLOR_SHIFT));
  330. movq mm4, XpS(uB)
  331. movq mm3, mm1
  332. psrlw mm4, 8
  333. psubw mm3, mm2
  334. pmullw mm3, mm4
  335. movq mm4, mm2
  336. psllw mm4, 8
  337. paddw mm3, mm4
  338. ', `$1', `BlendTextureAlpha', `
  339. ;INT32 iA = RGBA_GETALPHA(pCtx->SI.TexCol[iTex]);
  340. ;pOut->uA = (UINT16)(iA*(pArg1->uA - pArg2->uA) + (pArg2->uA<<8));
  341. movd mm4, XpCtxSI(TexCol+edx)
  342. movq mm3, mm1
  343. punpcklbw mm4, Zero
  344. psubw mm3, mm2
  345. pmullw mm3, mm4
  346. movq mm4, mm2
  347. psllw mm4, 8
  348. paddw mm3, mm4
  349. ', `$1', `BlendFactorAlpha', `
  350. ;INT32 iA = 0;//ATTENTION need Factor
  351. ;pOut->uA = (UINT16)(iA*(pArg1->uA - pArg2->uA) + (pArg2->uA<<8));
  352. movq mm3, mm1
  353. movq mm4, mm2
  354. psubw mm3, mm4
  355. movd mm4, XpCtx(pdwRenderState+RS_TEXTUREFACTOR)
  356. punpcklbw mm4, Zero
  357. pmullw mm3, mm4
  358. movq mm4, mm2
  359. psllw mm4, 8
  360. paddw mm3, mm4
  361. ', `$1', `BlendTextureAlphaPM', `
  362. ;INT32 iA = 255 - RGBA_GETALPHA(pCtx->SI.TexCol[iTex]);
  363. ;pOut->uA = min((UINT32)((pArg1->uA<<COLOR_SHIFT) + iA*pArg2->uA), 0xffff);
  364. movd mm4, XpCtxSI(TexCol+edx)
  365. pcmpeqw mm3, mm3 ; These two lines make 255 - TexColAlpha
  366. pxor mm4, mm3
  367. punpcklbw mm4, Zero
  368. movq mm3, mm2
  369. pmullw mm3, mm4
  370. movq mm4, mm1
  371. psllw mm4, 8
  372. paddusw mm3, mm4
  373. ', `$1', `AddSigned2', `
  374. ;pOut->uA = (max(((INT32)pArg1->uA+pArg2->uA-128)<<1, 0x0))<<COLOR_SHIFT;
  375. movq mm3, mm1
  376. movq mm4, mm2
  377. ; Actually only shifting up by 7 instead of 8.
  378. psllw mm3, 7 ; Shift down by one bit to check to see if there is a carry.
  379. psllw mm4, 7 ; Cant use saturate twice since were doing add and sub and it could mess up result.
  380. paddw mm4, mm3
  381. psubusw mm4, MMWORD PTR AlphaVal128 ; subtract 128 from shifted value. It is really 64 in the upper byte.
  382. movq mm3, mm4
  383. psllw mm3, 1
  384. por mm4, mm3
  385. psraw mm4, 15
  386. psllw mm3, 1
  387. por mm3, mm4 ; Could have used paddusw here, but por achieves same effect.
  388. ', `$1', `Subtract', `
  389. ; pOut->uA = (min(max((~((~(INT32)pArg1->uA) + pArg2->uA)), 0x0), 0xff))<<COLOR_SHIFT;
  390. movq mm3, mm1
  391. movq mm4, mm2
  392. psubusw mm3, mm4
  393. psllw mm3, 8
  394. ', `$1', `AddSmooth', `
  395. ; pOut->uA = (min(max((INT32)pArg1->uA<<COLOR_SHIFT+(~(INT32)pArg1->uA)*pArg2->uA, 0x0), 0xffff));
  396. movq mm3, mm1
  397. movq mm4, mm2
  398. paddusw mm3, mm2
  399. psllw mm3, 8
  400. pmullw mm4, mm1
  401. psubusw mm3, mm4
  402. ')
  403. ret
  404. ;}
  405. ')
  406. dnl
  407. d_RepStr(`d_TexBlendOpAlpha(AA)',
  408. `AA', None, CopyArg1, CopyArg2, Modulate, Modulate2, Modulate4, Add, AddSigned,
  409. BlendDiffuseAlpha, BlendTextureAlpha, BlendFactorAlpha, BlendTextureAlphaPM,
  410. AddSigned2, Subtract, AddSmooth)
  411. dnl
  412. dnl
  413. define(`d_TexBlendGetColor', `; void CMMX_TexBlend_Get_Color_$1_$2(PD3DI_RASTCOLOR pArg1, PD3DI_RASTCOLOR pArg2, PD3DI_RASTCOLOR pInput,
  414. ; PD3DI_RASTCTX pCtx, PD3DI_RASTSPAN pS, INT32 iTex)
  415. ; {
  416. PUBLIC _MMX_TexBlend_Get_Color_$1_$2
  417. _MMX_TexBlend_Get_Color_$1_$2:
  418. ; Alpha is already done in mm3
  419. ifelse(`$1', `Texture', `
  420. ;pArg1->uB = (UINT8)RGBA_GETBLUE(pCtx->SI.TexCol[iTex]);
  421. ;pArg1->uG = (UINT8)RGBA_GETGREEN(pCtx->SI.TexCol[iTex]);
  422. ;pArg1->uR = (UINT8)RGBA_GETRED(pCtx->SI.TexCol[iTex]);
  423. pxor mm5, mm5
  424. movq mm1, XpCtxSI(TexCol+edx)
  425. punpcklbw mm1, mm5
  426. ', `$1', `InvTexture', `
  427. ;pArg1->uB = (UINT8)~(RGBA_GETBLUE(pCtx->SI.TexCol[iTex]));
  428. ;pArg1->uG = (UINT8)~(RGBA_GETGREEN(pCtx->SI.TexCol[iTex]));
  429. ;pArg1->uR = (UINT8)~(RGBA_GETRED(pCtx->SI.TexCol[iTex]));
  430. pcmpeqd mm5, mm5
  431. movd mm1, XpCtxSI(TexCol+edx)
  432. pxor mm1, mm5
  433. pxor mm5, mm5
  434. punpcklbw mm1, mm5
  435. ', `$1', `TextureAlpha', `
  436. ;pArg1->uB = (UINT8)RGBA_GETALPHA(pCtx->SI.TexCol[iTex]);
  437. ;pArg1->uG = pArg1->uB;
  438. ;pArg1->uR = pArg1->uB;
  439. pxor mm5, mm5
  440. movd mm1, XpCtxSI(TexCol + edx)
  441. punpcklbw mm1, mm5
  442. punpckhwd mm1, mm1
  443. punpckhdq mm1, mm1
  444. ', `$1', `InvTextureAlpha', `
  445. ;pArg1->uB = (UINT8)~(RGBA_GETALPHA(pCtx->SI.TexCol[iTex]));
  446. ;pArg1->uG = pArg1->uB;
  447. ;pArg1->uR = pArg1->uB;
  448. pcmpeqd mm5, mm5
  449. movq mm1, XpCtxSI(TexCol + edx)
  450. pxor mm1, mm5
  451. pxor mm5, mm5
  452. punpcklbw mm1, mm5
  453. punpckhwd mm1, mm1
  454. punpckhdq mm1, mm1
  455. ')
  456. dnl
  457. ifelse(`$2', `Diffuse', `
  458. ;pArg2->uB = (UINT8)(pS->uB>>COLOR_SHIFT);
  459. ;pArg2->uG = (UINT8)(pS->uG>>COLOR_SHIFT);
  460. ;pArg2->uR = (UINT8)(pS->uR>>COLOR_SHIFT);
  461. movq mm2, XpS(uB)
  462. psrlw mm2, 8
  463. ', `$2', `Specular', `
  464. ;pArg2->uB = (UINT8)(pInput->uBS>>COLOR_SHIFT);
  465. ;pArg2->uG = (UINT8)(pInput->uGS>>COLOR_SHIFT);
  466. ;pArg2->uR = (UINT8)(pInput->uRS>>COLOR_SHIFT);
  467. movq mm2, XpS(uBS)
  468. psrlw mm2, 8
  469. ', `$2', `Input', `
  470. ;pArg2->uB = (UINT8)(pInput->uB>>COLOR_SHIFT);
  471. ;pArg2->uG = (UINT8)(pInput->uG>>COLOR_SHIFT);
  472. ;pArg2->uR = (UINT8)(pInput->uR>>COLOR_SHIFT);
  473. movq mm2, mm7
  474. psrlw mm2, 8
  475. ', `$2', `Factor', `
  476. ;pArg2->uB = 0;//ATTENTION need Factor
  477. ;pArg2->uG = 0;//ATTENTION need Factor
  478. ;pArg2->uR = 0;//ATTENTION need Factor
  479. pxor mm5, mm5
  480. movd mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR)
  481. punpcklbw mm2, mm5
  482. ', `$2', `InvDiffuse', `
  483. ;pArg2->uB = (UINT8)~((pS->uB>>COLOR_SHIFT));
  484. ;pArg2->uG = (UINT8)~((pS->uG>>COLOR_SHIFT));
  485. ;pArg2->uR = (UINT8)~((pS->uR>>COLOR_SHIFT));
  486. pcmpeqd mm2, mm2
  487. pxor mm2, XpS(uB)
  488. pxor mm2, mm5
  489. psrlw mm2, 8
  490. ', `$2', `InvSpecular', `
  491. ;pArg2->uB = (UINT8)~((pS->uBS>>COLOR_SHIFT));
  492. ;pArg2->uG = (UINT8)~((pS->uGS>>COLOR_SHIFT));
  493. ;pArg2->uR = (UINT8)~((pS->uRS>>COLOR_SHIFT));
  494. pcmpeqd mm2, mm2
  495. pxor mm2, XpS(uBS)
  496. pxor mm2, mm5
  497. psrlw mm2, 8
  498. ', `$2', `InvInput', `
  499. ;pArg2->uB = (UINT8)~((pInput->uB>>COLOR_SHIFT));
  500. ;pArg2->uG = (UINT8)~((pInput->uG>>COLOR_SHIFT));
  501. ;pArg2->uR = (UINT8)~((pInput->uR>>COLOR_SHIFT));
  502. movq mm2, mm7
  503. pcmpeqd mm5, mm5
  504. pxor mm2, mm5
  505. psrlw mm2, 8
  506. ', `$2', `InvFactor', `
  507. ;pArg2->uB = (UINT8)~(RGBA_GETBLUE(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR]));
  508. ;pArg2->uG = (UINT8)~(RGBA_GETGREEN(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR]));
  509. ;pArg2->uR = (UINT8)~(RGBA_GETRED(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR]));
  510. pcmpeqd mm5, mm5
  511. movq mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR)
  512. pxor mm2, mm5
  513. pxor mm5, mm5
  514. punpcklbw mm2, mm5
  515. ', `$2', `DiffuseAlpha', `
  516. ;pArg2->uB = (UINT8)(pS->uA>>COLOR_SHIFT);
  517. ;pArg2->uG = pArg2->uB;
  518. ;pArg2->uR = pArg2->uB;
  519. movq mm2, XpS(uB)
  520. psrlw mm2, 8
  521. punpckhwd mm2, mm2
  522. punpckhdq mm2, mm2
  523. ', `$2', `SpecularAlpha', `
  524. ;pArg2->uB = (UINT8)(pS->uFog>>COLOR_SHIFT);
  525. ;pArg2->uG = pArg2->uB;
  526. ;pArg2->uR = pArg2->uB;
  527. movq mm2, XpS(uFog)
  528. psrlw mm2, 8
  529. punpckhwd mm2, mm2
  530. punpckhdq mm2, mm2
  531. ', `$2', `InputAlpha', `
  532. ;pArg2->uB = (UINT8)(pInput->uA);
  533. ;pArg2->uG = pArg2->uB;
  534. ;pArg2->uR = pArg2->uB;
  535. movq mm2, mm7
  536. punpckhwd mm2, mm2
  537. punpckhdq mm2, mm2
  538. psrlw mm2, 8
  539. ', `$2', `FactorAlpha', `
  540. ;pArg2->uB = (UINT8)RGBA_GETALPHA(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR];
  541. ;pArg2->uG = pArg2->uB;
  542. ;pArg2->uR = pArg2->uB;
  543. pxor mm5, mm5
  544. movq mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR)
  545. punpcklbw mm2, mm5
  546. punpckhwd mm2, mm2
  547. punpckhdq mm2, mm2
  548. ', `$2', `InvDiffuseAlpha', `
  549. ;pArg2->uB = (UINT8)~((pS->uA>>COLOR_SHIFT));
  550. ;pArg2->uG = pArg2->uB;
  551. ;pArg2->uR = pArg2->uB;
  552. pcmpeqd mm2, mm2
  553. pxor mm2, XpS(uB)
  554. psrlw mm2, 8
  555. punpckhwd mm2, mm2
  556. punpckhdq mm2, mm2
  557. ', `$2', `InvSpecularAlpha', `
  558. ;pArg2->uB = (UINT8)~((pS->uFog>>COLOR_SHIFT));
  559. ;pArg2->uG = pArg2->uB;
  560. ;pArg2->uR = pArg2->uB;
  561. pcmpeqd mm2, mm2
  562. pxor mm2, XpS(uFog)
  563. psrlw mm2, 8
  564. punpckhwd mm2, mm2
  565. punpckhdq mm2, mm2
  566. ', `$2', `InvInputAlpha', `
  567. ;pArg2->uB = (UINT8)~((pInput->uA));
  568. ;pArg2->uG = pArg2->uB;
  569. ;pArg2->uR = pArg2->uB;
  570. pcmpeqd mm2, mm2
  571. pxor mm2, mm7
  572. psrlw mm2, 8
  573. punpckhwd mm2, mm2
  574. punpckhdq mm2, mm2
  575. ', `$2', `InvFactorAlpha', `
  576. ;pArg2->uB = (UINT8)~(RGBA_GETALPHA(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR]));
  577. ;pArg2->uG = pArg2->uB;
  578. ;pArg2->uR = pArg2->uB;
  579. pcmpeqd mm5, mm5
  580. movq mm2, XpCtx(pdwRenderState + RS_TEXTUREFACTOR)
  581. pxor mm2, mm5
  582. pxor mm5, mm5
  583. punpcklbw mm2, mm5
  584. punpckhwd mm2, mm2
  585. punpckhdq mm2, mm2
  586. ')
  587. ret
  588. ;}
  589. ')
  590. dnl
  591. d_RepStr(`d_RepStr(`d_TexBlendGetColor(AA, BB)',
  592. `AA', Texture, InvTexture, TextureAlpha, InvTextureAlpha)',
  593. `BB', Diffuse, Input, Factor, InvDiffuse, InvInput, InvFactor,
  594. DiffuseAlpha, InputAlpha, FactorAlpha, InvDiffuseAlpha, InvInputAlpha, InvFactorAlpha,
  595. Specular, InvSpecular, SpecularAlpha, InvSpecularAlpha)
  596. dnl
  597. dnl
  598. define(`d_TexBlendOpColor', `; void CMMX_TexBlend_Op_Color_$1(PD3DI_RASTCOLOR pArg1, PD3DI_RASTCOLOR pArg2, PD3DI_RASTCOLOR pInput,
  599. ; PD3DI_RASTCTX pCtx, PD3DI_RASTSPAN pS, INT32 iTex)
  600. ; {
  601. PUBLIC _MMX_TexBlend_Op_Color_$1
  602. _MMX_TexBlend_Op_Color_$1:
  603. movq mm6, MMWORD PTR MaskOffAlpha ; 0x0000ffff ffffffff
  604. ifelse(`$1', `None', `
  605. ;pOut->uB = pS->uB;
  606. ;pOut->uG = pS->uG;
  607. ;pOut->uR = pS->uR;
  608. movq mm4, XpS(uB)
  609. ', `$1', `CopyArg1', `
  610. ;pOut->uB = pArg1->uB<<COLOR_SHIFT;
  611. ;pOut->uG = pArg1->uG<<COLOR_SHIFT;
  612. ;pOut->uR = pArg1->uR<<COLOR_SHIFT;
  613. movq mm4, mm1
  614. psllw mm4, 8
  615. ', `$1', `CopyArg2', `
  616. ;pOut->uB = pArg2->uB<<COLOR_SHIFT;
  617. ;pOut->uG = pArg2->uG<<COLOR_SHIFT;
  618. ;pOut->uR = pArg2->uR<<COLOR_SHIFT;
  619. movq mm4, mm2
  620. psllw mm4, 8
  621. ', `$1', `Modulate', `
  622. ;pOut->uB = pArg1->uB*pArg2->uB;
  623. ;pOut->uG = pArg1->uG*pArg2->uG;
  624. ;pOut->uR = pArg1->uR*pArg2->uR;
  625. movq mm4, mm1
  626. movq mm5, mm2
  627. pmullw mm4, mm5
  628. ', `$1', `Modulate2', `
  629. ;pOut->uB = min(((UINT32)pArg1->uB*pArg2->uB)<<1, 0xffff);
  630. ;pOut->uG = min(((UINT32)pArg1->uG*pArg2->uG)<<1, 0xffff);
  631. ;pOut->uR = min(((UINT32)pArg1->uR*pArg2->uR)<<1, 0xffff);
  632. movq mm4, mm1
  633. movq mm5, mm2
  634. pmullw mm4, mm5
  635. movq mm5, mm4
  636. psllw mm4, 1
  637. psraw mm5, 15 ; Make mask based on high bit. This is used to make value saturate.
  638. paddusw mm4, mm5 ; This could just as will be an por to set all bits.
  639. ', `$1', `Modulate4', `
  640. ;pOut->uB = min(((UINT32)pArg1->uB*pArg2->uB)<<2, 0xffff);
  641. ;pOut->uG = min(((UINT32)pArg1->uG*pArg2->uG)<<2, 0xffff);
  642. ;pOut->uR = min(((UINT32)pArg1->uR*pArg2->uR)<<2, 0xffff);
  643. movq mm4, mm1
  644. movq mm5, mm2
  645. pmullw mm4, mm5
  646. movq mm5, mm4
  647. psllw mm4, 2
  648. paddusw mm5, MMWORD PTR Val0x0000400040004000 ; This will set sign bit if either of the upper bits are set.
  649. psraw mm5, 15 ; Shift sign bit down to all bits.
  650. paddusw mm4, mm5 ; This could just as will be an por to set all bits.
  651. ', `$1', `Add', `
  652. ;pOut->uB = min((UINT32)pArg1->uB+pArg2->uB, 0xffff);
  653. ;pOut->uG = min((UINT32)pArg1->uG+pArg2->uG, 0xffff);
  654. ;pOut->uR = min((UINT32)pArg1->uR+pArg2->uR, 0xffff);
  655. movq mm4, mm1
  656. paddusb mm4, mm2
  657. psllw mm4, 8
  658. ', `$1', `AddSigned', `
  659. ;pOut->uB = min(max((INT32)pArg1->uB+pArg2->uB-128, 0x0), 0xff);
  660. ;pOut->uG = min(max((INT32)pArg1->uG+pArg2->uG-128, 0x0), 0xff);
  661. ;pOut->uR = min(max((INT32)pArg1->uR+pArg2->uR-128, 0x0), 0xff);
  662. movq mm4, mm1
  663. movq mm5, mm2
  664. ; Actually only shifting up by 7 instead of 8.
  665. psllw mm4, 7 ; Shift down by one bit to check to see if there is a carry.
  666. psllw mm5, 7 ; Cant use saturate twice since were doing add and sub and it could mess up result.
  667. paddw mm5, mm4
  668. psubusw mm5, MMWORD PTR RGBVal128 ; subtract 128 from shifted value. It is really 64 in the upper byte.
  669. movq mm4, mm5
  670. psraw mm5, 15
  671. psllw mm4, 1
  672. por mm4, mm5 ; Could have used paddusw here, but por achieves same effect.
  673. ', `$1', `BlendDiffuseAlpha', `
  674. ;INT32 iA = pS->uA>>COLOR_SHIFT;
  675. ;pOut->uB = (UINT16)(iA*(pArg1->uB - pArg2->uB) + (pArg2->uB<<8));
  676. ;pOut->uG = (UINT16)(iA*(pArga1->uG - pArg2->uG) + (pArg2->uG<<8));
  677. ;pOut->uR = (UINT16)(iA*(pArg1->uR - pArg2->uR) + (pArg2->uR<<8));
  678. movq mm4, mm1
  679. movq mm5, mm2
  680. psubw mm4, mm5
  681. movq mm5, XpS(uB) ; Set iA = pS->uA
  682. psrlw mm5, 8
  683. punpckhwd mm5, mm5 ; copy iA to high dword.
  684. punpckhdq mm5, mm5 ; copy iA to full register
  685. pmullw mm4, mm5
  686. movq mm5, mm2
  687. psllw mm5, 8
  688. paddw mm4, mm5
  689. ', `$1', `BlendTextureAlpha', `
  690. ;INT32 iA = RGBA_GETALPHA(pCtx->SI.TexCol[iTex]);
  691. ;pOut->uB = (UINT16)(iA*(pArg1->uB - pArg2->uB) + (pArg2->uB<<8));
  692. ;pOut->uG = (UINT16)(iA*(pArg1->uG - pArg2->uG) + (pArg2->uG<<8));
  693. ;pOut->uR = (UINT16)(iA*(pArg1->uR - pArg2->uR) + (pArg2->uR<<8));
  694. movq mm4, mm1
  695. movq mm5, mm2
  696. psubw mm4, mm5
  697. movd mm5, XpCtxSI(TexCol+edx)
  698. punpcklbw mm5, Zero
  699. punpckhwd mm5, mm5 ; copy iA to high dword.
  700. punpckhdq mm5, mm5 ; copy iA to full register
  701. pmullw mm4, mm5
  702. movq mm5, mm2
  703. psllw mm5, 8
  704. paddw mm4, mm5
  705. ', `$1', `BlendFactorAlpha', `
  706. ;INT32 iA = RGBA_GETALPHA(pCtx->pdwRenderState[D3DRS_TEXTUREFACTOR]);
  707. ;pOut->uB = (UINT16)(iA*(pArg1->uB - pArg2->uB) + (pArg2->uB<<COLOR_SHIFT));
  708. ;pOut->uG = (UINT16)(iA*(pArg1->uG - pArg2->uG) + (pArg2->uG<<COLOR_SHIFT));
  709. ;pOut->uR = (UINT16)(iA*(pArg1->uR - pArg2->uR) + (pArg2->uR<<COLOR_SHIFT));
  710. movq mm4, mm1
  711. movq mm5, mm2
  712. psubw mm4, mm5
  713. movd mm5, XpCtx(pdwRenderState+RS_TEXTUREFACTOR)
  714. punpcklbw mm5, Zero
  715. punpckhwd mm5, mm5 ; copy iA to high dword.
  716. punpckhdq mm5, mm5 ; copy iA to full register
  717. pmullw mm4, mm5
  718. movq mm5, mm2
  719. psllw mm5, 8
  720. paddw mm4, mm5
  721. ', `$1', `BlendTextureAlphaPM', `
  722. ;INT32 iA = 255 - RGBA_GETALPHA(pCtx->SI.TexCol[iTex]);
  723. ;pOut->uB = min((UINT32)((pArg1->uB<<COLOR_SHIFT) + iA*pArg2->uB), 0xffff);
  724. ;pOut->uG = min((UINT32)((pArg1->uG<<COLOR_SHIFT) + iA*pArg2->uG), 0xffff);
  725. ;pOut->uR = min((UINT32)((pArg1->uR<<COLOR_SHIFT) + iA*pArg2->uR), 0xffff);
  726. movd mm5, XpCtxSI(TexCol+edx)
  727. pcmpeqw mm4, mm4 ; These two lines make 255 - TexColAlpha
  728. pxor mm5, mm4
  729. punpcklbw mm5, Zero
  730. punpckhwd mm5, mm5 ; copy iA to high dword.
  731. punpckhdq mm5, mm5 ; copy iA to full register
  732. movq mm4, mm2
  733. pmullw mm4, mm5
  734. movq mm5, mm1
  735. psllw mm5, 8
  736. paddusw mm4, mm5
  737. ', `$1', `AddSigned2', `
  738. ; pOut->uB = (min(max(((INT32)pArg1->uB+pArg2->uB-128) << 1, 0x0), 0xff))<<COLOR_SHIFT;
  739. ; pOut->uG = (min(max(((INT32)pArg1->uG+pArg2->uG-128) << 1, 0x0), 0xff))<<COLOR_SHIFT;
  740. ; pOut->uR = (min(max(((INT32)pArg1->uR+pArg2->uR-128) << 1, 0x0), 0xff))<<COLOR_SHIFT;
  741. movq mm4, mm1
  742. movq mm5, mm2
  743. ; Actually only shifting up by 7 instead of 8.
  744. psllw mm4, 7 ; Shift down by one bit to check to see if there is a carry.
  745. psllw mm5, 7 ; Cant use saturate twice since were doing add and sub and it could mess up result.
  746. paddw mm5, mm4
  747. psubusw mm5, MMWORD PTR RGBVal128 ; subtract 128 from shifted value. It is really 64 in the upper byte.
  748. movq mm4, mm5
  749. psllw mm4, 1
  750. por mm5, mm4
  751. psraw mm5, 15
  752. psllw mm4, 1
  753. por mm4, mm5 ; Could have used paddusw here, but por achieves same effect.
  754. ', `$1', `Subtract', `
  755. ; pOut->uB = max((~((~(UINT32)pArg1->uB) + pArg2->uB)), 0x0)<<COLOR_SHIFT;
  756. ; pOut->uG = max((~((~(UINT32)pArg1->uG) + pArg2->uG)), 0x0)<<COLOR_SHIFT;
  757. ; pOut->uR = max((~((~(UINT32)pArg1->uR) + pArg2->uR)), 0x0)<<COLOR_SHIFT;
  758. movq mm4, mm1
  759. psubusw mm4, mm2
  760. psllw mm4, 8
  761. ', `$1', `AddSmooth', `
  762. ; pOut->uB = min((pArg1->uB<<COLOR_SHIFT+(~pArg1->uB)*pArg2->uB), 0xffff);
  763. ; pOut->uG = min((pArg1->uG<<COLOR_SHIFT+(~pArg1->uG)*pArg2->uG), 0xffff);
  764. ; pOut->uR = min((pArg1->uR<<COLOR_SHIFT+(~pArg1->uR)*pArg2->uR), 0xffff);
  765. movq mm4, mm1
  766. movq mm5, mm2
  767. paddusw mm4, mm2
  768. psllw mm4, 8
  769. pmullw mm5, mm1
  770. psubusw mm4, mm5
  771. ', `$1', `ModulateAlphaAddColor', `
  772. ; pOut->uB = min((pArg1->uB<<COLOR_SHIFT+pArg1->uA*pArg2->uB), 0xffff);
  773. ; pOut->uG = min((pArg1->uG<<COLOR_SHIFT+pArg1->uA*pArg2->uG), 0xffff);
  774. ; pOut->uR = min((pArg1->uR<<COLOR_SHIFT+pArg1->uA*pArg2->uR), 0xffff);
  775. ; Get pArg1->uA
  776. movq mm4, mm1
  777. psrlw mm4, 47
  778. punpckhwd mm4, mm4 ; copy pArg1->uA to high dword.
  779. punpckhdq mm4, mm4 ; copy pArg1->uA to full register
  780. pmullw mm4, mm2
  781. movq mm5, mm1
  782. psllw mm5, 8
  783. paddw mm4, mm5
  784. ', `$1', `ModulateColorAddAlpha', `
  785. ; pOut->uB = min((pArg2->uB*pArg1->uB+pArg1->uA<<COLOR_SHIFT), 0xffff);
  786. ; pOut->uG = min((pArg2->uG*pArg1->uG+pArg1->uA<<COLOR_SHIFT), 0xffff);
  787. ; pOut->uR = min((pArg2->uR*pArg1->uR+pArg1->uA<<COLOR_SHIFT), 0xffff);
  788. movq mm4, mm1
  789. movq mm5, mm2
  790. pmullw mm4, mm5
  791. ; Get pArg1->uA
  792. movq mm5, mm1
  793. psrlw mm5, 47
  794. punpckhwd mm5, mm5 ; copy pArg1->uA to high dword.
  795. punpckhdq mm5, mm5 ; copy pArg1->uA to full register
  796. paddw mm4, mm5
  797. ')
  798. ; Need pOuts alpha value.
  799. pand mm4, mm6
  800. pandn mm6, mm3
  801. por mm4, mm6
  802. ret
  803. ;}
  804. ')
  805. dnl
  806. d_RepStr(`d_TexBlendOpColor(AA)',
  807. `AA', None, CopyArg1, CopyArg2, Modulate, Modulate2, Modulate4, Add, AddSigned,
  808. BlendDiffuseAlpha, BlendTextureAlpha, BlendFactorAlpha, BlendTextureAlphaPM,
  809. AddSigned2, Subtract, AddSmooth, ModulateAlphaAddColor, ModulateColorAddAlpha)
  810. dnl
  811. END