Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1232 lines
36 KiB

  1. /**************************************************************************\
  2. *
  3. * Copyright (c) 1999-2000 Microsoft Corporation
  4. *
  5. * Module name:
  6. *
  7. * The "Blend" scan operation.
  8. *
  9. * Abstract:
  10. *
  11. * See Gdiplus\Specs\ScanOperation.doc for an overview.
  12. *
  13. * Notes:
  14. *
  15. * Revision History:
  16. *
  17. * 12/07/1999 agodfrey
  18. * Created it.
  19. *
  20. \**************************************************************************/
  21. #include "precomp.hpp"
  22. /**************************************************************************\
  23. *
  24. * Operation Description:
  25. *
  26. * Blend: Does a SrcOver alpha-blend operation.
  27. *
  28. * Arguments:
  29. *
  30. * dst - The destination scan
  31. * src - The source scan (usually equal to dst).
  32. * count - The length of the scan, in pixels
  33. * otherParams - Additional data. (We use BlendingScan.)
  34. *
  35. * Return Value:
  36. *
  37. * None
  38. *
  39. * Notes:
  40. *
  41. * This is a ternary operation. We take pixels from 'src', blend pixels
  42. * from 'otherParams->BlendingScan' over them, and write the result to 'dst'.
  43. *
  44. * Since the formats of the 'dst' and 'src' scans are the same for all
  45. * the blend functions we implement, the naming is simplified to list just
  46. * the format of BlendingScan, then the format of 'dst'.
  47. *
  48. * src and dst may be equal; otherwise, they must point to scans which do
  49. * not overlap in memory.
  50. *
  51. * The blend operation adheres to the following rule:
  52. * "If the blending alpha value is zero, do not write the destination pixel."
  53. *
  54. * In other words, it is also a 'WriteRMW' operation. This allows us to
  55. * avoid a separate 'WriteRMW' step in some cases. See SOReadRMW.cpp and
  56. * SOWriteRMW.cpp.
  57. *
  58. * The impact of this is that you have to be careful if you want 'blend'
  59. * to be a true ternary operation. Remember, if a blend pixel
  60. * is transparent, NOTHING gets written to the corresponding destination
  61. * pixel. One way to solve this is to make sure that the final operation in
  62. * your pipeline is a WriteRMW operation.
  63. *
  64. * History:
  65. *
  66. * 04/04/1999 andrewgo
  67. * Created it.
  68. * 12/07/1999 agodfrey
  69. * Included the 32bpp blend (moved from from Ddi/scan.cpp)
  70. * 01/06/2000 agodfrey
  71. * Added AndrewGo's code for 565, 555, RGB24 and BGR24. Changed the
  72. * blends to be 'almost' ternary operations.
  73. *
  74. \**************************************************************************/
  75. VOID FASTCALL
  76. ScanOperation::BlendLinear_sRGB_32RGB(
  77. VOID *dst,
  78. const VOID *src,
  79. INT count,
  80. const OtherParams *otherParams
  81. )
  82. {
  83. int nRun;
  84. void *buffer0=otherParams->TempBuffers[0];
  85. void *buffer1=otherParams->TempBuffers[1];
  86. void *buffer2=otherParams->TempBuffers[2];
  87. DEFINE_POINTERS(ARGB, ARGB)
  88. DEFINE_BLEND_POINTER(ARGB)
  89. using namespace sRGB;
  90. OtherParams otherParams2=*otherParams;
  91. while (count>0)
  92. {
  93. // Find the run of translucent pixels
  94. nRun=0;
  95. while (isTranslucent(*((ARGB*)(bl+nRun))))
  96. {
  97. nRun++;
  98. if (nRun==count) { break; }
  99. }
  100. if (nRun==0)
  101. {
  102. while ((count>0) && (((*((DWORD*)bl))>>24)==0xFF))
  103. {
  104. *d=*bl;
  105. count--;
  106. d++;
  107. bl++;
  108. s++;
  109. }
  110. while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
  111. {
  112. count--;
  113. d++;
  114. bl++;
  115. s++;
  116. }
  117. }
  118. else
  119. {
  120. // Source
  121. GammaConvert_sRGB_sRGB64(buffer1,s,nRun,otherParams);
  122. // Surface to blend
  123. AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
  124. GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
  125. AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
  126. // Blend to destination.
  127. // Must blend using the previous result as the bl
  128. otherParams2.BlendingScan=buffer0;
  129. Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2);
  130. GammaConvert_sRGB64_sRGB(d,buffer1,nRun,otherParams);
  131. count-=nRun;
  132. d+=nRun;
  133. bl+=nRun;
  134. s+=nRun;
  135. }
  136. }
  137. }
  138. VOID FASTCALL
  139. ScanOperation::BlendLinear_sRGB_32RGB_MMX(
  140. VOID *dst,
  141. const VOID *src,
  142. INT count,
  143. const OtherParams *otherParams
  144. )
  145. {
  146. int nRun;
  147. void *buffer0=otherParams->TempBuffers[0];
  148. void *buffer1=otherParams->TempBuffers[1];
  149. void *buffer2=otherParams->TempBuffers[2];
  150. DEFINE_POINTERS(ARGB, ARGB)
  151. DEFINE_BLEND_POINTER(ARGB)
  152. using namespace sRGB;
  153. OtherParams otherParams2=*otherParams;
  154. while (count>0)
  155. {
  156. // Find the run of translucent pixels
  157. nRun=0;
  158. while (isTranslucent(*((ARGB*)(bl+nRun))))
  159. {
  160. nRun++;
  161. if (nRun==count) { break; }
  162. }
  163. if (nRun==0)
  164. {
  165. while ((count>0) && (((*((DWORD*)bl))>>24)==0xFF))
  166. {
  167. *d=*bl;
  168. count--;
  169. d++;
  170. bl++;
  171. s++;
  172. }
  173. while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
  174. {
  175. count--;
  176. d++;
  177. bl++;
  178. s++;
  179. }
  180. }
  181. else
  182. {
  183. // Source
  184. GammaConvert_sRGB_sRGB64(buffer1,s,nRun,otherParams);
  185. // Surface to blend
  186. AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
  187. GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
  188. AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
  189. // Blend to destination
  190. // Must blend using the previous result as the bl
  191. otherParams2.BlendingScan=buffer0;
  192. Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2);
  193. GammaConvert_sRGB64_sRGB(d,buffer1,nRun,otherParams);
  194. count-=nRun;
  195. d+=nRun;
  196. bl+=nRun;
  197. s+=nRun;
  198. }
  199. }
  200. }
  201. VOID FASTCALL
  202. ScanOperation::BlendLinear_sRGB_565(
  203. VOID *dst,
  204. const VOID *src,
  205. INT count,
  206. const OtherParams *otherParams
  207. )
  208. {
  209. int nRun;
  210. void *buffer0=otherParams->TempBuffers[0];
  211. void *buffer1=otherParams->TempBuffers[1];
  212. void *buffer2=otherParams->TempBuffers[2];
  213. DEFINE_POINTERS(UINT16,UINT16)
  214. DEFINE_BLEND_POINTER(ARGB)
  215. using namespace sRGB;
  216. OtherParams otherParams2=*otherParams;
  217. while (count>0)
  218. {
  219. // Find the run of translucent pixels
  220. nRun=0;
  221. while (isTranslucent(*((ARGB*)(bl+nRun))))
  222. {
  223. nRun++;
  224. if (nRun==count) { break; }
  225. }
  226. if (nRun==0)
  227. {
  228. while (((*((DWORD*)bl+nRun))>>24)==0xFF)
  229. {
  230. nRun++;
  231. if (nRun==count) { break; }
  232. }
  233. if (nRun>0)
  234. {
  235. Dither_sRGB_565(d,bl,nRun,otherParams);
  236. count-=nRun;
  237. d+=nRun;
  238. bl+=nRun;
  239. s+=nRun;
  240. }
  241. while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
  242. {
  243. count--;
  244. d++;
  245. bl++;
  246. s++;
  247. }
  248. }
  249. else
  250. {
  251. // Source
  252. Convert_565_sRGB(buffer2,s,nRun,otherParams);
  253. GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams);
  254. // Surface to blend
  255. AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
  256. GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
  257. AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
  258. // Blend to destination
  259. otherParams2.BlendingScan=buffer0;
  260. Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2);
  261. GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams);
  262. Dither_sRGB_565(d,buffer2,nRun,otherParams);
  263. count-=nRun;
  264. d+=nRun;
  265. bl+=nRun;
  266. s+=nRun;
  267. }
  268. }
  269. }
  270. VOID FASTCALL
  271. ScanOperation::BlendLinear_sRGB_565_MMX(
  272. VOID *dst,
  273. const VOID *src,
  274. INT count,
  275. const OtherParams *otherParams
  276. )
  277. {
  278. int nRun;
  279. void *buffer0=otherParams->TempBuffers[0];
  280. void *buffer1=otherParams->TempBuffers[1];
  281. void *buffer2=otherParams->TempBuffers[2];
  282. DEFINE_POINTERS(UINT16,UINT16)
  283. DEFINE_BLEND_POINTER(ARGB)
  284. using namespace sRGB;
  285. OtherParams otherParams2=*otherParams;
  286. while (count>0)
  287. {
  288. // Find the run of translucent pixels
  289. nRun=0;
  290. while (isTranslucent(*((ARGB*)(bl+nRun))))
  291. {
  292. nRun++;
  293. if (nRun==count) { break; }
  294. }
  295. if (nRun==0)
  296. {
  297. while (((*((DWORD*)bl+nRun))>>24)==0xFF)
  298. {
  299. nRun++;
  300. if (nRun==count) { break; }
  301. }
  302. if (nRun>0)
  303. {
  304. Dither_sRGB_565_MMX(d,bl,nRun,otherParams);
  305. count-=nRun;
  306. d+=nRun;
  307. bl+=nRun;
  308. s+=nRun;
  309. }
  310. while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
  311. {
  312. count--;
  313. d++;
  314. bl++;
  315. s++;
  316. }
  317. }
  318. else
  319. {
  320. // Source
  321. Convert_565_sRGB(buffer2,s,nRun,otherParams);
  322. GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams);
  323. // Surface to blend
  324. AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
  325. GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
  326. AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
  327. // Blend to destination
  328. otherParams2.BlendingScan=buffer0;
  329. Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2);
  330. GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams);
  331. Dither_sRGB_565_MMX(d,buffer2,nRun,otherParams);
  332. count-=nRun;
  333. d+=nRun;
  334. bl+=nRun;
  335. s+=nRun;
  336. }
  337. }
  338. }
  339. VOID FASTCALL
  340. ScanOperation::BlendLinear_sRGB_555(
  341. VOID *dst,
  342. const VOID *src,
  343. INT count,
  344. const OtherParams *otherParams
  345. )
  346. {
  347. int nRun;
  348. void *buffer0=otherParams->TempBuffers[0];
  349. void *buffer1=otherParams->TempBuffers[1];
  350. void *buffer2=otherParams->TempBuffers[2];
  351. DEFINE_POINTERS(UINT16,UINT16)
  352. DEFINE_BLEND_POINTER(ARGB)
  353. using namespace sRGB;
  354. OtherParams otherParams2=*otherParams;
  355. while (count>0)
  356. {
  357. // Find the run of translucent pixels
  358. nRun=0;
  359. while (isTranslucent(*((ARGB*)(bl+nRun))))
  360. {
  361. nRun++;
  362. if (nRun==count) { break; }
  363. }
  364. if (nRun==0)
  365. {
  366. while (((*((DWORD*)bl+nRun))>>24)==0xFF)
  367. {
  368. nRun++;
  369. if (nRun==count) { break; }
  370. }
  371. if (nRun>0)
  372. {
  373. Dither_sRGB_555(d,bl,nRun,otherParams);
  374. count-=nRun;
  375. d+=nRun;
  376. bl+=nRun;
  377. s+=nRun;
  378. }
  379. while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
  380. {
  381. count--;
  382. d++;
  383. bl++;
  384. s++;
  385. }
  386. }
  387. else
  388. {
  389. // Source
  390. Convert_555_sRGB(buffer2,s,nRun,otherParams);
  391. GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams);
  392. // Surface to blend
  393. AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
  394. GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
  395. AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
  396. // Blend to destination
  397. otherParams2.BlendingScan=buffer0;
  398. Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2);
  399. GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams);
  400. Dither_sRGB_555(d,buffer2,nRun,otherParams);
  401. count-=nRun;
  402. d+=nRun;
  403. bl+=nRun;
  404. s+=nRun;
  405. }
  406. }
  407. }
  408. VOID FASTCALL
  409. ScanOperation::BlendLinear_sRGB_555_MMX(
  410. VOID *dst,
  411. const VOID *src,
  412. INT count,
  413. const OtherParams *otherParams
  414. )
  415. {
  416. int nRun;
  417. void *buffer0=otherParams->TempBuffers[0];
  418. void *buffer1=otherParams->TempBuffers[1];
  419. void *buffer2=otherParams->TempBuffers[2];
  420. DEFINE_POINTERS(UINT16,UINT16)
  421. DEFINE_BLEND_POINTER(ARGB)
  422. using namespace sRGB;
  423. OtherParams otherParams2=*otherParams;
  424. while (count>0)
  425. {
  426. // Find the run of translucent pixels
  427. nRun=0;
  428. while (isTranslucent(*((ARGB*)(bl+nRun))))
  429. {
  430. nRun++;
  431. if (nRun==count) { break; }
  432. }
  433. if (nRun==0)
  434. {
  435. while (((*((DWORD*)bl+nRun))>>24)==0xFF)
  436. {
  437. nRun++;
  438. if (nRun==count) { break; }
  439. }
  440. if (nRun>0)
  441. {
  442. Dither_sRGB_555_MMX(d,bl,nRun,otherParams);
  443. count-=nRun;
  444. d+=nRun;
  445. bl+=nRun;
  446. s+=nRun;
  447. }
  448. while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
  449. {
  450. count--;
  451. d++;
  452. bl++;
  453. s++;
  454. }
  455. }
  456. else
  457. {
  458. // Source
  459. Convert_555_sRGB(buffer2,s,nRun,otherParams);
  460. GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams);
  461. // Surface to blend
  462. AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
  463. GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
  464. AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
  465. // Blend to destination
  466. otherParams2.BlendingScan=buffer0;
  467. Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2);
  468. GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams);
  469. Dither_sRGB_555_MMX(d,buffer2,nRun,otherParams);
  470. count-=nRun;
  471. d+=nRun;
  472. bl+=nRun;
  473. s+=nRun;
  474. }
  475. }
  476. }
  477. // Blend sRGB over sRGB, ignoring the non-linear gamma.
  478. VOID FASTCALL
  479. ScanOperation::Blend_sRGB_sRGB(
  480. VOID *dst,
  481. const VOID *src,
  482. INT count,
  483. const OtherParams *otherParams
  484. )
  485. {
  486. DEFINE_POINTERS(ARGB, ARGB)
  487. DEFINE_BLEND_POINTER(ARGB)
  488. ASSERT(count>0);
  489. UINT32 dstPixel;
  490. do {
  491. UINT32 blendPixel = *bl;
  492. UINT32 alpha = blendPixel >> 24;
  493. // If alpha is zero, skip everything, including writing the
  494. // destination pixel. This is needed for the RMW optimization.
  495. if (alpha != 0)
  496. {
  497. if (alpha == 255)
  498. {
  499. dstPixel = blendPixel;
  500. }
  501. else
  502. {
  503. //
  504. // Dst = B + (1-Alpha) * S
  505. //
  506. dstPixel = *s;
  507. ULONG Multa = 255 - alpha;
  508. ULONG _D1_00AA00GG = (dstPixel & 0xff00ff00) >> 8;
  509. ULONG _D1_00RR00BB = (dstPixel & 0x00ff00ff);
  510. ULONG _D2_AAAAGGGG = _D1_00AA00GG * Multa + 0x00800080;
  511. ULONG _D2_RRRRBBBB = _D1_00RR00BB * Multa + 0x00800080;
  512. ULONG _D3_00AA00GG = (_D2_AAAAGGGG & 0xff00ff00) >> 8;
  513. ULONG _D3_00RR00BB = (_D2_RRRRBBBB & 0xff00ff00) >> 8;
  514. ULONG _D4_AA00GG00 = (_D2_AAAAGGGG + _D3_00AA00GG) & 0xFF00FF00;
  515. ULONG _D4_00RR00BB = ((_D2_RRRRBBBB + _D3_00RR00BB) & 0xFF00FF00) >> 8;
  516. dstPixel = blendPixel + _D4_AA00GG00 + _D4_00RR00BB;
  517. }
  518. *d = dstPixel;
  519. }
  520. bl++;
  521. s++;
  522. d++;
  523. } while (--count != 0);
  524. }
  525. VOID FASTCALL
  526. ScanOperation::Blend_sRGB_sRGB_MMX(
  527. VOID *dst,
  528. const VOID *src,
  529. INT count,
  530. const OtherParams *otherParams
  531. )
  532. {
  533. #if defined(_X86_)
  534. using namespace sRGB;
  535. DEFINE_POINTERS(ARGB64, ARGB64)
  536. const void *pbl=otherParams->BlendingScan;
  537. static ULONGLONG halfMask=0x0080008000800080;
  538. DWORD dwBlendPixel;
  539. _asm {
  540. mov ecx,count ; ecx=pixel counter
  541. mov ebx,pbl ; ebx=blend pixel pointer
  542. mov esi,s ; esi=source pixel pointer
  543. mov edi,d ; edi=dest pixel pointer
  544. pxor mm7,mm7 ; mm7=[0|0|0|0]
  545. movq mm3,halfMask
  546. main_loop:
  547. mov eax,DWORD ptr [ebx]
  548. mov edx,eax ; eax=blend pixel
  549. shr edx,24 ; edx=alpha
  550. cmp edx,0 ; For some reason, doing a jz right after a shr stalls
  551. jz alpha_blend_done ; if alpha=0, no blending
  552. cmp edx,0xFF
  553. jne alpha_blend
  554. mov [edi],eax ; if alpha=0xFF, copy bl to dest
  555. jmp alpha_blend_done
  556. alpha_blend:
  557. movd mm4,eax
  558. mov eax,[esi] ; eax=source
  559. movd mm0,eax ; mm0=[0|0|AR|GB]
  560. punpcklbw mm0,mm7 ; mm0=[A|R|G|B]
  561. xor edx,0xFF ; C=255-Alpha
  562. movd mm2,edx ; mm2=[0|0|0|C]
  563. punpcklwd mm2,mm2 ; mm2=[0|0|C|C]
  564. punpckldq mm2,mm2 ; mm2=[C|C|C|C]
  565. pmullw mm0,mm2
  566. paddw mm0,mm3 ; mm0=[AA|RR|GG|BB]
  567. movq mm2,mm0 ; mm2=[AA|RR|GG|BB]
  568. psrlw mm0,8 ; mm0=[A|R|G|B]
  569. paddw mm0,mm2 ; mm0=[AA|RR|GG|BB]
  570. psrlw mm0,8 ; mm0=[A|R|G|B]
  571. packuswb mm0,mm0 ; mm0=[AR|GB|AR|GB]
  572. paddd mm0,mm4 ; Add the blend pixel
  573. movd edx,mm0 ; edx=[ARGB] -> result pixel
  574. mov [edi],edx
  575. alpha_blend_done:
  576. add edi,4
  577. add esi,4
  578. add ebx,4
  579. dec ecx
  580. jg main_loop
  581. emms
  582. }
  583. #endif
  584. }
  585. // Blend from sRGB64 to sRGB64.
  586. VOID FASTCALL
  587. ScanOperation::Blend_sRGB64_sRGB64(
  588. VOID *dst,
  589. const VOID *src,
  590. INT count,
  591. const OtherParams *otherParams
  592. )
  593. {
  594. DEFINE_POINTERS(ARGB64, ARGB64)
  595. DEFINE_BLEND_POINTER(ARGB64)
  596. using namespace sRGB;
  597. while (count--)
  598. {
  599. sRGB64Color blendPixel;
  600. blendPixel.argb = *bl;
  601. INT16 alpha = blendPixel.a;
  602. // If alpha is zero, skip everything, including writing the
  603. // destination pixel. This is needed for the RMW optimization.
  604. if (alpha != 0)
  605. {
  606. sRGB64Color dstPixel;
  607. if (alpha == SRGB_ONE)
  608. {
  609. dstPixel.argb = blendPixel.argb;
  610. }
  611. else
  612. {
  613. //
  614. // Dst = Src + (1-Alpha) * Dst
  615. //
  616. dstPixel.argb = *s;
  617. INT Multa = SRGB_ONE - alpha;
  618. dstPixel.r = ((dstPixel.r * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.r;
  619. dstPixel.g = ((dstPixel.g * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.g;
  620. dstPixel.b = ((dstPixel.b * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.b;
  621. dstPixel.a = ((dstPixel.a * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.a;
  622. }
  623. *d = dstPixel.argb;
  624. }
  625. bl++;
  626. s++;
  627. d++;
  628. }
  629. }
  630. // Blend from sRGB64 to sRGB64 MMX.
  631. VOID FASTCALL
  632. ScanOperation::Blend_sRGB64_sRGB64_MMX(
  633. VOID *dst,
  634. const VOID *src,
  635. INT count,
  636. const OtherParams *otherParams
  637. )
  638. {
  639. #if defined(_X86_)
  640. using namespace sRGB;
  641. DEFINE_POINTERS(ARGB64, ARGB64)
  642. const void *pbl=otherParams->BlendingScan;
  643. static ULONGLONG ullSRGBHalfMask=0x1000100010001000;
  644. _asm {
  645. mov ecx,count ; ecx=pixel counter
  646. mov ebx,pbl ; ebx=blend pixel pointer
  647. mov esi,s ; esi=source pixel pointer
  648. mov edi,d ; edi=dest pixel pointer
  649. movq mm4,ullSRGBHalfMask ; mm4=mask with srgb half
  650. main_loop:
  651. movsx eax,word ptr [ebx+3*2] ; eax=alpha
  652. or eax,eax ; eax==0?
  653. jz alpha_blend_done ; if alpha=0, no blending
  654. movq mm0,[ebx] ; mm0=blend pixel
  655. cmp eax,SRGB_ONE ; if alpha=SRGB_ONE, dest=blend
  656. jne alpha_blend
  657. movq [edi],mm0 ; copy blend pixel to dest
  658. jmp alpha_blend_done
  659. alpha_blend:
  660. ; Get SRGB_ONE-Alpha
  661. neg eax
  662. add eax,SRGB_ONE ; C=SRGB_ONE-Alpha
  663. movd mm2, eax ; mm2=[0|0|0|C]
  664. punpcklwd mm2, mm2
  665. punpckldq mm2, mm2 ; mm2=[C|C|C|C]
  666. ; Blend pixels
  667. movq mm1,[esi] ; mm1=[A|R|G|B] source pixel
  668. movq mm3,mm1 ; mm3=[A|R|G|B] source pixel
  669. pmullw mm1,mm2 ; low word of source*C
  670. paddw mm1,mm4 ; add an srgb half for rounding
  671. psrlw mm1,SRGB_FRACTIONBITS ; truncate low SRGB_FRACTIONBITS
  672. pmulhw mm3,mm2 ; high word of source*C
  673. psllw mm3,SRGB_INTEGERBITS ; truncate high SRGB_INTEGERBITS
  674. por mm1,mm3 ; mm1=[A|R|G|B]
  675. paddw mm1,mm0 ; add blend pixel
  676. movq [edi],mm1 ; copy result to dest
  677. alpha_blend_done:
  678. add edi,8
  679. add esi,8
  680. add ebx,8
  681. dec ecx
  682. jg main_loop
  683. emms
  684. }
  685. #endif
  686. }
  687. // Blend from sRGB to 16bpp 565, ignoring sRGB's non-linear gamma.
  688. VOID FASTCALL
  689. ScanOperation::Blend_sRGB_565(
  690. VOID *dst,
  691. const VOID *src,
  692. INT count,
  693. const OtherParams *otherParams
  694. )
  695. {
  696. DEFINE_POINTERS(UINT16, UINT16)
  697. DEFINE_BLEND_POINTER(ARGB)
  698. ASSERT(count>0);
  699. do {
  700. UINT32 blendPixel = *bl;
  701. UINT32 alpha = blendPixel >> 27;
  702. if (alpha != 0)
  703. {
  704. UINT32 dstPixel;
  705. // Blend: S + [ (255 - sA) * D ] / 255
  706. // First, convert the source pixel from 32bpp BGRA to
  707. // 5-5-5 16bpp, pre-multiplied.
  708. //
  709. // Note: No rounding needs to be done on this conversion!
  710. blendPixel = ((blendPixel >> 8) & 0xf800) |
  711. ((blendPixel >> 5) & 0x07e0) |
  712. ((blendPixel >> 3) & 0x001f);
  713. if (alpha == 31)
  714. {
  715. dstPixel = blendPixel;
  716. }
  717. else
  718. {
  719. dstPixel = (UINT32) *s;
  720. UINT32 multA = 31 - alpha;
  721. UINT32 D1_00rr00bb = (dstPixel & 0xf81f);
  722. UINT32 D2_rrrrbbbb = D1_00rr00bb * multA + 0x00008010;
  723. UINT32 D3_00rr00bb = (D2_rrrrbbbb & 0x001f03e0) >> 5;
  724. UINT32 D4_rrxxbbxx = ((D2_rrrrbbbb + D3_00rr00bb) >> 5) & 0xf81f;
  725. UINT32 D1_000000gg = (dstPixel & 0x7e0) >> 5;
  726. UINT32 D2_0000gggg = D1_000000gg * 2 * multA + 0x00000020;
  727. UINT32 D3_000000gg = (D2_0000gggg & 0x00000fc0) >> 6;
  728. UINT32 D4_0000ggxx = ((D2_0000gggg + D3_000000gg) & 0x0fc0) >> 1;
  729. dstPixel = (UINT16) ((D4_rrxxbbxx | D4_0000ggxx) + blendPixel);
  730. }
  731. *d = (UINT16) dstPixel;
  732. }
  733. bl++;
  734. s++;
  735. d++;
  736. } while (--count != 0);
  737. }
  738. // Blend from sRGB to 16bpp 555, ignoring sRGB's non-linear gamma.
  739. VOID FASTCALL
  740. ScanOperation::Blend_sRGB_555(
  741. VOID *dst,
  742. const VOID *src,
  743. INT count,
  744. const OtherParams *otherParams
  745. )
  746. {
  747. DEFINE_POINTERS(UINT16, UINT16)
  748. DEFINE_BLEND_POINTER(ARGB)
  749. ASSERT(count>0);
  750. do {
  751. UINT32 blendPixel = *bl;
  752. UINT32 alpha = blendPixel >> 27;
  753. if (alpha != 0)
  754. {
  755. UINT32 dstPixel;
  756. // Blend: S + [ (255 - sA) * D ] / 255
  757. // First, convert the source pixel from 32bpp BGRA to
  758. // 5-5-5 16bpp, pre-multiplied.
  759. //
  760. // Note: No rounding needs to be done on this conversion!
  761. blendPixel = ((blendPixel & 0x00f80000) >> 9) |
  762. ((blendPixel & 0x0000f800) >> 6) |
  763. ((blendPixel & 0x000000f8) >> 3);
  764. if (alpha == 31)
  765. {
  766. dstPixel = blendPixel;
  767. }
  768. else
  769. {
  770. dstPixel = (UINT32) *s;
  771. UINT32 multA = 31 - alpha;
  772. UINT32 D1_00rr00bb = (dstPixel & 0x7c1f);
  773. UINT32 D2_rrrrbbbb = D1_00rr00bb * multA + 0x00004010;
  774. UINT32 D3_00rr00bb = (D2_rrrrbbbb & 0x000f83e0) >> 5;
  775. UINT32 D4_rrxxbbxx = ((D2_rrrrbbbb + D3_00rr00bb) >> 5) & 0x7c1f;
  776. UINT32 D1_000000gg = (dstPixel & 0x3e0) >> 5;
  777. UINT32 D2_0000gggg = D1_000000gg * multA + 0x00000010;
  778. UINT32 D3_000000gg = (D2_0000gggg & 0x000003e0) >> 5;
  779. UINT32 D4_0000ggxx = (D2_0000gggg + D3_000000gg) & 0x03e0;
  780. dstPixel = (UINT16) ((D4_rrxxbbxx | D4_0000ggxx) + blendPixel);
  781. }
  782. *d = (UINT16) dstPixel;
  783. }
  784. bl++;
  785. s++;
  786. d++;
  787. } while (--count != 0);
  788. }
  789. // Blend from sRGB to RGB24, ignoring sRGB's non-linear gamma.
  790. VOID FASTCALL
  791. ScanOperation::Blend_sRGB_24(
  792. VOID *dst,
  793. const VOID *src,
  794. INT count,
  795. const OtherParams *otherParams
  796. )
  797. {
  798. DEFINE_POINTERS(BYTE, BYTE)
  799. DEFINE_BLEND_POINTER(ARGB)
  800. ASSERT(count>0);
  801. do {
  802. if (((UINT_PTR) d & 0x3) == 0)
  803. {
  804. while (count >= 4)
  805. {
  806. BYTE *bb = (BYTE *) bl;
  807. if ((bb[3] & bb[7] & bb[11] & bb[15]) != 0xFF)
  808. {
  809. break;
  810. }
  811. ((UINT32 *) d)[0] = (bb[4] << 24) | (bb[2] << 16) | (bb[1] << 8) | bb[0];
  812. ((UINT32 *) d)[1] = (bb[9] << 24) | (bb[8] << 16) | (bb[6] << 8) | bb[5];
  813. ((UINT32 *) d)[2] = (bb[14] << 24) | (bb[13] << 16) | (bb[12] << 8) | bb[10];
  814. count -= 4;
  815. bl += 4;
  816. d += 12;
  817. s += 12;
  818. }
  819. }
  820. if (count == 0)
  821. {
  822. break;
  823. }
  824. UINT32 blendPixel = *bl;
  825. UINT32 alpha = blendPixel >> 24;
  826. if (alpha != 0)
  827. {
  828. UINT32 dstPixel;
  829. if (alpha == 255)
  830. {
  831. dstPixel = blendPixel;
  832. }
  833. else
  834. {
  835. // Dst = Src + (1-Alpha) * Dst
  836. UINT32 multA = 255 - alpha;
  837. UINT32 D1_000000GG = *(s + 1);
  838. UINT32 D2_0000GGGG = D1_000000GG * multA + 0x00800080;
  839. UINT32 D3_000000GG = (D2_0000GGGG & 0xff00ff00) >> 8;
  840. UINT32 D4_0000GG00 = (D2_0000GGGG + D3_000000GG) & 0xFF00FF00;
  841. UINT32 D1_00RR00BB = *(s) | (ULONG) *(s + 2) << 16;
  842. UINT32 D2_RRRRBBBB = D1_00RR00BB * multA + 0x00800080;
  843. UINT32 D3_00RR00BB = (D2_RRRRBBBB & 0xff00ff00) >> 8;
  844. UINT32 D4_00RR00BB = ((D2_RRRRBBBB + D3_00RR00BB) & 0xFF00FF00) >> 8;
  845. dstPixel = (D4_0000GG00 | D4_00RR00BB) + blendPixel;
  846. }
  847. *(d) = (BYTE) (dstPixel);
  848. *(d + 1) = (BYTE) (dstPixel >> 8);
  849. *(d + 2) = (BYTE) (dstPixel >> 16);
  850. }
  851. bl++;
  852. d += 3;
  853. s += 3;
  854. } while (--count != 0);
  855. }
  856. // Blend from sRGB to BGR24, ignoring sRGB's non-linear gamma.
  857. VOID FASTCALL
  858. ScanOperation::Blend_sRGB_24BGR(
  859. VOID *dst,
  860. const VOID *src,
  861. INT count,
  862. const OtherParams *otherParams
  863. )
  864. {
  865. DEFINE_POINTERS(BYTE, BYTE)
  866. DEFINE_BLEND_POINTER(ARGB)
  867. ASSERT(count>0);
  868. do {
  869. UINT32 blendPixel = *bl;
  870. UINT32 alpha = blendPixel >> 24;
  871. if (alpha != 0)
  872. {
  873. UINT32 dstPixel;
  874. if (alpha == 255)
  875. {
  876. dstPixel = blendPixel;
  877. }
  878. else
  879. {
  880. // Dst = Src + (1-Alpha) * Dst
  881. UINT32 multA = 255 - alpha;
  882. UINT32 D1_000000GG = *(s + 1);
  883. UINT32 D2_0000GGGG = D1_000000GG * multA + 0x00800080;
  884. UINT32 D3_000000GG = (D2_0000GGGG & 0xff00ff00) >> 8;
  885. UINT32 D4_0000GG00 = (D2_0000GGGG + D3_000000GG) & 0xFF00FF00;
  886. UINT32 D1_00RR00BB = *(s) | (ULONG) *(s + 2) << 16;
  887. UINT32 D2_RRRRBBBB = D1_00RR00BB * multA + 0x00800080;
  888. UINT32 D3_00RR00BB = (D2_RRRRBBBB & 0xff00ff00) >> 8;
  889. UINT32 D4_00RR00BB = ((D2_RRRRBBBB + D3_00RR00BB) & 0xFF00FF00) >> 8;
  890. dstPixel = (D4_0000GG00 | D4_00RR00BB) + blendPixel;
  891. }
  892. *(d) = (BYTE) (dstPixel >> 16);
  893. *(d + 1) = (BYTE) (dstPixel >> 8);
  894. *(d + 2) = (BYTE) (dstPixel);
  895. }
  896. bl++;
  897. d += 3;
  898. s += 3;
  899. } while (--count != 0);
  900. }
  901. /*
  902. !!![agodfrey]
  903. So we're going to move to standardizing on non-premultiplied alpha.
  904. When we do, the above routines will all have to change - but we may
  905. want to keep the above versions around too.
  906. Below, I've implemented the sRGB and sRGB64 versions for a non-premultiplied
  907. source. Now, these really blend from a non-premultiplied source,
  908. to a pre-multiplied destination. You can see this from the fact that they
  909. are equivalent to combining the above pre-multiplied Blends with an
  910. AlphaMultiply step on the source data.
  911. Since pre-multiplied and non-premultiplied formats are identical for alpha==1,
  912. the functions below work fine when the destination has no alpha (i.e. alpha==1).
  913. Otherwise, we can use them when the destination is in premultiplied format.
  914. If we somehow let the user draw to such a destination, they can use an off-screen
  915. premultiplied buffer to accumulate drawing, and then using a
  916. pre-multiplied blend, draw that to the final destination. This gives them
  917. the same functionality that standardizing on pre-multiplied alpha is supposed
  918. to give.
  919. // Blend sRGB over sRGB, ignoring the non-linear gamma.
  920. VOID FASTCALL
  921. ScanOperation::Blend_sRGB_sRGB(
  922. VOID *dst,
  923. const VOID *src,
  924. INT count,
  925. const OtherParams *otherParams
  926. )
  927. {
  928. DEFINE_POINTERS(ARGB, ARGB)
  929. DEFINE_BLEND_POINTER(ARGB)
  930. ASSERT(count>0);
  931. do {
  932. UINT32 blendPixel = *bl;
  933. UINT32 alpha = blendPixel >> 24;
  934. // If alpha is zero, skip everything, including writing the
  935. // destination pixel. This is needed for the RMW optimization.
  936. if (alpha != 0)
  937. {
  938. UINT32 dstPixel;
  939. if (alpha == 255)
  940. {
  941. dstPixel = blendPixel;
  942. }
  943. else
  944. {
  945. // Dst = Dst * (1-Alpha) + Src * Alpha
  946. dstPixel = *s;
  947. ULONG invalpha = 255 - alpha;
  948. ULONG _D1_00AA00GG = (dstPixel & 0xff00ff00) >> 8;
  949. ULONG _D1_00RR00BB = (dstPixel & 0x00ff00ff);
  950. // For the alpha channel, the result we want is this:
  951. //
  952. // Dst = Dst * (1-Alpha) + Src.
  953. //
  954. // Or equivalently:
  955. //
  956. // Dst = Dst * (1-Alpha) + Alpha.
  957. //
  958. // We want to apply the same operations to the alpha channel as
  959. // we do to the others. So, to get the above result from
  960. //
  961. // Dst = Dst * (1-Alpha) + Src * Alpha
  962. //
  963. // we fake a 'Src' value of 1 (represented by 255).
  964. ULONG _S1_00ff00GG = (blendPixel & 0xff00ff00) >> 8 + 0xff0000;
  965. ULONG _S1_00RR00BB = (blendPixel & 0x00ff00ff);
  966. ULONG _D2_AAAAGGGG = _D1_00AA00GG * invalpha +
  967. _S1_00ff00GG * alpha +
  968. 0x00800080;
  969. ULONG _D2_RRRRBBBB = _D1_00RR00BB * invalpha +
  970. _S1_00RR00BB * alpha +
  971. 0x00800080;
  972. ULONG _D3_00AA00GG = (_D2_AAAAGGGG & 0xff00ff00) >> 8;
  973. ULONG _D3_00RR00BB = (_D2_RRRRBBBB & 0xff00ff00) >> 8;
  974. ULONG _D4_AA00GG00 = (_D2_AAAAGGGG + _D3_00AA00GG) & 0xFF00FF00;
  975. ULONG _D4_00RR00BB = ((_D2_RRRRBBBB + _D3_00RR00BB) & 0xFF00FF00) >> 8;
  976. dstPixel = _D4_AA00GG00 + _D4_00RR00BB;
  977. }
  978. *d = dstPixel;
  979. }
  980. bl++;
  981. s++;
  982. d++;
  983. } while (--count != 0);
  984. }
  985. // Blend from sRGB64 to sRGB64.
  986. VOID FASTCALL
  987. ScanOperation::Blend_sRGB64_sRGB64(
  988. VOID *dst,
  989. const VOID *src,
  990. INT count,
  991. const OtherParams *otherParams
  992. )
  993. {
  994. DEFINE_POINTERS(ARGB64, ARGB64)
  995. DEFINE_BLEND_POINTER(ARGB64)
  996. using namespace sRGB;
  997. while (count--)
  998. {
  999. sRGB64Color blendPixel;
  1000. blendPixel.argb = *bl;
  1001. INT alpha = blendPixel.a;
  1002. // If alpha is zero, skip everything, including writing the
  1003. // destination pixel. This is needed for the RMW optimization.
  1004. if (alpha != 0)
  1005. {
  1006. sRGB64Color dstPixel;
  1007. if (alpha == SRGB_ONE)
  1008. {
  1009. dstPixel.argb = blendPixel.argb;
  1010. }
  1011. else
  1012. {
  1013. // Dst = Dst * (1-Alpha) + Src * Alpha
  1014. dstPixel.argb = *s;
  1015. INT invalpha = SRGB_ONE - alpha;
  1016. dstPixel.r = ((dstPixel.r * invalpha) +
  1017. (blendPixel.r * alpha) +
  1018. SRGB_HALF) >>
  1019. SRGB_FRACTIONBITS;
  1020. dstPixel.g = ((dstPixel.g * invalpha) +
  1021. (blendPixel.g * alpha) +
  1022. SRGB_HALF) >>
  1023. SRGB_FRACTIONBITS;
  1024. dstPixel.b = ((dstPixel.b * invalpha) +
  1025. (blendPixel.b * alpha) +
  1026. SRGB_HALF) >>
  1027. SRGB_FRACTIONBITS;
  1028. dstPixel.a = (((dstPixel.a * invalpha) + SRGB_HALF) >>
  1029. SRGB_FRACTIONBITS) +
  1030. blendPixel.a;
  1031. }
  1032. *d = dstPixel.argb;
  1033. }
  1034. bl++;
  1035. s++;
  1036. d++;
  1037. }
  1038. }
  1039. */