/**************************************************************************\ * * Copyright (c) 1999-2000 Microsoft Corporation * * Module name: * * The "Blend" scan operation. * * Abstract: * * See Gdiplus\Specs\ScanOperation.doc for an overview. * * Notes: * * Revision History: * * 12/07/1999 agodfrey * Created it. * \**************************************************************************/ #include "precomp.hpp" /**************************************************************************\ * * Operation Description: * * Blend: Does a SrcOver alpha-blend operation. * * Arguments: * * dst - The destination scan * src - The source scan (usually equal to dst). * count - The length of the scan, in pixels * otherParams - Additional data. (We use BlendingScan.) * * Return Value: * * None * * Notes: * * This is a ternary operation. We take pixels from 'src', blend pixels * from 'otherParams->BlendingScan' over them, and write the result to 'dst'. * * Since the formats of the 'dst' and 'src' scans are the same for all * the blend functions we implement, the naming is simplified to list just * the format of BlendingScan, then the format of 'dst'. * * src and dst may be equal; otherwise, they must point to scans which do * not overlap in memory. * * The blend operation adheres to the following rule: * "If the blending alpha value is zero, do not write the destination pixel." * * In other words, it is also a 'WriteRMW' operation. This allows us to * avoid a separate 'WriteRMW' step in some cases. See SOReadRMW.cpp and * SOWriteRMW.cpp. * * The impact of this is that you have to be careful if you want 'blend' * to be a true ternary operation. Remember, if a blend pixel * is transparent, NOTHING gets written to the corresponding destination * pixel. One way to solve this is to make sure that the final operation in * your pipeline is a WriteRMW operation. * * History: * * 04/04/1999 andrewgo * Created it. * 12/07/1999 agodfrey * Included the 32bpp blend (moved from from Ddi/scan.cpp) * 01/06/2000 agodfrey * Added AndrewGo's code for 565, 555, RGB24 and BGR24. Changed the * blends to be 'almost' ternary operations. * \**************************************************************************/ VOID FASTCALL ScanOperation::BlendLinear_sRGB_32RGB( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { int nRun; void *buffer0=otherParams->TempBuffers[0]; void *buffer1=otherParams->TempBuffers[1]; void *buffer2=otherParams->TempBuffers[2]; DEFINE_POINTERS(ARGB, ARGB) DEFINE_BLEND_POINTER(ARGB) using namespace sRGB; OtherParams otherParams2=*otherParams; while (count>0) { // Find the run of translucent pixels nRun=0; while (isTranslucent(*((ARGB*)(bl+nRun)))) { nRun++; if (nRun==count) { break; } } if (nRun==0) { while ((count>0) && (((*((DWORD*)bl))>>24)==0xFF)) { *d=*bl; count--; d++; bl++; s++; } while ((count>0) && (((*((DWORD*)bl))>>24)==0x00)) { count--; d++; bl++; s++; } } else { // Source GammaConvert_sRGB_sRGB64(buffer1,s,nRun,otherParams); // Surface to blend AlphaDivide_sRGB(buffer0,bl,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams); AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams); // Blend to destination. // Must blend using the previous result as the bl otherParams2.BlendingScan=buffer0; Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2); GammaConvert_sRGB64_sRGB(d,buffer1,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } } } VOID FASTCALL ScanOperation::BlendLinear_sRGB_32RGB_MMX( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { int nRun; void *buffer0=otherParams->TempBuffers[0]; void *buffer1=otherParams->TempBuffers[1]; void *buffer2=otherParams->TempBuffers[2]; DEFINE_POINTERS(ARGB, ARGB) DEFINE_BLEND_POINTER(ARGB) using namespace sRGB; OtherParams otherParams2=*otherParams; while (count>0) { // Find the run of translucent pixels nRun=0; while (isTranslucent(*((ARGB*)(bl+nRun)))) { nRun++; if (nRun==count) { break; } } if (nRun==0) { while ((count>0) && (((*((DWORD*)bl))>>24)==0xFF)) { *d=*bl; count--; d++; bl++; s++; } while ((count>0) && (((*((DWORD*)bl))>>24)==0x00)) { count--; d++; bl++; s++; } } else { // Source GammaConvert_sRGB_sRGB64(buffer1,s,nRun,otherParams); // Surface to blend AlphaDivide_sRGB(buffer0,bl,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams); AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams); // Blend to destination // Must blend using the previous result as the bl otherParams2.BlendingScan=buffer0; Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2); GammaConvert_sRGB64_sRGB(d,buffer1,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } } } VOID FASTCALL ScanOperation::BlendLinear_sRGB_565( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { int nRun; void *buffer0=otherParams->TempBuffers[0]; void *buffer1=otherParams->TempBuffers[1]; void *buffer2=otherParams->TempBuffers[2]; DEFINE_POINTERS(UINT16,UINT16) DEFINE_BLEND_POINTER(ARGB) using namespace sRGB; OtherParams otherParams2=*otherParams; while (count>0) { // Find the run of translucent pixels nRun=0; while (isTranslucent(*((ARGB*)(bl+nRun)))) { nRun++; if (nRun==count) { break; } } if (nRun==0) { while (((*((DWORD*)bl+nRun))>>24)==0xFF) { nRun++; if (nRun==count) { break; } } if (nRun>0) { Dither_sRGB_565(d,bl,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } while ((count>0) && (((*((DWORD*)bl))>>24)==0x00)) { count--; d++; bl++; s++; } } else { // Source Convert_565_sRGB(buffer2,s,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams); // Surface to blend AlphaDivide_sRGB(buffer0,bl,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams); AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams); // Blend to destination otherParams2.BlendingScan=buffer0; Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2); GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams); Dither_sRGB_565(d,buffer2,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } } } VOID FASTCALL ScanOperation::BlendLinear_sRGB_565_MMX( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { int nRun; void *buffer0=otherParams->TempBuffers[0]; void *buffer1=otherParams->TempBuffers[1]; void *buffer2=otherParams->TempBuffers[2]; DEFINE_POINTERS(UINT16,UINT16) DEFINE_BLEND_POINTER(ARGB) using namespace sRGB; OtherParams otherParams2=*otherParams; while (count>0) { // Find the run of translucent pixels nRun=0; while (isTranslucent(*((ARGB*)(bl+nRun)))) { nRun++; if (nRun==count) { break; } } if (nRun==0) { while (((*((DWORD*)bl+nRun))>>24)==0xFF) { nRun++; if (nRun==count) { break; } } if (nRun>0) { Dither_sRGB_565_MMX(d,bl,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } while ((count>0) && (((*((DWORD*)bl))>>24)==0x00)) { count--; d++; bl++; s++; } } else { // Source Convert_565_sRGB(buffer2,s,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams); // Surface to blend AlphaDivide_sRGB(buffer0,bl,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams); AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams); // Blend to destination otherParams2.BlendingScan=buffer0; Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2); GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams); Dither_sRGB_565_MMX(d,buffer2,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } } } VOID FASTCALL ScanOperation::BlendLinear_sRGB_555( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { int nRun; void *buffer0=otherParams->TempBuffers[0]; void *buffer1=otherParams->TempBuffers[1]; void *buffer2=otherParams->TempBuffers[2]; DEFINE_POINTERS(UINT16,UINT16) DEFINE_BLEND_POINTER(ARGB) using namespace sRGB; OtherParams otherParams2=*otherParams; while (count>0) { // Find the run of translucent pixels nRun=0; while (isTranslucent(*((ARGB*)(bl+nRun)))) { nRun++; if (nRun==count) { break; } } if (nRun==0) { while (((*((DWORD*)bl+nRun))>>24)==0xFF) { nRun++; if (nRun==count) { break; } } if (nRun>0) { Dither_sRGB_555(d,bl,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } while ((count>0) && (((*((DWORD*)bl))>>24)==0x00)) { count--; d++; bl++; s++; } } else { // Source Convert_555_sRGB(buffer2,s,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams); // Surface to blend AlphaDivide_sRGB(buffer0,bl,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams); AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams); // Blend to destination otherParams2.BlendingScan=buffer0; Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2); GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams); Dither_sRGB_555(d,buffer2,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } } } VOID FASTCALL ScanOperation::BlendLinear_sRGB_555_MMX( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { int nRun; void *buffer0=otherParams->TempBuffers[0]; void *buffer1=otherParams->TempBuffers[1]; void *buffer2=otherParams->TempBuffers[2]; DEFINE_POINTERS(UINT16,UINT16) DEFINE_BLEND_POINTER(ARGB) using namespace sRGB; OtherParams otherParams2=*otherParams; while (count>0) { // Find the run of translucent pixels nRun=0; while (isTranslucent(*((ARGB*)(bl+nRun)))) { nRun++; if (nRun==count) { break; } } if (nRun==0) { while (((*((DWORD*)bl+nRun))>>24)==0xFF) { nRun++; if (nRun==count) { break; } } if (nRun>0) { Dither_sRGB_555_MMX(d,bl,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } while ((count>0) && (((*((DWORD*)bl))>>24)==0x00)) { count--; d++; bl++; s++; } } else { // Source Convert_555_sRGB(buffer2,s,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams); // Surface to blend AlphaDivide_sRGB(buffer0,bl,nRun,otherParams); GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams); AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams); // Blend to destination otherParams2.BlendingScan=buffer0; Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2); GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams); Dither_sRGB_555_MMX(d,buffer2,nRun,otherParams); count-=nRun; d+=nRun; bl+=nRun; s+=nRun; } } } // Blend sRGB over sRGB, ignoring the non-linear gamma. VOID FASTCALL ScanOperation::Blend_sRGB_sRGB( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { DEFINE_POINTERS(ARGB, ARGB) DEFINE_BLEND_POINTER(ARGB) ASSERT(count>0); UINT32 dstPixel; do { UINT32 blendPixel = *bl; UINT32 alpha = blendPixel >> 24; // If alpha is zero, skip everything, including writing the // destination pixel. This is needed for the RMW optimization. if (alpha != 0) { if (alpha == 255) { dstPixel = blendPixel; } else { // // Dst = B + (1-Alpha) * S // dstPixel = *s; ULONG Multa = 255 - alpha; ULONG _D1_00AA00GG = (dstPixel & 0xff00ff00) >> 8; ULONG _D1_00RR00BB = (dstPixel & 0x00ff00ff); ULONG _D2_AAAAGGGG = _D1_00AA00GG * Multa + 0x00800080; ULONG _D2_RRRRBBBB = _D1_00RR00BB * Multa + 0x00800080; ULONG _D3_00AA00GG = (_D2_AAAAGGGG & 0xff00ff00) >> 8; ULONG _D3_00RR00BB = (_D2_RRRRBBBB & 0xff00ff00) >> 8; ULONG _D4_AA00GG00 = (_D2_AAAAGGGG + _D3_00AA00GG) & 0xFF00FF00; ULONG _D4_00RR00BB = ((_D2_RRRRBBBB + _D3_00RR00BB) & 0xFF00FF00) >> 8; dstPixel = blendPixel + _D4_AA00GG00 + _D4_00RR00BB; } *d = dstPixel; } bl++; s++; d++; } while (--count != 0); } VOID FASTCALL ScanOperation::Blend_sRGB_sRGB_MMX( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { #if defined(_X86_) using namespace sRGB; DEFINE_POINTERS(ARGB64, ARGB64) const void *pbl=otherParams->BlendingScan; static ULONGLONG halfMask=0x0080008000800080; DWORD dwBlendPixel; _asm { mov ecx,count ; ecx=pixel counter mov ebx,pbl ; ebx=blend pixel pointer mov esi,s ; esi=source pixel pointer mov edi,d ; edi=dest pixel pointer pxor mm7,mm7 ; mm7=[0|0|0|0] movq mm3,halfMask main_loop: mov eax,DWORD ptr [ebx] mov edx,eax ; eax=blend pixel shr edx,24 ; edx=alpha cmp edx,0 ; For some reason, doing a jz right after a shr stalls jz alpha_blend_done ; if alpha=0, no blending cmp edx,0xFF jne alpha_blend mov [edi],eax ; if alpha=0xFF, copy bl to dest jmp alpha_blend_done alpha_blend: movd mm4,eax mov eax,[esi] ; eax=source movd mm0,eax ; mm0=[0|0|AR|GB] punpcklbw mm0,mm7 ; mm0=[A|R|G|B] xor edx,0xFF ; C=255-Alpha movd mm2,edx ; mm2=[0|0|0|C] punpcklwd mm2,mm2 ; mm2=[0|0|C|C] punpckldq mm2,mm2 ; mm2=[C|C|C|C] pmullw mm0,mm2 paddw mm0,mm3 ; mm0=[AA|RR|GG|BB] movq mm2,mm0 ; mm2=[AA|RR|GG|BB] psrlw mm0,8 ; mm0=[A|R|G|B] paddw mm0,mm2 ; mm0=[AA|RR|GG|BB] psrlw mm0,8 ; mm0=[A|R|G|B] packuswb mm0,mm0 ; mm0=[AR|GB|AR|GB] paddd mm0,mm4 ; Add the blend pixel movd edx,mm0 ; edx=[ARGB] -> result pixel mov [edi],edx alpha_blend_done: add edi,4 add esi,4 add ebx,4 dec ecx jg main_loop emms } #endif } // Blend from sRGB64 to sRGB64. VOID FASTCALL ScanOperation::Blend_sRGB64_sRGB64( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { DEFINE_POINTERS(ARGB64, ARGB64) DEFINE_BLEND_POINTER(ARGB64) using namespace sRGB; while (count--) { sRGB64Color blendPixel; blendPixel.argb = *bl; INT16 alpha = blendPixel.a; // If alpha is zero, skip everything, including writing the // destination pixel. This is needed for the RMW optimization. if (alpha != 0) { sRGB64Color dstPixel; if (alpha == SRGB_ONE) { dstPixel.argb = blendPixel.argb; } else { // // Dst = Src + (1-Alpha) * Dst // dstPixel.argb = *s; INT Multa = SRGB_ONE - alpha; dstPixel.r = ((dstPixel.r * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.r; dstPixel.g = ((dstPixel.g * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.g; dstPixel.b = ((dstPixel.b * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.b; dstPixel.a = ((dstPixel.a * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.a; } *d = dstPixel.argb; } bl++; s++; d++; } } // Blend from sRGB64 to sRGB64 MMX. VOID FASTCALL ScanOperation::Blend_sRGB64_sRGB64_MMX( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { #if defined(_X86_) using namespace sRGB; DEFINE_POINTERS(ARGB64, ARGB64) const void *pbl=otherParams->BlendingScan; static ULONGLONG ullSRGBHalfMask=0x1000100010001000; _asm { mov ecx,count ; ecx=pixel counter mov ebx,pbl ; ebx=blend pixel pointer mov esi,s ; esi=source pixel pointer mov edi,d ; edi=dest pixel pointer movq mm4,ullSRGBHalfMask ; mm4=mask with srgb half main_loop: movsx eax,word ptr [ebx+3*2] ; eax=alpha or eax,eax ; eax==0? jz alpha_blend_done ; if alpha=0, no blending movq mm0,[ebx] ; mm0=blend pixel cmp eax,SRGB_ONE ; if alpha=SRGB_ONE, dest=blend jne alpha_blend movq [edi],mm0 ; copy blend pixel to dest jmp alpha_blend_done alpha_blend: ; Get SRGB_ONE-Alpha neg eax add eax,SRGB_ONE ; C=SRGB_ONE-Alpha movd mm2, eax ; mm2=[0|0|0|C] punpcklwd mm2, mm2 punpckldq mm2, mm2 ; mm2=[C|C|C|C] ; Blend pixels movq mm1,[esi] ; mm1=[A|R|G|B] source pixel movq mm3,mm1 ; mm3=[A|R|G|B] source pixel pmullw mm1,mm2 ; low word of source*C paddw mm1,mm4 ; add an srgb half for rounding psrlw mm1,SRGB_FRACTIONBITS ; truncate low SRGB_FRACTIONBITS pmulhw mm3,mm2 ; high word of source*C psllw mm3,SRGB_INTEGERBITS ; truncate high SRGB_INTEGERBITS por mm1,mm3 ; mm1=[A|R|G|B] paddw mm1,mm0 ; add blend pixel movq [edi],mm1 ; copy result to dest alpha_blend_done: add edi,8 add esi,8 add ebx,8 dec ecx jg main_loop emms } #endif } // Blend from sRGB to 16bpp 565, ignoring sRGB's non-linear gamma. VOID FASTCALL ScanOperation::Blend_sRGB_565( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { DEFINE_POINTERS(UINT16, UINT16) DEFINE_BLEND_POINTER(ARGB) ASSERT(count>0); do { UINT32 blendPixel = *bl; UINT32 alpha = blendPixel >> 27; if (alpha != 0) { UINT32 dstPixel; // Blend: S + [ (255 - sA) * D ] / 255 // First, convert the source pixel from 32bpp BGRA to // 5-5-5 16bpp, pre-multiplied. // // Note: No rounding needs to be done on this conversion! blendPixel = ((blendPixel >> 8) & 0xf800) | ((blendPixel >> 5) & 0x07e0) | ((blendPixel >> 3) & 0x001f); if (alpha == 31) { dstPixel = blendPixel; } else { dstPixel = (UINT32) *s; UINT32 multA = 31 - alpha; UINT32 D1_00rr00bb = (dstPixel & 0xf81f); UINT32 D2_rrrrbbbb = D1_00rr00bb * multA + 0x00008010; UINT32 D3_00rr00bb = (D2_rrrrbbbb & 0x001f03e0) >> 5; UINT32 D4_rrxxbbxx = ((D2_rrrrbbbb + D3_00rr00bb) >> 5) & 0xf81f; UINT32 D1_000000gg = (dstPixel & 0x7e0) >> 5; UINT32 D2_0000gggg = D1_000000gg * 2 * multA + 0x00000020; UINT32 D3_000000gg = (D2_0000gggg & 0x00000fc0) >> 6; UINT32 D4_0000ggxx = ((D2_0000gggg + D3_000000gg) & 0x0fc0) >> 1; dstPixel = (UINT16) ((D4_rrxxbbxx | D4_0000ggxx) + blendPixel); } *d = (UINT16) dstPixel; } bl++; s++; d++; } while (--count != 0); } // Blend from sRGB to 16bpp 555, ignoring sRGB's non-linear gamma. VOID FASTCALL ScanOperation::Blend_sRGB_555( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { DEFINE_POINTERS(UINT16, UINT16) DEFINE_BLEND_POINTER(ARGB) ASSERT(count>0); do { UINT32 blendPixel = *bl; UINT32 alpha = blendPixel >> 27; if (alpha != 0) { UINT32 dstPixel; // Blend: S + [ (255 - sA) * D ] / 255 // First, convert the source pixel from 32bpp BGRA to // 5-5-5 16bpp, pre-multiplied. // // Note: No rounding needs to be done on this conversion! blendPixel = ((blendPixel & 0x00f80000) >> 9) | ((blendPixel & 0x0000f800) >> 6) | ((blendPixel & 0x000000f8) >> 3); if (alpha == 31) { dstPixel = blendPixel; } else { dstPixel = (UINT32) *s; UINT32 multA = 31 - alpha; UINT32 D1_00rr00bb = (dstPixel & 0x7c1f); UINT32 D2_rrrrbbbb = D1_00rr00bb * multA + 0x00004010; UINT32 D3_00rr00bb = (D2_rrrrbbbb & 0x000f83e0) >> 5; UINT32 D4_rrxxbbxx = ((D2_rrrrbbbb + D3_00rr00bb) >> 5) & 0x7c1f; UINT32 D1_000000gg = (dstPixel & 0x3e0) >> 5; UINT32 D2_0000gggg = D1_000000gg * multA + 0x00000010; UINT32 D3_000000gg = (D2_0000gggg & 0x000003e0) >> 5; UINT32 D4_0000ggxx = (D2_0000gggg + D3_000000gg) & 0x03e0; dstPixel = (UINT16) ((D4_rrxxbbxx | D4_0000ggxx) + blendPixel); } *d = (UINT16) dstPixel; } bl++; s++; d++; } while (--count != 0); } // Blend from sRGB to RGB24, ignoring sRGB's non-linear gamma. VOID FASTCALL ScanOperation::Blend_sRGB_24( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { DEFINE_POINTERS(BYTE, BYTE) DEFINE_BLEND_POINTER(ARGB) ASSERT(count>0); do { if (((UINT_PTR) d & 0x3) == 0) { while (count >= 4) { BYTE *bb = (BYTE *) bl; if ((bb[3] & bb[7] & bb[11] & bb[15]) != 0xFF) { break; } ((UINT32 *) d)[0] = (bb[4] << 24) | (bb[2] << 16) | (bb[1] << 8) | bb[0]; ((UINT32 *) d)[1] = (bb[9] << 24) | (bb[8] << 16) | (bb[6] << 8) | bb[5]; ((UINT32 *) d)[2] = (bb[14] << 24) | (bb[13] << 16) | (bb[12] << 8) | bb[10]; count -= 4; bl += 4; d += 12; s += 12; } } if (count == 0) { break; } UINT32 blendPixel = *bl; UINT32 alpha = blendPixel >> 24; if (alpha != 0) { UINT32 dstPixel; if (alpha == 255) { dstPixel = blendPixel; } else { // Dst = Src + (1-Alpha) * Dst UINT32 multA = 255 - alpha; UINT32 D1_000000GG = *(s + 1); UINT32 D2_0000GGGG = D1_000000GG * multA + 0x00800080; UINT32 D3_000000GG = (D2_0000GGGG & 0xff00ff00) >> 8; UINT32 D4_0000GG00 = (D2_0000GGGG + D3_000000GG) & 0xFF00FF00; UINT32 D1_00RR00BB = *(s) | (ULONG) *(s + 2) << 16; UINT32 D2_RRRRBBBB = D1_00RR00BB * multA + 0x00800080; UINT32 D3_00RR00BB = (D2_RRRRBBBB & 0xff00ff00) >> 8; UINT32 D4_00RR00BB = ((D2_RRRRBBBB + D3_00RR00BB) & 0xFF00FF00) >> 8; dstPixel = (D4_0000GG00 | D4_00RR00BB) + blendPixel; } *(d) = (BYTE) (dstPixel); *(d + 1) = (BYTE) (dstPixel >> 8); *(d + 2) = (BYTE) (dstPixel >> 16); } bl++; d += 3; s += 3; } while (--count != 0); } // Blend from sRGB to BGR24, ignoring sRGB's non-linear gamma. VOID FASTCALL ScanOperation::Blend_sRGB_24BGR( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { DEFINE_POINTERS(BYTE, BYTE) DEFINE_BLEND_POINTER(ARGB) ASSERT(count>0); do { UINT32 blendPixel = *bl; UINT32 alpha = blendPixel >> 24; if (alpha != 0) { UINT32 dstPixel; if (alpha == 255) { dstPixel = blendPixel; } else { // Dst = Src + (1-Alpha) * Dst UINT32 multA = 255 - alpha; UINT32 D1_000000GG = *(s + 1); UINT32 D2_0000GGGG = D1_000000GG * multA + 0x00800080; UINT32 D3_000000GG = (D2_0000GGGG & 0xff00ff00) >> 8; UINT32 D4_0000GG00 = (D2_0000GGGG + D3_000000GG) & 0xFF00FF00; UINT32 D1_00RR00BB = *(s) | (ULONG) *(s + 2) << 16; UINT32 D2_RRRRBBBB = D1_00RR00BB * multA + 0x00800080; UINT32 D3_00RR00BB = (D2_RRRRBBBB & 0xff00ff00) >> 8; UINT32 D4_00RR00BB = ((D2_RRRRBBBB + D3_00RR00BB) & 0xFF00FF00) >> 8; dstPixel = (D4_0000GG00 | D4_00RR00BB) + blendPixel; } *(d) = (BYTE) (dstPixel >> 16); *(d + 1) = (BYTE) (dstPixel >> 8); *(d + 2) = (BYTE) (dstPixel); } bl++; d += 3; s += 3; } while (--count != 0); } /* !!![agodfrey] So we're going to move to standardizing on non-premultiplied alpha. When we do, the above routines will all have to change - but we may want to keep the above versions around too. Below, I've implemented the sRGB and sRGB64 versions for a non-premultiplied source. Now, these really blend from a non-premultiplied source, to a pre-multiplied destination. You can see this from the fact that they are equivalent to combining the above pre-multiplied Blends with an AlphaMultiply step on the source data. Since pre-multiplied and non-premultiplied formats are identical for alpha==1, the functions below work fine when the destination has no alpha (i.e. alpha==1). Otherwise, we can use them when the destination is in premultiplied format. If we somehow let the user draw to such a destination, they can use an off-screen premultiplied buffer to accumulate drawing, and then using a pre-multiplied blend, draw that to the final destination. This gives them the same functionality that standardizing on pre-multiplied alpha is supposed to give. // Blend sRGB over sRGB, ignoring the non-linear gamma. VOID FASTCALL ScanOperation::Blend_sRGB_sRGB( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { DEFINE_POINTERS(ARGB, ARGB) DEFINE_BLEND_POINTER(ARGB) ASSERT(count>0); do { UINT32 blendPixel = *bl; UINT32 alpha = blendPixel >> 24; // If alpha is zero, skip everything, including writing the // destination pixel. This is needed for the RMW optimization. if (alpha != 0) { UINT32 dstPixel; if (alpha == 255) { dstPixel = blendPixel; } else { // Dst = Dst * (1-Alpha) + Src * Alpha dstPixel = *s; ULONG invalpha = 255 - alpha; ULONG _D1_00AA00GG = (dstPixel & 0xff00ff00) >> 8; ULONG _D1_00RR00BB = (dstPixel & 0x00ff00ff); // For the alpha channel, the result we want is this: // // Dst = Dst * (1-Alpha) + Src. // // Or equivalently: // // Dst = Dst * (1-Alpha) + Alpha. // // We want to apply the same operations to the alpha channel as // we do to the others. So, to get the above result from // // Dst = Dst * (1-Alpha) + Src * Alpha // // we fake a 'Src' value of 1 (represented by 255). ULONG _S1_00ff00GG = (blendPixel & 0xff00ff00) >> 8 + 0xff0000; ULONG _S1_00RR00BB = (blendPixel & 0x00ff00ff); ULONG _D2_AAAAGGGG = _D1_00AA00GG * invalpha + _S1_00ff00GG * alpha + 0x00800080; ULONG _D2_RRRRBBBB = _D1_00RR00BB * invalpha + _S1_00RR00BB * alpha + 0x00800080; ULONG _D3_00AA00GG = (_D2_AAAAGGGG & 0xff00ff00) >> 8; ULONG _D3_00RR00BB = (_D2_RRRRBBBB & 0xff00ff00) >> 8; ULONG _D4_AA00GG00 = (_D2_AAAAGGGG + _D3_00AA00GG) & 0xFF00FF00; ULONG _D4_00RR00BB = ((_D2_RRRRBBBB + _D3_00RR00BB) & 0xFF00FF00) >> 8; dstPixel = _D4_AA00GG00 + _D4_00RR00BB; } *d = dstPixel; } bl++; s++; d++; } while (--count != 0); } // Blend from sRGB64 to sRGB64. VOID FASTCALL ScanOperation::Blend_sRGB64_sRGB64( VOID *dst, const VOID *src, INT count, const OtherParams *otherParams ) { DEFINE_POINTERS(ARGB64, ARGB64) DEFINE_BLEND_POINTER(ARGB64) using namespace sRGB; while (count--) { sRGB64Color blendPixel; blendPixel.argb = *bl; INT alpha = blendPixel.a; // If alpha is zero, skip everything, including writing the // destination pixel. This is needed for the RMW optimization. if (alpha != 0) { sRGB64Color dstPixel; if (alpha == SRGB_ONE) { dstPixel.argb = blendPixel.argb; } else { // Dst = Dst * (1-Alpha) + Src * Alpha dstPixel.argb = *s; INT invalpha = SRGB_ONE - alpha; dstPixel.r = ((dstPixel.r * invalpha) + (blendPixel.r * alpha) + SRGB_HALF) >> SRGB_FRACTIONBITS; dstPixel.g = ((dstPixel.g * invalpha) + (blendPixel.g * alpha) + SRGB_HALF) >> SRGB_FRACTIONBITS; dstPixel.b = ((dstPixel.b * invalpha) + (blendPixel.b * alpha) + SRGB_HALF) >> SRGB_FRACTIONBITS; dstPixel.a = (((dstPixel.a * invalpha) + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.a; } *d = dstPixel.argb; } bl++; s++; d++; } } */