You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1232 lines
36 KiB
1232 lines
36 KiB
/**************************************************************************\
|
|
*
|
|
* Copyright (c) 1999-2000 Microsoft Corporation
|
|
*
|
|
* Module name:
|
|
*
|
|
* The "Blend" scan operation.
|
|
*
|
|
* Abstract:
|
|
*
|
|
* See Gdiplus\Specs\ScanOperation.doc for an overview.
|
|
*
|
|
* Notes:
|
|
*
|
|
* Revision History:
|
|
*
|
|
* 12/07/1999 agodfrey
|
|
* Created it.
|
|
*
|
|
\**************************************************************************/
|
|
|
|
#include "precomp.hpp"
|
|
|
|
/**************************************************************************\
|
|
*
|
|
* Operation Description:
|
|
*
|
|
* Blend: Does a SrcOver alpha-blend operation.
|
|
*
|
|
* Arguments:
|
|
*
|
|
* dst - The destination scan
|
|
* src - The source scan (usually equal to dst).
|
|
* count - The length of the scan, in pixels
|
|
* otherParams - Additional data. (We use BlendingScan.)
|
|
*
|
|
* Return Value:
|
|
*
|
|
* None
|
|
*
|
|
* Notes:
|
|
*
|
|
* This is a ternary operation. We take pixels from 'src', blend pixels
|
|
* from 'otherParams->BlendingScan' over them, and write the result to 'dst'.
|
|
*
|
|
* Since the formats of the 'dst' and 'src' scans are the same for all
|
|
* the blend functions we implement, the naming is simplified to list just
|
|
* the format of BlendingScan, then the format of 'dst'.
|
|
*
|
|
* src and dst may be equal; otherwise, they must point to scans which do
|
|
* not overlap in memory.
|
|
*
|
|
* The blend operation adheres to the following rule:
|
|
* "If the blending alpha value is zero, do not write the destination pixel."
|
|
*
|
|
* In other words, it is also a 'WriteRMW' operation. This allows us to
|
|
* avoid a separate 'WriteRMW' step in some cases. See SOReadRMW.cpp and
|
|
* SOWriteRMW.cpp.
|
|
*
|
|
* The impact of this is that you have to be careful if you want 'blend'
|
|
* to be a true ternary operation. Remember, if a blend pixel
|
|
* is transparent, NOTHING gets written to the corresponding destination
|
|
* pixel. One way to solve this is to make sure that the final operation in
|
|
* your pipeline is a WriteRMW operation.
|
|
*
|
|
* History:
|
|
*
|
|
* 04/04/1999 andrewgo
|
|
* Created it.
|
|
* 12/07/1999 agodfrey
|
|
* Included the 32bpp blend (moved from from Ddi/scan.cpp)
|
|
* 01/06/2000 agodfrey
|
|
* Added AndrewGo's code for 565, 555, RGB24 and BGR24. Changed the
|
|
* blends to be 'almost' ternary operations.
|
|
*
|
|
\**************************************************************************/
|
|
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::BlendLinear_sRGB_32RGB(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
int nRun;
|
|
void *buffer0=otherParams->TempBuffers[0];
|
|
void *buffer1=otherParams->TempBuffers[1];
|
|
void *buffer2=otherParams->TempBuffers[2];
|
|
DEFINE_POINTERS(ARGB, ARGB)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
using namespace sRGB;
|
|
OtherParams otherParams2=*otherParams;
|
|
|
|
while (count>0)
|
|
{
|
|
// Find the run of translucent pixels
|
|
nRun=0;
|
|
while (isTranslucent(*((ARGB*)(bl+nRun))))
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
|
|
if (nRun==0)
|
|
{
|
|
while ((count>0) && (((*((DWORD*)bl))>>24)==0xFF))
|
|
{
|
|
*d=*bl;
|
|
count--;
|
|
d++;
|
|
bl++;
|
|
s++;
|
|
}
|
|
while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
|
|
{
|
|
count--;
|
|
d++;
|
|
bl++;
|
|
s++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Source
|
|
GammaConvert_sRGB_sRGB64(buffer1,s,nRun,otherParams);
|
|
|
|
// Surface to blend
|
|
AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
|
|
AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
|
|
|
|
// Blend to destination.
|
|
// Must blend using the previous result as the bl
|
|
otherParams2.BlendingScan=buffer0;
|
|
Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2);
|
|
GammaConvert_sRGB64_sRGB(d,buffer1,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
}
|
|
}
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::BlendLinear_sRGB_32RGB_MMX(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
int nRun;
|
|
void *buffer0=otherParams->TempBuffers[0];
|
|
void *buffer1=otherParams->TempBuffers[1];
|
|
void *buffer2=otherParams->TempBuffers[2];
|
|
DEFINE_POINTERS(ARGB, ARGB)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
using namespace sRGB;
|
|
OtherParams otherParams2=*otherParams;
|
|
|
|
while (count>0)
|
|
{
|
|
// Find the run of translucent pixels
|
|
nRun=0;
|
|
while (isTranslucent(*((ARGB*)(bl+nRun))))
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
|
|
if (nRun==0)
|
|
{
|
|
while ((count>0) && (((*((DWORD*)bl))>>24)==0xFF))
|
|
{
|
|
*d=*bl;
|
|
count--;
|
|
d++;
|
|
bl++;
|
|
s++;
|
|
}
|
|
while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
|
|
{
|
|
count--;
|
|
d++;
|
|
bl++;
|
|
s++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Source
|
|
GammaConvert_sRGB_sRGB64(buffer1,s,nRun,otherParams);
|
|
|
|
// Surface to blend
|
|
AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
|
|
AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
|
|
|
|
// Blend to destination
|
|
// Must blend using the previous result as the bl
|
|
otherParams2.BlendingScan=buffer0;
|
|
Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2);
|
|
GammaConvert_sRGB64_sRGB(d,buffer1,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
}
|
|
}
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::BlendLinear_sRGB_565(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
int nRun;
|
|
void *buffer0=otherParams->TempBuffers[0];
|
|
void *buffer1=otherParams->TempBuffers[1];
|
|
void *buffer2=otherParams->TempBuffers[2];
|
|
DEFINE_POINTERS(UINT16,UINT16)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
using namespace sRGB;
|
|
OtherParams otherParams2=*otherParams;
|
|
|
|
while (count>0)
|
|
{
|
|
// Find the run of translucent pixels
|
|
nRun=0;
|
|
while (isTranslucent(*((ARGB*)(bl+nRun))))
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
|
|
if (nRun==0)
|
|
{
|
|
while (((*((DWORD*)bl+nRun))>>24)==0xFF)
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
if (nRun>0)
|
|
{
|
|
Dither_sRGB_565(d,bl,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
|
|
{
|
|
count--;
|
|
d++;
|
|
bl++;
|
|
s++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Source
|
|
Convert_565_sRGB(buffer2,s,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams);
|
|
|
|
// Surface to blend
|
|
AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
|
|
AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
|
|
|
|
// Blend to destination
|
|
otherParams2.BlendingScan=buffer0;
|
|
Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2);
|
|
GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams);
|
|
|
|
Dither_sRGB_565(d,buffer2,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
}
|
|
}
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::BlendLinear_sRGB_565_MMX(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
int nRun;
|
|
void *buffer0=otherParams->TempBuffers[0];
|
|
void *buffer1=otherParams->TempBuffers[1];
|
|
void *buffer2=otherParams->TempBuffers[2];
|
|
DEFINE_POINTERS(UINT16,UINT16)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
using namespace sRGB;
|
|
OtherParams otherParams2=*otherParams;
|
|
|
|
while (count>0)
|
|
{
|
|
// Find the run of translucent pixels
|
|
nRun=0;
|
|
while (isTranslucent(*((ARGB*)(bl+nRun))))
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
|
|
if (nRun==0)
|
|
{
|
|
while (((*((DWORD*)bl+nRun))>>24)==0xFF)
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
if (nRun>0)
|
|
{
|
|
Dither_sRGB_565_MMX(d,bl,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
|
|
{
|
|
count--;
|
|
d++;
|
|
bl++;
|
|
s++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Source
|
|
Convert_565_sRGB(buffer2,s,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams);
|
|
|
|
// Surface to blend
|
|
AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
|
|
AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
|
|
|
|
// Blend to destination
|
|
otherParams2.BlendingScan=buffer0;
|
|
Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2);
|
|
GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams);
|
|
|
|
Dither_sRGB_565_MMX(d,buffer2,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
}
|
|
}
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::BlendLinear_sRGB_555(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
int nRun;
|
|
void *buffer0=otherParams->TempBuffers[0];
|
|
void *buffer1=otherParams->TempBuffers[1];
|
|
void *buffer2=otherParams->TempBuffers[2];
|
|
DEFINE_POINTERS(UINT16,UINT16)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
using namespace sRGB;
|
|
OtherParams otherParams2=*otherParams;
|
|
|
|
while (count>0)
|
|
{
|
|
// Find the run of translucent pixels
|
|
nRun=0;
|
|
while (isTranslucent(*((ARGB*)(bl+nRun))))
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
|
|
if (nRun==0)
|
|
{
|
|
while (((*((DWORD*)bl+nRun))>>24)==0xFF)
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
if (nRun>0)
|
|
{
|
|
Dither_sRGB_555(d,bl,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
|
|
{
|
|
count--;
|
|
d++;
|
|
bl++;
|
|
s++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Source
|
|
Convert_555_sRGB(buffer2,s,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams);
|
|
|
|
// Surface to blend
|
|
AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
|
|
AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
|
|
|
|
// Blend to destination
|
|
otherParams2.BlendingScan=buffer0;
|
|
Blend_sRGB64_sRGB64(buffer1,buffer1,nRun,&otherParams2);
|
|
GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams);
|
|
|
|
Dither_sRGB_555(d,buffer2,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
}
|
|
}
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::BlendLinear_sRGB_555_MMX(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
int nRun;
|
|
void *buffer0=otherParams->TempBuffers[0];
|
|
void *buffer1=otherParams->TempBuffers[1];
|
|
void *buffer2=otherParams->TempBuffers[2];
|
|
DEFINE_POINTERS(UINT16,UINT16)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
using namespace sRGB;
|
|
OtherParams otherParams2=*otherParams;
|
|
|
|
while (count>0)
|
|
{
|
|
// Find the run of translucent pixels
|
|
nRun=0;
|
|
while (isTranslucent(*((ARGB*)(bl+nRun))))
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
|
|
if (nRun==0)
|
|
{
|
|
while (((*((DWORD*)bl+nRun))>>24)==0xFF)
|
|
{
|
|
nRun++;
|
|
if (nRun==count) { break; }
|
|
}
|
|
if (nRun>0)
|
|
{
|
|
Dither_sRGB_555_MMX(d,bl,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
while ((count>0) && (((*((DWORD*)bl))>>24)==0x00))
|
|
{
|
|
count--;
|
|
d++;
|
|
bl++;
|
|
s++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Source
|
|
Convert_555_sRGB(buffer2,s,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer1,buffer2,nRun,otherParams);
|
|
|
|
// Surface to blend
|
|
AlphaDivide_sRGB(buffer0,bl,nRun,otherParams);
|
|
GammaConvert_sRGB_sRGB64(buffer2,buffer0,nRun,otherParams);
|
|
AlphaMultiply_sRGB64(buffer0,buffer2,nRun,otherParams);
|
|
|
|
// Blend to destination
|
|
otherParams2.BlendingScan=buffer0;
|
|
Blend_sRGB64_sRGB64_MMX(buffer1,buffer1,nRun,&otherParams2);
|
|
GammaConvert_sRGB64_sRGB(buffer2,buffer1,nRun,otherParams);
|
|
|
|
Dither_sRGB_555_MMX(d,buffer2,nRun,otherParams);
|
|
|
|
count-=nRun;
|
|
d+=nRun;
|
|
bl+=nRun;
|
|
s+=nRun;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Blend sRGB over sRGB, ignoring the non-linear gamma.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB_sRGB(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
DEFINE_POINTERS(ARGB, ARGB)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
|
|
ASSERT(count>0);
|
|
|
|
UINT32 dstPixel;
|
|
do {
|
|
UINT32 blendPixel = *bl;
|
|
UINT32 alpha = blendPixel >> 24;
|
|
|
|
// If alpha is zero, skip everything, including writing the
|
|
// destination pixel. This is needed for the RMW optimization.
|
|
|
|
if (alpha != 0)
|
|
{
|
|
|
|
if (alpha == 255)
|
|
{
|
|
dstPixel = blendPixel;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Dst = B + (1-Alpha) * S
|
|
//
|
|
|
|
dstPixel = *s;
|
|
|
|
ULONG Multa = 255 - alpha;
|
|
ULONG _D1_00AA00GG = (dstPixel & 0xff00ff00) >> 8;
|
|
ULONG _D1_00RR00BB = (dstPixel & 0x00ff00ff);
|
|
|
|
ULONG _D2_AAAAGGGG = _D1_00AA00GG * Multa + 0x00800080;
|
|
ULONG _D2_RRRRBBBB = _D1_00RR00BB * Multa + 0x00800080;
|
|
|
|
ULONG _D3_00AA00GG = (_D2_AAAAGGGG & 0xff00ff00) >> 8;
|
|
ULONG _D3_00RR00BB = (_D2_RRRRBBBB & 0xff00ff00) >> 8;
|
|
|
|
ULONG _D4_AA00GG00 = (_D2_AAAAGGGG + _D3_00AA00GG) & 0xFF00FF00;
|
|
ULONG _D4_00RR00BB = ((_D2_RRRRBBBB + _D3_00RR00BB) & 0xFF00FF00) >> 8;
|
|
|
|
dstPixel = blendPixel + _D4_AA00GG00 + _D4_00RR00BB;
|
|
}
|
|
|
|
*d = dstPixel;
|
|
}
|
|
|
|
bl++;
|
|
s++;
|
|
d++;
|
|
} while (--count != 0);
|
|
}
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB_sRGB_MMX(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
#if defined(_X86_)
|
|
using namespace sRGB;
|
|
DEFINE_POINTERS(ARGB64, ARGB64)
|
|
const void *pbl=otherParams->BlendingScan;
|
|
static ULONGLONG halfMask=0x0080008000800080;
|
|
DWORD dwBlendPixel;
|
|
|
|
_asm {
|
|
mov ecx,count ; ecx=pixel counter
|
|
mov ebx,pbl ; ebx=blend pixel pointer
|
|
mov esi,s ; esi=source pixel pointer
|
|
mov edi,d ; edi=dest pixel pointer
|
|
pxor mm7,mm7 ; mm7=[0|0|0|0]
|
|
movq mm3,halfMask
|
|
|
|
main_loop:
|
|
mov eax,DWORD ptr [ebx]
|
|
mov edx,eax ; eax=blend pixel
|
|
shr edx,24 ; edx=alpha
|
|
cmp edx,0 ; For some reason, doing a jz right after a shr stalls
|
|
jz alpha_blend_done ; if alpha=0, no blending
|
|
|
|
cmp edx,0xFF
|
|
jne alpha_blend
|
|
mov [edi],eax ; if alpha=0xFF, copy bl to dest
|
|
jmp alpha_blend_done
|
|
|
|
alpha_blend:
|
|
movd mm4,eax
|
|
|
|
mov eax,[esi] ; eax=source
|
|
movd mm0,eax ; mm0=[0|0|AR|GB]
|
|
punpcklbw mm0,mm7 ; mm0=[A|R|G|B]
|
|
|
|
xor edx,0xFF ; C=255-Alpha
|
|
movd mm2,edx ; mm2=[0|0|0|C]
|
|
punpcklwd mm2,mm2 ; mm2=[0|0|C|C]
|
|
punpckldq mm2,mm2 ; mm2=[C|C|C|C]
|
|
|
|
pmullw mm0,mm2
|
|
paddw mm0,mm3 ; mm0=[AA|RR|GG|BB]
|
|
movq mm2,mm0 ; mm2=[AA|RR|GG|BB]
|
|
|
|
psrlw mm0,8 ; mm0=[A|R|G|B]
|
|
paddw mm0,mm2 ; mm0=[AA|RR|GG|BB]
|
|
psrlw mm0,8 ; mm0=[A|R|G|B]
|
|
|
|
packuswb mm0,mm0 ; mm0=[AR|GB|AR|GB]
|
|
paddd mm0,mm4 ; Add the blend pixel
|
|
movd edx,mm0 ; edx=[ARGB] -> result pixel
|
|
mov [edi],edx
|
|
|
|
alpha_blend_done:
|
|
add edi,4
|
|
add esi,4
|
|
add ebx,4
|
|
dec ecx
|
|
jg main_loop
|
|
|
|
emms
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// Blend from sRGB64 to sRGB64.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB64_sRGB64(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
DEFINE_POINTERS(ARGB64, ARGB64)
|
|
DEFINE_BLEND_POINTER(ARGB64)
|
|
using namespace sRGB;
|
|
|
|
while (count--)
|
|
{
|
|
sRGB64Color blendPixel;
|
|
blendPixel.argb = *bl;
|
|
INT16 alpha = blendPixel.a;
|
|
|
|
// If alpha is zero, skip everything, including writing the
|
|
// destination pixel. This is needed for the RMW optimization.
|
|
|
|
if (alpha != 0)
|
|
{
|
|
sRGB64Color dstPixel;
|
|
|
|
if (alpha == SRGB_ONE)
|
|
{
|
|
dstPixel.argb = blendPixel.argb;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// Dst = Src + (1-Alpha) * Dst
|
|
//
|
|
|
|
dstPixel.argb = *s;
|
|
|
|
INT Multa = SRGB_ONE - alpha;
|
|
|
|
dstPixel.r = ((dstPixel.r * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.r;
|
|
dstPixel.g = ((dstPixel.g * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.g;
|
|
dstPixel.b = ((dstPixel.b * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.b;
|
|
dstPixel.a = ((dstPixel.a * Multa + SRGB_HALF) >> SRGB_FRACTIONBITS) + blendPixel.a;
|
|
}
|
|
|
|
*d = dstPixel.argb;
|
|
}
|
|
|
|
bl++;
|
|
s++;
|
|
d++;
|
|
}
|
|
}
|
|
|
|
// Blend from sRGB64 to sRGB64 MMX.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB64_sRGB64_MMX(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
#if defined(_X86_)
|
|
using namespace sRGB;
|
|
DEFINE_POINTERS(ARGB64, ARGB64)
|
|
const void *pbl=otherParams->BlendingScan;
|
|
static ULONGLONG ullSRGBHalfMask=0x1000100010001000;
|
|
|
|
_asm {
|
|
mov ecx,count ; ecx=pixel counter
|
|
mov ebx,pbl ; ebx=blend pixel pointer
|
|
mov esi,s ; esi=source pixel pointer
|
|
mov edi,d ; edi=dest pixel pointer
|
|
movq mm4,ullSRGBHalfMask ; mm4=mask with srgb half
|
|
|
|
main_loop:
|
|
movsx eax,word ptr [ebx+3*2] ; eax=alpha
|
|
or eax,eax ; eax==0?
|
|
jz alpha_blend_done ; if alpha=0, no blending
|
|
|
|
movq mm0,[ebx] ; mm0=blend pixel
|
|
cmp eax,SRGB_ONE ; if alpha=SRGB_ONE, dest=blend
|
|
jne alpha_blend
|
|
movq [edi],mm0 ; copy blend pixel to dest
|
|
jmp alpha_blend_done
|
|
|
|
alpha_blend:
|
|
; Get SRGB_ONE-Alpha
|
|
neg eax
|
|
add eax,SRGB_ONE ; C=SRGB_ONE-Alpha
|
|
movd mm2, eax ; mm2=[0|0|0|C]
|
|
punpcklwd mm2, mm2
|
|
punpckldq mm2, mm2 ; mm2=[C|C|C|C]
|
|
|
|
; Blend pixels
|
|
movq mm1,[esi] ; mm1=[A|R|G|B] source pixel
|
|
movq mm3,mm1 ; mm3=[A|R|G|B] source pixel
|
|
pmullw mm1,mm2 ; low word of source*C
|
|
paddw mm1,mm4 ; add an srgb half for rounding
|
|
psrlw mm1,SRGB_FRACTIONBITS ; truncate low SRGB_FRACTIONBITS
|
|
pmulhw mm3,mm2 ; high word of source*C
|
|
psllw mm3,SRGB_INTEGERBITS ; truncate high SRGB_INTEGERBITS
|
|
por mm1,mm3 ; mm1=[A|R|G|B]
|
|
paddw mm1,mm0 ; add blend pixel
|
|
movq [edi],mm1 ; copy result to dest
|
|
|
|
alpha_blend_done:
|
|
add edi,8
|
|
add esi,8
|
|
add ebx,8
|
|
|
|
dec ecx
|
|
jg main_loop
|
|
emms
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
// Blend from sRGB to 16bpp 565, ignoring sRGB's non-linear gamma.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB_565(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
DEFINE_POINTERS(UINT16, UINT16)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
|
|
ASSERT(count>0);
|
|
|
|
do {
|
|
UINT32 blendPixel = *bl;
|
|
UINT32 alpha = blendPixel >> 27;
|
|
|
|
if (alpha != 0)
|
|
{
|
|
UINT32 dstPixel;
|
|
|
|
// Blend: S + [ (255 - sA) * D ] / 255
|
|
|
|
// First, convert the source pixel from 32bpp BGRA to
|
|
// 5-5-5 16bpp, pre-multiplied.
|
|
//
|
|
// Note: No rounding needs to be done on this conversion!
|
|
|
|
blendPixel = ((blendPixel >> 8) & 0xf800) |
|
|
((blendPixel >> 5) & 0x07e0) |
|
|
((blendPixel >> 3) & 0x001f);
|
|
|
|
if (alpha == 31)
|
|
{
|
|
dstPixel = blendPixel;
|
|
}
|
|
else
|
|
{
|
|
dstPixel = (UINT32) *s;
|
|
|
|
UINT32 multA = 31 - alpha;
|
|
|
|
UINT32 D1_00rr00bb = (dstPixel & 0xf81f);
|
|
UINT32 D2_rrrrbbbb = D1_00rr00bb * multA + 0x00008010;
|
|
UINT32 D3_00rr00bb = (D2_rrrrbbbb & 0x001f03e0) >> 5;
|
|
UINT32 D4_rrxxbbxx = ((D2_rrrrbbbb + D3_00rr00bb) >> 5) & 0xf81f;
|
|
|
|
UINT32 D1_000000gg = (dstPixel & 0x7e0) >> 5;
|
|
UINT32 D2_0000gggg = D1_000000gg * 2 * multA + 0x00000020;
|
|
UINT32 D3_000000gg = (D2_0000gggg & 0x00000fc0) >> 6;
|
|
UINT32 D4_0000ggxx = ((D2_0000gggg + D3_000000gg) & 0x0fc0) >> 1;
|
|
|
|
dstPixel = (UINT16) ((D4_rrxxbbxx | D4_0000ggxx) + blendPixel);
|
|
}
|
|
|
|
*d = (UINT16) dstPixel;
|
|
}
|
|
|
|
bl++;
|
|
s++;
|
|
d++;
|
|
} while (--count != 0);
|
|
}
|
|
|
|
// Blend from sRGB to 16bpp 555, ignoring sRGB's non-linear gamma.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB_555(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
DEFINE_POINTERS(UINT16, UINT16)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
|
|
ASSERT(count>0);
|
|
|
|
do {
|
|
UINT32 blendPixel = *bl;
|
|
UINT32 alpha = blendPixel >> 27;
|
|
|
|
if (alpha != 0)
|
|
{
|
|
UINT32 dstPixel;
|
|
|
|
// Blend: S + [ (255 - sA) * D ] / 255
|
|
|
|
// First, convert the source pixel from 32bpp BGRA to
|
|
// 5-5-5 16bpp, pre-multiplied.
|
|
//
|
|
// Note: No rounding needs to be done on this conversion!
|
|
|
|
blendPixel = ((blendPixel & 0x00f80000) >> 9) |
|
|
((blendPixel & 0x0000f800) >> 6) |
|
|
((blendPixel & 0x000000f8) >> 3);
|
|
|
|
if (alpha == 31)
|
|
{
|
|
dstPixel = blendPixel;
|
|
}
|
|
else
|
|
{
|
|
dstPixel = (UINT32) *s;
|
|
|
|
UINT32 multA = 31 - alpha;
|
|
|
|
UINT32 D1_00rr00bb = (dstPixel & 0x7c1f);
|
|
UINT32 D2_rrrrbbbb = D1_00rr00bb * multA + 0x00004010;
|
|
UINT32 D3_00rr00bb = (D2_rrrrbbbb & 0x000f83e0) >> 5;
|
|
UINT32 D4_rrxxbbxx = ((D2_rrrrbbbb + D3_00rr00bb) >> 5) & 0x7c1f;
|
|
|
|
UINT32 D1_000000gg = (dstPixel & 0x3e0) >> 5;
|
|
UINT32 D2_0000gggg = D1_000000gg * multA + 0x00000010;
|
|
UINT32 D3_000000gg = (D2_0000gggg & 0x000003e0) >> 5;
|
|
UINT32 D4_0000ggxx = (D2_0000gggg + D3_000000gg) & 0x03e0;
|
|
|
|
dstPixel = (UINT16) ((D4_rrxxbbxx | D4_0000ggxx) + blendPixel);
|
|
}
|
|
|
|
*d = (UINT16) dstPixel;
|
|
}
|
|
|
|
bl++;
|
|
s++;
|
|
d++;
|
|
} while (--count != 0);
|
|
}
|
|
|
|
// Blend from sRGB to RGB24, ignoring sRGB's non-linear gamma.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB_24(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
DEFINE_POINTERS(BYTE, BYTE)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
|
|
ASSERT(count>0);
|
|
|
|
do {
|
|
|
|
if (((UINT_PTR) d & 0x3) == 0)
|
|
{
|
|
while (count >= 4)
|
|
{
|
|
BYTE *bb = (BYTE *) bl;
|
|
|
|
if ((bb[3] & bb[7] & bb[11] & bb[15]) != 0xFF)
|
|
{
|
|
break;
|
|
}
|
|
|
|
((UINT32 *) d)[0] = (bb[4] << 24) | (bb[2] << 16) | (bb[1] << 8) | bb[0];
|
|
((UINT32 *) d)[1] = (bb[9] << 24) | (bb[8] << 16) | (bb[6] << 8) | bb[5];
|
|
((UINT32 *) d)[2] = (bb[14] << 24) | (bb[13] << 16) | (bb[12] << 8) | bb[10];
|
|
|
|
count -= 4;
|
|
bl += 4;
|
|
d += 12;
|
|
s += 12;
|
|
}
|
|
}
|
|
|
|
if (count == 0)
|
|
{
|
|
break;
|
|
}
|
|
|
|
UINT32 blendPixel = *bl;
|
|
UINT32 alpha = blendPixel >> 24;
|
|
|
|
if (alpha != 0)
|
|
{
|
|
UINT32 dstPixel;
|
|
|
|
if (alpha == 255)
|
|
{
|
|
dstPixel = blendPixel;
|
|
}
|
|
else
|
|
{
|
|
// Dst = Src + (1-Alpha) * Dst
|
|
|
|
UINT32 multA = 255 - alpha;
|
|
|
|
UINT32 D1_000000GG = *(s + 1);
|
|
UINT32 D2_0000GGGG = D1_000000GG * multA + 0x00800080;
|
|
UINT32 D3_000000GG = (D2_0000GGGG & 0xff00ff00) >> 8;
|
|
UINT32 D4_0000GG00 = (D2_0000GGGG + D3_000000GG) & 0xFF00FF00;
|
|
|
|
UINT32 D1_00RR00BB = *(s) | (ULONG) *(s + 2) << 16;
|
|
UINT32 D2_RRRRBBBB = D1_00RR00BB * multA + 0x00800080;
|
|
UINT32 D3_00RR00BB = (D2_RRRRBBBB & 0xff00ff00) >> 8;
|
|
UINT32 D4_00RR00BB = ((D2_RRRRBBBB + D3_00RR00BB) & 0xFF00FF00) >> 8;
|
|
|
|
dstPixel = (D4_0000GG00 | D4_00RR00BB) + blendPixel;
|
|
}
|
|
|
|
*(d) = (BYTE) (dstPixel);
|
|
*(d + 1) = (BYTE) (dstPixel >> 8);
|
|
*(d + 2) = (BYTE) (dstPixel >> 16);
|
|
}
|
|
|
|
bl++;
|
|
d += 3;
|
|
s += 3;
|
|
} while (--count != 0);
|
|
}
|
|
|
|
// Blend from sRGB to BGR24, ignoring sRGB's non-linear gamma.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB_24BGR(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
DEFINE_POINTERS(BYTE, BYTE)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
|
|
ASSERT(count>0);
|
|
|
|
do {
|
|
UINT32 blendPixel = *bl;
|
|
UINT32 alpha = blendPixel >> 24;
|
|
|
|
if (alpha != 0)
|
|
{
|
|
UINT32 dstPixel;
|
|
|
|
if (alpha == 255)
|
|
{
|
|
dstPixel = blendPixel;
|
|
}
|
|
else
|
|
{
|
|
// Dst = Src + (1-Alpha) * Dst
|
|
|
|
UINT32 multA = 255 - alpha;
|
|
|
|
UINT32 D1_000000GG = *(s + 1);
|
|
UINT32 D2_0000GGGG = D1_000000GG * multA + 0x00800080;
|
|
UINT32 D3_000000GG = (D2_0000GGGG & 0xff00ff00) >> 8;
|
|
UINT32 D4_0000GG00 = (D2_0000GGGG + D3_000000GG) & 0xFF00FF00;
|
|
|
|
UINT32 D1_00RR00BB = *(s) | (ULONG) *(s + 2) << 16;
|
|
UINT32 D2_RRRRBBBB = D1_00RR00BB * multA + 0x00800080;
|
|
UINT32 D3_00RR00BB = (D2_RRRRBBBB & 0xff00ff00) >> 8;
|
|
UINT32 D4_00RR00BB = ((D2_RRRRBBBB + D3_00RR00BB) & 0xFF00FF00) >> 8;
|
|
|
|
dstPixel = (D4_0000GG00 | D4_00RR00BB) + blendPixel;
|
|
}
|
|
|
|
*(d) = (BYTE) (dstPixel >> 16);
|
|
*(d + 1) = (BYTE) (dstPixel >> 8);
|
|
*(d + 2) = (BYTE) (dstPixel);
|
|
}
|
|
|
|
bl++;
|
|
d += 3;
|
|
s += 3;
|
|
} while (--count != 0);
|
|
}
|
|
|
|
/*
|
|
|
|
!!![agodfrey]
|
|
So we're going to move to standardizing on non-premultiplied alpha.
|
|
When we do, the above routines will all have to change - but we may
|
|
want to keep the above versions around too.
|
|
|
|
Below, I've implemented the sRGB and sRGB64 versions for a non-premultiplied
|
|
source. Now, these really blend from a non-premultiplied source,
|
|
to a pre-multiplied destination. You can see this from the fact that they
|
|
are equivalent to combining the above pre-multiplied Blends with an
|
|
AlphaMultiply step on the source data.
|
|
|
|
Since pre-multiplied and non-premultiplied formats are identical for alpha==1,
|
|
the functions below work fine when the destination has no alpha (i.e. alpha==1).
|
|
|
|
Otherwise, we can use them when the destination is in premultiplied format.
|
|
If we somehow let the user draw to such a destination, they can use an off-screen
|
|
premultiplied buffer to accumulate drawing, and then using a
|
|
pre-multiplied blend, draw that to the final destination. This gives them
|
|
the same functionality that standardizing on pre-multiplied alpha is supposed
|
|
to give.
|
|
|
|
// Blend sRGB over sRGB, ignoring the non-linear gamma.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB_sRGB(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
DEFINE_POINTERS(ARGB, ARGB)
|
|
DEFINE_BLEND_POINTER(ARGB)
|
|
|
|
ASSERT(count>0);
|
|
|
|
do {
|
|
UINT32 blendPixel = *bl;
|
|
UINT32 alpha = blendPixel >> 24;
|
|
|
|
// If alpha is zero, skip everything, including writing the
|
|
// destination pixel. This is needed for the RMW optimization.
|
|
|
|
if (alpha != 0)
|
|
{
|
|
UINT32 dstPixel;
|
|
|
|
if (alpha == 255)
|
|
{
|
|
dstPixel = blendPixel;
|
|
}
|
|
else
|
|
{
|
|
// Dst = Dst * (1-Alpha) + Src * Alpha
|
|
|
|
dstPixel = *s;
|
|
|
|
ULONG invalpha = 255 - alpha;
|
|
|
|
ULONG _D1_00AA00GG = (dstPixel & 0xff00ff00) >> 8;
|
|
ULONG _D1_00RR00BB = (dstPixel & 0x00ff00ff);
|
|
|
|
// For the alpha channel, the result we want is this:
|
|
//
|
|
// Dst = Dst * (1-Alpha) + Src.
|
|
//
|
|
// Or equivalently:
|
|
//
|
|
// Dst = Dst * (1-Alpha) + Alpha.
|
|
//
|
|
// We want to apply the same operations to the alpha channel as
|
|
// we do to the others. So, to get the above result from
|
|
//
|
|
// Dst = Dst * (1-Alpha) + Src * Alpha
|
|
//
|
|
// we fake a 'Src' value of 1 (represented by 255).
|
|
|
|
ULONG _S1_00ff00GG = (blendPixel & 0xff00ff00) >> 8 + 0xff0000;
|
|
ULONG _S1_00RR00BB = (blendPixel & 0x00ff00ff);
|
|
|
|
ULONG _D2_AAAAGGGG = _D1_00AA00GG * invalpha +
|
|
_S1_00ff00GG * alpha +
|
|
0x00800080;
|
|
ULONG _D2_RRRRBBBB = _D1_00RR00BB * invalpha +
|
|
_S1_00RR00BB * alpha +
|
|
0x00800080;
|
|
|
|
ULONG _D3_00AA00GG = (_D2_AAAAGGGG & 0xff00ff00) >> 8;
|
|
ULONG _D3_00RR00BB = (_D2_RRRRBBBB & 0xff00ff00) >> 8;
|
|
|
|
ULONG _D4_AA00GG00 = (_D2_AAAAGGGG + _D3_00AA00GG) & 0xFF00FF00;
|
|
ULONG _D4_00RR00BB = ((_D2_RRRRBBBB + _D3_00RR00BB) & 0xFF00FF00) >> 8;
|
|
|
|
|
|
dstPixel = _D4_AA00GG00 + _D4_00RR00BB;
|
|
}
|
|
|
|
*d = dstPixel;
|
|
}
|
|
|
|
bl++;
|
|
s++;
|
|
d++;
|
|
} while (--count != 0);
|
|
}
|
|
|
|
// Blend from sRGB64 to sRGB64.
|
|
|
|
VOID FASTCALL
|
|
ScanOperation::Blend_sRGB64_sRGB64(
|
|
VOID *dst,
|
|
const VOID *src,
|
|
INT count,
|
|
const OtherParams *otherParams
|
|
)
|
|
{
|
|
DEFINE_POINTERS(ARGB64, ARGB64)
|
|
DEFINE_BLEND_POINTER(ARGB64)
|
|
using namespace sRGB;
|
|
|
|
while (count--)
|
|
{
|
|
sRGB64Color blendPixel;
|
|
blendPixel.argb = *bl;
|
|
INT alpha = blendPixel.a;
|
|
|
|
// If alpha is zero, skip everything, including writing the
|
|
// destination pixel. This is needed for the RMW optimization.
|
|
|
|
if (alpha != 0)
|
|
{
|
|
sRGB64Color dstPixel;
|
|
|
|
if (alpha == SRGB_ONE)
|
|
{
|
|
dstPixel.argb = blendPixel.argb;
|
|
}
|
|
else
|
|
{
|
|
// Dst = Dst * (1-Alpha) + Src * Alpha
|
|
|
|
dstPixel.argb = *s;
|
|
|
|
INT invalpha = SRGB_ONE - alpha;
|
|
|
|
dstPixel.r = ((dstPixel.r * invalpha) +
|
|
(blendPixel.r * alpha) +
|
|
SRGB_HALF) >>
|
|
SRGB_FRACTIONBITS;
|
|
dstPixel.g = ((dstPixel.g * invalpha) +
|
|
(blendPixel.g * alpha) +
|
|
SRGB_HALF) >>
|
|
SRGB_FRACTIONBITS;
|
|
dstPixel.b = ((dstPixel.b * invalpha) +
|
|
(blendPixel.b * alpha) +
|
|
SRGB_HALF) >>
|
|
SRGB_FRACTIONBITS;
|
|
dstPixel.a = (((dstPixel.a * invalpha) + SRGB_HALF) >>
|
|
SRGB_FRACTIONBITS) +
|
|
blendPixel.a;
|
|
}
|
|
|
|
*d = dstPixel.argb;
|
|
}
|
|
|
|
bl++;
|
|
s++;
|
|
d++;
|
|
}
|
|
}
|
|
|
|
*/
|
|
|
|
|