Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

481 lines
15 KiB

/**************************************************************************\
*
* Copyright (c) 1999 Microsoft Corporation
*
* Module Name:
*
* bicubic.cpp
*
* Abstract:
*
* Bicubic Resampling code
*
* Created:
*
* 11/03/1999 ASecchia
\**************************************************************************/
#include "precomp.hpp"
DpOutputBicubicImageSpan::DpOutputBicubicImageSpan(
DpBitmap* bitmap,
DpScanBuffer * scan,
DpContext* context,
DpImageAttributes imageAttributes,
INT numPoints,
const GpPointF *dstPoints,
const GpRectF *srcRect
)
{
Scan = scan;
BWrapMode = imageAttributes.wrapMode;
ClampColor = imageAttributes.clampColor;
SrcRectClamp = imageAttributes.srcRectClamp;
dBitmap = bitmap;
ASSERT(dBitmap != NULL);
ASSERT(dBitmap->IsValid());
// on bad bitmap, we return with Valid = FALSE
if (dBitmap == NULL ||
!dBitmap->IsValid() )
{
dBitmap = NULL;
return;
} else {
BmpData.Width = dBitmap->Width;
BmpData.Height = dBitmap->Height;
BmpData.PixelFormat = PIXFMT_32BPP_PARGB;
BmpData.Stride = dBitmap->Delta;
BmpData.Scan0 = dBitmap->Bits;
}
WorldToDevice = context->WorldToDevice;
context->GetDeviceToWorld(&DeviceToWorld);
if(srcRect)
SrcRect = *srcRect;
else
{
SrcRect.X = 0;
SrcRect.Y = 0;
SrcRect.Width = (REAL) dBitmap->Width;
SrcRect.Height = (REAL) dBitmap->Height;
}
GpPointF points[4];
GpMatrix xForm;
BOOL existsTransform = TRUE;
switch(numPoints)
{
case 0:
points[0].X = 0;
points[0].Y = 0;
points[1].X = (REAL) SrcRect.Width;
points[1].Y = 0;
points[2].X = 0;
points[2].Y = (REAL) SrcRect.Height;
break;
case 1:
points[0] = dstPoints[0];
points[1].X = (REAL) (points[0].X + SrcRect.Width);
points[1].Y = points[0].Y;
points[2].X = points[0].X;
points[2].Y = (REAL) (points[0].Y + SrcRect.Height);
break;
case 3:
case 4:
GpMemcpy(&points[0], dstPoints, numPoints*sizeof(GpPointF));
break;
default:
existsTransform = FALSE;
}
if(existsTransform)
{
xForm.InferAffineMatrix(points, SrcRect);
}
WorldToDevice = context->WorldToDevice;
WorldToDevice.Prepend(xForm);
if(WorldToDevice.IsInvertible())
{
DeviceToWorld = WorldToDevice;
DeviceToWorld.Invert();
}
}
namespace DpOutputBicubicImageSpanNS {
const INT KernShift = 6;
const INT Oversample = 1 << KernShift;
const FIX16 kern[2*Oversample+1] =
{
65536, 65496, 65379, 65186, 64920, 64583, 64177, 63705,
63168, 62569, 61911, 61195, 60424, 59600, 58725, 57802,
56832, 55818, 54763, 53668, 52536, 51369, 50169, 48939,
47680, 46395, 45087, 43757, 42408, 41042, 39661, 38268,
36864, 35452, 34035, 32614, 31192, 29771, 28353, 26941,
25536, 24141, 22759, 21391, 20040, 18708, 17397, 16110,
14848, 13614, 12411, 11240, 10104, 9005, 7945, 6927,
5952, 5023, 4143, 3313, 2536, 1814, 1149, 544,
0, -496, -961, -1395, -1800, -2176, -2523, -2843,
-3136, -3403, -3645, -3862, -4056, -4227, -4375, -4502,
-4608, -4694, -4761, -4809, -4840, -4854, -4851, -4833,
-4800, -4753, -4693, -4620, -4536, -4441, -4335, -4220,
-4096, -3964, -3825, -3679, -3528, -3372, -3211, -3047,
-2880, -2711, -2541, -2370, -2200, -2031, -1863, -1698,
-1536, -1378, -1225, -1077, -936, -802, -675, -557,
-448, -349, -261, -184, -120, -69, -31, -8,
0
};
#ifdef _X86_
const short kern14[2*Oversample+1] =
{
16384, 16374, 16345, 16297, 16230, 16146, 16044, 15926,
15792, 15642, 15478, 15299, 15106, 14900, 14681, 14451,
14208, 13955, 13691, 13417, 13134, 12842, 12542, 12235,
11920, 11599, 11272, 10939, 10602, 10261, 9915, 9567,
9216, 8863, 8509, 8154, 7798, 7443, 7088, 6735,
6384, 6035, 5690, 5348, 5010, 4677, 4349, 4028,
3712, 3404, 3103, 2810, 2526, 2251, 1986, 1732,
1488, 1256, 1036, 828, 634, 454, 287, 136,
0, -124, -240, -349, -450, -544, -631, -711,
-784, -851, -911, -966, -1014, -1057, -1094, -1126,
-1152, -1174, -1190, -1202, -1210, -1214, -1213, -1208,
-1200, -1188, -1173, -1155, -1134, -1110, -1084, -1055,
-1024, -991, -956, -920, -882, -843, -803, -762,
-720, -678, -635, -593, -550, -508, -466, -425,
-384, -345, -306, -269, -234, -201, -169, -139,
-112, -87, -65, -46, -30, -17, -8, -2,
0
};
#pragma warning(disable : 4799)
ARGB FASTCALL Do1DBicubicMMX(ARGB filter[4], short w[4])
{
ARGB result;
static ULONGLONG HalfFix3 = 0x0004000400040004;
// really should do this function without any preamble.
_asm
{
mov eax, filter ;
mov ebx, w ;
pxor mm0, mm0 ; zero
movq mm1, [ebx] ; w
movd mm4, [eax] ; filter[0]
movd mm5, [eax+4] ; filter[1]
movd mm6, [eax+8] ; filter[2]
movd mm7, [eax+0xc] ; filter[3]
punpcklbw mm4, mm0 ; 0a0r0g0b (interleave zeros)
punpcklbw mm5, mm0 ;
punpcklbw mm6, mm0 ;
punpcklbw mm7, mm0 ;
psllw mm4, 5 ; 2 to compensate for the kernel resolution +
psllw mm5, 5 ; 3 to support some fractional bits for the add.
psllw mm6, 5 ;
psllw mm7, 5 ;
movq mm2, mm1 ;
punpcklwd mm2, mm2 ; w1 w1 w0 w0
movq mm3, mm2 ;
punpckldq mm2, mm2 ; w0
punpckhdq mm3, mm3 ; w1
pmulhw mm4, mm2 ; filter[0]*w0
pmulhw mm5, mm3 ; filter[1]*w1
punpckhwd mm1, mm1 ; w3 w3 w2 w2
movq mm2, mm1 ;
punpckldq mm1, mm1 ; w2
punpckhdq mm2, mm2 ; w3
pmulhw mm6, mm1 ; filter[2]*w2
pmulhw mm7, mm2 ; filter[3]*w3
paddsw mm4, mm5 ; add
paddsw mm6, mm7 ; add
paddsw mm4, mm6 ; add
movq mm3, HalfFix3 ;
paddsw mm4, mm3 ; add half
psraw mm4, 3 ; round the fractional bits away.
packuswb mm4, mm4 ; saturate between [0, 0xff]
; need to saturate the r, g, b components to range 0..a
movq mm0, mm4 ;
punpcklbw mm0, mm0 ; aarrggbb
punpckhwd mm0, mm0 ; aaaarrrr
psrlq mm0, 32 ; 0000aaaa
mov eax, 0xffffffff ;
movd mm1, eax ;
psubb mm1, mm0 ; 255-a
paddusb mm4, mm1 ; saturate against 255
psubusb mm4, mm1 ; drop it back to the right range
movd result, mm4 ;
//emms; this instruction is done by the caller.
}
return result;
}
#endif
inline ARGB Do1DBicubic(ARGB filter[4], const FIX16 x)
{
// Lookup the convolution kernel.
FIX16 w0 = kern[Oversample+x];
FIX16 w1 = kern[x];
FIX16 w2 = kern[Oversample-x];
FIX16 w3 = kern[2*Oversample-x];
// Cast to LONG so that we preserve the sign when we start
// shifting values around - the bicubic filter will often
// have negative intermediate color components.
ULONG *p = (ULONG *)filter;
LONG a, r, g, b;
// Casting of p to ULONG and then having the LONG casts in the expressions
// below is to work around a compiler sign extension bug.
// In this particular case, the bug was dropping the '& 0xff' from the
// green component expression causing it to become negative
// which gets clamped to zero.
// When the bug is fixed, p should be reverted to LONG and casted to LONG
// and the LONG casts should be removed from the expressions below.
// Alpha component
a = (w0 * (LONG)((p[0] >> 24) & 0xff) +
w1 * (LONG)((p[1] >> 24) & 0xff) +
w2 * (LONG)((p[2] >> 24) & 0xff) +
w3 * (LONG)((p[3] >> 24) & 0xff)) >> FIX16_SHIFT;
a = (a < 0) ? 0 : (a > 255) ? 255 : a;
// We have premultiplied alpha values - clamp R, G, B to alpha
// Red component
r = (w0 * (LONG)((p[0] >> 16) & 0xff) +
w1 * (LONG)((p[1] >> 16) & 0xff) +
w2 * (LONG)((p[2] >> 16) & 0xff) +
w3 * (LONG)((p[3] >> 16) & 0xff)) >> FIX16_SHIFT;
r = (r < 0) ? 0 : (r > a) ? a : r;
// Green component
g = (w0 * (LONG)((p[0] >> 8) & 0xff) +
w1 * (LONG)((p[1] >> 8) & 0xff) +
w2 * (LONG)((p[2] >> 8) & 0xff) +
w3 * (LONG)((p[3] >> 8) & 0xff)) >> FIX16_SHIFT;
g = (g < 0) ? 0 : (g > a) ? a : g;
// Blue component
b = (w0 * (LONG)(p[0] & 0xff) +
w1 * (LONG)(p[1] & 0xff) +
w2 * (LONG)(p[2] & 0xff) +
w3 * (LONG)(p[3] & 0xff)) >> FIX16_SHIFT;
b = (b < 0) ? 0 : (b > a) ? a : b;
return ((a << 24) | (r << 16) | (g << 8) | b);
}
} // end DpOutputBicubicImageSpanNS
GpStatus
DpOutputBicubicImageSpan::OutputSpan(
INT y,
INT xMin,
INT xMax // xMax is exclusive
)
{
// Nothing to do.
if(xMin==xMax)
{
return Ok;
}
ASSERT(xMin < xMax);
GpPointF p1, p2;
p1.X = (REAL) xMin;
p1.Y = p2.Y = (REAL) y;
p2.X = (REAL) xMax;
DeviceToWorld.Transform(&p1);
DeviceToWorld.Transform(&p2);
// Convert to Fixed point notation - 16 bits of fractional precision.
FIX16 dx, dy, x0, y0;
x0 = GpRound(p1.X*FIX16_ONE);
y0 = GpRound(p1.Y*FIX16_ONE);
ASSERT(xMin < xMax);
dx = GpRound(((p2.X - p1.X)*FIX16_ONE)/(xMax-xMin));
dy = GpRound(((p2.Y - p1.Y)*FIX16_ONE)/(xMax-xMin));
return OutputSpanIncremental(y, xMin, xMax, x0, y0, dx, dy);
}
GpStatus
DpOutputBicubicImageSpan::OutputSpanIncremental(
INT y,
INT xMin,
INT xMax,
FIX16 x0,
FIX16 y0,
FIX16 dx,
FIX16 dy
)
{
using namespace DpOutputBicubicImageSpanNS;
INT width = xMax - xMin;
ARGB *buffer = Scan->NextBuffer(xMin, y, width);
ARGB *srcPtr0 = static_cast<ARGB*> (BmpData.Scan0);
INT stride = BmpData.Stride/sizeof(ARGB);
INT ix;
INT iy;
FIX16 fracx; // hold the fractional increment for ix
FIX16 fracy; // hold the fractional increment for iy
ARGB filter[4][4]; // 4x4 filter array.
INT xstep, ystep; // loop variables in x and y
INT wx[4];
INT wy[4]; // wrapped coordinates
// For all pixels in the destination span...
for(int i=0; i<width; i++)
{
// .. compute the position in source space.
// floor
ix = x0 >> FIX16_SHIFT;
iy = y0 >> FIX16_SHIFT;
// Apply the wrapmode to all possible kernel combinations.
for(xstep=0;xstep<4;xstep++) {
wx[xstep] = ix+xstep-1;
wy[xstep] = iy+xstep-1;
}
if(BWrapMode != WrapModeClamp) {
if( ( (UINT)(ix-1) >= (UINT)( max(((INT)BmpData.Width)-4,0))) ||
( (UINT)(iy-1) >= (UINT)( max(((INT)BmpData.Height)-4,0))) )
{
for(xstep=0;xstep<4;xstep++) {
ApplyWrapMode(BWrapMode, wx[xstep], wy[xstep], BmpData.Width, BmpData.Height);
}
}
}
// Check to see if we're outside of the valid drawing range specified
// in the DpBitmap.
fracx = (x0 & FIX16_MASK) >> (FIX16_SHIFT-KernShift);
fracy = (y0 & FIX16_MASK) >> (FIX16_SHIFT-KernShift);
// Build up the filter domain surrounding the current pixel.
// Technically the loops below should go from -2 to 2 to correctly
// handle the case of fracx or fracy == 0, but our convolution kernel
// has zero at that point anyway, so we optimize it away.
for(ystep=0;ystep<4;ystep++) for(xstep=0;xstep<4;xstep++)
{
// !!! PERF: check the y step outside
// of the x loop and use memset to fill the entire line.
// This should reduce the complexity of the inner loop
// comparison.
// Make sure the pixel is within the bounds of the source before
// accessing it.
if( ((wx[xstep]) >=0) &&
((wy[ystep]) >=0) &&
((wx[xstep]) < (INT)(BmpData.Width)) &&
((wy[ystep]) < (INT)(BmpData.Height)) )
{
filter[xstep][ystep] =
*(srcPtr0+stride*(wy[ystep])+(wx[xstep]));
} else {
// This means that this source pixel is outside of the valid
// bits in the source. (edge condition)
filter[xstep][ystep] = (ARGB) ClampColor;
}
}
#ifdef _X86_
if(OSInfo::HasMMX)
{
// Lookup the convolution kernel.
short w[4];
w[0] = kern14[Oversample+fracy];
w[1] = kern14[fracy];
w[2] = kern14[Oversample-fracy];
w[3] = kern14[2*Oversample-fracy];
// Filter the 4 vertical pixel columns
// Reuse filter[0] to store the intermediate result
for(xstep=0;xstep<4;xstep++)
{
filter[0][xstep] = Do1DBicubicMMX(filter[xstep], w);
}
// Lookup the convolution kernel.
w[0] = kern14[Oversample+fracx];
w[1] = kern14[fracx];
w[2] = kern14[Oversample-fracx];
w[3] = kern14[2*Oversample-fracx];
// Filter horizontally.
*buffer++ = Do1DBicubicMMX(filter[0], w);
// Update source position
x0 += dx;
y0 += dy;
}
else
#endif
{
// Filter the 4 vertical pixel columns
// Reuse filter[0] to store the intermediate result
for(xstep=0;xstep<4;xstep++)
{
filter[0][xstep] = Do1DBicubic(filter[xstep], fracy);
}
// Filter horizontally.
*buffer++ = Do1DBicubic(filter[0], fracx);
// Update source position
x0 += dx;
y0 += dy;
}
}
// Clear the MMX state
#ifdef _X86_
if(OSInfo::HasMMX)
{
_asm emms;
}
#endif
return Ok;
}