Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

481 lines
15 KiB

  1. /**************************************************************************\
  2. *
  3. * Copyright (c) 1999 Microsoft Corporation
  4. *
  5. * Module Name:
  6. *
  7. * bicubic.cpp
  8. *
  9. * Abstract:
  10. *
  11. * Bicubic Resampling code
  12. *
  13. * Created:
  14. *
  15. * 11/03/1999 ASecchia
  16. \**************************************************************************/
  17. #include "precomp.hpp"
  18. DpOutputBicubicImageSpan::DpOutputBicubicImageSpan(
  19. DpBitmap* bitmap,
  20. DpScanBuffer * scan,
  21. DpContext* context,
  22. DpImageAttributes imageAttributes,
  23. INT numPoints,
  24. const GpPointF *dstPoints,
  25. const GpRectF *srcRect
  26. )
  27. {
  28. Scan = scan;
  29. BWrapMode = imageAttributes.wrapMode;
  30. ClampColor = imageAttributes.clampColor;
  31. SrcRectClamp = imageAttributes.srcRectClamp;
  32. dBitmap = bitmap;
  33. ASSERT(dBitmap != NULL);
  34. ASSERT(dBitmap->IsValid());
  35. // on bad bitmap, we return with Valid = FALSE
  36. if (dBitmap == NULL ||
  37. !dBitmap->IsValid() )
  38. {
  39. dBitmap = NULL;
  40. return;
  41. } else {
  42. BmpData.Width = dBitmap->Width;
  43. BmpData.Height = dBitmap->Height;
  44. BmpData.PixelFormat = PIXFMT_32BPP_PARGB;
  45. BmpData.Stride = dBitmap->Delta;
  46. BmpData.Scan0 = dBitmap->Bits;
  47. }
  48. WorldToDevice = context->WorldToDevice;
  49. context->GetDeviceToWorld(&DeviceToWorld);
  50. if(srcRect)
  51. SrcRect = *srcRect;
  52. else
  53. {
  54. SrcRect.X = 0;
  55. SrcRect.Y = 0;
  56. SrcRect.Width = (REAL) dBitmap->Width;
  57. SrcRect.Height = (REAL) dBitmap->Height;
  58. }
  59. GpPointF points[4];
  60. GpMatrix xForm;
  61. BOOL existsTransform = TRUE;
  62. switch(numPoints)
  63. {
  64. case 0:
  65. points[0].X = 0;
  66. points[0].Y = 0;
  67. points[1].X = (REAL) SrcRect.Width;
  68. points[1].Y = 0;
  69. points[2].X = 0;
  70. points[2].Y = (REAL) SrcRect.Height;
  71. break;
  72. case 1:
  73. points[0] = dstPoints[0];
  74. points[1].X = (REAL) (points[0].X + SrcRect.Width);
  75. points[1].Y = points[0].Y;
  76. points[2].X = points[0].X;
  77. points[2].Y = (REAL) (points[0].Y + SrcRect.Height);
  78. break;
  79. case 3:
  80. case 4:
  81. GpMemcpy(&points[0], dstPoints, numPoints*sizeof(GpPointF));
  82. break;
  83. default:
  84. existsTransform = FALSE;
  85. }
  86. if(existsTransform)
  87. {
  88. xForm.InferAffineMatrix(points, SrcRect);
  89. }
  90. WorldToDevice = context->WorldToDevice;
  91. WorldToDevice.Prepend(xForm);
  92. if(WorldToDevice.IsInvertible())
  93. {
  94. DeviceToWorld = WorldToDevice;
  95. DeviceToWorld.Invert();
  96. }
  97. }
  98. namespace DpOutputBicubicImageSpanNS {
  99. const INT KernShift = 6;
  100. const INT Oversample = 1 << KernShift;
  101. const FIX16 kern[2*Oversample+1] =
  102. {
  103. 65536, 65496, 65379, 65186, 64920, 64583, 64177, 63705,
  104. 63168, 62569, 61911, 61195, 60424, 59600, 58725, 57802,
  105. 56832, 55818, 54763, 53668, 52536, 51369, 50169, 48939,
  106. 47680, 46395, 45087, 43757, 42408, 41042, 39661, 38268,
  107. 36864, 35452, 34035, 32614, 31192, 29771, 28353, 26941,
  108. 25536, 24141, 22759, 21391, 20040, 18708, 17397, 16110,
  109. 14848, 13614, 12411, 11240, 10104, 9005, 7945, 6927,
  110. 5952, 5023, 4143, 3313, 2536, 1814, 1149, 544,
  111. 0, -496, -961, -1395, -1800, -2176, -2523, -2843,
  112. -3136, -3403, -3645, -3862, -4056, -4227, -4375, -4502,
  113. -4608, -4694, -4761, -4809, -4840, -4854, -4851, -4833,
  114. -4800, -4753, -4693, -4620, -4536, -4441, -4335, -4220,
  115. -4096, -3964, -3825, -3679, -3528, -3372, -3211, -3047,
  116. -2880, -2711, -2541, -2370, -2200, -2031, -1863, -1698,
  117. -1536, -1378, -1225, -1077, -936, -802, -675, -557,
  118. -448, -349, -261, -184, -120, -69, -31, -8,
  119. 0
  120. };
  121. #ifdef _X86_
  122. const short kern14[2*Oversample+1] =
  123. {
  124. 16384, 16374, 16345, 16297, 16230, 16146, 16044, 15926,
  125. 15792, 15642, 15478, 15299, 15106, 14900, 14681, 14451,
  126. 14208, 13955, 13691, 13417, 13134, 12842, 12542, 12235,
  127. 11920, 11599, 11272, 10939, 10602, 10261, 9915, 9567,
  128. 9216, 8863, 8509, 8154, 7798, 7443, 7088, 6735,
  129. 6384, 6035, 5690, 5348, 5010, 4677, 4349, 4028,
  130. 3712, 3404, 3103, 2810, 2526, 2251, 1986, 1732,
  131. 1488, 1256, 1036, 828, 634, 454, 287, 136,
  132. 0, -124, -240, -349, -450, -544, -631, -711,
  133. -784, -851, -911, -966, -1014, -1057, -1094, -1126,
  134. -1152, -1174, -1190, -1202, -1210, -1214, -1213, -1208,
  135. -1200, -1188, -1173, -1155, -1134, -1110, -1084, -1055,
  136. -1024, -991, -956, -920, -882, -843, -803, -762,
  137. -720, -678, -635, -593, -550, -508, -466, -425,
  138. -384, -345, -306, -269, -234, -201, -169, -139,
  139. -112, -87, -65, -46, -30, -17, -8, -2,
  140. 0
  141. };
  142. #pragma warning(disable : 4799)
  143. ARGB FASTCALL Do1DBicubicMMX(ARGB filter[4], short w[4])
  144. {
  145. ARGB result;
  146. static ULONGLONG HalfFix3 = 0x0004000400040004;
  147. // really should do this function without any preamble.
  148. _asm
  149. {
  150. mov eax, filter ;
  151. mov ebx, w ;
  152. pxor mm0, mm0 ; zero
  153. movq mm1, [ebx] ; w
  154. movd mm4, [eax] ; filter[0]
  155. movd mm5, [eax+4] ; filter[1]
  156. movd mm6, [eax+8] ; filter[2]
  157. movd mm7, [eax+0xc] ; filter[3]
  158. punpcklbw mm4, mm0 ; 0a0r0g0b (interleave zeros)
  159. punpcklbw mm5, mm0 ;
  160. punpcklbw mm6, mm0 ;
  161. punpcklbw mm7, mm0 ;
  162. psllw mm4, 5 ; 2 to compensate for the kernel resolution +
  163. psllw mm5, 5 ; 3 to support some fractional bits for the add.
  164. psllw mm6, 5 ;
  165. psllw mm7, 5 ;
  166. movq mm2, mm1 ;
  167. punpcklwd mm2, mm2 ; w1 w1 w0 w0
  168. movq mm3, mm2 ;
  169. punpckldq mm2, mm2 ; w0
  170. punpckhdq mm3, mm3 ; w1
  171. pmulhw mm4, mm2 ; filter[0]*w0
  172. pmulhw mm5, mm3 ; filter[1]*w1
  173. punpckhwd mm1, mm1 ; w3 w3 w2 w2
  174. movq mm2, mm1 ;
  175. punpckldq mm1, mm1 ; w2
  176. punpckhdq mm2, mm2 ; w3
  177. pmulhw mm6, mm1 ; filter[2]*w2
  178. pmulhw mm7, mm2 ; filter[3]*w3
  179. paddsw mm4, mm5 ; add
  180. paddsw mm6, mm7 ; add
  181. paddsw mm4, mm6 ; add
  182. movq mm3, HalfFix3 ;
  183. paddsw mm4, mm3 ; add half
  184. psraw mm4, 3 ; round the fractional bits away.
  185. packuswb mm4, mm4 ; saturate between [0, 0xff]
  186. ; need to saturate the r, g, b components to range 0..a
  187. movq mm0, mm4 ;
  188. punpcklbw mm0, mm0 ; aarrggbb
  189. punpckhwd mm0, mm0 ; aaaarrrr
  190. psrlq mm0, 32 ; 0000aaaa
  191. mov eax, 0xffffffff ;
  192. movd mm1, eax ;
  193. psubb mm1, mm0 ; 255-a
  194. paddusb mm4, mm1 ; saturate against 255
  195. psubusb mm4, mm1 ; drop it back to the right range
  196. movd result, mm4 ;
  197. //emms; this instruction is done by the caller.
  198. }
  199. return result;
  200. }
  201. #endif
  202. inline ARGB Do1DBicubic(ARGB filter[4], const FIX16 x)
  203. {
  204. // Lookup the convolution kernel.
  205. FIX16 w0 = kern[Oversample+x];
  206. FIX16 w1 = kern[x];
  207. FIX16 w2 = kern[Oversample-x];
  208. FIX16 w3 = kern[2*Oversample-x];
  209. // Cast to LONG so that we preserve the sign when we start
  210. // shifting values around - the bicubic filter will often
  211. // have negative intermediate color components.
  212. ULONG *p = (ULONG *)filter;
  213. LONG a, r, g, b;
  214. // Casting of p to ULONG and then having the LONG casts in the expressions
  215. // below is to work around a compiler sign extension bug.
  216. // In this particular case, the bug was dropping the '& 0xff' from the
  217. // green component expression causing it to become negative
  218. // which gets clamped to zero.
  219. // When the bug is fixed, p should be reverted to LONG and casted to LONG
  220. // and the LONG casts should be removed from the expressions below.
  221. // Alpha component
  222. a = (w0 * (LONG)((p[0] >> 24) & 0xff) +
  223. w1 * (LONG)((p[1] >> 24) & 0xff) +
  224. w2 * (LONG)((p[2] >> 24) & 0xff) +
  225. w3 * (LONG)((p[3] >> 24) & 0xff)) >> FIX16_SHIFT;
  226. a = (a < 0) ? 0 : (a > 255) ? 255 : a;
  227. // We have premultiplied alpha values - clamp R, G, B to alpha
  228. // Red component
  229. r = (w0 * (LONG)((p[0] >> 16) & 0xff) +
  230. w1 * (LONG)((p[1] >> 16) & 0xff) +
  231. w2 * (LONG)((p[2] >> 16) & 0xff) +
  232. w3 * (LONG)((p[3] >> 16) & 0xff)) >> FIX16_SHIFT;
  233. r = (r < 0) ? 0 : (r > a) ? a : r;
  234. // Green component
  235. g = (w0 * (LONG)((p[0] >> 8) & 0xff) +
  236. w1 * (LONG)((p[1] >> 8) & 0xff) +
  237. w2 * (LONG)((p[2] >> 8) & 0xff) +
  238. w3 * (LONG)((p[3] >> 8) & 0xff)) >> FIX16_SHIFT;
  239. g = (g < 0) ? 0 : (g > a) ? a : g;
  240. // Blue component
  241. b = (w0 * (LONG)(p[0] & 0xff) +
  242. w1 * (LONG)(p[1] & 0xff) +
  243. w2 * (LONG)(p[2] & 0xff) +
  244. w3 * (LONG)(p[3] & 0xff)) >> FIX16_SHIFT;
  245. b = (b < 0) ? 0 : (b > a) ? a : b;
  246. return ((a << 24) | (r << 16) | (g << 8) | b);
  247. }
  248. } // end DpOutputBicubicImageSpanNS
  249. GpStatus
  250. DpOutputBicubicImageSpan::OutputSpan(
  251. INT y,
  252. INT xMin,
  253. INT xMax // xMax is exclusive
  254. )
  255. {
  256. // Nothing to do.
  257. if(xMin==xMax)
  258. {
  259. return Ok;
  260. }
  261. ASSERT(xMin < xMax);
  262. GpPointF p1, p2;
  263. p1.X = (REAL) xMin;
  264. p1.Y = p2.Y = (REAL) y;
  265. p2.X = (REAL) xMax;
  266. DeviceToWorld.Transform(&p1);
  267. DeviceToWorld.Transform(&p2);
  268. // Convert to Fixed point notation - 16 bits of fractional precision.
  269. FIX16 dx, dy, x0, y0;
  270. x0 = GpRound(p1.X*FIX16_ONE);
  271. y0 = GpRound(p1.Y*FIX16_ONE);
  272. ASSERT(xMin < xMax);
  273. dx = GpRound(((p2.X - p1.X)*FIX16_ONE)/(xMax-xMin));
  274. dy = GpRound(((p2.Y - p1.Y)*FIX16_ONE)/(xMax-xMin));
  275. return OutputSpanIncremental(y, xMin, xMax, x0, y0, dx, dy);
  276. }
  277. GpStatus
  278. DpOutputBicubicImageSpan::OutputSpanIncremental(
  279. INT y,
  280. INT xMin,
  281. INT xMax,
  282. FIX16 x0,
  283. FIX16 y0,
  284. FIX16 dx,
  285. FIX16 dy
  286. )
  287. {
  288. using namespace DpOutputBicubicImageSpanNS;
  289. INT width = xMax - xMin;
  290. ARGB *buffer = Scan->NextBuffer(xMin, y, width);
  291. ARGB *srcPtr0 = static_cast<ARGB*> (BmpData.Scan0);
  292. INT stride = BmpData.Stride/sizeof(ARGB);
  293. INT ix;
  294. INT iy;
  295. FIX16 fracx; // hold the fractional increment for ix
  296. FIX16 fracy; // hold the fractional increment for iy
  297. ARGB filter[4][4]; // 4x4 filter array.
  298. INT xstep, ystep; // loop variables in x and y
  299. INT wx[4];
  300. INT wy[4]; // wrapped coordinates
  301. // For all pixels in the destination span...
  302. for(int i=0; i<width; i++)
  303. {
  304. // .. compute the position in source space.
  305. // floor
  306. ix = x0 >> FIX16_SHIFT;
  307. iy = y0 >> FIX16_SHIFT;
  308. // Apply the wrapmode to all possible kernel combinations.
  309. for(xstep=0;xstep<4;xstep++) {
  310. wx[xstep] = ix+xstep-1;
  311. wy[xstep] = iy+xstep-1;
  312. }
  313. if(BWrapMode != WrapModeClamp) {
  314. if( ( (UINT)(ix-1) >= (UINT)( max(((INT)BmpData.Width)-4,0))) ||
  315. ( (UINT)(iy-1) >= (UINT)( max(((INT)BmpData.Height)-4,0))) )
  316. {
  317. for(xstep=0;xstep<4;xstep++) {
  318. ApplyWrapMode(BWrapMode, wx[xstep], wy[xstep], BmpData.Width, BmpData.Height);
  319. }
  320. }
  321. }
  322. // Check to see if we're outside of the valid drawing range specified
  323. // in the DpBitmap.
  324. fracx = (x0 & FIX16_MASK) >> (FIX16_SHIFT-KernShift);
  325. fracy = (y0 & FIX16_MASK) >> (FIX16_SHIFT-KernShift);
  326. // Build up the filter domain surrounding the current pixel.
  327. // Technically the loops below should go from -2 to 2 to correctly
  328. // handle the case of fracx or fracy == 0, but our convolution kernel
  329. // has zero at that point anyway, so we optimize it away.
  330. for(ystep=0;ystep<4;ystep++) for(xstep=0;xstep<4;xstep++)
  331. {
  332. // !!! PERF: check the y step outside
  333. // of the x loop and use memset to fill the entire line.
  334. // This should reduce the complexity of the inner loop
  335. // comparison.
  336. // Make sure the pixel is within the bounds of the source before
  337. // accessing it.
  338. if( ((wx[xstep]) >=0) &&
  339. ((wy[ystep]) >=0) &&
  340. ((wx[xstep]) < (INT)(BmpData.Width)) &&
  341. ((wy[ystep]) < (INT)(BmpData.Height)) )
  342. {
  343. filter[xstep][ystep] =
  344. *(srcPtr0+stride*(wy[ystep])+(wx[xstep]));
  345. } else {
  346. // This means that this source pixel is outside of the valid
  347. // bits in the source. (edge condition)
  348. filter[xstep][ystep] = (ARGB) ClampColor;
  349. }
  350. }
  351. #ifdef _X86_
  352. if(OSInfo::HasMMX)
  353. {
  354. // Lookup the convolution kernel.
  355. short w[4];
  356. w[0] = kern14[Oversample+fracy];
  357. w[1] = kern14[fracy];
  358. w[2] = kern14[Oversample-fracy];
  359. w[3] = kern14[2*Oversample-fracy];
  360. // Filter the 4 vertical pixel columns
  361. // Reuse filter[0] to store the intermediate result
  362. for(xstep=0;xstep<4;xstep++)
  363. {
  364. filter[0][xstep] = Do1DBicubicMMX(filter[xstep], w);
  365. }
  366. // Lookup the convolution kernel.
  367. w[0] = kern14[Oversample+fracx];
  368. w[1] = kern14[fracx];
  369. w[2] = kern14[Oversample-fracx];
  370. w[3] = kern14[2*Oversample-fracx];
  371. // Filter horizontally.
  372. *buffer++ = Do1DBicubicMMX(filter[0], w);
  373. // Update source position
  374. x0 += dx;
  375. y0 += dy;
  376. }
  377. else
  378. #endif
  379. {
  380. // Filter the 4 vertical pixel columns
  381. // Reuse filter[0] to store the intermediate result
  382. for(xstep=0;xstep<4;xstep++)
  383. {
  384. filter[0][xstep] = Do1DBicubic(filter[xstep], fracy);
  385. }
  386. // Filter horizontally.
  387. *buffer++ = Do1DBicubic(filter[0], fracx);
  388. // Update source position
  389. x0 += dx;
  390. y0 += dy;
  391. }
  392. }
  393. // Clear the MMX state
  394. #ifdef _X86_
  395. if(OSInfo::HasMMX)
  396. {
  397. _asm emms;
  398. }
  399. #endif
  400. return Ok;
  401. }