Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1233 lines
42 KiB

  1. /******************************Module*Header*******************************\
  2. *
  3. * *******************
  4. * * GDI SAMPLE CODE *
  5. * *******************
  6. *
  7. * Module Name: bitblt.c
  8. *
  9. * Contains the high-level DrvBitBlt and DrvCopyBits functions. The low-
  10. * level stuff lives in the 'blt??.c' files.
  11. *
  12. * Note: The way we've implemented device-bitmaps has changed in NT5, with
  13. * the advent of 'EngModifySurface' and 'DrvDeriveSurface'. Now,
  14. * off-screen bitmaps will always have an iType of STYPE_BITMAP
  15. * (meaning that GDI can draw directly on the bits if it needs to).
  16. * Additionally, former off-screen bitmaps that have been converted
  17. * by us to system-memory DIBs will still have an iType of STYPE_BITMAP.
  18. *
  19. * Copyright (c) 1992-1998 Microsoft Corporation
  20. \**************************************************************************/
  21. #include "precomp.h"
  22. /******************************Public*Routine******************************\
  23. * VOID vXferNativeSrccopy
  24. *
  25. * Does a SRCCOPY transfer of a bitmap to the screen using the frame
  26. * buffer, because with USWC write-combining it's significantly faster
  27. * than using the data transfer register.
  28. *
  29. \**************************************************************************/
  30. VOID vXferNativeSrccopy( // Type FNXFER
  31. PDEV* ppdev,
  32. LONG c, // Count of rectangles, can't be zero
  33. RECTL* prcl, // List of destination rectangles, in relative
  34. // coordinates
  35. ULONG rop4, // Not used
  36. SURFOBJ* psoSrc, // Source surface
  37. POINTL* pptlSrc, // Original unclipped source point
  38. RECTL* prclDst, // Original unclipped destination rectangle
  39. XLATEOBJ* pxlo) // Not used
  40. {
  41. LONG xOffset;
  42. LONG yOffset;
  43. LONG dx;
  44. LONG dy;
  45. RECTL rclDst;
  46. POINTL ptlSrc;
  47. ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
  48. "Can handle trivial xlate only");
  49. ASSERTDD(psoSrc->iBitmapFormat == ppdev->iBitmapFormat,
  50. "Source must be same colour depth as screen");
  51. ASSERTDD(c > 0, "Can't handle zero rectangles");
  52. ASSERTDD(rop4 == 0xcccc, "Must be a SRCCOPY rop");
  53. xOffset = ppdev->xOffset;
  54. yOffset = ppdev->yOffset;
  55. dx = pptlSrc->x - prclDst->left;
  56. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  57. while (TRUE)
  58. {
  59. ptlSrc.x = prcl->left + dx;
  60. ptlSrc.y = prcl->top + dy;
  61. // 'vPutBits' takes only absolute coordinates, so add in the
  62. // off-screen bitmap offset here:
  63. rclDst.left = prcl->left + xOffset;
  64. rclDst.right = prcl->right + xOffset;
  65. rclDst.top = prcl->top + yOffset;
  66. rclDst.bottom = prcl->bottom + yOffset;
  67. vPutBits(ppdev, psoSrc, &rclDst, &ptlSrc);
  68. if (--c == 0)
  69. return;
  70. prcl++;
  71. }
  72. }
  73. /******************************Public*Routine******************************\
  74. * VOID vReadNativeSrccopy
  75. *
  76. * Does a SRCCOPY read from the screen to a system-memory bitmap. The only
  77. * reason we do this here instead of punting to GDI is to ensure that we
  78. * do dword reads that are aligned to the video-memory source and not the
  79. * system-memory destination.
  80. *
  81. \**************************************************************************/
  82. VOID vReadNativeSrccopy( // Type FNXFER
  83. PDEV* ppdev,
  84. LONG c, // Count of rectangles, can't be zero
  85. RECTL* prcl, // List of destination rectangles, in relative
  86. // coordinates
  87. ULONG rop4, // Not used
  88. SURFOBJ* psoDst, // Destination surface
  89. POINTL* pptlSrc, // Original unclipped source point
  90. RECTL* prclDst, // Original unclipped destination rectangle
  91. XLATEOBJ* pxlo) // Not used
  92. {
  93. LONG xOffset;
  94. LONG yOffset;
  95. LONG dx;
  96. LONG dy;
  97. RECTL rclDst;
  98. POINTL ptlSrc;
  99. ASSERTDD((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL),
  100. "Can handle trivial xlate only");
  101. ASSERTDD(psoDst->iBitmapFormat == ppdev->iBitmapFormat,
  102. "Source must be same colour depth as screen");
  103. ASSERTDD(c > 0, "Can't handle zero rectangles");
  104. ASSERTDD(rop4 == 0xcccc, "Must be a SRCCOPY rop");
  105. xOffset = ppdev->xOffset;
  106. yOffset = ppdev->yOffset;
  107. dx = pptlSrc->x - prclDst->left;
  108. dy = pptlSrc->y - prclDst->top; // Add to destination to get source
  109. while (TRUE)
  110. {
  111. // 'vGetBits' takes only absolute coordinates, so add in the
  112. // off-screen bitmap offset here:
  113. ptlSrc.x = prcl->left + dx + xOffset;
  114. ptlSrc.y = prcl->top + dy + yOffset;
  115. vGetBits(ppdev, psoDst, prcl, &ptlSrc);
  116. if (--c == 0)
  117. return;
  118. prcl++;
  119. }
  120. }
  121. /******************************Public*Routine******************************\
  122. * BOOL bPuntBlt
  123. *
  124. * Has GDI do any drawing operations that we don't specifically handle
  125. * in the driver.
  126. *
  127. \**************************************************************************/
  128. BOOL bPuntBlt(
  129. SURFOBJ* psoDst,
  130. SURFOBJ* psoSrc,
  131. SURFOBJ* psoMsk,
  132. CLIPOBJ* pco,
  133. XLATEOBJ* pxlo,
  134. RECTL* prclDst,
  135. POINTL* pptlSrc,
  136. POINTL* pptlMsk,
  137. BRUSHOBJ* pbo,
  138. POINTL* pptlBrush,
  139. ROP4 rop4)
  140. {
  141. PDEV* ppdev;
  142. if (psoDst->dhsurf != NULL)
  143. ppdev = (PDEV*) psoDst->dhpdev;
  144. else
  145. ppdev = (PDEV*) psoSrc->dhpdev;
  146. #if DBG
  147. {
  148. //////////////////////////////////////////////////////////////////////
  149. // Diagnostics
  150. //
  151. // Since calling the engine to do any drawing can be rather painful,
  152. // particularly when the source is an off-screen DFB (since GDI will
  153. // have to allocate a DIB and call us to make a temporary copy before
  154. // it can even start drawing), we'll try to avoid it as much as
  155. // possible.
  156. //
  157. // Here we simply spew out information describing the blt whenever
  158. // this routine gets called (checked builds only, of course):
  159. ULONG ulClip;
  160. PDEV* ppdev;
  161. if (psoDst->dhsurf != NULL)
  162. ppdev = (PDEV*) psoDst->dhpdev;
  163. else
  164. ppdev = (PDEV*) psoSrc->dhpdev;
  165. ulClip = (pco == NULL) ? DC_TRIVIAL : pco->iDComplexity;
  166. DISPDBG((2, ">> Punt << Dst format: %li Dst type: %li Clip: %li Rop: %lx",
  167. psoDst->iBitmapFormat, psoDst->iType, ulClip, rop4));
  168. if (psoSrc != NULL)
  169. {
  170. DISPDBG((2, " << Src format: %li Src type: %li",
  171. psoSrc->iBitmapFormat, psoSrc->iType));
  172. if (psoSrc->iBitmapFormat == BMF_1BPP)
  173. {
  174. DISPDBG((2, " << Foreground: %lx Background: %lx",
  175. pxlo->pulXlate[1], pxlo->pulXlate[0]));
  176. }
  177. }
  178. if ((pxlo != NULL) && !(pxlo->flXlate & XO_TRIVIAL) && (psoSrc != NULL))
  179. {
  180. if (((psoSrc->dhsurf == NULL) &&
  181. (psoSrc->iBitmapFormat != ppdev->iBitmapFormat)) ||
  182. ((psoDst->dhsurf == NULL) &&
  183. (psoDst->iBitmapFormat != ppdev->iBitmapFormat)))
  184. {
  185. // Don't bother printing the 'xlate' message when the source
  186. // is a different bitmap format from the destination -- in
  187. // those cases we know there always has to be a translate.
  188. }
  189. else
  190. {
  191. DISPDBG((2, " << With xlate"));
  192. }
  193. }
  194. // If the rop4 requires a pattern, and it's a non-solid brush...
  195. if (((((rop4 >> 4) ^ (rop4)) & 0x0f0f) != 0) &&
  196. (pbo->iSolidColor == -1))
  197. {
  198. if (pbo->pvRbrush == NULL)
  199. DISPDBG((2, " << With brush -- Not created"));
  200. else
  201. DISPDBG((2, " << With brush -- Created Ok"));
  202. }
  203. }
  204. #endif
  205. if (DIRECT_ACCESS(ppdev))
  206. {
  207. //////////////////////////////////////////////////////////////////////
  208. // Banked Framebuffer bPuntBlt
  209. //
  210. // This section of code handles a PuntBlt when GDI can directly draw
  211. // on the framebuffer, but the drawing has to be done in banks:
  212. BANK bnk;
  213. BOOL b;
  214. HSURF hsurfTmp;
  215. SURFOBJ* psoTmp;
  216. SIZEL sizl;
  217. POINTL ptlSrc;
  218. RECTL rclTmp;
  219. RECTL rclDst;
  220. DSURF* pdsurfDst;
  221. DSURF* pdsurfSrc;
  222. // We copy the original destination rectangle, and use that in every
  223. // GDI call-back instead of the original because sometimes GDI is
  224. // sneaky and points 'prclDst' to '&pco->rclBounds'. Because we
  225. // modify 'rclBounds', that would affect 'prclDst', which we don't
  226. // want to happen:
  227. rclDst = *prclDst;
  228. pdsurfDst = (DSURF*) psoDst->dhsurf;
  229. pdsurfSrc = (psoSrc == NULL) ? NULL : (DSURF*) psoSrc->dhsurf;
  230. if ((pdsurfSrc == NULL) || (pdsurfSrc->dt & DT_DIB))
  231. {
  232. // Do a memory-to-screen blt:
  233. vBankStart(ppdev, &rclDst, pco, &bnk);
  234. b = TRUE;
  235. do {
  236. b &= EngBitBlt(bnk.pso, psoSrc, psoMsk, bnk.pco, pxlo,
  237. &rclDst, pptlSrc, pptlMsk, pbo, pptlBrush,
  238. rop4);
  239. } while (bBankEnum(&bnk));
  240. }
  241. else
  242. {
  243. b = FALSE; // Assume failure
  244. // The screen is the source (it may be the destination too...)
  245. ptlSrc.x = pptlSrc->x + ppdev->xOffset;
  246. ptlSrc.y = pptlSrc->y + ppdev->yOffset;
  247. if ((pco != NULL) && (pco->iDComplexity != DC_TRIVIAL))
  248. {
  249. // We have to intersect the destination rectangle with
  250. // the clip bounds if there is one (consider the case
  251. // where the app asked to blt a really, really big
  252. // rectangle from the screen -- prclDst would be really,
  253. // really big but pco->rclBounds would be the actual
  254. // area of interest):
  255. rclDst.left = max(rclDst.left, pco->rclBounds.left);
  256. rclDst.top = max(rclDst.top, pco->rclBounds.top);
  257. rclDst.right = min(rclDst.right, pco->rclBounds.right);
  258. rclDst.bottom = min(rclDst.bottom, pco->rclBounds.bottom);
  259. // Correspondingly, we have to offset the source point:
  260. ptlSrc.x += (rclDst.left - prclDst->left);
  261. ptlSrc.y += (rclDst.top - prclDst->top);
  262. }
  263. // We're now either going to do a screen-to-screen or screen-to-DIB
  264. // blt. In either case, we're going to create a temporary copy of
  265. // the source. (Why do we do this when GDI could do it for us?
  266. // GDI would create a temporary copy of the DIB for every bank
  267. // call-back!)
  268. sizl.cx = rclDst.right - rclDst.left;
  269. sizl.cy = rclDst.bottom - rclDst.top;
  270. // Don't forget to convert from relative to absolute coordinates
  271. // on the source! (vBankStart takes care of that for the
  272. // destination.)
  273. rclTmp.right = sizl.cx;
  274. rclTmp.bottom = sizl.cy;
  275. rclTmp.left = 0;
  276. rclTmp.top = 0;
  277. // GDI does guarantee us that the blt extents have already been
  278. // clipped to the surface boundaries (we don't have to worry
  279. // here about trying to read where there isn't video memory).
  280. // Let's just assert to make sure:
  281. ASSERTDD((ptlSrc.x >= 0) &&
  282. (ptlSrc.y >= 0) &&
  283. (ptlSrc.x + sizl.cx <= ppdev->cxMemory) &&
  284. (ptlSrc.y + sizl.cy <= ppdev->cyMemory),
  285. "Source rectangle out of bounds!");
  286. hsurfTmp = (HSURF) EngCreateBitmap(sizl,
  287. 0, // Let GDI choose ulWidth
  288. ppdev->iBitmapFormat,
  289. 0, // Don't need any options
  290. NULL);// Let GDI allocate
  291. if (hsurfTmp != 0)
  292. {
  293. psoTmp = EngLockSurface(hsurfTmp);
  294. if (psoTmp != NULL)
  295. {
  296. vGetBits(ppdev, psoTmp, &rclTmp, &ptlSrc);
  297. if ((pdsurfDst == NULL) || (pdsurfDst->dt & DT_DIB))
  298. {
  299. // It was a Screen-to-DIB blt; now it's a DIB-to-DIB
  300. // blt. Note that the source point is (0, 0) in our
  301. // temporary surface:
  302. b = EngBitBlt(psoDst, psoTmp, psoMsk, pco, pxlo,
  303. &rclDst, (POINTL*) &rclTmp, pptlMsk,
  304. pbo, pptlBrush, rop4);
  305. }
  306. else
  307. {
  308. // It was a Screen-to-Screen blt; now it's a DIB-to-
  309. // screen blt. Note that the source point is (0, 0)
  310. // in our temporary surface:
  311. vBankStart(ppdev, &rclDst, pco, &bnk);
  312. b = TRUE;
  313. do {
  314. b &= EngBitBlt(bnk.pso, psoTmp, psoMsk, bnk.pco,
  315. pxlo, &rclDst, (POINTL*) &rclTmp,
  316. pptlMsk, pbo, pptlBrush, rop4);
  317. } while (bBankEnum(&bnk));
  318. }
  319. EngUnlockSurface(psoTmp);
  320. }
  321. EngDeleteSurface(hsurfTmp);
  322. }
  323. }
  324. return(b);
  325. }
  326. #if !defined(_X86_)
  327. else
  328. {
  329. //////////////////////////////////////////////////////////////////////
  330. // Really Slow bPuntBlt
  331. //
  332. // Here we handle a PuntBlt when GDI can't draw directly on the
  333. // framebuffer (as on the Alpha, which can't do it because of its
  334. // 32 bit bus). If you thought the banked version was slow, just
  335. // look at this one. Guaranteed, there will be at least one bitmap
  336. // allocation and extra copy involved; there could be two if it's a
  337. // screen-to-screen operation.
  338. POINTL ptlSrc;
  339. RECTL rclDst;
  340. SIZEL sizl;
  341. BOOL bSrcIsScreen;
  342. HSURF hsurfSrc;
  343. RECTL rclTmp;
  344. BOOL b;
  345. LONG lDelta;
  346. BYTE* pjBits;
  347. BYTE* pjScan0;
  348. HSURF hsurfDst;
  349. RECTL rclScreen;
  350. b = FALSE; // For error cases, assume we'll fail
  351. rclDst = *prclDst;
  352. if (pptlSrc != NULL)
  353. ptlSrc = *pptlSrc;
  354. if ((pco != NULL) && (pco->iDComplexity != DC_TRIVIAL))
  355. {
  356. // We have to intersect the destination rectangle with
  357. // the clip bounds if there is one (consider the case
  358. // where the app asked to blt a really, really big
  359. // rectangle from the screen -- prclDst would be really,
  360. // really big but pco->rclBounds would be the actual
  361. // area of interest):
  362. rclDst.left = max(rclDst.left, pco->rclBounds.left);
  363. rclDst.top = max(rclDst.top, pco->rclBounds.top);
  364. rclDst.right = min(rclDst.right, pco->rclBounds.right);
  365. rclDst.bottom = min(rclDst.bottom, pco->rclBounds.bottom);
  366. ptlSrc.x += (rclDst.left - prclDst->left);
  367. ptlSrc.y += (rclDst.top - prclDst->top);
  368. }
  369. sizl.cx = rclDst.right - rclDst.left;
  370. sizl.cy = rclDst.bottom - rclDst.top;
  371. // We only need to make a copy from the screen if the source is
  372. // the screen, and the source is involved in the rop. Note that
  373. // we have to check the rop before dereferencing 'psoSrc'
  374. // (because 'psoSrc' may be NULL if the source isn't involved):
  375. bSrcIsScreen = (((((rop4 >> 2) ^ (rop4)) & 0x3333) != 0) &&
  376. (psoSrc->dhsurf != NULL));
  377. if (bSrcIsScreen)
  378. {
  379. // We need to create a copy of the source rectangle:
  380. hsurfSrc = (HSURF) EngCreateBitmap(sizl, 0, ppdev->iBitmapFormat,
  381. 0, NULL);
  382. if (hsurfSrc == 0)
  383. goto Error_0;
  384. psoSrc = EngLockSurface(hsurfSrc);
  385. if (psoSrc == NULL)
  386. goto Error_1;
  387. rclTmp.left = 0;
  388. rclTmp.top = 0;
  389. rclTmp.right = sizl.cx;
  390. rclTmp.bottom = sizl.cy;
  391. // vGetBits takes absolute coordinates for the source point:
  392. ptlSrc.x += ppdev->xOffset;
  393. ptlSrc.y += ppdev->yOffset;
  394. vGetBits(ppdev, psoSrc, &rclTmp, &ptlSrc);
  395. // The source will now come from (0, 0) of our temporary source
  396. // surface:
  397. ptlSrc.x = 0;
  398. ptlSrc.y = 0;
  399. }
  400. if (psoDst->dhsurf == NULL)
  401. {
  402. b = EngBitBlt(psoDst, psoSrc, psoMsk, pco, pxlo, &rclDst, &ptlSrc,
  403. pptlMsk, pbo, pptlBrush, rop4);
  404. }
  405. else
  406. {
  407. // We need to create a temporary work buffer. We have to do
  408. // some fudging with the offsets so that the upper-left corner
  409. // of the (relative coordinates) clip object bounds passed to
  410. // GDI will be transformed to the upper-left corner of our
  411. // temporary bitmap.
  412. // The alignment doesn't have to be as tight as this at 16bpp
  413. // and 32bpp, but it won't hurt:
  414. lDelta = CONVERT_TO_BYTES((((rclDst.right + 3) & ~3L) -
  415. (rclDst.left & ~3L)),
  416. ppdev);
  417. // We're actually only allocating a bitmap that is 'sizl.cx' x
  418. // 'sizl.cy' in size:
  419. pjBits = EngAllocMem(0, lDelta * sizl.cy, ALLOC_TAG);
  420. if (pjBits == NULL)
  421. goto Error_2;
  422. // We now adjust the surface's 'pvScan0' so that when GDI thinks
  423. // it's writing to pixel (rclDst.top, rclDst.left), it will
  424. // actually be writing to the upper-left pixel of our temporary
  425. // bitmap:
  426. pjScan0 = pjBits - (rclDst.top * lDelta)
  427. - CONVERT_TO_BYTES((rclDst.left & ~3L), ppdev);
  428. ASSERTDD((((ULONG_PTR) pjScan0) & 3) == 0,
  429. "pvScan0 must be dword aligned!");
  430. // The checked build of GDI sometimes checks on blts that
  431. // prclDst->right <= pso->sizl.cx, so we lie to it about
  432. // the size of our bitmap:
  433. sizl.cx = rclDst.right;
  434. sizl.cy = rclDst.bottom;
  435. hsurfDst = (HSURF) EngCreateBitmap(
  436. sizl, // Bitmap covers rectangle
  437. lDelta, // Use this delta
  438. ppdev->iBitmapFormat, // Same colour depth
  439. BMF_TOPDOWN, // Must have a positive delta
  440. pjScan0); // Where (0, 0) would be
  441. if ((hsurfDst == 0) ||
  442. (!EngAssociateSurface(hsurfDst, ppdev->hdevEng, 0)))
  443. goto Error_3;
  444. psoDst = EngLockSurface(hsurfDst);
  445. if (psoDst == NULL)
  446. goto Error_4;
  447. // Make sure that the rectangle we Get/Put from/to the screen
  448. // is in absolute coordinates:
  449. rclScreen.left = rclDst.left + ppdev->xOffset;
  450. rclScreen.right = rclDst.right + ppdev->xOffset;
  451. rclScreen.top = rclDst.top + ppdev->yOffset;
  452. rclScreen.bottom = rclDst.bottom + ppdev->yOffset;
  453. // It would be nice to get a copy of the destination rectangle
  454. // only when the ROP involves the destination (or when the source
  455. // is an RLE), but we can't do that. If the brush is truly NULL,
  456. // GDI will immediately return TRUE from EngBitBlt, without
  457. // modifying the temporary bitmap -- and we would proceed to
  458. // copy the uninitialized temporary bitmap back to the screen.
  459. vGetBits(ppdev, psoDst, &rclDst, (POINTL*) &rclScreen);
  460. b = EngBitBlt(psoDst, psoSrc, psoMsk, pco, pxlo, &rclDst, &ptlSrc,
  461. pptlMsk, pbo, pptlBrush, rop4);
  462. vPutBits(ppdev, psoDst, &rclScreen, (POINTL*) &rclDst);
  463. EngUnlockSurface(psoDst);
  464. Error_4:
  465. EngDeleteSurface(hsurfDst);
  466. Error_3:
  467. EngFreeMem(pjBits);
  468. }
  469. Error_2:
  470. if (bSrcIsScreen)
  471. {
  472. EngUnlockSurface(psoSrc);
  473. Error_1:
  474. EngDeleteSurface(hsurfSrc);
  475. }
  476. Error_0:
  477. return(b);
  478. }
  479. #endif
  480. }
  481. /******************************Public*Routine******************************\
  482. * BOOL DrvBitBlt
  483. *
  484. * Implements the workhorse routine of a display driver.
  485. *
  486. \**************************************************************************/
  487. BOOL DrvBitBlt(
  488. SURFOBJ* psoDst,
  489. SURFOBJ* psoSrc,
  490. SURFOBJ* psoMsk,
  491. CLIPOBJ* pco,
  492. XLATEOBJ* pxlo,
  493. RECTL* prclDst,
  494. POINTL* pptlSrc,
  495. POINTL* pptlMsk,
  496. BRUSHOBJ* pbo,
  497. POINTL* pptlBrush,
  498. ROP4 rop4)
  499. {
  500. PDEV* ppdev;
  501. DSURF* pdsurfDst;
  502. DSURF* pdsurfSrc;
  503. POINTL ptlSrc;
  504. BOOL bMore;
  505. CLIPENUM ce;
  506. LONG c;
  507. RECTL rcl;
  508. BYTE rop3;
  509. FNFILL* pfnFill;
  510. RBRUSH_COLOR rbc; // Realized brush or solid colour
  511. FNXFER* pfnXfer;
  512. ULONG iSrcBitmapFormat;
  513. ULONG iDir;
  514. BOOL bRet;
  515. bRet = TRUE; // Assume success
  516. pdsurfDst = (DSURF*) psoDst->dhsurf; // May be NULL
  517. if (psoSrc == NULL)
  518. {
  519. pdsurfSrc = NULL;
  520. if (!(pdsurfDst->dt & DT_DIB))
  521. {
  522. ///////////////////////////////////////////////////////////////////
  523. // Fills
  524. ///////////////////////////////////////////////////////////////////
  525. // Fills are this function's "raison d'etre", so we handle them
  526. // as quickly as possible:
  527. ppdev = (PDEV*) psoDst->dhpdev;
  528. ppdev->xOffset = pdsurfDst->x;
  529. ppdev->yOffset = pdsurfDst->y;
  530. // Make sure it doesn't involve a mask (i.e., it's really a
  531. // Rop3):
  532. rop3 = (BYTE) rop4;
  533. if ((BYTE) (rop4 >> 8) == rop3)
  534. {
  535. // Since 'psoSrc' is NULL, the rop3 had better not indicate
  536. // that we need a source.
  537. ASSERTDD((((rop4 >> 2) ^ (rop4)) & 0x33) == 0,
  538. "Need source but GDI gave us a NULL 'psoSrc'");
  539. Fill_It:
  540. pfnFill = ppdev->pfnFillSolid; // Default to solid fill
  541. if ((((rop3 >> 4) ^ (rop3)) & 0xf) != 0)
  542. {
  543. // The rop says that a pattern is truly required
  544. // (blackness, for instance, doesn't need one):
  545. rbc.iSolidColor = pbo->iSolidColor;
  546. if (rbc.iSolidColor == -1)
  547. {
  548. // Try and realize the pattern brush; by doing
  549. // this call-back, GDI will eventually call us
  550. // again through DrvRealizeBrush:
  551. rbc.prb = pbo->pvRbrush;
  552. if (rbc.prb == NULL)
  553. {
  554. rbc.prb = BRUSHOBJ_pvGetRbrush(pbo);
  555. if (rbc.prb == NULL)
  556. {
  557. // If we couldn't realize the brush, punt
  558. // the call (it may have been a non 8x8
  559. // brush or something, which we can't be
  560. // bothered to handle, so let GDI do the
  561. // drawing):
  562. goto Punt_It;
  563. }
  564. }
  565. if ((ppdev->iBitmapFormat == BMF_24BPP) && ((BYTE) (rop4 >> 8) != rop3)) {
  566. goto Punt_It;
  567. }
  568. pfnFill = ppdev->pfnFillPat;
  569. }
  570. }
  571. // Note that these 2 'if's are more efficient than
  572. // a switch statement:
  573. if ((pco == NULL) || (pco->iDComplexity == DC_TRIVIAL))
  574. {
  575. pfnFill(ppdev, 1, prclDst, rop4, rbc, pptlBrush);
  576. goto All_Done;
  577. }
  578. else if (pco->iDComplexity == DC_RECT)
  579. {
  580. if (bIntersect(prclDst, &pco->rclBounds, &rcl))
  581. pfnFill(ppdev, 1, &rcl, rop4, rbc, pptlBrush);
  582. goto All_Done;
  583. }
  584. else
  585. {
  586. CLIPOBJ_cEnumStart(pco, FALSE, CT_RECTANGLES, CD_ANY, 0);
  587. do {
  588. bMore = CLIPOBJ_bEnum(pco, sizeof(ce), (ULONG*) &ce);
  589. c = cIntersect(prclDst, ce.arcl, ce.c);
  590. if (c != 0)
  591. pfnFill(ppdev, c, ce.arcl, rop4, rbc, pptlBrush);
  592. } while (bMore);
  593. goto All_Done;
  594. }
  595. }
  596. }
  597. else
  598. {
  599. // Thanks to EngModifySurface, the destination is really a
  600. // plane old DIB, so we can forget about our DSURF structure
  601. // (this will simplify checks later in this routine):
  602. pdsurfDst = NULL;
  603. }
  604. }
  605. else
  606. {
  607. pdsurfDst = (DSURF*) psoDst->dhsurf;
  608. if ((pdsurfDst != NULL) && (pdsurfDst->dt & DT_DIB))
  609. {
  610. // The destination is really a plane old DIB.
  611. pdsurfDst = NULL;
  612. }
  613. pdsurfSrc = (DSURF*) psoSrc->dhsurf;
  614. if ((pdsurfSrc != NULL) && (pdsurfSrc->dt & DT_DIB))
  615. {
  616. // Here we consider putting a DIB DFB back into off-screen
  617. // memory. If there's a translate, it's probably not worth
  618. // moving since we won't be able to use the hardware to do
  619. // the blt (a similar argument could be made for weird rops
  620. // and stuff that we'll only end up having GDI simulate, but
  621. // those should happen infrequently enough that I don't care).
  622. //
  623. // This is only worth doing if the destination is in off-
  624. // screen memory, though!
  625. if ((pdsurfDst != NULL) &&
  626. ((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL)))
  627. {
  628. ppdev = pdsurfSrc->ppdev;
  629. // See 'DrvCopyBits' for some more comments on how this
  630. // moving-it-back-into-off-screen-memory thing works:
  631. if (pdsurfSrc->iUniq == ppdev->iHeapUniq)
  632. {
  633. if (--pdsurfSrc->cBlt == 0)
  634. {
  635. if (bMoveDibToOffscreenDfbIfRoom(ppdev, pdsurfSrc))
  636. goto Continue_It;
  637. }
  638. }
  639. else
  640. {
  641. // Some space was freed up in off-screen memory,
  642. // so reset the counter for this DFB:
  643. pdsurfSrc->iUniq = ppdev->iHeapUniq;
  644. pdsurfSrc->cBlt = HEAP_COUNT_DOWN;
  645. }
  646. }
  647. // The source is really a plane old DIB.
  648. pdsurfSrc = NULL;
  649. }
  650. }
  651. Continue_It:
  652. ASSERTDD((pdsurfSrc == NULL) || !(pdsurfSrc->dt & DT_DIB),
  653. "pdsurfSrc should be non-NULL only if in off-screen memory");
  654. ASSERTDD((pdsurfDst == NULL) || !(pdsurfDst->dt & DT_DIB),
  655. "pdsurfDst should be non-NULL only if in off-screen memory");
  656. if (pdsurfDst != NULL)
  657. {
  658. // The destination is in video memory.
  659. if (pdsurfSrc != NULL)
  660. {
  661. // The source is also in video memory. This is effectively
  662. // a screen-to-screen blt, so adjust the source point:
  663. ptlSrc.x = pptlSrc->x - (pdsurfDst->x - pdsurfSrc->x);
  664. ptlSrc.y = pptlSrc->y - (pdsurfDst->y - pdsurfSrc->y);
  665. pptlSrc = &ptlSrc;
  666. }
  667. ppdev = pdsurfDst->ppdev;
  668. ppdev->xOffset = pdsurfDst->x;
  669. ppdev->yOffset = pdsurfDst->y;
  670. }
  671. else
  672. {
  673. // The destination is a DIB.
  674. if (pdsurfSrc == NULL)
  675. {
  676. // The source is a DIB, too. Let GDI handle it.
  677. goto EngBitBlt_It;
  678. }
  679. ppdev = pdsurfSrc->ppdev;
  680. ppdev->xOffset = pdsurfSrc->x;
  681. ppdev->yOffset = pdsurfSrc->y;
  682. }
  683. if (((rop4 >> 8) & 0xff) == (rop4 & 0xff))
  684. {
  685. // Since we've already handled the cases where the ROP4 is really
  686. // a ROP3 and no source is required, we can assert...
  687. ASSERTDD((psoSrc != NULL) && (pptlSrc != NULL),
  688. "Expected no-source case to already have been handled");
  689. ///////////////////////////////////////////////////////////////////
  690. // Bitmap transfers
  691. ///////////////////////////////////////////////////////////////////
  692. // Since the foreground and background ROPs are the same, we
  693. // don't have to worry about no stinking masks (it's a simple
  694. // Rop3).
  695. rop3 = (BYTE) rop4; // Make it into a Rop3 (we keep the rop4
  696. // around in case we decide to punt)
  697. if (pdsurfDst != NULL)
  698. {
  699. // The destination is the screen. See if the ROP3 requires a
  700. // pattern:
  701. if ((rop3 >> 4) == (rop3 & 0xf))
  702. {
  703. // Nope, the ROP3 doesn't require a pattern.
  704. if (pdsurfSrc == NULL)
  705. {
  706. //////////////////////////////////////////////////
  707. // DIB-to-screen blt
  708. iSrcBitmapFormat = psoSrc->iBitmapFormat;
  709. if (iSrcBitmapFormat == BMF_1BPP)
  710. {
  711. pfnXfer = ppdev->pfnXfer1bpp;
  712. goto Xfer_It;
  713. }
  714. else if ((iSrcBitmapFormat == ppdev->iBitmapFormat) &&
  715. ((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL)))
  716. {
  717. if ((rop3 & 0xf) != 0xc)
  718. {
  719. pfnXfer = ppdev->pfnXferNative;
  720. }
  721. else
  722. {
  723. // Thanks to USWC write-combining, for SRCCOPY
  724. // blts it will be much stupendously faster to copy
  725. // directly to the frame buffer than to use the
  726. // transfer register. Note that this is true for
  727. // almost any video adapter (including yours).
  728. pfnXfer = vXferNativeSrccopy;
  729. }
  730. goto Xfer_It;
  731. }
  732. // Expansions from 4bpp are pretty frequent with a ROP, and
  733. // should really be done for all color depths, not just 4bpp.
  734. //
  735. // Note, though, that USWC means it's faster to punt to GDI
  736. // for all SRCCOPY cases.
  737. else if ((iSrcBitmapFormat == BMF_4BPP) &&
  738. (ppdev->iBitmapFormat == BMF_8BPP) &&
  739. (rop4 != 0xcccc))
  740. {
  741. pfnXfer = ppdev->pfnXfer4bpp;
  742. goto Xfer_It;
  743. }
  744. }
  745. else // pdsurfSrc != NULL
  746. {
  747. if ((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL))
  748. {
  749. //////////////////////////////////////////////////
  750. // Screen-to-screen blt with no translate
  751. if ((pco == NULL) || (pco->iDComplexity == DC_TRIVIAL))
  752. {
  753. (ppdev->pfnCopyBlt)(ppdev, 1, prclDst, rop4,
  754. pptlSrc, prclDst);
  755. goto All_Done;
  756. }
  757. else if (pco->iDComplexity == DC_RECT)
  758. {
  759. if (bIntersect(prclDst, &pco->rclBounds, &rcl))
  760. {
  761. (ppdev->pfnCopyBlt)(ppdev, 1, &rcl, rop4,
  762. pptlSrc, prclDst);
  763. }
  764. goto All_Done;
  765. }
  766. else
  767. {
  768. // Don't forget that we'll have to draw the
  769. // rectangles in the correct direction:
  770. if (pptlSrc->y >= prclDst->top)
  771. {
  772. if (pptlSrc->x >= prclDst->left)
  773. iDir = CD_RIGHTDOWN;
  774. else
  775. iDir = CD_LEFTDOWN;
  776. }
  777. else
  778. {
  779. if (pptlSrc->x >= prclDst->left)
  780. iDir = CD_RIGHTUP;
  781. else
  782. iDir = CD_LEFTUP;
  783. }
  784. CLIPOBJ_cEnumStart(pco, FALSE, CT_RECTANGLES,
  785. iDir, 0);
  786. do {
  787. bMore = CLIPOBJ_bEnum(pco, sizeof(ce),
  788. (ULONG*) &ce);
  789. c = cIntersect(prclDst, ce.arcl, ce.c);
  790. if (c != 0)
  791. {
  792. (ppdev->pfnCopyBlt)(ppdev, c, ce.arcl,
  793. rop4, pptlSrc, prclDst);
  794. }
  795. } while (bMore);
  796. goto All_Done;
  797. }
  798. }
  799. }
  800. }
  801. }
  802. else if (((pxlo == NULL) || (pxlo->flXlate & XO_TRIVIAL)) &&
  803. (rop4 == 0xcccc) &&
  804. (psoDst->iBitmapFormat == ppdev->iBitmapFormat))
  805. {
  806. //////////////////////////////////////////////////
  807. // Screen-to-DIB SRCCOPY blt with no translate
  808. //
  809. // The only way to read bits from video memory on the S3 is to
  810. // have the CPU read directly from the frame-buffer. Unfortunately,
  811. // reads from video memory are pathetically slow.
  812. //
  813. // Have you ever benchmarked reads? On a Pentium II with USWC
  814. // enabled, consecutive writes to the frame buffer via PCI on a
  815. // typical video card is typically on the order of 80 to 100 MB/s.
  816. // Dword reads max out at 6 MB/s! Byte reads (or worse, unaligned
  817. // Dword reads) max out at a very small 1.5 MB/s!
  818. //
  819. // The problem is that if we just punt to GDI, GDI doesn't realize
  820. // that the source is video-memory and the destination is system-
  821. // memory. It will proceed to align its copy to the destination,
  822. // which means that it may do misaligned dword reads from video
  823. // memory. So we just dropped our throughput by a factor of 4!
  824. //
  825. // So the net-result is that we special-case reads here simply so
  826. // that we can do aligned dword reads from video memory.
  827. pfnXfer = vReadNativeSrccopy;
  828. // The Xfer_It routine expects the system-memory surface to come
  829. // in as 'psoSrc'.
  830. psoSrc = psoDst;
  831. // It might also be a thought to convert an off-screen DFB to a
  832. // DIB at this point.
  833. goto Xfer_It;
  834. }
  835. }
  836. else if ((psoMsk == NULL) && (rop4 == 0xaaf0))
  837. {
  838. // The only time GDI will ask us to do a true rop4 using the brush
  839. // mask is when the brush is 1bpp, and the background rop is AA
  840. // (meaning it's a NOP):
  841. rop3 = (BYTE) rop4;
  842. goto Fill_It;
  843. }
  844. // Just fall through to Punt_It...
  845. Punt_It:
  846. bRet = bPuntBlt(psoDst,
  847. psoSrc,
  848. psoMsk,
  849. pco,
  850. pxlo,
  851. prclDst,
  852. pptlSrc,
  853. pptlMsk,
  854. pbo,
  855. pptlBrush,
  856. rop4);
  857. goto All_Done;
  858. //////////////////////////////////////////////////////////////////////
  859. // Common bitmap transfer
  860. Xfer_It:
  861. if ((pco == NULL) || (pco->iDComplexity == DC_TRIVIAL))
  862. {
  863. pfnXfer(ppdev, 1, prclDst, rop4, psoSrc, pptlSrc, prclDst, pxlo);
  864. goto All_Done;
  865. }
  866. else if (pco->iDComplexity == DC_RECT)
  867. {
  868. if (bIntersect(prclDst, &pco->rclBounds, &rcl))
  869. pfnXfer(ppdev, 1, &rcl, rop4, psoSrc, pptlSrc, prclDst, pxlo);
  870. goto All_Done;
  871. }
  872. else
  873. {
  874. CLIPOBJ_cEnumStart(pco, FALSE, CT_RECTANGLES,
  875. CD_ANY, 0);
  876. do {
  877. bMore = CLIPOBJ_bEnum(pco, sizeof(ce),
  878. (ULONG*) &ce);
  879. c = cIntersect(prclDst, ce.arcl, ce.c);
  880. if (c != 0)
  881. {
  882. pfnXfer(ppdev, c, ce.arcl, rop4, psoSrc,
  883. pptlSrc, prclDst, pxlo);
  884. }
  885. } while (bMore);
  886. goto All_Done;
  887. }
  888. ////////////////////////////////////////////////////////////////////////
  889. // Common DIB blt
  890. EngBitBlt_It:
  891. // Our driver doesn't handle any blt's between two DIBs. Normally
  892. // a driver doesn't have to worry about this, but we do because
  893. // we have DFBs that may get moved from off-screen memory to a DIB,
  894. // where we have GDI do all the drawing. GDI does DIB drawing at
  895. // a reasonable speed (unless one of the surfaces is a device-
  896. // managed surface...)
  897. //
  898. // If either the source or destination surface in an EngBitBlt
  899. // call-back is a device-managed surface (meaning it's not a DIB
  900. // that GDI can draw with), GDI will automatically allocate memory
  901. // and call the driver's DrvCopyBits routine to create a DIB copy
  902. // that it can use. So this means that this could handle all 'punts',
  903. // and we could conceivably get rid of bPuntBlt. But this would have
  904. // a bad performance impact because of the extra memory allocations
  905. // and bitmap copies -- you really don't want to do this unless you
  906. // have to (or your surface was created such that GDI can draw
  907. // directly onto it) -- I've been burned by this because it's not
  908. // obvious that the performance impact is so bad.
  909. //
  910. // That being said, we only call EngBitBlt when all the surfaces
  911. // are DIBs:
  912. bRet = EngBitBlt(psoDst, psoSrc, psoMsk, pco, pxlo, prclDst,
  913. pptlSrc, pptlMsk, pbo, pptlBrush, rop4);
  914. All_Done:
  915. return(bRet);
  916. }
  917. /******************************Public*Routine******************************\
  918. * BOOL DrvCopyBits
  919. *
  920. * Do fast bitmap copies.
  921. *
  922. * DrvCopyBits is just a special-case of DrvBitBlt. Since DrvBitBlt is
  923. * plenty fast, we let DrvBitBlt handle all the cases.
  924. *
  925. * (I used to have a bunch of extra code here to optimize the SRCCOPY
  926. * cases, but the performance win was immeasurable. There's no point in
  927. * the adding code complexity or the working set hit.)
  928. *
  929. \**************************************************************************/
  930. BOOL DrvCopyBits(
  931. SURFOBJ* psoDst,
  932. SURFOBJ* psoSrc,
  933. CLIPOBJ* pco,
  934. XLATEOBJ* pxlo,
  935. RECTL* prclDst,
  936. POINTL* pptlSrc)
  937. {
  938. return(DrvBitBlt(psoDst, psoSrc, NULL, pco, pxlo, prclDst, pptlSrc, NULL,
  939. NULL, NULL, 0x0000CCCC));
  940. }
  941. /******************************Public*Routine******************************\
  942. * BOOL DrvTransparentBlt
  943. *
  944. * Do blt using a source color-key.
  945. *
  946. \**************************************************************************/
  947. BOOL DrvTransparentBlt(
  948. SURFOBJ* psoDst,
  949. SURFOBJ* psoSrc,
  950. CLIPOBJ* pco,
  951. XLATEOBJ* pxlo,
  952. RECTL* prclDst,
  953. RECTL* prclSrc,
  954. ULONG iTransparentColor,
  955. ULONG ulReserved)
  956. {
  957. DSURF* pdsurfSrc;
  958. DSURF* pdsurfDst;
  959. PDEV* ppdev;
  960. ULONG c;
  961. BOOL bMore;
  962. POINTL ptlSrc;
  963. RECTL rcl;
  964. CLIPENUM ce;
  965. pdsurfSrc = (DSURF*) psoSrc->dhsurf;
  966. pdsurfDst = (DSURF*) psoDst->dhsurf;
  967. // We only handle the case when both surfaces are in video memory
  968. // and when no stretching is involved. (GDI using USWC write-
  969. // combining is perfectly fast for the case where the source is
  970. // a DIB and the destination is video memory.)
  971. if (((pdsurfSrc == NULL) || (pdsurfSrc->dt & DT_DIB)) ||
  972. ((pdsurfDst == NULL) || (pdsurfDst->dt & DT_DIB)) ||
  973. ((pxlo != NULL) && !(pxlo->flXlate & XO_TRIVIAL)) ||
  974. ((prclSrc->right - prclSrc->left) != (prclDst->right - prclDst->left)) ||
  975. ((prclSrc->bottom - prclSrc->top) != (prclDst->bottom - prclDst->top)))
  976. {
  977. return(EngTransparentBlt(psoDst, psoSrc, pco, pxlo, prclDst, prclSrc,
  978. iTransparentColor, ulReserved));
  979. }
  980. ppdev = (PDEV*) psoDst->dhpdev;
  981. ppdev->xOffset = pdsurfDst->x;
  982. ppdev->yOffset = pdsurfDst->y;
  983. ptlSrc.x = prclSrc->left - (pdsurfDst->x - pdsurfSrc->x);
  984. ptlSrc.y = prclSrc->top - (pdsurfDst->y - pdsurfSrc->y);
  985. if ((pco == NULL) || (pco->iDComplexity == DC_TRIVIAL))
  986. {
  987. ppdev->pfnCopyTransparent(ppdev, 1, prclDst, &ptlSrc,
  988. prclDst, iTransparentColor);
  989. }
  990. else if (pco->iDComplexity == DC_RECT)
  991. {
  992. if (bIntersect(prclDst, &pco->rclBounds, &rcl))
  993. ppdev->pfnCopyTransparent(ppdev, 1, &rcl, &ptlSrc,
  994. prclDst, iTransparentColor);
  995. }
  996. else
  997. {
  998. CLIPOBJ_cEnumStart(pco, FALSE, CT_RECTANGLES, CD_ANY, 0);
  999. do {
  1000. bMore = CLIPOBJ_bEnum(pco, sizeof(ce), (ULONG*) &ce);
  1001. c = cIntersect(prclDst, ce.arcl, ce.c);
  1002. if (c != 0)
  1003. {
  1004. ppdev->pfnCopyTransparent(ppdev, c, ce.arcl, &ptlSrc,
  1005. prclDst, iTransparentColor);
  1006. }
  1007. } while (bMore);
  1008. }
  1009. return(TRUE);
  1010. }