Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2267 lines
78 KiB

  1. /******************************Module*Header*******************************\
  2. *
  3. * **************************
  4. * * DirectDraw SAMPLE CODE *
  5. * **************************
  6. *
  7. * Module Name: ddraw.c
  8. *
  9. * Implements all the DirectDraw components for the driver.
  10. *
  11. * Copyright (c) 1995-1998 Microsoft Corporation
  12. \**************************************************************************/
  13. #include "precomp.h"
  14. // Defines we'll use in the surface's 'dwReserved1' field:
  15. #define DD_RESERVED_DIFFERENTPIXELFORMAT 0x0001
  16. // Worst-case possible number of FIFO entries we'll have to wait for in
  17. // DdBlt for any operation:
  18. #define DDBLT_FIFO_COUNT 9
  19. // NT is kind enough to pre-calculate the 2-d surface offset as a 'hint' so
  20. // that we don't have to do the following, which would be 6 DIVs per blt:
  21. //
  22. // y += (offset / pitch)
  23. // x += (offset % pitch) / bytes_per_pixel
  24. #define convertToGlobalCord(x, y, surf) \
  25. { \
  26. y += surf->yHint; \
  27. x += surf->xHint; \
  28. }
  29. /******************************Public*Routine******************************\
  30. * VOID vFixMissingPixels
  31. *
  32. * Trio64V+ work-around.
  33. *
  34. * On 1024x768x8 and 800x600x8 modes, switching from K2 to stream processor
  35. * results in 1 character clock pixels on the right handed side of the screen
  36. * missing. This problem can be worked-around by adjusting CR2 register.
  37. *
  38. \**************************************************************************/
  39. VOID vFixMissingPixels(
  40. PDEV* ppdev)
  41. {
  42. BYTE* pjIoBase;
  43. BYTE jVerticalRetraceEnd;
  44. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Must be streams capable");
  45. pjIoBase = ppdev->pjIoBase;
  46. // Unlock CRTC control registers:
  47. OUTP(pjIoBase, CRTC_INDEX, 0x11);
  48. jVerticalRetraceEnd = INP(pjIoBase, CRTC_DATA);
  49. OUTP(pjIoBase, CRTC_DATA, jVerticalRetraceEnd & 0x7f);
  50. // Add one character clock:
  51. OUTP(pjIoBase, CRTC_INDEX, 0x2);
  52. ppdev->jSavedCR2 = INP(pjIoBase, CRTC_DATA);
  53. OUTP(pjIoBase, CRTC_DATA, ppdev->jSavedCR2 + 1);
  54. // Lock CRTC control registers again:
  55. OUTP(pjIoBase, CRTC_INDEX, 0x11);
  56. OUTP(pjIoBase, CRTC_DATA, jVerticalRetraceEnd | 0x80);
  57. }
  58. /******************************Public*Routine******************************\
  59. * VOID vUnfixMissingPixels
  60. *
  61. * Trio64V+ work-around.
  62. *
  63. \**************************************************************************/
  64. VOID vUnfixMissingPixels(
  65. PDEV* ppdev)
  66. {
  67. BYTE* pjIoBase;
  68. BYTE jVerticalRetraceEnd;
  69. pjIoBase = ppdev->pjIoBase;
  70. // Unlock CRTC control registers:
  71. OUTP(pjIoBase, CRTC_INDEX, 0x11);
  72. jVerticalRetraceEnd = INP(pjIoBase, CRTC_DATA);
  73. OUTP(pjIoBase, CRTC_DATA, jVerticalRetraceEnd & 0x7f);
  74. // Restore original register value:
  75. OUTP(pjIoBase, CRTC_INDEX, 0x2);
  76. OUTP(pjIoBase, CRTC_DATA, ppdev->jSavedCR2);
  77. // Lock CRTC control registers again:
  78. OUTP(pjIoBase, CRTC_INDEX, 0x11);
  79. OUTP(pjIoBase, CRTC_DATA, jVerticalRetraceEnd | 0x80);
  80. }
  81. /******************************Public*Routine******************************\
  82. * VOID vStreamsDelay()
  83. *
  84. * This tries to work around a hardware timing bug. Supposedly, consecutive
  85. * writes to the streams processor in fast CPUs such as P120 and P133's
  86. * have problems. I haven't seen this problem, but this work-around exists
  87. * in the Windows 95 driver, and at this point don't want to chance not
  88. * having it. Note that writes to the streams processor are not performance
  89. * critical, so this is not a performance hit.
  90. *
  91. \**************************************************************************/
  92. VOID vStreamsDelay()
  93. {
  94. volatile LONG i;
  95. for (i = 32; i != 0; i--)
  96. ;
  97. }
  98. /******************************Public*Routine******************************\
  99. * VOID vTurnOnStreamsProcessorMode
  100. *
  101. \**************************************************************************/
  102. VOID vTurnOnStreamsProcessorMode(
  103. PDEV* ppdev)
  104. {
  105. BYTE* pjMmBase;
  106. BYTE* pjIoBase;
  107. BYTE jStreamsProcessorModeSelect;
  108. DWORD dwPFormat;
  109. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Must be streams capable");
  110. ACQUIRE_CRTC_CRITICAL_SECTION(ppdev);
  111. pjMmBase = ppdev->pjMmBase;
  112. pjIoBase = ppdev->pjIoBase;
  113. NW_GP_WAIT(ppdev, pjMmBase);
  114. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  115. ;
  116. // Full streams processor operation:
  117. OUTP(pjIoBase, CRTC_INDEX, 0x67);
  118. jStreamsProcessorModeSelect = INP(pjIoBase, CRTC_DATA);
  119. OUTP(pjIoBase, CRTC_DATA, jStreamsProcessorModeSelect | 0x0c);
  120. if (ppdev->iBitmapFormat == BMF_8BPP)
  121. {
  122. vFixMissingPixels(ppdev);
  123. }
  124. switch(ppdev->iBitmapFormat)
  125. {
  126. case BMF_8BPP:
  127. dwPFormat = P_RGB8;
  128. break;
  129. case BMF_16BPP:
  130. if (IS_RGB15_R(ppdev->flRed))
  131. dwPFormat = P_RGB15;
  132. else
  133. dwPFormat = P_RGB16;
  134. break;
  135. case BMF_32BPP:
  136. dwPFormat = P_RGB32;
  137. break;
  138. default:
  139. dwPFormat = -1;
  140. RIP("Unexpected bitmap format");
  141. }
  142. WRITE_STREAM_D(pjMmBase, P_CONTROL, dwPFormat );
  143. WRITE_STREAM_D(pjMmBase, FIFO_CONTROL, ((0xcL << FifoAlloc_Shift)|
  144. (4L << P_FifoThresh_Shift) |
  145. (4L << S_FifoThresh_Shift)));
  146. WRITE_STREAM_D(pjMmBase, P_0, 0);
  147. WRITE_STREAM_D(pjMmBase, P_STRIDE, ppdev->lDelta);
  148. WRITE_STREAM_D(pjMmBase, P_XY, 0x010001L);
  149. WRITE_STREAM_D(pjMmBase, P_WH, WH(ppdev->cxScreen, ppdev->cyScreen));
  150. WRITE_STREAM_D(pjMmBase, S_WH, WH(10, 2));
  151. WRITE_STREAM_D(pjMmBase, CKEY_LOW, ppdev->ulColorKey |
  152. CompareBits0t7 |
  153. KeyFromCompare);
  154. WRITE_STREAM_D(pjMmBase, CKEY_HI, ppdev->ulColorKey);
  155. WRITE_STREAM_D(pjMmBase, BLEND_CONTROL, POnS);
  156. WRITE_STREAM_D(pjMmBase, OPAQUE_CONTROL, 0);
  157. WRITE_STREAM_D(pjMmBase, FIFO_CONTROL, ppdev->ulFifoValue);
  158. RELEASE_CRTC_CRITICAL_SECTION(ppdev);
  159. }
  160. /******************************Public*Routine******************************\
  161. * VOID vTurnOffStreamsProcessorMode
  162. *
  163. \**************************************************************************/
  164. VOID vTurnOffStreamsProcessorMode(
  165. PDEV* ppdev)
  166. {
  167. BYTE* pjMmBase;
  168. BYTE* pjIoBase;
  169. BYTE jStreamsProcessorModeSelect;
  170. ACQUIRE_CRTC_CRITICAL_SECTION(ppdev);
  171. pjMmBase = ppdev->pjMmBase;
  172. pjIoBase = ppdev->pjIoBase;
  173. NW_GP_WAIT(ppdev, pjMmBase);
  174. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  175. ;
  176. WRITE_STREAM_D(pjMmBase, FIFO_CONTROL, 0x3000L);
  177. OUTP(pjIoBase, CRTC_INDEX, 0x67);
  178. jStreamsProcessorModeSelect = INP(pjIoBase, CRTC_DATA);
  179. OUTP(pjIoBase, CRTC_DATA, jStreamsProcessorModeSelect & ~0x0C);
  180. if (ppdev->iBitmapFormat == BMF_8BPP)
  181. {
  182. vUnfixMissingPixels(ppdev);
  183. }
  184. RELEASE_CRTC_CRITICAL_SECTION(ppdev);
  185. }
  186. /******************************Public*Routine******************************\
  187. * DWORD dwGetPaletteEntry
  188. *
  189. \**************************************************************************/
  190. DWORD dwGetPaletteEntry(
  191. PDEV* ppdev,
  192. DWORD iIndex)
  193. {
  194. BYTE* pjIoBase;
  195. DWORD dwRed;
  196. DWORD dwGreen;
  197. DWORD dwBlue;
  198. pjIoBase = ppdev->pjIoBase;
  199. OUTP(pjIoBase, 0x3c7, iIndex);
  200. dwRed = INP(pjIoBase, 0x3c9) << 2;
  201. dwGreen = INP(pjIoBase, 0x3c9) << 2;
  202. dwBlue = INP(pjIoBase, 0x3c9) << 2;
  203. return((dwRed << 16) | (dwGreen << 8) | (dwBlue));
  204. }
  205. /******************************Public*Routine******************************\
  206. * VOID vGetDisplayDuration
  207. *
  208. * Get the length, in EngQueryPerformanceCounter() ticks, of a refresh cycle.
  209. *
  210. * If we could trust the miniport to return back and accurate value for
  211. * the refresh rate, we could use that. Unfortunately, our miniport doesn't
  212. * ensure that it's an accurate value.
  213. *
  214. \**************************************************************************/
  215. #define NUM_VBLANKS_TO_MEASURE 1
  216. #define NUM_MEASUREMENTS_TO_TAKE 8
  217. VOID vGetDisplayDuration(
  218. PDEV* ppdev)
  219. {
  220. BYTE* pjIoBase;
  221. LONG i;
  222. LONG j;
  223. LONGLONG li;
  224. LONGLONG liFrequency;
  225. LONGLONG liMin;
  226. LONGLONG aliMeasurement[NUM_MEASUREMENTS_TO_TAKE + 1];
  227. pjIoBase = ppdev->pjIoBase;
  228. memset(&ppdev->flipRecord, 0, sizeof(ppdev->flipRecord));
  229. // Warm up EngQUeryPerformanceCounter to make sure it's in the working
  230. // set:
  231. EngQueryPerformanceCounter(&li);
  232. // Unfortunately, since NT is a proper multitasking system, we can't
  233. // just disable interrupts to take an accurate reading. We also can't
  234. // do anything so goofy as dynamically change our thread's priority to
  235. // real-time.
  236. //
  237. // So we just do a bunch of short measurements and take the minimum.
  238. //
  239. // It would be 'okay' if we got a result that's longer than the actual
  240. // VBlank cycle time -- nothing bad would happen except that the app
  241. // would run a little slower. We don't want to get a result that's
  242. // shorter than the actual VBlank cycle time -- that could cause us
  243. // to start drawing over a frame before the Flip has occured.
  244. //
  245. // Skip a couple of vertical blanks to allow the hardware to settle
  246. // down after the mode change, to make our readings accurate:
  247. for (i = 2; i != 0; i--)
  248. {
  249. while (VBLANK_IS_ACTIVE(pjIoBase))
  250. ;
  251. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  252. ;
  253. }
  254. for (i = 0; i < NUM_MEASUREMENTS_TO_TAKE; i++)
  255. {
  256. // We're at the start of the VBlank active cycle!
  257. EngQueryPerformanceCounter(&aliMeasurement[i]);
  258. // Okay, so life in a multi-tasking environment isn't all that
  259. // simple. What if we had taken a context switch just before
  260. // the above EngQueryPerformanceCounter call, and now were half
  261. // way through the VBlank inactive cycle? Then we would measure
  262. // only half a VBlank cycle, which is obviously bad. The worst
  263. // thing we can do is get a time shorter than the actual VBlank
  264. // cycle time.
  265. //
  266. // So we solve this by making sure we're in the VBlank active
  267. // time before and after we query the time. If it's not, we'll
  268. // sync up to the next VBlank (it's okay to measure this period --
  269. // it will be guaranteed to be longer than the VBlank cycle and
  270. // will likely be thrown out when we select the minimum sample).
  271. // There's a chance that we'll take a context switch and return
  272. // just before the end of the active VBlank time -- meaning that
  273. // the actual measured time would be less than the true amount --
  274. // but since the VBlank is active less than 1% of the time, this
  275. // means that we would have a maximum of 1% error approximately
  276. // 1% of the times we take a context switch. An acceptable risk.
  277. //
  278. // This next line will cause us wait if we're no longer in the
  279. // VBlank active cycle as we should be at this point:
  280. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  281. ;
  282. for (j = 0; j < NUM_VBLANKS_TO_MEASURE; j++)
  283. {
  284. while (VBLANK_IS_ACTIVE(pjIoBase))
  285. ;
  286. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  287. ;
  288. }
  289. }
  290. EngQueryPerformanceCounter(&aliMeasurement[NUM_MEASUREMENTS_TO_TAKE]);
  291. // Use the minimum:
  292. liMin = aliMeasurement[1] - aliMeasurement[0];
  293. DISPDBG((1, "Refresh count: %li - %li", 1, (ULONG) liMin));
  294. for (i = 2; i <= NUM_MEASUREMENTS_TO_TAKE; i++)
  295. {
  296. li = aliMeasurement[i] - aliMeasurement[i - 1];
  297. DISPDBG((1, " %li - %li", i, (ULONG) li));
  298. if (li < liMin)
  299. liMin = li;
  300. }
  301. // Round the result:
  302. ppdev->flipRecord.liFlipDuration
  303. = (DWORD) (liMin + (NUM_VBLANKS_TO_MEASURE / 2)) / NUM_VBLANKS_TO_MEASURE;
  304. ppdev->flipRecord.bFlipFlag = FALSE;
  305. ppdev->flipRecord.fpFlipFrom = 0;
  306. // We need the refresh rate in Hz to query the S3 miniport about the
  307. // streams parameters:
  308. EngQueryPerformanceFrequency(&liFrequency);
  309. ppdev->ulRefreshRate
  310. = (ULONG) ((liFrequency + (ppdev->flipRecord.liFlipDuration / 2))
  311. / ppdev->flipRecord.liFlipDuration);
  312. DISPDBG((1, "Frequency: %li Hz", ppdev->ulRefreshRate));
  313. }
  314. /******************************Public*Routine******************************\
  315. * HRESULT ddrvalUpdateFlipStatus
  316. *
  317. * Checks and sees if the most recent flip has occurred.
  318. *
  319. * Unfortunately, the hardware has no ability to tell us whether a vertical
  320. * retrace has occured since the flip command was given other than by
  321. * sampling the vertical-blank-active and display-active status bits.
  322. *
  323. \**************************************************************************/
  324. HRESULT ddrvalUpdateFlipStatus(
  325. PDEV* ppdev,
  326. FLATPTR fpVidMem)
  327. {
  328. BYTE* pjIoBase;
  329. LONGLONG liTime;
  330. pjIoBase = ppdev->pjIoBase;
  331. if ((ppdev->flipRecord.bFlipFlag) &&
  332. ((fpVidMem == (FLATPTR) -1) ||
  333. (fpVidMem == ppdev->flipRecord.fpFlipFrom)))
  334. {
  335. if (VBLANK_IS_ACTIVE(pjIoBase))
  336. {
  337. if (ppdev->flipRecord.bWasEverInDisplay)
  338. {
  339. ppdev->flipRecord.bHaveEverCrossedVBlank = TRUE;
  340. }
  341. }
  342. else if (DISPLAY_IS_ACTIVE(pjIoBase))
  343. {
  344. if (ppdev->flipRecord.bHaveEverCrossedVBlank)
  345. {
  346. ppdev->flipRecord.bFlipFlag = FALSE;
  347. return(DD_OK);
  348. }
  349. ppdev->flipRecord.bWasEverInDisplay = TRUE;
  350. }
  351. // It's pretty unlikely that we'll happen to sample the vertical-
  352. // blank-active at the first vertical blank after the flip command
  353. // has been given. So to provide better results, we also check the
  354. // time elapsed since the flip. If it's more than the duration of
  355. // one entire refresh of the display, then we know for sure it has
  356. // happened:
  357. EngQueryPerformanceCounter(&liTime);
  358. if (liTime - ppdev->flipRecord.liFlipTime
  359. <= ppdev->flipRecord.liFlipDuration)
  360. {
  361. return(DDERR_WASSTILLDRAWING);
  362. }
  363. ppdev->flipRecord.bFlipFlag = FALSE;
  364. }
  365. return(DD_OK);
  366. }
  367. /******************************Public*Routine******************************\
  368. * DWORD DdBlt
  369. *
  370. \**************************************************************************/
  371. DWORD DdBlt(
  372. PDD_BLTDATA lpBlt)
  373. {
  374. PDD_SURFACE_GLOBAL srcSurf;
  375. PDD_SURFACE_LOCAL dstSurfx;
  376. PDD_SURFACE_GLOBAL dstSurf;
  377. PDEV* ppdev;
  378. BYTE* pjMmBase;
  379. HRESULT ddrval;
  380. DWORD dstX;
  381. DWORD dstY;
  382. DWORD dwFlags;
  383. DWORD dstWidth;
  384. DWORD dstHeight;
  385. DWORD srcWidth;
  386. DWORD srcHeight;
  387. DWORD dwError;
  388. LONG dstPitch;
  389. LONG srcPitch;
  390. DWORD srcX;
  391. DWORD srcY;
  392. ULONG ulBltCmd;
  393. DWORD dwVEctrl;
  394. DWORD dwVEdda;
  395. DWORD dwVEcrop;
  396. DWORD dwVEdstAddr;
  397. DWORD dwVEsrcAddr;
  398. DWORD dwDstByteCount;
  399. DWORD dwSrcByteCount;
  400. DWORD dwSrcBytes;
  401. DWORD dwCropSkip;
  402. LONG i;
  403. FLATPTR fp;
  404. ppdev = (PDEV*) lpBlt->lpDD->dhpdev;
  405. pjMmBase = ppdev->pjMmBase;
  406. dstSurfx = lpBlt->lpDDDestSurface;
  407. dstSurf = dstSurfx->lpGbl;
  408. // Is a flip in progress?
  409. ddrval = ddrvalUpdateFlipStatus(ppdev, dstSurf->fpVidMem);
  410. if (ddrval != DD_OK)
  411. {
  412. lpBlt->ddRVal = ddrval;
  413. return(DDHAL_DRIVER_HANDLED);
  414. }
  415. dwFlags = lpBlt->dwFlags;
  416. if (dwFlags & DDBLT_ASYNC)
  417. {
  418. // If async, then only work if we won't have to wait on the
  419. // accelerator to start the command.
  420. //
  421. // The FIFO wait should account for the worst-case possible
  422. // blt that we would do:
  423. if (MM_FIFO_BUSY(ppdev, pjMmBase, DDBLT_FIFO_COUNT))
  424. {
  425. lpBlt->ddRVal = DDERR_WASSTILLDRAWING;
  426. return(DDHAL_DRIVER_HANDLED);
  427. }
  428. }
  429. // Copy src/dst rects:
  430. dstX = lpBlt->rDest.left;
  431. dstY = lpBlt->rDest.top;
  432. dstWidth = lpBlt->rDest.right - lpBlt->rDest.left;
  433. dstHeight = lpBlt->rDest.bottom - lpBlt->rDest.top;
  434. if (dwFlags & DDBLT_COLORFILL)
  435. {
  436. // The S3 can't easily do colour fills for off-screen surfaces that
  437. // are a different pixel format than that of the primary display:
  438. if (dstSurf->dwReserved1 & DD_RESERVED_DIFFERENTPIXELFORMAT)
  439. {
  440. DISPDBG((0, "Can't do colorfill to odd pixel format"));
  441. return(DDHAL_DRIVER_NOTHANDLED);
  442. }
  443. else
  444. {
  445. convertToGlobalCord(dstX, dstY, dstSurf);
  446. NW_FIFO_WAIT(ppdev, pjMmBase, 6);
  447. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  448. NW_ALT_MIX(ppdev, pjMmBase, FOREGROUND_COLOR | OVERPAINT, 0);
  449. NW_FRGD_COLOR(ppdev, pjMmBase, lpBlt->bltFX.dwFillColor);
  450. NW_ABS_CURXY_FAST(ppdev, pjMmBase, dstX, dstY);
  451. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, dstHeight - 1);
  452. NW_ALT_CMD(ppdev, pjMmBase, RECTANGLE_FILL | DRAWING_DIR_TBLRXM |
  453. DRAW | DIR_TYPE_XY |
  454. LAST_PIXEL_ON | MULTIPLE_PIXELS |
  455. WRITE);
  456. lpBlt->ddRVal = DD_OK;
  457. return(DDHAL_DRIVER_HANDLED);
  458. }
  459. }
  460. // We specified with Our ddCaps.dwCaps that we handle a limited number
  461. // of commands, and by this point in our routine we've handled everything
  462. // except DDBLT_ROP. DirectDraw and GDI shouldn't pass us anything
  463. // else; we'll assert on debug builds to prove this:
  464. ASSERTDD((dwFlags & DDBLT_ROP) && (lpBlt->lpDDSrcSurface),
  465. "Expected dwFlags commands of only DDBLT_ASYNC and DDBLT_COLORFILL");
  466. // Get offset, dstWidth, and dstHeight for source:
  467. srcSurf = lpBlt->lpDDSrcSurface->lpGbl;
  468. srcX = lpBlt->rSrc.left;
  469. srcY = lpBlt->rSrc.top;
  470. srcWidth = lpBlt->rSrc.right - lpBlt->rSrc.left;
  471. srcHeight = lpBlt->rSrc.bottom - lpBlt->rSrc.top;
  472. // If a stretch or a funky pixel format blt are involved, we'll have to
  473. // defer to the overlay or pixel formatter routines:
  474. if ((srcWidth == dstWidth) &&
  475. (srcHeight == dstHeight) &&
  476. !(srcSurf->dwReserved1 & DD_RESERVED_DIFFERENTPIXELFORMAT) &&
  477. !(dstSurf->dwReserved1 & DD_RESERVED_DIFFERENTPIXELFORMAT))
  478. {
  479. // Assume we can do the blt top-to-bottom, left-to-right:
  480. ulBltCmd = BITBLT | DRAW | DIR_TYPE_XY | WRITE | DRAWING_DIR_TBLRXM;
  481. if ((dstSurf == srcSurf) && (srcX + dstWidth > dstX) &&
  482. (srcY + dstHeight > dstY) && (dstX + dstWidth > srcX) &&
  483. (dstY + dstHeight > srcY) &&
  484. (((srcY == dstY) && (dstX > srcX) )
  485. || ((srcY != dstY) && (dstY > srcY))))
  486. {
  487. // Okay, we have to do the blt bottom-to-top, right-to-left:
  488. ulBltCmd = BITBLT | DRAW | DIR_TYPE_XY | WRITE | DRAWING_DIR_BTRLXM;
  489. srcX = lpBlt->rSrc.right - 1;
  490. srcY = lpBlt->rSrc.bottom - 1;
  491. dstX = lpBlt->rDest.right - 1;
  492. dstY = lpBlt->rDest.bottom - 1;
  493. }
  494. // NT only ever gives us SRCCOPY rops, so don't even both checking
  495. // for anything else.
  496. convertToGlobalCord(srcX, srcY, srcSurf);
  497. convertToGlobalCord(dstX, dstY, dstSurf);
  498. if (dwFlags & DDBLT_KEYSRCOVERRIDE)
  499. {
  500. NW_FIFO_WAIT(ppdev, pjMmBase, 9);
  501. NW_MULT_MISC_READ_SEL(ppdev, pjMmBase, ppdev->ulMiscState
  502. | MULT_MISC_COLOR_COMPARE, 0);
  503. NW_COLOR_CMP(ppdev, pjMmBase,
  504. lpBlt->bltFX.ddckSrcColorkey.dwColorSpaceLowValue);
  505. NW_ALT_MIX(ppdev, pjMmBase, SRC_DISPLAY_MEMORY | OVERPAINT, 0);
  506. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  507. NW_ABS_CURXY_FAST(ppdev, pjMmBase, srcX, srcY);
  508. NW_ABS_DESTXY_FAST(ppdev, pjMmBase, dstX, dstY);
  509. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, dstHeight - 1);
  510. NW_ALT_CMD(ppdev, pjMmBase, ulBltCmd);
  511. NW_MULT_MISC_READ_SEL(ppdev, pjMmBase, ppdev->ulMiscState, 0);
  512. }
  513. else
  514. {
  515. NW_FIFO_WAIT(ppdev, pjMmBase, 6);
  516. NW_ALT_MIX(ppdev, pjMmBase, SRC_DISPLAY_MEMORY | OVERPAINT, 0);
  517. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  518. NW_ABS_CURXY_FAST(ppdev, pjMmBase, srcX, srcY);
  519. NW_ABS_DESTXY_FAST(ppdev, pjMmBase, dstX, dstY);
  520. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, dstHeight - 1);
  521. NW_ALT_CMD(ppdev, pjMmBase, ulBltCmd);
  522. }
  523. }
  524. //////////////////////////////////////////////////////////////////////
  525. // Pixel Formatter Blts
  526. //
  527. // We can do stretches or funky pixel format blts only if a pixel
  528. // formatter is present. Plus, we set our 'ddCaps' such that we
  529. // shouldn't have to handle any shrinks.
  530. //
  531. // (We check to make sure we weren't asked to do a shrink, because we
  532. // would probably hang if the application ignored what we told them
  533. // and asked for a shrink):
  534. else if ((ppdev->flCaps & CAPS_PIXEL_FORMATTER) &&
  535. (srcWidth <= dstWidth) &&
  536. (srcHeight <= dstHeight))
  537. {
  538. if ((dwFlags & DDBLT_KEYSRCOVERRIDE) ||
  539. (dstWidth >= 4 * srcWidth))
  540. {
  541. // Contrary to what we're indicating in our capabilities, we
  542. // can't colour key on stretches or pixel format conversions.
  543. // The S3 hardware also can't do stretches of four times or
  544. // more.
  545. return(DDHAL_DRIVER_NOTHANDLED);
  546. }
  547. dwVEctrl = ~dstWidth & 0x00000FFF; // Initial accumulator
  548. dwVEdda = 0x10000000 // Some reserved bit?
  549. | (STRETCH | SCREEN) // Scale from video memory
  550. | (srcWidth << 16) // K1
  551. | ((srcWidth - dstWidth) & 0x7FF); // K2
  552. // We'll be doing the vertical stretching in software, so calculate
  553. // the DDA terms here. We have the luxury of not worrying about
  554. // overflow because DirectDraw limits our coordinate space to 15
  555. // bits.
  556. //
  557. // Note that dwRGBBitCount is overloaded with dwYUVBitCount:
  558. dwSrcByteCount = srcSurf->ddpfSurface.dwRGBBitCount >> 3;
  559. if (srcSurf->ddpfSurface.dwFlags & DDPF_FOURCC)
  560. {
  561. dwVEctrl |= INPUT_YCrCb422 | CSCENABLE; // Not INPUT_YUV422!
  562. }
  563. else if (srcSurf->ddpfSurface.dwFlags & DDPF_RGB)
  564. {
  565. switch (dwSrcByteCount)
  566. {
  567. case 1:
  568. dwVEctrl |= INPUT_RGB8;
  569. break;
  570. case 2:
  571. if (IS_RGB15_R(srcSurf->ddpfSurface.dwRBitMask))
  572. dwVEctrl |= INPUT_RGB15;
  573. else
  574. dwVEctrl |= INPUT_RGB16;
  575. break;
  576. default:
  577. dwVEctrl |= INPUT_RGB32;
  578. break;
  579. }
  580. }
  581. dwDstByteCount = dstSurf->ddpfSurface.dwRGBBitCount >> 3;
  582. switch (dwDstByteCount)
  583. {
  584. case 1:
  585. dwVEctrl |= OUTPUT_RGB8;
  586. break;
  587. case 2:
  588. if (IS_RGB15_R(dstSurf->ddpfSurface.dwRBitMask))
  589. dwVEctrl |= OUTPUT_RGB15;
  590. else
  591. dwVEctrl |= OUTPUT_RGB16;
  592. break;
  593. default:
  594. dwVEctrl |=OUTPUT_RGB32;
  595. break;
  596. }
  597. if (dwDstByteCount > 1)
  598. {
  599. dwVEctrl |= FILTERENABLE;
  600. if (dstWidth > 2 * srcWidth)
  601. dwVEdda |= LINEAR12221; // linear, 1-2-2-2-1, >2X stretch
  602. else if (dstWidth > srcWidth)
  603. dwVEdda |= LINEAR02420; // linear, 0-2-4-2-0, 1-2X stretch
  604. else
  605. dwVEdda |= BILINEAR; // bi-linear, <1X stretch
  606. }
  607. dwVEsrcAddr = (DWORD)(srcSurf->fpVidMem + (srcY * srcSurf->lPitch)
  608. + (srcX * dwSrcByteCount));
  609. dwVEdstAddr = (DWORD)(dstSurf->fpVidMem + (dstY * dstSurf->lPitch)
  610. + (dstX * dwDstByteCount));
  611. srcPitch = srcSurf->lPitch;
  612. dstPitch = dstSurf->lPitch;
  613. // The S3's source alignment within the dword must be done using the
  614. // crop register:
  615. dwVEcrop = dstWidth;
  616. if (dwVEsrcAddr & 3)
  617. {
  618. dwSrcBytes = (srcWidth * dwSrcByteCount);
  619. // Transform the number of source pixels to the number of
  620. // corresponding destination pixels, and round the result:
  621. dwCropSkip = ((dwVEsrcAddr & 3) * dstWidth + (dwSrcBytes >> 1))
  622. / dwSrcBytes;
  623. dwVEcrop += (dwCropSkip << 16);
  624. dwVEsrcAddr &= ~3;
  625. }
  626. // We have to run the vertical DDA ourselves:
  627. dwError = srcHeight >> 1;
  628. i = dstHeight;
  629. // Watch out for a hardware bug the destination will be 32 pixels
  630. // or less:
  631. //
  632. // We'll use 40 as our minimum width to guarantee we shouldn't
  633. // crash.
  634. if (dstWidth >= 40)
  635. {
  636. // The S3 will sometimes hang when using the video engine with
  637. // certain end-byte alignments. We'll simply lengthen the blt in
  638. // this case and hope that no-one notices:
  639. if (((dwVEdstAddr + (dstWidth * dwDstByteCount)) & 7) == 4)
  640. {
  641. dwVEcrop++;
  642. }
  643. // We have to execute a graphics engine NOP before using the
  644. // pixel formatter video engine:
  645. NW_FIFO_WAIT(ppdev, pjMmBase, 1);
  646. NW_ALT_CMD(ppdev, pjMmBase, 0);
  647. NW_GP_WAIT(ppdev, pjMmBase);
  648. // Set up some non-variant registers:
  649. NW_FIFO_WAIT(ppdev, pjMmBase, 4);
  650. WRITE_FORMATTER_D(pjMmBase, PF_CONTROL, dwVEctrl);
  651. WRITE_FORMATTER_D(pjMmBase, PF_DDA, dwVEdda);
  652. WRITE_FORMATTER_D(pjMmBase, PF_STEP, ppdev->dwVEstep);
  653. WRITE_FORMATTER_D(pjMmBase, PF_CROP, dwVEcrop);
  654. do {
  655. NW_FIFO_WAIT(ppdev, pjMmBase, 3);
  656. WRITE_FORMATTER_D(pjMmBase, PF_SRCADDR, dwVEsrcAddr);
  657. WRITE_FORMATTER_D(pjMmBase, PF_DSTADDR, dwVEdstAddr);
  658. WRITE_FORMATTER_D(pjMmBase, PF_NOP, 0);
  659. NW_FORMATTER_WAIT(ppdev, pjMmBase);
  660. dwVEdstAddr += dstPitch;
  661. dwError += srcHeight;
  662. if (dwError >= dstHeight)
  663. {
  664. dwError -= dstHeight;
  665. dwVEsrcAddr += srcPitch;
  666. }
  667. } while (--i != 0);
  668. }
  669. else if (dwDstByteCount != (DWORD) ppdev->cjPelSize)
  670. {
  671. // Because for narrow video engine blts we have to copy the
  672. // result using the normal graphics accelerator on a pixel
  673. // basis, we can't handle funky destination colour depths.
  674. // I expect zero applications to ask for narrow blts that
  675. // hit this case, so we will simply fail the call should it
  676. // ever actually occur:
  677. return(DDHAL_DRIVER_NOTHANDLED);
  678. }
  679. else
  680. {
  681. // The S3 will hang if we blt less than 32 pixels via the
  682. // pixel formatter. Unfortunately, we can't simply return
  683. // DDHAL_DRIVER_NOTHANDLED for this case. We said we'd do
  684. // hardware stretches, so we have to handle all hardware
  685. // stretches.
  686. //
  687. // We work around the problem by doing a 32 pixel stretch to
  688. // a piece of off-screen memory, then blting the appropriate
  689. // subset to the correct position on the screen.
  690. //
  691. // 32 isn't big enough. We still hang. Lets make it 40.
  692. dwVEcrop = 32 + 8;
  693. convertToGlobalCord(dstX, dstY, dstSurf);
  694. srcX = ppdev->pdsurfVideoEngineScratch->x;
  695. srcY = ppdev->pdsurfVideoEngineScratch->y;
  696. dwVEdstAddr = (srcY * ppdev->lDelta) + (srcX * ppdev->cjPelSize);
  697. ASSERTDD(((dwVEdstAddr + (dwVEcrop * dwDstByteCount)) & 7) != 4,
  698. "Must account for S3 end-alignment bug");
  699. do {
  700. // Use the pixel formatter to blt to our scratch area:
  701. NW_FIFO_WAIT(ppdev, pjMmBase, 1);
  702. NW_ALT_CMD(ppdev, pjMmBase, 0);
  703. NW_GP_WAIT(ppdev, pjMmBase);
  704. NW_FIFO_WAIT(ppdev, pjMmBase, 7);
  705. WRITE_FORMATTER_D(pjMmBase, PF_CONTROL, dwVEctrl);
  706. WRITE_FORMATTER_D(pjMmBase, PF_DDA, dwVEdda);
  707. WRITE_FORMATTER_D(pjMmBase, PF_STEP, ppdev->dwVEstep);
  708. WRITE_FORMATTER_D(pjMmBase, PF_CROP, dwVEcrop);
  709. WRITE_FORMATTER_D(pjMmBase, PF_SRCADDR, dwVEsrcAddr);
  710. WRITE_FORMATTER_D(pjMmBase, PF_DSTADDR, dwVEdstAddr);
  711. WRITE_FORMATTER_D(pjMmBase, PF_NOP, 0);
  712. NW_FORMATTER_WAIT(ppdev, pjMmBase);
  713. dwError += srcHeight;
  714. if (dwError >= dstHeight)
  715. {
  716. dwError -= dstHeight;
  717. dwVEsrcAddr += srcPitch;
  718. }
  719. // Now copy from the scratch area to the final destination:
  720. NW_FIFO_WAIT(ppdev, pjMmBase, 6);
  721. NW_ALT_MIX(ppdev, pjMmBase, SRC_DISPLAY_MEMORY | OVERPAINT, 0);
  722. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  723. NW_ABS_CURXY_FAST(ppdev, pjMmBase, srcX, srcY);
  724. NW_ABS_DESTXY_FAST(ppdev, pjMmBase, dstX, dstY);
  725. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, 0);
  726. NW_ALT_CMD(ppdev, pjMmBase, BITBLT | DRAW | DIR_TYPE_XY |
  727. WRITE | DRAWING_DIR_TBLRXM);
  728. dstY++;
  729. } while (--i != 0);
  730. }
  731. }
  732. else
  733. {
  734. //////////////////////////////////////////////////////////////////////
  735. // Overlay Blts
  736. //
  737. // Here we have to take care of cases where the destination is a
  738. // funky pixel format.
  739. // In order to make ActiveMovie and DirectVideo work, we have
  740. // to support blting between funky pixel format surfaces of the
  741. // same type. This is used to copy the current frame to the
  742. // next overlay surface in line.
  743. //
  744. // Unfortunately, it's not easy to switch the S3 graphics
  745. // processor out of its current pixel depth, so we'll only support
  746. // the minimal functionality required:
  747. if (!(dwFlags & DDBLT_ROP) ||
  748. (srcX != 0) ||
  749. (srcY != 0) ||
  750. (dstX != 0) ||
  751. (dstY != 0) ||
  752. (dstWidth != dstSurf->wWidth) ||
  753. (dstHeight != dstSurf->wHeight) ||
  754. (dstSurf->lPitch != srcSurf->lPitch) ||
  755. (dstSurf->ddpfSurface.dwRGBBitCount
  756. != srcSurf->ddpfSurface.dwRGBBitCount))
  757. {
  758. DISPDBG((0, "Sorry, we do only full-surface blts between same-type"));
  759. DISPDBG((0, "surfaces that have a funky pixel format."));
  760. return(DDHAL_DRIVER_NOTHANDLED);
  761. }
  762. else
  763. {
  764. // Convert the dimensions to the current pixel format. This
  765. // is pretty easy because we created the bitmap linearly, so
  766. // it takes the entire width of the screen:
  767. dstWidth = ppdev->cxMemory;
  768. dstHeight = dstSurf->dwBlockSizeY;
  769. convertToGlobalCord(dstX, dstY, dstSurf);
  770. convertToGlobalCord(srcX, srcY, srcSurf);
  771. NW_FIFO_WAIT(ppdev, pjMmBase, 6);
  772. NW_ALT_MIX(ppdev, pjMmBase, SRC_DISPLAY_MEMORY | OVERPAINT, 0);
  773. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  774. NW_ABS_CURXY_FAST(ppdev, pjMmBase, srcX, srcY);
  775. NW_ABS_DESTXY_FAST(ppdev, pjMmBase, dstX, dstY);
  776. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, dstHeight - 1);
  777. NW_ALT_CMD(ppdev, pjMmBase, BITBLT | DRAW | DIR_TYPE_XY |
  778. WRITE | DRAWING_DIR_TBLRXM);
  779. }
  780. }
  781. lpBlt->ddRVal = DD_OK;
  782. return(DDHAL_DRIVER_HANDLED);
  783. }
  784. /******************************Public*Routine******************************\
  785. * DWORD DdFlip
  786. *
  787. * Note that lpSurfCurr may not necessarily be valid.
  788. *
  789. \**************************************************************************/
  790. DWORD DdFlip(
  791. PDD_FLIPDATA lpFlip)
  792. {
  793. PDEV* ppdev;
  794. BYTE* pjIoBase;
  795. BYTE* pjMmBase;
  796. HRESULT ddrval;
  797. ULONG ulMemoryOffset;
  798. ULONG ulLowOffset;
  799. ULONG ulMiddleOffset;
  800. ULONG ulHighOffset;
  801. ppdev = (PDEV*) lpFlip->lpDD->dhpdev;
  802. pjIoBase = ppdev->pjIoBase;
  803. pjMmBase = ppdev->pjMmBase;
  804. // Is the current flip still in progress?
  805. //
  806. // Don't want a flip to work until after the last flip is done,
  807. // so we ask for the general flip status and ignore the vmem.
  808. ddrval = ddrvalUpdateFlipStatus(ppdev, (FLATPTR) -1);
  809. if ((ddrval != DD_OK) || (NW_GP_BUSY(ppdev, pjMmBase)))
  810. {
  811. lpFlip->ddRVal = DDERR_WASSTILLDRAWING;
  812. return(DDHAL_DRIVER_HANDLED);
  813. }
  814. ulMemoryOffset = (ULONG)(lpFlip->lpSurfTarg->lpGbl->fpVidMem);
  815. // Make sure that the border/blanking period isn't active; wait if
  816. // it is. We could return DDERR_WASSTILLDRAWING in this case, but
  817. // that will increase the odds that we can't flip the next time:
  818. while (!(DISPLAY_IS_ACTIVE(pjIoBase)))
  819. ;
  820. if (ppdev->flStatus & STAT_STREAMS_ENABLED)
  821. {
  822. // When using the streams processor, we have to do the flip via the
  823. // streams registers:
  824. if (lpFlip->lpSurfCurr->ddsCaps.dwCaps & DDSCAPS_PRIMARYSURFACE)
  825. {
  826. WRITE_STREAM_D(pjMmBase, P_0, ulMemoryOffset);
  827. }
  828. else if (lpFlip->lpSurfCurr->ddsCaps.dwCaps & DDSCAPS_OVERLAY)
  829. {
  830. // Make sure that the overlay surface we're flipping from is
  831. // currently visible. If you don't do this check, you'll get
  832. // really weird results when someone starts up two ActiveMovie
  833. // or DirectVideo movies simultaneously!
  834. if (lpFlip->lpSurfCurr->lpGbl->fpVidMem == ppdev->fpVisibleOverlay)
  835. {
  836. ppdev->fpVisibleOverlay = ulMemoryOffset;
  837. WRITE_STREAM_D(pjMmBase, S_0, ulMemoryOffset +
  838. ppdev->dwOverlayFlipOffset);
  839. }
  840. }
  841. }
  842. else
  843. {
  844. // Do the old way, via the CRTC registers:
  845. ulMemoryOffset >>= 2;
  846. ulLowOffset = 0x0d | ((ulMemoryOffset & 0x0000ff) << 8);
  847. ulMiddleOffset = 0x0c | ((ulMemoryOffset & 0x00ff00));
  848. ulHighOffset = 0x69 | ((ulMemoryOffset & 0x1f0000) >> 8)
  849. | ppdev->ulExtendedSystemControl3Register_69;
  850. // Don't let the cursor thread touch the CRT registers while we're
  851. // using them:
  852. ACQUIRE_CRTC_CRITICAL_SECTION(ppdev);
  853. // Too bad that the S3's flip can't be done in a single atomic register
  854. // write; as it is, we stand a small chance of being context-switched
  855. // out and exactly hitting the vertical blank in the middle of doing
  856. // these outs, possibly causing the screen to momentarily jump.
  857. //
  858. // There are some hoops we could jump through to minimize the chances
  859. // of this happening; we could try to align the flip buffer such that
  860. // the minor registers are ensured to be identical for either flip
  861. // position, and so that only the high address need be written, an
  862. // obviously atomic operation.
  863. //
  864. // However, I'm simply not going to worry about it.
  865. OUTPW(pjIoBase, CRTC_INDEX, ulLowOffset);
  866. OUTPW(pjIoBase, CRTC_INDEX, ulMiddleOffset);
  867. OUTPW(pjIoBase, CRTC_INDEX, ulHighOffset);
  868. RELEASE_CRTC_CRITICAL_SECTION(ppdev);
  869. }
  870. // Remember where and when we were when we did the flip:
  871. EngQueryPerformanceCounter(&ppdev->flipRecord.liFlipTime);
  872. ppdev->flipRecord.bFlipFlag = TRUE;
  873. ppdev->flipRecord.bHaveEverCrossedVBlank = FALSE;
  874. ppdev->flipRecord.bWasEverInDisplay = FALSE;
  875. ppdev->flipRecord.fpFlipFrom = lpFlip->lpSurfCurr->lpGbl->fpVidMem;
  876. lpFlip->ddRVal = DD_OK;
  877. return(DDHAL_DRIVER_HANDLED);
  878. }
  879. /******************************Public*Routine******************************\
  880. * DWORD DdLock
  881. *
  882. \**************************************************************************/
  883. DWORD DdLock(
  884. PDD_LOCKDATA lpLock)
  885. {
  886. PDEV* ppdev;
  887. BYTE* pjMmBase;
  888. HRESULT ddrval;
  889. ppdev = (PDEV*) lpLock->lpDD->dhpdev;
  890. pjMmBase = ppdev->pjMmBase;
  891. // Check to see if any pending physical flip has occurred. Don't allow
  892. // a lock if a blt is in progress:
  893. ddrval = ddrvalUpdateFlipStatus(ppdev, lpLock->lpDDSurface->lpGbl->fpVidMem);
  894. if (ddrval != DD_OK)
  895. {
  896. lpLock->ddRVal = DDERR_WASSTILLDRAWING;
  897. return(DDHAL_DRIVER_HANDLED);
  898. }
  899. // Here's one of the places where the Windows 95 and Windows NT DirectDraw
  900. // implementations differ: on Windows NT, you should watch for
  901. // DDLOCK_WAIT and loop in the driver while the accelerator is busy.
  902. // On Windows 95, it doesn't really matter.
  903. //
  904. // (The reason is that Windows NT allows applications to draw directly
  905. // to the frame buffer even while the accelerator is running, and does
  906. // not synchronize everything on the Win16Lock. Note that on Windows NT,
  907. // it is even possible for multiple threads to be holding different
  908. // DirectDraw surface locks at the same time.)
  909. if (lpLock->dwFlags & DDLOCK_WAIT)
  910. {
  911. NW_GP_WAIT(ppdev, pjMmBase);
  912. }
  913. else if (NW_GP_BUSY(ppdev, pjMmBase))
  914. {
  915. lpLock->ddRVal = DDERR_WASSTILLDRAWING;
  916. return(DDHAL_DRIVER_HANDLED);
  917. }
  918. return(DDHAL_DRIVER_NOTHANDLED);
  919. }
  920. /******************************Public*Routine******************************\
  921. * DWORD DdGetBltStatus
  922. *
  923. * Doesn't currently really care what surface is specified, just checks
  924. * and goes.
  925. *
  926. \**************************************************************************/
  927. DWORD DdGetBltStatus(
  928. PDD_GETBLTSTATUSDATA lpGetBltStatus)
  929. {
  930. PDEV* ppdev;
  931. BYTE* pjMmBase;
  932. HRESULT ddRVal;
  933. ppdev = (PDEV*) lpGetBltStatus->lpDD->dhpdev;
  934. pjMmBase = ppdev->pjMmBase;
  935. ddRVal = DD_OK;
  936. if (lpGetBltStatus->dwFlags == DDGBS_CANBLT)
  937. {
  938. // DDGBS_CANBLT case: can we add a blt?
  939. ddRVal = ddrvalUpdateFlipStatus(ppdev,
  940. lpGetBltStatus->lpDDSurface->lpGbl->fpVidMem);
  941. if (ddRVal == DD_OK)
  942. {
  943. // There was no flip going on, so is there room in the FIFO
  944. // to add a blt?
  945. if (MM_FIFO_BUSY(ppdev, pjMmBase, DDBLT_FIFO_COUNT))
  946. {
  947. ddRVal = DDERR_WASSTILLDRAWING;
  948. }
  949. }
  950. }
  951. else
  952. {
  953. // DDGBS_ISBLTDONE case: is a blt in progress?
  954. if (NW_GP_BUSY(ppdev, pjMmBase))
  955. {
  956. ddRVal = DDERR_WASSTILLDRAWING;
  957. }
  958. }
  959. lpGetBltStatus->ddRVal = ddRVal;
  960. return(DDHAL_DRIVER_HANDLED);
  961. }
  962. /******************************Public*Routine******************************\
  963. * DWORD DdMapMemory
  964. *
  965. * This is a new DDI call specific to Windows NT that is used to map
  966. * or unmap all the application modifiable portions of the frame buffer
  967. * into the specified process's address space.
  968. *
  969. \**************************************************************************/
  970. DWORD DdMapMemory(
  971. PDD_MAPMEMORYDATA lpMapMemory)
  972. {
  973. PDEV* ppdev;
  974. VIDEO_SHARE_MEMORY ShareMemory;
  975. VIDEO_SHARE_MEMORY_INFORMATION ShareMemoryInformation;
  976. DWORD ReturnedDataLength;
  977. ppdev = (PDEV*) lpMapMemory->lpDD->dhpdev;
  978. if (lpMapMemory->bMap)
  979. {
  980. ShareMemory.ProcessHandle = lpMapMemory->hProcess;
  981. // 'RequestedVirtualAddress' isn't actually used for the SHARE IOCTL:
  982. ShareMemory.RequestedVirtualAddress = 0;
  983. // We map in starting at the top of the frame buffer:
  984. ShareMemory.ViewOffset = 0;
  985. // We map down to the end of the frame buffer.
  986. //
  987. // Note: There is a 64k granularity on the mapping (meaning that
  988. // we have to round up to 64k).
  989. //
  990. // Note: If there is any portion of the frame buffer that must
  991. // not be modified by an application, that portion of memory
  992. // MUST NOT be mapped in by this call. This would include
  993. // any data that, if modified by a malicious application,
  994. // would cause the driver to crash. This could include, for
  995. // example, any DSP code that is kept in off-screen memory.
  996. ShareMemory.ViewSize
  997. = ROUND_UP_TO_64K(ppdev->cyMemory * ppdev->lDelta);
  998. if (EngDeviceIoControl(ppdev->hDriver,
  999. IOCTL_VIDEO_SHARE_VIDEO_MEMORY,
  1000. &ShareMemory,
  1001. sizeof(VIDEO_SHARE_MEMORY),
  1002. &ShareMemoryInformation,
  1003. sizeof(VIDEO_SHARE_MEMORY_INFORMATION),
  1004. &ReturnedDataLength))
  1005. {
  1006. DISPDBG((0, "Failed IOCTL_VIDEO_SHARE_MEMORY"));
  1007. lpMapMemory->ddRVal = DDERR_GENERIC;
  1008. return(DDHAL_DRIVER_HANDLED);
  1009. }
  1010. lpMapMemory->fpProcess = (FLATPTR)ShareMemoryInformation.VirtualAddress;
  1011. }
  1012. else
  1013. {
  1014. ShareMemory.ProcessHandle = lpMapMemory->hProcess;
  1015. ShareMemory.ViewOffset = 0;
  1016. ShareMemory.ViewSize = 0;
  1017. ShareMemory.RequestedVirtualAddress = (VOID*) lpMapMemory->fpProcess;
  1018. if (EngDeviceIoControl(ppdev->hDriver,
  1019. IOCTL_VIDEO_UNSHARE_VIDEO_MEMORY,
  1020. &ShareMemory,
  1021. sizeof(VIDEO_SHARE_MEMORY),
  1022. NULL,
  1023. 0,
  1024. &ReturnedDataLength))
  1025. {
  1026. RIP("Failed IOCTL_VIDEO_UNSHARE_MEMORY");
  1027. }
  1028. }
  1029. lpMapMemory->ddRVal = DD_OK;
  1030. return(DDHAL_DRIVER_HANDLED);
  1031. }
  1032. /******************************Public*Routine******************************\
  1033. * DWORD DdGetFlipStatus
  1034. *
  1035. * If the display has gone through one refresh cycle since the flip
  1036. * occurred, we return DD_OK. If it has not gone through one refresh
  1037. * cycle we return DDERR_WASSTILLDRAWING to indicate that this surface
  1038. * is still busy "drawing" the flipped page. We also return
  1039. * DDERR_WASSTILLDRAWING if the bltter is busy and the caller wanted
  1040. * to know if they could flip yet.
  1041. *
  1042. \**************************************************************************/
  1043. DWORD DdGetFlipStatus(
  1044. PDD_GETFLIPSTATUSDATA lpGetFlipStatus)
  1045. {
  1046. PDEV* ppdev;
  1047. BYTE* pjMmBase;
  1048. ppdev = (PDEV*) lpGetFlipStatus->lpDD->dhpdev;
  1049. pjMmBase = ppdev->pjMmBase;
  1050. // We don't want a flip to work until after the last flip is done,
  1051. // so we ask for the general flip status and ignore the vmem:
  1052. lpGetFlipStatus->ddRVal = ddrvalUpdateFlipStatus(ppdev, (FLATPTR) -1);
  1053. // Check if the bltter is busy if someone wants to know if they can
  1054. // flip:
  1055. if (lpGetFlipStatus->dwFlags == DDGFS_CANFLIP)
  1056. {
  1057. if ((lpGetFlipStatus->ddRVal == DD_OK) && (NW_GP_BUSY(ppdev, pjMmBase)))
  1058. {
  1059. lpGetFlipStatus->ddRVal = DDERR_WASSTILLDRAWING;
  1060. }
  1061. }
  1062. return(DDHAL_DRIVER_HANDLED);
  1063. }
  1064. /******************************Public*Routine******************************\
  1065. * DWORD DdWaitForVerticalBlank
  1066. *
  1067. \**************************************************************************/
  1068. DWORD DdWaitForVerticalBlank(
  1069. PDD_WAITFORVERTICALBLANKDATA lpWaitForVerticalBlank)
  1070. {
  1071. PDEV* ppdev;
  1072. BYTE* pjIoBase;
  1073. ppdev = (PDEV*) lpWaitForVerticalBlank->lpDD->dhpdev;
  1074. pjIoBase = ppdev->pjIoBase;
  1075. switch (lpWaitForVerticalBlank->dwFlags)
  1076. {
  1077. case DDWAITVB_I_TESTVB:
  1078. // If TESTVB, it's just a request for the current vertical blank
  1079. // status:
  1080. if (VBLANK_IS_ACTIVE(pjIoBase))
  1081. lpWaitForVerticalBlank->bIsInVB = TRUE;
  1082. else
  1083. lpWaitForVerticalBlank->bIsInVB = FALSE;
  1084. lpWaitForVerticalBlank->ddRVal = DD_OK;
  1085. return(DDHAL_DRIVER_HANDLED);
  1086. case DDWAITVB_BLOCKBEGIN:
  1087. // If BLOCKBEGIN is requested, we wait until the vertical blank
  1088. // is over, and then wait for the display period to end:
  1089. while (VBLANK_IS_ACTIVE(pjIoBase))
  1090. ;
  1091. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  1092. ;
  1093. lpWaitForVerticalBlank->ddRVal = DD_OK;
  1094. return(DDHAL_DRIVER_HANDLED);
  1095. case DDWAITVB_BLOCKEND:
  1096. // If BLOCKEND is requested, we wait for the vblank interval to end:
  1097. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  1098. ;
  1099. while (VBLANK_IS_ACTIVE(pjIoBase))
  1100. ;
  1101. lpWaitForVerticalBlank->ddRVal = DD_OK;
  1102. return(DDHAL_DRIVER_HANDLED);
  1103. }
  1104. return(DDHAL_DRIVER_NOTHANDLED);
  1105. }
  1106. /******************************Public*Routine******************************\
  1107. * DWORD DdCanCreateSurface
  1108. *
  1109. \**************************************************************************/
  1110. DWORD DdCanCreateSurface(
  1111. PDD_CANCREATESURFACEDATA lpCanCreateSurface)
  1112. {
  1113. PDEV* ppdev;
  1114. DWORD dwRet;
  1115. LPDDSURFACEDESC lpSurfaceDesc;
  1116. ppdev = (PDEV*) lpCanCreateSurface->lpDD->dhpdev;
  1117. lpSurfaceDesc = lpCanCreateSurface->lpDDSurfaceDesc;
  1118. dwRet = DDHAL_DRIVER_NOTHANDLED;
  1119. if (!lpCanCreateSurface->bIsDifferentPixelFormat)
  1120. {
  1121. // It's trivially easy to create plain surfaces that are the same
  1122. // type as the primary surface:
  1123. dwRet = DDHAL_DRIVER_HANDLED;
  1124. }
  1125. // If the streams processor is capable, we can handle overlays:
  1126. else if (ppdev->flCaps & CAPS_STREAMS_CAPABLE)
  1127. {
  1128. // When using the Streams processor, we handle only overlays of
  1129. // different pixel formats -- not any off-screen memory:
  1130. if (lpSurfaceDesc->ddsCaps.dwCaps & DDSCAPS_OVERLAY)
  1131. {
  1132. // We handle two types of YUV overlay surfaces:
  1133. if (lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_FOURCC)
  1134. {
  1135. // Check first for a supported YUV type:
  1136. if (lpSurfaceDesc->ddpfPixelFormat.dwFourCC == FOURCC_YUY2)
  1137. {
  1138. lpSurfaceDesc->ddpfPixelFormat.dwYUVBitCount = 16;
  1139. dwRet = DDHAL_DRIVER_HANDLED;
  1140. }
  1141. }
  1142. // We handle 16bpp and 32bpp RGB overlay surfaces:
  1143. else if ((lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_RGB) &&
  1144. !(lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8))
  1145. {
  1146. if (lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount == 16)
  1147. {
  1148. if (IS_RGB15(&lpSurfaceDesc->ddpfPixelFormat) ||
  1149. IS_RGB16(&lpSurfaceDesc->ddpfPixelFormat))
  1150. {
  1151. dwRet = DDHAL_DRIVER_HANDLED;
  1152. }
  1153. }
  1154. // We don't handle 24bpp overlay surfaces because they are
  1155. // undocumented and don't seem to work on the Trio64V+.
  1156. //
  1157. // We don't handle 32bpp overlay surfaces because our streams
  1158. // minimum-stretch-ratio tables were obviously created for
  1159. // 16bpp overlay surfaces; 32bpp overlay surfaces create a lot
  1160. // of noise when close to the minimum stretch ratio.
  1161. }
  1162. }
  1163. }
  1164. // If the pixel formatter is enabled, we can handle funky format off-
  1165. // screen surfaces, but not at 8bpp because of palette issues:
  1166. else if ((ppdev->flCaps & CAPS_PIXEL_FORMATTER) &&
  1167. (ppdev->iBitmapFormat > BMF_8BPP))
  1168. {
  1169. if (lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_FOURCC)
  1170. {
  1171. if (lpSurfaceDesc->ddpfPixelFormat.dwFourCC == FOURCC_YUY2)
  1172. {
  1173. lpSurfaceDesc->ddpfPixelFormat.dwYUVBitCount = 16;
  1174. dwRet = DDHAL_DRIVER_HANDLED;
  1175. }
  1176. }
  1177. // We handle 16bpp and 32bpp RGB off-screen surfaces:
  1178. else if ((lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_RGB) &&
  1179. !(lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8))
  1180. {
  1181. if (lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount == 16)
  1182. {
  1183. if (IS_RGB15(&lpSurfaceDesc->ddpfPixelFormat) ||
  1184. IS_RGB16(&lpSurfaceDesc->ddpfPixelFormat))
  1185. {
  1186. dwRet = DDHAL_DRIVER_HANDLED;
  1187. }
  1188. }
  1189. else if (lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount == 32)
  1190. {
  1191. if (IS_RGB32(&lpSurfaceDesc->ddpfPixelFormat))
  1192. {
  1193. dwRet = DDHAL_DRIVER_HANDLED;
  1194. }
  1195. }
  1196. }
  1197. }
  1198. // Print some spew if this was a surface we refused to create:
  1199. if (dwRet == DDHAL_DRIVER_NOTHANDLED)
  1200. {
  1201. if (lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_RGB)
  1202. {
  1203. DISPDBG((0, "Failed creation of %libpp RGB surface %lx %lx %lx",
  1204. lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount,
  1205. lpSurfaceDesc->ddpfPixelFormat.dwRBitMask,
  1206. lpSurfaceDesc->ddpfPixelFormat.dwGBitMask,
  1207. lpSurfaceDesc->ddpfPixelFormat.dwBBitMask));
  1208. }
  1209. else
  1210. {
  1211. DISPDBG((0, "Failed creation of type 0x%lx YUV 0x%lx surface",
  1212. lpSurfaceDesc->ddpfPixelFormat.dwFlags,
  1213. lpSurfaceDesc->ddpfPixelFormat.dwFourCC));
  1214. }
  1215. }
  1216. lpCanCreateSurface->ddRVal = DD_OK;
  1217. return(dwRet);
  1218. }
  1219. /******************************Public*Routine******************************\
  1220. * DWORD DdCreateSurface
  1221. *
  1222. \**************************************************************************/
  1223. DWORD DdCreateSurface(
  1224. PDD_CREATESURFACEDATA lpCreateSurface)
  1225. {
  1226. PDEV* ppdev;
  1227. DD_SURFACE_LOCAL* lpSurfaceLocal;
  1228. DD_SURFACE_GLOBAL* lpSurfaceGlobal;
  1229. LPDDSURFACEDESC lpSurfaceDesc;
  1230. DWORD dwByteCount;
  1231. LONG lLinearPitch;
  1232. DWORD dwHeight;
  1233. ppdev = (PDEV*) lpCreateSurface->lpDD->dhpdev;
  1234. // On Windows NT, dwSCnt will always be 1, so there will only ever
  1235. // be one entry in the 'lplpSList' array:
  1236. lpSurfaceLocal = lpCreateSurface->lplpSList[0];
  1237. lpSurfaceGlobal = lpSurfaceLocal->lpGbl;
  1238. lpSurfaceDesc = lpCreateSurface->lpDDSurfaceDesc;
  1239. // We repeat the same checks we did in 'DdCanCreateSurface' because
  1240. // it's possible that an application doesn't call 'DdCanCreateSurface'
  1241. // before calling 'DdCreateSurface'.
  1242. ASSERTDD(lpSurfaceGlobal->ddpfSurface.dwSize == sizeof(DDPIXELFORMAT),
  1243. "NT is supposed to guarantee that ddpfSurface.dwSize is valid");
  1244. // DdCanCreateSurface already validated whether the hardware supports
  1245. // the surface, so we don't need to do any validation here. We'll
  1246. // just go ahead and allocate it.
  1247. //
  1248. // Note that we don't do anything special for RGB surfaces that are
  1249. // the same pixel format as the display -- by returning DDHAL_DRIVER_
  1250. // NOTHANDLED, DirectDraw will automatically handle the allocation
  1251. // for us.
  1252. //
  1253. // Also, since we'll be making linear surfaces, make sure the width
  1254. // isn't unreasonably large.
  1255. //
  1256. // Note that on NT, an overlay can be created only if the driver
  1257. // okay's it here in this routine. Under Win95, the overlay will be
  1258. // created automatically if it's the same pixel format as the primary
  1259. // display.
  1260. if ((lpSurfaceLocal->ddsCaps.dwCaps & DDSCAPS_OVERLAY) ||
  1261. (lpSurfaceGlobal->ddpfSurface.dwFlags & DDPF_FOURCC) ||
  1262. (lpSurfaceGlobal->ddpfSurface.dwYUVBitCount
  1263. != (DWORD) 8 * ppdev->cjPelSize) ||
  1264. (lpSurfaceGlobal->ddpfSurface.dwRBitMask != ppdev->flRed))
  1265. {
  1266. if (lpSurfaceGlobal->wWidth <= (DWORD) ppdev->cxMemory)
  1267. {
  1268. // The S3 cannot easily draw to YUV surfaces or surfaces that are
  1269. // a different RGB format than the display. So we'll make them
  1270. // linear surfaces to save some space:
  1271. if (lpSurfaceGlobal->ddpfSurface.dwFlags & DDPF_FOURCC)
  1272. {
  1273. ASSERTDD((lpSurfaceGlobal->ddpfSurface.dwFourCC == FOURCC_YUY2),
  1274. "Expected our DdCanCreateSurface to allow only YUY2 or Y211");
  1275. dwByteCount = (lpSurfaceGlobal->ddpfSurface.dwFourCC == FOURCC_YUY2)
  1276. ? 2 : 1;
  1277. // We have to fill in the bit-count for FourCC surfaces:
  1278. lpSurfaceGlobal->ddpfSurface.dwYUVBitCount = 8 * dwByteCount;
  1279. DISPDBG((0, "Created YUV: %li x %li",
  1280. lpSurfaceGlobal->wWidth, lpSurfaceGlobal->wHeight));
  1281. }
  1282. else
  1283. {
  1284. dwByteCount = lpSurfaceGlobal->ddpfSurface.dwRGBBitCount >> 3;
  1285. DISPDBG((0, "Created RGB %libpp: %li x %li Red: %lx",
  1286. 8 * dwByteCount, lpSurfaceGlobal->wWidth, lpSurfaceGlobal->wHeight,
  1287. lpSurfaceGlobal->ddpfSurface.dwRBitMask));
  1288. // The S3 can't handle palettized or 32bpp overlays. Note that
  1289. // we sometimes don't get a chance to say no to these surfaces
  1290. // in CanCreateSurface, because DirectDraw won't call
  1291. // CanCreateSurface if the surface to be created is the same
  1292. // pixel format as the primary display:
  1293. if ((dwByteCount != 2) &&
  1294. (lpSurfaceLocal->ddsCaps.dwCaps & DDSCAPS_OVERLAY))
  1295. {
  1296. lpCreateSurface->ddRVal = DDERR_INVALIDPIXELFORMAT;
  1297. return(DDHAL_DRIVER_HANDLED);
  1298. }
  1299. }
  1300. // We want to allocate a linear surface to store the FourCC
  1301. // surface, but DirectDraw is using a 2-D heap-manager because
  1302. // the rest of our surfaces have to be 2-D. So here we have to
  1303. // convert the linear size to a 2-D size.
  1304. //
  1305. // The stride has to be a dword multiple:
  1306. lLinearPitch = (lpSurfaceGlobal->wWidth * dwByteCount + 3) & ~3;
  1307. dwHeight = (lpSurfaceGlobal->wHeight * lLinearPitch
  1308. + ppdev->lDelta - 1) / ppdev->lDelta;
  1309. // Now fill in enough stuff to have the DirectDraw heap-manager
  1310. // do the allocation for us:
  1311. lpSurfaceGlobal->fpVidMem = DDHAL_PLEASEALLOC_BLOCKSIZE;
  1312. lpSurfaceGlobal->dwBlockSizeX = ppdev->lDelta; // Specified in bytes
  1313. lpSurfaceGlobal->dwBlockSizeY = dwHeight;
  1314. lpSurfaceGlobal->lPitch = lLinearPitch;
  1315. lpSurfaceGlobal->dwReserved1 = DD_RESERVED_DIFFERENTPIXELFORMAT;
  1316. lpSurfaceDesc->lPitch = lLinearPitch;
  1317. lpSurfaceDesc->dwFlags |= DDSD_PITCH;
  1318. }
  1319. else
  1320. {
  1321. DISPDBG((0, "Refused to create surface with large width"));
  1322. }
  1323. }
  1324. return(DDHAL_DRIVER_NOTHANDLED);
  1325. }
  1326. /******************************Public*Routine******************************\
  1327. * DWORD DdFreeDriverMemory
  1328. *
  1329. * This function called by DirectDraw when it's running low on memory in
  1330. * our heap. You only need to implement this function if you use the
  1331. * DirectDraw 'HeapVidMemAllocAligned' function in your driver, and you
  1332. * can boot those allocations out of memory to make room for DirectDraw.
  1333. *
  1334. * We implement this function in the S3 driver because we have DirectDraw
  1335. * entirely manage our off-screen heap, and we use HeapVidMemAllocAligned
  1336. * to put GDI device-bitmaps in off-screen memory. DirectDraw applications
  1337. * have a higher priority for getting stuff into video memory, though, and
  1338. * so this function is used to boot those GDI surfaces out of memory in
  1339. * order to make room for DirectDraw.
  1340. *
  1341. \**************************************************************************/
  1342. DWORD DdFreeDriverMemory(
  1343. PDD_FREEDRIVERMEMORYDATA lpFreeDriverMemory)
  1344. {
  1345. PDEV* ppdev;
  1346. ppdev = (PDEV*) lpFreeDriverMemory->lpDD->dhpdev;
  1347. lpFreeDriverMemory->ddRVal = DDERR_OUTOFMEMORY;
  1348. // If we successfully freed up some memory, set the return value to
  1349. // 'DD_OK'. DirectDraw will try again to do its allocation, and
  1350. // will call us again if there's still not enough room. (It will
  1351. // call us until either there's enough room for its alocation to
  1352. // succeed, or until we return something other than DD_OK.)
  1353. if (bMoveOldestOffscreenDfbToDib(ppdev))
  1354. {
  1355. lpFreeDriverMemory->ddRVal = DD_OK;
  1356. }
  1357. return(DDHAL_DRIVER_HANDLED);
  1358. }
  1359. /******************************Public*Routine******************************\
  1360. * DWORD DdSetColorKey
  1361. *
  1362. \**************************************************************************/
  1363. DWORD DdSetColorKey(
  1364. PDD_SETCOLORKEYDATA lpSetColorKey)
  1365. {
  1366. PDEV* ppdev;
  1367. BYTE* pjIoBase;
  1368. BYTE* pjMmBase;
  1369. DD_SURFACE_GLOBAL* lpSurface;
  1370. DWORD dwKeyLow;
  1371. DWORD dwKeyHigh;
  1372. ppdev = (PDEV*) lpSetColorKey->lpDD->dhpdev;
  1373. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Shouldn't have hooked call");
  1374. pjIoBase = ppdev->pjIoBase;
  1375. pjMmBase = ppdev->pjMmBase;
  1376. lpSurface = lpSetColorKey->lpDDSurface->lpGbl;
  1377. // We don't have to do anything for normal blt source colour keys:
  1378. if (lpSetColorKey->dwFlags & DDCKEY_SRCBLT)
  1379. {
  1380. lpSetColorKey->ddRVal = DD_OK;
  1381. return(DDHAL_DRIVER_HANDLED);
  1382. }
  1383. else if (lpSetColorKey->dwFlags & DDCKEY_DESTOVERLAY)
  1384. {
  1385. dwKeyLow = lpSetColorKey->ckNew.dwColorSpaceLowValue;
  1386. if (lpSurface->ddpfSurface.dwFlags & DDPF_PALETTEINDEXED8)
  1387. {
  1388. dwKeyLow = dwGetPaletteEntry(ppdev, dwKeyLow);
  1389. }
  1390. else
  1391. {
  1392. ASSERTDD(lpSurface->ddpfSurface.dwFlags & DDPF_RGB,
  1393. "Expected only RGB cases here");
  1394. // We have to transform the colour key from its native format
  1395. // to 8-8-8:
  1396. if (lpSurface->ddpfSurface.dwRGBBitCount == 16)
  1397. {
  1398. if (IS_RGB15_R(lpSurface->ddpfSurface.dwRBitMask))
  1399. dwKeyLow = RGB15to32(dwKeyLow);
  1400. else
  1401. dwKeyLow = RGB16to32(dwKeyLow);
  1402. }
  1403. else
  1404. {
  1405. ASSERTDD((lpSurface->ddpfSurface.dwRGBBitCount == 32),
  1406. "Expected the primary surface to be either 8, 16, or 32bpp");
  1407. }
  1408. }
  1409. dwKeyHigh = dwKeyLow;
  1410. dwKeyLow |= CompareBits0t7 | KeyFromCompare;
  1411. // Check for stream processor enabled before setting registers
  1412. if(ppdev->flStatus & STAT_STREAMS_ENABLED)
  1413. {
  1414. WAIT_FOR_VBLANK(pjIoBase);
  1415. WRITE_STREAM_D(pjMmBase, CKEY_LOW, dwKeyLow);
  1416. WRITE_STREAM_D(pjMmBase, CKEY_HI, dwKeyHigh);
  1417. }
  1418. else
  1419. {
  1420. // Save away the color key to be set when streams
  1421. // processor is turned on.
  1422. ppdev->ulColorKey = dwKeyHigh;
  1423. }
  1424. lpSetColorKey->ddRVal = DD_OK;
  1425. return(DDHAL_DRIVER_HANDLED);
  1426. }
  1427. DISPDBG((0, "DdSetColorKey: Invalid command"));
  1428. return(DDHAL_DRIVER_NOTHANDLED);
  1429. }
  1430. /******************************Public*Routine******************************\
  1431. * DWORD DdUpdateOverlay
  1432. *
  1433. \**************************************************************************/
  1434. DWORD DdUpdateOverlay(
  1435. PDD_UPDATEOVERLAYDATA lpUpdateOverlay)
  1436. {
  1437. PDEV* ppdev;
  1438. BYTE* pjIoBase;
  1439. BYTE* pjMmBase;
  1440. DD_SURFACE_GLOBAL* lpSource;
  1441. DD_SURFACE_GLOBAL* lpDestination;
  1442. DWORD dwStride;
  1443. LONG srcWidth;
  1444. LONG srcHeight;
  1445. LONG dstWidth;
  1446. LONG dstHeight;
  1447. DWORD dwBitCount;
  1448. DWORD dwStart;
  1449. DWORD dwTmp;
  1450. BOOL bColorKey;
  1451. DWORD dwKeyLow;
  1452. DWORD dwKeyHigh;
  1453. DWORD dwBytesPerPixel;
  1454. DWORD dwSecCtrl;
  1455. DWORD dwBlendCtrl;
  1456. ppdev = (PDEV*) lpUpdateOverlay->lpDD->dhpdev;
  1457. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Shouldn't have hooked call");
  1458. pjIoBase = ppdev->pjIoBase;
  1459. pjMmBase = ppdev->pjMmBase;
  1460. // 'Source' is the overlay surface, 'destination' is the surface to
  1461. // be overlayed:
  1462. lpSource = lpUpdateOverlay->lpDDSrcSurface->lpGbl;
  1463. if (lpUpdateOverlay->dwFlags & DDOVER_HIDE)
  1464. {
  1465. if (lpSource->fpVidMem == ppdev->fpVisibleOverlay)
  1466. {
  1467. WAIT_FOR_VBLANK(pjIoBase);
  1468. WRITE_STREAM_D(pjMmBase, BLEND_CONTROL, POnS);
  1469. WRITE_STREAM_D(pjMmBase, S_WH, WH(10, 2)); // Set to 10x2 rectangle
  1470. WRITE_STREAM_D(pjMmBase, OPAQUE_CONTROL, 0);// Disable opaque control
  1471. ppdev->fpVisibleOverlay = 0;
  1472. ASSERTDD(ppdev->flStatus & STAT_STREAMS_ENABLED,
  1473. "Expected streams to be enabled");
  1474. ppdev->flStatus &= ~STAT_STREAMS_ENABLED;
  1475. vTurnOffStreamsProcessorMode(ppdev);
  1476. }
  1477. lpUpdateOverlay->ddRVal = DD_OK;
  1478. return(DDHAL_DRIVER_HANDLED);
  1479. }
  1480. // Dereference 'lpDDDestSurface' only after checking for the DDOVER_HIDE
  1481. // case:
  1482. lpDestination = lpUpdateOverlay->lpDDDestSurface->lpGbl;
  1483. if (lpSource->fpVidMem != ppdev->fpVisibleOverlay)
  1484. {
  1485. if (lpUpdateOverlay->dwFlags & DDOVER_SHOW)
  1486. {
  1487. if (ppdev->fpVisibleOverlay != 0)
  1488. {
  1489. // Some other overlay is already visible:
  1490. DISPDBG((0, "DdUpdateOverlay: An overlay is already visible"));
  1491. lpUpdateOverlay->ddRVal = DDERR_OUTOFCAPS;
  1492. return(DDHAL_DRIVER_HANDLED);
  1493. }
  1494. else
  1495. {
  1496. // We're going to make the overlay visible, so mark it as
  1497. // such:
  1498. ppdev->fpVisibleOverlay = lpSource->fpVidMem;
  1499. }
  1500. }
  1501. else
  1502. {
  1503. // The overlay isn't visible, and we haven't been asked to make
  1504. // it visible, so this call is trivially easy:
  1505. lpUpdateOverlay->ddRVal = DD_OK;
  1506. return(DDHAL_DRIVER_HANDLED);
  1507. }
  1508. }
  1509. if (!(ppdev->flStatus & STAT_STREAMS_ENABLED))
  1510. {
  1511. ppdev->flStatus |= STAT_STREAMS_ENABLED;
  1512. vTurnOnStreamsProcessorMode(ppdev);
  1513. }
  1514. dwStride = lpSource->lPitch;
  1515. srcWidth = lpUpdateOverlay->rSrc.right - lpUpdateOverlay->rSrc.left;
  1516. srcHeight = lpUpdateOverlay->rSrc.bottom - lpUpdateOverlay->rSrc.top;
  1517. dstWidth = lpUpdateOverlay->rDest.right - lpUpdateOverlay->rDest.left;
  1518. dstHeight = lpUpdateOverlay->rDest.bottom - lpUpdateOverlay->rDest.top;
  1519. // Calculate DDA horizonal accumulator initial value:
  1520. dwSecCtrl = HDDA(srcWidth, dstWidth);
  1521. // Overlay input data format:
  1522. if (lpSource->ddpfSurface.dwFlags & DDPF_FOURCC)
  1523. {
  1524. dwBitCount = lpSource->ddpfSurface.dwYUVBitCount;
  1525. switch (lpSource->ddpfSurface.dwFourCC)
  1526. {
  1527. case FOURCC_YUY2:
  1528. dwSecCtrl |= S_YCrCb422; // Not S_YUV422! Dunno why...
  1529. break;
  1530. default:
  1531. RIP("Unexpected FourCC");
  1532. }
  1533. }
  1534. else
  1535. {
  1536. ASSERTDD(lpSource->ddpfSurface.dwFlags & DDPF_RGB,
  1537. "Expected us to have created only RGB or YUV overlays");
  1538. // The overlay surface is in RGB format:
  1539. dwBitCount = lpSource->ddpfSurface.dwRGBBitCount;
  1540. ASSERTDD(dwBitCount == 16,
  1541. "Expected us to have created 16bpp RGB surfaces only");
  1542. if (IS_RGB15_R(lpSource->ddpfSurface.dwRBitMask))
  1543. dwSecCtrl |= S_RGB15;
  1544. else
  1545. dwSecCtrl |= S_RGB16;
  1546. }
  1547. // Calculate start of video memory in QWORD boundary
  1548. dwBytesPerPixel = dwBitCount >> 3;
  1549. dwStart = (lpUpdateOverlay->rSrc.top * dwStride)
  1550. + (lpUpdateOverlay->rSrc.left * dwBytesPerPixel);
  1551. // Note that since we're shifting the source's edge to the left, we
  1552. // should really increase the source width to compensate. However,
  1553. // doing so when running at 1 to 1 would cause us to request a
  1554. // shrinking overlay -- something the S3 can't do.
  1555. dwStart = dwStart - (dwStart & 0x7);
  1556. ppdev->dwOverlayFlipOffset = dwStart; // Save for flip
  1557. dwStart += (DWORD)lpSource->fpVidMem;
  1558. // Set overlay filter characteristics:
  1559. if ((dstWidth != srcWidth) || (dstHeight != srcHeight))
  1560. {
  1561. if (dstWidth >= (srcWidth << 2))
  1562. {
  1563. dwSecCtrl |= S_Beyond4x; // Linear, 1-2-2-2-1, for >4X stretch
  1564. }
  1565. else if (dstWidth >= (srcWidth << 1))
  1566. {
  1567. dwSecCtrl |= S_2xTo4x; // Bi-linear, for 2X to 4X stretch
  1568. }
  1569. else
  1570. {
  1571. dwSecCtrl |= S_Upto2x; // Linear, 0-2-4-2-0, for X stretch
  1572. }
  1573. }
  1574. // Extract colour key:
  1575. bColorKey = FALSE;
  1576. dwBlendCtrl = 0;
  1577. if (lpUpdateOverlay->dwFlags & DDOVER_KEYDEST)
  1578. {
  1579. bColorKey = TRUE;
  1580. dwKeyLow = lpUpdateOverlay->lpDDDestSurface->ddckCKDestOverlay.dwColorSpaceLowValue;
  1581. dwBlendCtrl |= KeyOnP;
  1582. }
  1583. else if (lpUpdateOverlay->dwFlags & DDOVER_KEYDESTOVERRIDE)
  1584. {
  1585. bColorKey = TRUE;
  1586. dwKeyLow = lpUpdateOverlay->overlayFX.dckDestColorkey.dwColorSpaceLowValue;
  1587. dwBlendCtrl |= KeyOnP;
  1588. }
  1589. if (bColorKey)
  1590. {
  1591. // We support only destination colour keys:
  1592. if (lpDestination->ddpfSurface.dwFlags & DDPF_PALETTEINDEXED8)
  1593. {
  1594. dwKeyLow = dwGetPaletteEntry(ppdev, dwKeyLow);
  1595. }
  1596. else if (lpDestination->ddpfSurface.dwFlags & DDPF_RGB)
  1597. {
  1598. ASSERTDD(lpDestination->ddpfSurface.dwFlags & DDPF_RGB,
  1599. "Expected only RGB cases here");
  1600. // We have to transform the colour key from its native format
  1601. // to 8-8-8:
  1602. if (lpDestination->ddpfSurface.dwRGBBitCount == 16)
  1603. {
  1604. if (IS_RGB15_R(lpDestination->ddpfSurface.dwRBitMask))
  1605. dwKeyLow = RGB15to32(dwKeyLow);
  1606. else
  1607. dwKeyLow = RGB16to32(dwKeyLow);
  1608. }
  1609. else
  1610. {
  1611. ASSERTDD((lpDestination->ddpfSurface.dwRGBBitCount == 32),
  1612. "Expected the primary surface to be either 8, 16, or 32bpp");
  1613. }
  1614. }
  1615. dwKeyHigh = dwKeyLow;
  1616. dwKeyLow |= CompareBits0t7 | KeyFromCompare;
  1617. }
  1618. // Update and show:
  1619. NW_GP_WAIT(ppdev, pjMmBase);
  1620. WAIT_FOR_VBLANK(pjIoBase);
  1621. WRITE_STREAM_D(pjMmBase, S_0, dwStart);
  1622. WRITE_STREAM_D(pjMmBase, S_XY, XY(lpUpdateOverlay->rDest.left,
  1623. lpUpdateOverlay->rDest.top));
  1624. WRITE_STREAM_D(pjMmBase, S_WH, WH(dstWidth, dstHeight));
  1625. WRITE_STREAM_D(pjMmBase, S_STRIDE, dwStride);
  1626. WRITE_STREAM_D(pjMmBase, S_CONTROL, dwSecCtrl);
  1627. WRITE_STREAM_D(pjMmBase, S_HK1K2, HK1K2(srcWidth, dstWidth));
  1628. WRITE_STREAM_D(pjMmBase, S_VK1, VK1(srcHeight));
  1629. WRITE_STREAM_D(pjMmBase, S_VK2, VK2(srcHeight, dstHeight));
  1630. WRITE_STREAM_D(pjMmBase, S_VDDA, VDDA(dstHeight));
  1631. if (bColorKey)
  1632. {
  1633. WRITE_STREAM_D(pjMmBase, CKEY_LOW, dwKeyLow);
  1634. WRITE_STREAM_D(pjMmBase, CKEY_HI, dwKeyHigh);
  1635. }
  1636. WRITE_STREAM_D(pjMmBase, BLEND_CONTROL, dwBlendCtrl);
  1637. WRITE_STREAM_D(pjMmBase, FIFO_CONTROL, ppdev->ulFifoValue);
  1638. lpUpdateOverlay->ddRVal = DD_OK;
  1639. return(DDHAL_DRIVER_HANDLED);
  1640. }
  1641. /******************************Public*Routine******************************\
  1642. * DWORD DdSetOverlayPosition
  1643. *
  1644. \**************************************************************************/
  1645. DWORD DdSetOverlayPosition(
  1646. PDD_SETOVERLAYPOSITIONDATA lpSetOverlayPosition)
  1647. {
  1648. PDEV* ppdev;
  1649. BYTE* pjIoBase;
  1650. BYTE* pjMmBase;
  1651. ppdev = (PDEV*) lpSetOverlayPosition->lpDD->dhpdev;
  1652. pjIoBase = ppdev->pjIoBase;
  1653. pjMmBase = ppdev->pjMmBase;
  1654. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Shouldn't have hooked call");
  1655. // Check that streams processor is enabled before settting registers
  1656. if(ppdev->flStatus & STAT_STREAMS_ENABLED)
  1657. {
  1658. WAIT_FOR_VBLANK(pjIoBase);
  1659. WRITE_STREAM_D(pjMmBase, S_XY, XY(lpSetOverlayPosition->lXPos,
  1660. lpSetOverlayPosition->lYPos));
  1661. }
  1662. lpSetOverlayPosition->ddRVal = DD_OK;
  1663. return(DDHAL_DRIVER_HANDLED);
  1664. }
  1665. /******************************Public*Routine******************************\
  1666. * DWORD DdGetDriverInfo
  1667. *
  1668. * This function is an extensible method for returning DirectDraw
  1669. * capabilities and methods.
  1670. *
  1671. \**************************************************************************/
  1672. DWORD DdGetDriverInfo(
  1673. PDD_GETDRIVERINFODATA lpGetDriverInfo)
  1674. {
  1675. DWORD dwSize;
  1676. lpGetDriverInfo->ddRVal = DDERR_CURRENTLYNOTAVAIL;
  1677. if (IsEqualIID(&lpGetDriverInfo->guidInfo, &GUID_NTCallbacks))
  1678. {
  1679. DD_NTCALLBACKS NtCallbacks;
  1680. memset(&NtCallbacks, 0, sizeof(NtCallbacks));
  1681. dwSize = min(lpGetDriverInfo->dwExpectedSize, sizeof(DD_NTCALLBACKS));
  1682. NtCallbacks.dwSize = dwSize;
  1683. NtCallbacks.dwFlags = DDHAL_NTCB32_FREEDRIVERMEMORY;
  1684. NtCallbacks.FreeDriverMemory = DdFreeDriverMemory;
  1685. memcpy(lpGetDriverInfo->lpvData, &NtCallbacks, dwSize);
  1686. lpGetDriverInfo->ddRVal = DD_OK;
  1687. }
  1688. return(DDHAL_DRIVER_HANDLED);
  1689. }
  1690. /******************************Public*Routine******************************\
  1691. * VOID vAssertModeDirectDraw
  1692. *
  1693. * This function is called by enable.c when entering or leaving the
  1694. * DOS full-screen character mode.
  1695. *
  1696. \**************************************************************************/
  1697. VOID vAssertModeDirectDraw(
  1698. PDEV* ppdev,
  1699. BOOL bEnable)
  1700. {
  1701. }
  1702. /******************************Public*Routine******************************\
  1703. * BOOL bEnableDirectDraw
  1704. *
  1705. * This function is called by enable.c when the mode is first initialized,
  1706. * right after the miniport does the mode-set.
  1707. *
  1708. \**************************************************************************/
  1709. BOOL bEnableDirectDraw(
  1710. PDEV* ppdev)
  1711. {
  1712. BYTE* pjIoBase;
  1713. VIDEO_QUERY_STREAMS_MODE VideoQueryStreamsMode;
  1714. VIDEO_QUERY_STREAMS_PARAMETERS VideoQueryStreamsParameters;
  1715. DWORD ReturnedDataLength;
  1716. BOOL bDDrawEnabled=TRUE;
  1717. // We're not going to bother to support accelerated DirectDraw on
  1718. // those S3s that can't support memory-mapped I/O, simply because
  1719. // they're old cards and it's not worth the effort. We also
  1720. // require DIRECT_ACCESS to the frame buffer.
  1721. //
  1722. // We also don't support 864/964 cards because writing to the frame
  1723. // buffer can hang the entire system if an accelerated operation is
  1724. // going on at the same time.
  1725. //
  1726. // The 765 (Trio64V+) has a bug such that writing to the frame
  1727. // buffer during an accelerator operation may cause a hang if
  1728. // you do the write soon enough after starting the blt. (There is
  1729. // a small window of opportunity.) On UP machines, the context
  1730. // switch time seems to be enough to avoid the problem. However,
  1731. // on MP machines, we'll have to disable direct draw.
  1732. //
  1733. // NOTE: We can identify the 765 since it is the only chip with
  1734. // the CAPS_STREAMS_CAPABLE flag.
  1735. if (ppdev->flCaps & CAPS_STREAMS_CAPABLE)
  1736. {
  1737. DWORD numProcessors;
  1738. if (EngQuerySystemAttribute(EngNumberOfProcessors, &numProcessors))
  1739. {
  1740. if (numProcessors != 1)
  1741. {
  1742. DISPDBG((1, "Disabling DDraw for MP 765 box.\n"));
  1743. bDDrawEnabled = FALSE;
  1744. }
  1745. }
  1746. else
  1747. {
  1748. DISPDBG((1, "Can't determine number of processors, so play it "
  1749. "safe and disable DDraw for 765.\n"));
  1750. bDDrawEnabled = FALSE;
  1751. }
  1752. }
  1753. // The stretch and YUV bltter capabilities of the S3 868 and 968 were
  1754. // disabled to account for bug 135541.
  1755. ppdev->flCaps &= ~CAPS_PIXEL_FORMATTER;
  1756. if ((ppdev->flCaps & CAPS_NEW_MMIO) &&
  1757. !(ppdev->flCaps & CAPS_NO_DIRECT_ACCESS) &&
  1758. (bDDrawEnabled))
  1759. {
  1760. pjIoBase = ppdev->pjIoBase;
  1761. // We have to preserve the contents of register 0x69 on the S3's page
  1762. // flip:
  1763. ACQUIRE_CRTC_CRITICAL_SECTION(ppdev);
  1764. OUTP(pjIoBase, CRTC_INDEX, 0x69);
  1765. ppdev->ulExtendedSystemControl3Register_69
  1766. = (INP(pjIoBase, CRTC_DATA) & 0xe0) << 8;
  1767. RELEASE_CRTC_CRITICAL_SECTION(ppdev);
  1768. // Accurately measure the refresh rate for later:
  1769. vGetDisplayDuration(ppdev);
  1770. if (ppdev->flCaps & CAPS_STREAMS_CAPABLE)
  1771. {
  1772. // Query the miniport to get the correct streams parameters
  1773. // for this mode:
  1774. VideoQueryStreamsMode.ScreenWidth = ppdev->cxScreen;
  1775. VideoQueryStreamsMode.BitsPerPel = ppdev->cBitsPerPel;
  1776. VideoQueryStreamsMode.RefreshRate = ppdev->ulRefreshRate;
  1777. if (EngDeviceIoControl(ppdev->hDriver,
  1778. IOCTL_VIDEO_S3_QUERY_STREAMS_PARAMETERS,
  1779. &VideoQueryStreamsMode,
  1780. sizeof(VideoQueryStreamsMode),
  1781. &VideoQueryStreamsParameters,
  1782. sizeof(VideoQueryStreamsParameters),
  1783. &ReturnedDataLength))
  1784. {
  1785. DISPDBG((0, "Miniport reported no streams parameters"));
  1786. ppdev->flCaps &= ~CAPS_STREAMS_CAPABLE;
  1787. }
  1788. else
  1789. {
  1790. ppdev->ulMinOverlayStretch
  1791. = VideoQueryStreamsParameters.MinOverlayStretch;
  1792. ppdev->ulFifoValue
  1793. = VideoQueryStreamsParameters.FifoValue;
  1794. DISPDBG((0, "Refresh rate: %li Minimum overlay stretch: %li.%03li Fifo value: %lx",
  1795. ppdev->ulRefreshRate,
  1796. ppdev->ulMinOverlayStretch / 1000,
  1797. ppdev->ulMinOverlayStretch % 1000,
  1798. ppdev->ulFifoValue));
  1799. }
  1800. }
  1801. else if (ppdev->flCaps & CAPS_PIXEL_FORMATTER)
  1802. {
  1803. // The pixel formatter doesn't work at 24bpp:
  1804. if (ppdev->iBitmapFormat != BMF_24BPP)
  1805. {
  1806. // We'll need a pixel-high scratch area to work around a
  1807. // hardware bug for thin stretches:
  1808. ppdev->pdsurfVideoEngineScratch = pVidMemAllocate(ppdev,
  1809. ppdev->cxMemory,
  1810. 1);
  1811. if (ppdev->pdsurfVideoEngineScratch)
  1812. {
  1813. if (ppdev->cyMemory * ppdev->lDelta <= 0x100000)
  1814. ppdev->dwVEstep = 0x00040004; // If 1MB, 4 bytes/write
  1815. else
  1816. ppdev->dwVEstep = 0x00080008; // If 2MB, 8 bytes/write
  1817. ppdev->flCaps |= CAPS_PIXEL_FORMATTER;
  1818. }
  1819. }
  1820. }
  1821. }
  1822. return(TRUE);
  1823. }
  1824. /******************************Public*Routine******************************\
  1825. * VOID vDisableDirectDraw
  1826. *
  1827. * This function is called by enable.c when the driver is shutting down.
  1828. *
  1829. \**************************************************************************/
  1830. VOID vDisableDirectDraw(
  1831. PDEV* ppdev)
  1832. {
  1833. if (ppdev->pdsurfVideoEngineScratch)
  1834. {
  1835. vVidMemFree(ppdev->pdsurfVideoEngineScratch);
  1836. }
  1837. }