Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2266 lines
75 KiB

  1. /******************************Module*Header*******************************\
  2. *
  3. * **************************
  4. * * DirectDraw SAMPLE CODE *
  5. * **************************
  6. *
  7. * Module Name: ddraw.c
  8. *
  9. * Implements all the DirectDraw components for the driver.
  10. *
  11. * Copyright (c) 1995-1998 Microsoft Corporation
  12. \**************************************************************************/
  13. #include "precomp.h"
  14. // Defines we'll use in the surface's 'dwReserved1' field:
  15. #define DD_RESERVED_DIFFERENTPIXELFORMAT 0x0001
  16. // Worst-case possible number of FIFO entries we'll have to wait for in
  17. // DdBlt for any operation:
  18. #define DDBLT_FIFO_COUNT 9
  19. // NT is kind enough to pre-calculate the 2-d surface offset as a 'hint' so
  20. // that we don't have to do the following, which would be 6 DIVs per blt:
  21. //
  22. // y += (offset / pitch)
  23. // x += (offset % pitch) / bytes_per_pixel
  24. #define convertToGlobalCord(x, y, surf) \
  25. { \
  26. y += surf->yHint; \
  27. x += surf->xHint; \
  28. }
  29. /******************************Public*Routine******************************\
  30. * VOID vFixMissingPixels
  31. *
  32. * Trio64V+ work-around.
  33. *
  34. * On 1024x768x8 and 800x600x8 modes, switching from K2 to stream processor
  35. * results in 1 character clock pixels on the right handed side of the screen
  36. * missing. This problem can be worked-around by adjusting CR2 register.
  37. *
  38. \**************************************************************************/
  39. VOID vFixMissingPixels(
  40. PDEV* ppdev)
  41. {
  42. BYTE* pjIoBase;
  43. BYTE jVerticalRetraceEnd;
  44. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Must be streams capable");
  45. pjIoBase = ppdev->pjIoBase;
  46. // Unlock CRTC control registers:
  47. OUTP(pjIoBase, CRTC_INDEX, 0x11);
  48. jVerticalRetraceEnd = INP(pjIoBase, CRTC_DATA);
  49. OUTP(pjIoBase, CRTC_DATA, jVerticalRetraceEnd & 0x7f);
  50. // Add one character clock:
  51. OUTP(pjIoBase, CRTC_INDEX, 0x2);
  52. ppdev->jSavedCR2 = INP(pjIoBase, CRTC_DATA);
  53. OUTP(pjIoBase, CRTC_DATA, ppdev->jSavedCR2 + 1);
  54. // Lock CRTC control registers again:
  55. OUTP(pjIoBase, CRTC_INDEX, 0x11);
  56. OUTP(pjIoBase, CRTC_DATA, jVerticalRetraceEnd | 0x80);
  57. }
  58. /******************************Public*Routine******************************\
  59. * VOID vUnfixMissingPixels
  60. *
  61. * Trio64V+ work-around.
  62. *
  63. \**************************************************************************/
  64. VOID vUnfixMissingPixels(
  65. PDEV* ppdev)
  66. {
  67. BYTE* pjIoBase;
  68. BYTE jVerticalRetraceEnd;
  69. pjIoBase = ppdev->pjIoBase;
  70. // Unlock CRTC control registers:
  71. OUTP(pjIoBase, CRTC_INDEX, 0x11);
  72. jVerticalRetraceEnd = INP(pjIoBase, CRTC_DATA);
  73. OUTP(pjIoBase, CRTC_DATA, jVerticalRetraceEnd & 0x7f);
  74. // Restore original register value:
  75. OUTP(pjIoBase, CRTC_INDEX, 0x2);
  76. OUTP(pjIoBase, CRTC_DATA, ppdev->jSavedCR2);
  77. // Lock CRTC control registers again:
  78. OUTP(pjIoBase, CRTC_INDEX, 0x11);
  79. OUTP(pjIoBase, CRTC_DATA, jVerticalRetraceEnd | 0x80);
  80. }
  81. /******************************Public*Routine******************************\
  82. * VOID vStreamsDelay()
  83. *
  84. * This tries to work around a hardware timing bug. Supposedly, consecutive
  85. * writes to the streams processor in fast CPUs such as P120 and P133's
  86. * have problems. I haven't seen this problem, but this work-around exists
  87. * in the Windows 95 driver, and at this point don't want to chance not
  88. * having it. Note that writes to the streams processor are not performance
  89. * critical, so this is not a performance hit.
  90. *
  91. \**************************************************************************/
  92. VOID vStreamsDelay()
  93. {
  94. volatile LONG i;
  95. for (i = 32; i != 0; i--)
  96. ;
  97. }
  98. /******************************Public*Routine******************************\
  99. * VOID vTurnOnStreamsProcessorMode
  100. *
  101. \**************************************************************************/
  102. VOID vTurnOnStreamsProcessorMode(
  103. PDEV* ppdev)
  104. {
  105. BYTE* pjMmBase;
  106. BYTE* pjIoBase;
  107. BYTE jStreamsProcessorModeSelect;
  108. DWORD dwPFormat;
  109. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Must be streams capable");
  110. ACQUIRE_CRTC_CRITICAL_SECTION(ppdev);
  111. pjMmBase = ppdev->pjMmBase;
  112. pjIoBase = ppdev->pjIoBase;
  113. NW_GP_WAIT(ppdev, pjMmBase);
  114. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  115. ;
  116. // Full streams processor operation:
  117. OUTP(pjIoBase, CRTC_INDEX, 0x67);
  118. jStreamsProcessorModeSelect = INP(pjIoBase, CRTC_DATA);
  119. OUTP(pjIoBase, CRTC_DATA, jStreamsProcessorModeSelect | 0x0c);
  120. if (ppdev->iBitmapFormat == BMF_8BPP)
  121. {
  122. vFixMissingPixels(ppdev);
  123. }
  124. switch(ppdev->iBitmapFormat)
  125. {
  126. case BMF_8BPP:
  127. dwPFormat = P_RGB8;
  128. break;
  129. case BMF_16BPP:
  130. if (IS_RGB15_R(ppdev->flRed))
  131. dwPFormat = P_RGB15;
  132. else
  133. dwPFormat = P_RGB16;
  134. break;
  135. case BMF_32BPP:
  136. dwPFormat = P_RGB32;
  137. break;
  138. default:
  139. RIP("Unexpected bitmap format");
  140. }
  141. WRITE_STREAM_D(pjMmBase, P_CONTROL, dwPFormat );
  142. WRITE_STREAM_D(pjMmBase, FIFO_CONTROL, ((0xcL << FifoAlloc_Shift)|
  143. (4L << P_FifoThresh_Shift) |
  144. (4L << S_FifoThresh_Shift)));
  145. WRITE_STREAM_D(pjMmBase, P_0, 0);
  146. WRITE_STREAM_D(pjMmBase, P_STRIDE, ppdev->lDelta);
  147. WRITE_STREAM_D(pjMmBase, P_XY, 0x010001L);
  148. WRITE_STREAM_D(pjMmBase, P_WH, WH(ppdev->cxScreen, ppdev->cyScreen));
  149. WRITE_STREAM_D(pjMmBase, S_WH, WH(10, 2));
  150. WRITE_STREAM_D(pjMmBase, CKEY_LOW, ppdev->ulColorKey |
  151. CompareBits0t7 |
  152. KeyFromCompare);
  153. WRITE_STREAM_D(pjMmBase, CKEY_HI, ppdev->ulColorKey);
  154. WRITE_STREAM_D(pjMmBase, BLEND_CONTROL, POnS);
  155. WRITE_STREAM_D(pjMmBase, OPAQUE_CONTROL, 0);
  156. WRITE_STREAM_D(pjMmBase, FIFO_CONTROL, ppdev->ulFifoValue);
  157. RELEASE_CRTC_CRITICAL_SECTION(ppdev);
  158. }
  159. /******************************Public*Routine******************************\
  160. * VOID vTurnOffStreamsProcessorMode
  161. *
  162. \**************************************************************************/
  163. VOID vTurnOffStreamsProcessorMode(
  164. PDEV* ppdev)
  165. {
  166. BYTE* pjMmBase;
  167. BYTE* pjIoBase;
  168. BYTE jStreamsProcessorModeSelect;
  169. ACQUIRE_CRTC_CRITICAL_SECTION(ppdev);
  170. pjMmBase = ppdev->pjMmBase;
  171. pjIoBase = ppdev->pjIoBase;
  172. NW_GP_WAIT(ppdev, pjMmBase);
  173. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  174. ;
  175. WRITE_STREAM_D(pjMmBase, FIFO_CONTROL, 0x3000L);
  176. OUTP(pjIoBase, CRTC_INDEX, 0x67);
  177. jStreamsProcessorModeSelect = INP(pjIoBase, CRTC_DATA);
  178. OUTP(pjIoBase, CRTC_DATA, jStreamsProcessorModeSelect & ~0x0C);
  179. if (ppdev->iBitmapFormat == BMF_8BPP)
  180. {
  181. vUnfixMissingPixels(ppdev);
  182. }
  183. RELEASE_CRTC_CRITICAL_SECTION(ppdev);
  184. }
  185. /******************************Public*Routine******************************\
  186. * DWORD dwGetPaletteEntry
  187. *
  188. \**************************************************************************/
  189. DWORD dwGetPaletteEntry(
  190. PDEV* ppdev,
  191. DWORD iIndex)
  192. {
  193. BYTE* pjIoBase;
  194. DWORD dwRed;
  195. DWORD dwGreen;
  196. DWORD dwBlue;
  197. pjIoBase = ppdev->pjIoBase;
  198. OUTP(pjIoBase, 0x3c7, iIndex);
  199. dwRed = INP(pjIoBase, 0x3c9) << 2;
  200. dwGreen = INP(pjIoBase, 0x3c9) << 2;
  201. dwBlue = INP(pjIoBase, 0x3c9) << 2;
  202. return((dwRed << 16) | (dwGreen << 8) | (dwBlue));
  203. }
  204. /******************************Public*Routine******************************\
  205. * VOID vGetDisplayDuration
  206. *
  207. * Get the length, in EngQueryPerformanceCounter() ticks, of a refresh cycle.
  208. *
  209. * If we could trust the miniport to return back and accurate value for
  210. * the refresh rate, we could use that. Unfortunately, our miniport doesn't
  211. * ensure that it's an accurate value.
  212. *
  213. \**************************************************************************/
  214. #define NUM_VBLANKS_TO_MEASURE 1
  215. #define NUM_MEASUREMENTS_TO_TAKE 8
  216. VOID vGetDisplayDuration(
  217. PDEV* ppdev)
  218. {
  219. BYTE* pjIoBase;
  220. LONG i;
  221. LONG j;
  222. LONGLONG li;
  223. LONGLONG liFrequency;
  224. LONGLONG liMin;
  225. LONGLONG aliMeasurement[NUM_MEASUREMENTS_TO_TAKE + 1];
  226. pjIoBase = ppdev->pjIoBase;
  227. memset(&ppdev->flipRecord, 0, sizeof(ppdev->flipRecord));
  228. // Warm up EngQUeryPerformanceCounter to make sure it's in the working
  229. // set:
  230. EngQueryPerformanceCounter(&li);
  231. // Unfortunately, since NT is a proper multitasking system, we can't
  232. // just disable interrupts to take an accurate reading. We also can't
  233. // do anything so goofy as dynamically change our thread's priority to
  234. // real-time.
  235. //
  236. // So we just do a bunch of short measurements and take the minimum.
  237. //
  238. // It would be 'okay' if we got a result that's longer than the actual
  239. // VBlank cycle time -- nothing bad would happen except that the app
  240. // would run a little slower. We don't want to get a result that's
  241. // shorter than the actual VBlank cycle time -- that could cause us
  242. // to start drawing over a frame before the Flip has occured.
  243. //
  244. // Skip a couple of vertical blanks to allow the hardware to settle
  245. // down after the mode change, to make our readings accurate:
  246. for (i = 2; i != 0; i--)
  247. {
  248. while (VBLANK_IS_ACTIVE(pjIoBase))
  249. ;
  250. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  251. ;
  252. }
  253. for (i = 0; i < NUM_MEASUREMENTS_TO_TAKE; i++)
  254. {
  255. // We're at the start of the VBlank active cycle!
  256. EngQueryPerformanceCounter(&aliMeasurement[i]);
  257. // Okay, so life in a multi-tasking environment isn't all that
  258. // simple. What if we had taken a context switch just before
  259. // the above EngQueryPerformanceCounter call, and now were half
  260. // way through the VBlank inactive cycle? Then we would measure
  261. // only half a VBlank cycle, which is obviously bad. The worst
  262. // thing we can do is get a time shorter than the actual VBlank
  263. // cycle time.
  264. //
  265. // So we solve this by making sure we're in the VBlank active
  266. // time before and after we query the time. If it's not, we'll
  267. // sync up to the next VBlank (it's okay to measure this period --
  268. // it will be guaranteed to be longer than the VBlank cycle and
  269. // will likely be thrown out when we select the minimum sample).
  270. // There's a chance that we'll take a context switch and return
  271. // just before the end of the active VBlank time -- meaning that
  272. // the actual measured time would be less than the true amount --
  273. // but since the VBlank is active less than 1% of the time, this
  274. // means that we would have a maximum of 1% error approximately
  275. // 1% of the times we take a context switch. An acceptable risk.
  276. //
  277. // This next line will cause us wait if we're no longer in the
  278. // VBlank active cycle as we should be at this point:
  279. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  280. ;
  281. for (j = 0; j < NUM_VBLANKS_TO_MEASURE; j++)
  282. {
  283. while (VBLANK_IS_ACTIVE(pjIoBase))
  284. ;
  285. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  286. ;
  287. }
  288. }
  289. EngQueryPerformanceCounter(&aliMeasurement[NUM_MEASUREMENTS_TO_TAKE]);
  290. // Use the minimum:
  291. liMin = aliMeasurement[1] - aliMeasurement[0];
  292. DISPDBG((1, "Refresh count: %li - %li", 1, (ULONG) liMin));
  293. for (i = 2; i <= NUM_MEASUREMENTS_TO_TAKE; i++)
  294. {
  295. li = aliMeasurement[i] - aliMeasurement[i - 1];
  296. DISPDBG((1, " %li - %li", i, (ULONG) li));
  297. if (li < liMin)
  298. liMin = li;
  299. }
  300. // Round the result:
  301. ppdev->flipRecord.liFlipDuration
  302. = (DWORD) (liMin + (NUM_VBLANKS_TO_MEASURE / 2)) / NUM_VBLANKS_TO_MEASURE;
  303. ppdev->flipRecord.bFlipFlag = FALSE;
  304. ppdev->flipRecord.fpFlipFrom = 0;
  305. // We need the refresh rate in Hz to query the S3 miniport about the
  306. // streams parameters:
  307. EngQueryPerformanceFrequency(&liFrequency);
  308. ppdev->ulRefreshRate
  309. = (ULONG) ((liFrequency + (ppdev->flipRecord.liFlipDuration / 2))
  310. / ppdev->flipRecord.liFlipDuration);
  311. DISPDBG((1, "Frequency: %li Hz", ppdev->ulRefreshRate));
  312. }
  313. /******************************Public*Routine******************************\
  314. * HRESULT ddrvalUpdateFlipStatus
  315. *
  316. * Checks and sees if the most recent flip has occurred.
  317. *
  318. * Unfortunately, the hardware has no ability to tell us whether a vertical
  319. * retrace has occured since the flip command was given other than by
  320. * sampling the vertical-blank-active and display-active status bits.
  321. *
  322. \**************************************************************************/
  323. HRESULT ddrvalUpdateFlipStatus(
  324. PDEV* ppdev,
  325. FLATPTR fpVidMem)
  326. {
  327. BYTE* pjIoBase;
  328. LONGLONG liTime;
  329. pjIoBase = ppdev->pjIoBase;
  330. if ((ppdev->flipRecord.bFlipFlag) &&
  331. ((fpVidMem == (FLATPTR) -1) ||
  332. (fpVidMem == ppdev->flipRecord.fpFlipFrom)))
  333. {
  334. if (VBLANK_IS_ACTIVE(pjIoBase))
  335. {
  336. if (ppdev->flipRecord.bWasEverInDisplay)
  337. {
  338. ppdev->flipRecord.bHaveEverCrossedVBlank = TRUE;
  339. }
  340. }
  341. else if (DISPLAY_IS_ACTIVE(pjIoBase))
  342. {
  343. if (ppdev->flipRecord.bHaveEverCrossedVBlank)
  344. {
  345. ppdev->flipRecord.bFlipFlag = FALSE;
  346. return(DD_OK);
  347. }
  348. ppdev->flipRecord.bWasEverInDisplay = TRUE;
  349. }
  350. // It's pretty unlikely that we'll happen to sample the vertical-
  351. // blank-active at the first vertical blank after the flip command
  352. // has been given. So to provide better results, we also check the
  353. // time elapsed since the flip. If it's more than the duration of
  354. // one entire refresh of the display, then we know for sure it has
  355. // happened:
  356. EngQueryPerformanceCounter(&liTime);
  357. if (liTime - ppdev->flipRecord.liFlipTime
  358. <= ppdev->flipRecord.liFlipDuration)
  359. {
  360. return(DDERR_WASSTILLDRAWING);
  361. }
  362. ppdev->flipRecord.bFlipFlag = FALSE;
  363. }
  364. return(DD_OK);
  365. }
  366. /******************************Public*Routine******************************\
  367. * DWORD DdBlt
  368. *
  369. \**************************************************************************/
  370. DWORD DdBlt(
  371. PDD_BLTDATA lpBlt)
  372. {
  373. PDD_SURFACE_GLOBAL srcSurf;
  374. PDD_SURFACE_LOCAL dstSurfx;
  375. PDD_SURFACE_GLOBAL dstSurf;
  376. PDEV* ppdev;
  377. BYTE* pjMmBase;
  378. HRESULT ddrval;
  379. DWORD dstX;
  380. DWORD dstY;
  381. DWORD dwFlags;
  382. DWORD dstWidth;
  383. DWORD dstHeight;
  384. DWORD srcWidth;
  385. DWORD srcHeight;
  386. DWORD dwError;
  387. LONG dstPitch;
  388. LONG srcPitch;
  389. DWORD srcX;
  390. DWORD srcY;
  391. ULONG ulBltCmd;
  392. DWORD dwVEctrl;
  393. DWORD dwVEdda;
  394. DWORD dwVEcrop;
  395. DWORD dwVEdstAddr;
  396. DWORD dwVEsrcAddr;
  397. DWORD dwDstByteCount;
  398. DWORD dwSrcByteCount;
  399. DWORD dwSrcBytes;
  400. DWORD dwCropSkip;
  401. LONG i;
  402. FLATPTR fp;
  403. ppdev = (PDEV*) lpBlt->lpDD->dhpdev;
  404. pjMmBase = ppdev->pjMmBase;
  405. dstSurfx = lpBlt->lpDDDestSurface;
  406. dstSurf = dstSurfx->lpGbl;
  407. // Is a flip in progress?
  408. ddrval = ddrvalUpdateFlipStatus(ppdev, dstSurf->fpVidMem);
  409. if (ddrval != DD_OK)
  410. {
  411. lpBlt->ddRVal = ddrval;
  412. return(DDHAL_DRIVER_HANDLED);
  413. }
  414. dwFlags = lpBlt->dwFlags;
  415. if (dwFlags & DDBLT_ASYNC)
  416. {
  417. // If async, then only work if we won't have to wait on the
  418. // accelerator to start the command.
  419. //
  420. // The FIFO wait should account for the worst-case possible
  421. // blt that we would do:
  422. if (MM_FIFO_BUSY(ppdev, pjMmBase, DDBLT_FIFO_COUNT))
  423. {
  424. lpBlt->ddRVal = DDERR_WASSTILLDRAWING;
  425. return(DDHAL_DRIVER_HANDLED);
  426. }
  427. }
  428. // Copy src/dst rects:
  429. dstX = lpBlt->rDest.left;
  430. dstY = lpBlt->rDest.top;
  431. dstWidth = lpBlt->rDest.right - lpBlt->rDest.left;
  432. dstHeight = lpBlt->rDest.bottom - lpBlt->rDest.top;
  433. if (dwFlags & DDBLT_COLORFILL)
  434. {
  435. // The S3 can't easily do colour fills for off-screen surfaces that
  436. // are a different pixel format than that of the primary display:
  437. if (dstSurf->dwReserved1 & DD_RESERVED_DIFFERENTPIXELFORMAT)
  438. {
  439. DISPDBG((0, "Can't do colorfill to odd pixel format"));
  440. return(DDHAL_DRIVER_NOTHANDLED);
  441. }
  442. else
  443. {
  444. convertToGlobalCord(dstX, dstY, dstSurf);
  445. NW_FIFO_WAIT(ppdev, pjMmBase, 6);
  446. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  447. NW_ALT_MIX(ppdev, pjMmBase, FOREGROUND_COLOR | OVERPAINT, 0);
  448. NW_FRGD_COLOR(ppdev, pjMmBase, lpBlt->bltFX.dwFillColor);
  449. NW_ABS_CURXY_FAST(ppdev, pjMmBase, dstX, dstY);
  450. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, dstHeight - 1);
  451. NW_ALT_CMD(ppdev, pjMmBase, RECTANGLE_FILL | DRAWING_DIR_TBLRXM |
  452. DRAW | DIR_TYPE_XY |
  453. LAST_PIXEL_ON | MULTIPLE_PIXELS |
  454. WRITE);
  455. lpBlt->ddRVal = DD_OK;
  456. return(DDHAL_DRIVER_HANDLED);
  457. }
  458. }
  459. // We specified with Our ddCaps.dwCaps that we handle a limited number
  460. // of commands, and by this point in our routine we've handled everything
  461. // except DDBLT_ROP. DirectDraw and GDI shouldn't pass us anything
  462. // else; we'll assert on debug builds to prove this:
  463. ASSERTDD((dwFlags & DDBLT_ROP) && (lpBlt->lpDDSrcSurface),
  464. "Expected dwFlags commands of only DDBLT_ASYNC and DDBLT_COLORFILL");
  465. // Get offset, dstWidth, and dstHeight for source:
  466. srcSurf = lpBlt->lpDDSrcSurface->lpGbl;
  467. srcX = lpBlt->rSrc.left;
  468. srcY = lpBlt->rSrc.top;
  469. srcWidth = lpBlt->rSrc.right - lpBlt->rSrc.left;
  470. srcHeight = lpBlt->rSrc.bottom - lpBlt->rSrc.top;
  471. // If a stretch or a funky pixel format blt are involved, we'll have to
  472. // defer to the overlay or pixel formatter routines:
  473. if ((srcWidth == dstWidth) &&
  474. (srcHeight == dstHeight) &&
  475. !(srcSurf->dwReserved1 & DD_RESERVED_DIFFERENTPIXELFORMAT) &&
  476. !(dstSurf->dwReserved1 & DD_RESERVED_DIFFERENTPIXELFORMAT))
  477. {
  478. // Assume we can do the blt top-to-bottom, left-to-right:
  479. ulBltCmd = BITBLT | DRAW | DIR_TYPE_XY | WRITE | DRAWING_DIR_TBLRXM;
  480. if ((dstSurf == srcSurf) && (srcX + dstWidth > dstX) &&
  481. (srcY + dstHeight > dstY) && (dstX + dstWidth > srcX) &&
  482. (dstY + dstHeight > srcY) &&
  483. (((srcY == dstY) && (dstX > srcX) )
  484. || ((srcY != dstY) && (dstY > srcY))))
  485. {
  486. // Okay, we have to do the blt bottom-to-top, right-to-left:
  487. ulBltCmd = BITBLT | DRAW | DIR_TYPE_XY | WRITE | DRAWING_DIR_BTRLXM;
  488. srcX = lpBlt->rSrc.right - 1;
  489. srcY = lpBlt->rSrc.bottom - 1;
  490. dstX = lpBlt->rDest.right - 1;
  491. dstY = lpBlt->rDest.bottom - 1;
  492. }
  493. // NT only ever gives us SRCCOPY rops, so don't even both checking
  494. // for anything else.
  495. convertToGlobalCord(srcX, srcY, srcSurf);
  496. convertToGlobalCord(dstX, dstY, dstSurf);
  497. if (dwFlags & DDBLT_KEYSRCOVERRIDE)
  498. {
  499. NW_FIFO_WAIT(ppdev, pjMmBase, 9);
  500. NW_MULT_MISC_READ_SEL(ppdev, pjMmBase, ppdev->ulMiscState
  501. | MULT_MISC_COLOR_COMPARE, 0);
  502. NW_COLOR_CMP(ppdev, pjMmBase,
  503. lpBlt->bltFX.ddckSrcColorkey.dwColorSpaceLowValue);
  504. NW_ALT_MIX(ppdev, pjMmBase, SRC_DISPLAY_MEMORY | OVERPAINT, 0);
  505. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  506. NW_ABS_CURXY_FAST(ppdev, pjMmBase, srcX, srcY);
  507. NW_ABS_DESTXY_FAST(ppdev, pjMmBase, dstX, dstY);
  508. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, dstHeight - 1);
  509. NW_ALT_CMD(ppdev, pjMmBase, ulBltCmd);
  510. NW_MULT_MISC_READ_SEL(ppdev, pjMmBase, ppdev->ulMiscState, 0);
  511. }
  512. else
  513. {
  514. NW_FIFO_WAIT(ppdev, pjMmBase, 6);
  515. NW_ALT_MIX(ppdev, pjMmBase, SRC_DISPLAY_MEMORY | OVERPAINT, 0);
  516. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  517. NW_ABS_CURXY_FAST(ppdev, pjMmBase, srcX, srcY);
  518. NW_ABS_DESTXY_FAST(ppdev, pjMmBase, dstX, dstY);
  519. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, dstHeight - 1);
  520. NW_ALT_CMD(ppdev, pjMmBase, ulBltCmd);
  521. }
  522. }
  523. //////////////////////////////////////////////////////////////////////
  524. // Pixel Formatter Blts
  525. //
  526. // We can do stretches or funky pixel format blts only if a pixel
  527. // formatter is present. Plus, we set our 'ddCaps' such that we
  528. // shouldn't have to handle any shrinks.
  529. //
  530. // (We check to make sure we weren't asked to do a shrink, because we
  531. // would probably hang if the application ignored what we told them
  532. // and asked for a shrink):
  533. else if ((ppdev->flCaps & CAPS_PIXEL_FORMATTER) &&
  534. (srcWidth <= dstWidth) &&
  535. (srcHeight <= dstHeight))
  536. {
  537. if ((dwFlags & DDBLT_KEYSRCOVERRIDE) ||
  538. (dstWidth >= 4 * srcWidth))
  539. {
  540. // Contrary to what we're indicating in our capabilities, we
  541. // can't colour key on stretches or pixel format conversions.
  542. // The S3 hardware also can't do stretches of four times or
  543. // more.
  544. return(DDHAL_DRIVER_NOTHANDLED);
  545. }
  546. dwVEctrl = ~dstWidth & 0x00000FFF; // Initial accumulator
  547. dwVEdda = 0x10000000 // Some reserved bit?
  548. | (STRETCH | SCREEN) // Scale from video memory
  549. | (srcWidth << 16) // K1
  550. | ((srcWidth - dstWidth) & 0x7FF); // K2
  551. // We'll be doing the vertical stretching in software, so calculate
  552. // the DDA terms here. We have the luxury of not worrying about
  553. // overflow because DirectDraw limits our coordinate space to 15
  554. // bits.
  555. //
  556. // Note that dwRGBBitCount is overloaded with dwYUVBitCount:
  557. dwSrcByteCount = srcSurf->ddpfSurface.dwRGBBitCount >> 3;
  558. if (srcSurf->ddpfSurface.dwFlags & DDPF_FOURCC)
  559. {
  560. dwVEctrl |= INPUT_YCrCb422 | CSCENABLE; // Not INPUT_YUV422!
  561. }
  562. else if (srcSurf->ddpfSurface.dwFlags & DDPF_RGB)
  563. {
  564. switch (dwSrcByteCount)
  565. {
  566. case 1:
  567. dwVEctrl |= INPUT_RGB8;
  568. break;
  569. case 2:
  570. if (IS_RGB15_R(srcSurf->ddpfSurface.dwRBitMask))
  571. dwVEctrl |= INPUT_RGB15;
  572. else
  573. dwVEctrl |= INPUT_RGB16;
  574. break;
  575. default:
  576. dwVEctrl |= INPUT_RGB32;
  577. break;
  578. }
  579. }
  580. dwDstByteCount = dstSurf->ddpfSurface.dwRGBBitCount >> 3;
  581. switch (dwDstByteCount)
  582. {
  583. case 1:
  584. dwVEctrl |= OUTPUT_RGB8;
  585. break;
  586. case 2:
  587. if (IS_RGB15_R(dstSurf->ddpfSurface.dwRBitMask))
  588. dwVEctrl |= OUTPUT_RGB15;
  589. else
  590. dwVEctrl |= OUTPUT_RGB16;
  591. break;
  592. default:
  593. dwVEctrl |=OUTPUT_RGB32;
  594. break;
  595. }
  596. if (dwDstByteCount > 1)
  597. {
  598. dwVEctrl |= FILTERENABLE;
  599. if (dstWidth > 2 * srcWidth)
  600. dwVEdda |= LINEAR12221; // linear, 1-2-2-2-1, >2X stretch
  601. else if (dstWidth > srcWidth)
  602. dwVEdda |= LINEAR02420; // linear, 0-2-4-2-0, 1-2X stretch
  603. else
  604. dwVEdda |= BILINEAR; // bi-linear, <1X stretch
  605. }
  606. dwVEsrcAddr = (DWORD)(srcSurf->fpVidMem + (srcY * srcSurf->lPitch)
  607. + (srcX * dwSrcByteCount));
  608. dwVEdstAddr = (DWORD)(dstSurf->fpVidMem + (dstY * dstSurf->lPitch)
  609. + (dstX * dwDstByteCount));
  610. srcPitch = srcSurf->lPitch;
  611. dstPitch = dstSurf->lPitch;
  612. // The S3's source alignment within the dword must be done using the
  613. // crop register:
  614. dwVEcrop = dstWidth;
  615. if (dwVEsrcAddr & 3)
  616. {
  617. dwSrcBytes = (srcWidth * dwSrcByteCount);
  618. // Transform the number of source pixels to the number of
  619. // corresponding destination pixels, and round the result:
  620. dwCropSkip = ((dwVEsrcAddr & 3) * dstWidth + (dwSrcBytes >> 1))
  621. / dwSrcBytes;
  622. dwVEcrop += (dwCropSkip << 16);
  623. dwVEsrcAddr &= ~3;
  624. }
  625. // We have to run the vertical DDA ourselves:
  626. dwError = srcHeight >> 1;
  627. i = dstHeight;
  628. // Watch out for a hardware bug the destination will be 32 pixels
  629. // or less:
  630. //
  631. // We'll use 40 as our minimum width to guarantee we shouldn't
  632. // crash.
  633. if (dstWidth >= 40)
  634. {
  635. // The S3 will sometimes hang when using the video engine with
  636. // certain end-byte alignments. We'll simply lengthen the blt in
  637. // this case and hope that no-one notices:
  638. if (((dwVEdstAddr + (dstWidth * dwDstByteCount)) & 7) == 4)
  639. {
  640. dwVEcrop++;
  641. }
  642. // We have to execute a graphics engine NOP before using the
  643. // pixel formatter video engine:
  644. NW_FIFO_WAIT(ppdev, pjMmBase, 1);
  645. NW_ALT_CMD(ppdev, pjMmBase, 0);
  646. NW_GP_WAIT(ppdev, pjMmBase);
  647. // Set up some non-variant registers:
  648. NW_FIFO_WAIT(ppdev, pjMmBase, 4);
  649. WRITE_FORMATTER_D(pjMmBase, PF_CONTROL, dwVEctrl);
  650. WRITE_FORMATTER_D(pjMmBase, PF_DDA, dwVEdda);
  651. WRITE_FORMATTER_D(pjMmBase, PF_STEP, ppdev->dwVEstep);
  652. WRITE_FORMATTER_D(pjMmBase, PF_CROP, dwVEcrop);
  653. do {
  654. NW_FIFO_WAIT(ppdev, pjMmBase, 3);
  655. WRITE_FORMATTER_D(pjMmBase, PF_SRCADDR, dwVEsrcAddr);
  656. WRITE_FORMATTER_D(pjMmBase, PF_DSTADDR, dwVEdstAddr);
  657. WRITE_FORMATTER_D(pjMmBase, PF_NOP, 0);
  658. NW_FORMATTER_WAIT(ppdev, pjMmBase);
  659. dwVEdstAddr += dstPitch;
  660. dwError += srcHeight;
  661. if (dwError >= dstHeight)
  662. {
  663. dwError -= dstHeight;
  664. dwVEsrcAddr += srcPitch;
  665. }
  666. } while (--i != 0);
  667. }
  668. else if (dwDstByteCount != (DWORD) ppdev->cjPelSize)
  669. {
  670. // Because for narrow video engine blts we have to copy the
  671. // result using the normal graphics accelerator on a pixel
  672. // basis, we can't handle funky destination colour depths.
  673. // I expect zero applications to ask for narrow blts that
  674. // hit this case, so we will simply fail the call should it
  675. // ever actually occur:
  676. return(DDHAL_DRIVER_NOTHANDLED);
  677. }
  678. else
  679. {
  680. // The S3 will hang if we blt less than 32 pixels via the
  681. // pixel formatter. Unfortunately, we can't simply return
  682. // DDHAL_DRIVER_NOTHANDLED for this case. We said we'd do
  683. // hardware stretches, so we have to handle all hardware
  684. // stretches.
  685. //
  686. // We work around the problem by doing a 32 pixel stretch to
  687. // a piece of off-screen memory, then blting the appropriate
  688. // subset to the correct position on the screen.
  689. //
  690. // 32 isn't big enough. We still hang. Lets make it 40.
  691. dwVEcrop = 32 + 8;
  692. convertToGlobalCord(dstX, dstY, dstSurf);
  693. srcX = ppdev->pdsurfVideoEngineScratch->x;
  694. srcY = ppdev->pdsurfVideoEngineScratch->y;
  695. dwVEdstAddr = (srcY * ppdev->lDelta) + (srcX * ppdev->cjPelSize);
  696. ASSERTDD(((dwVEdstAddr + (dwVEcrop * dwDstByteCount)) & 7) != 4,
  697. "Must account for S3 end-alignment bug");
  698. do {
  699. // Use the pixel formatter to blt to our scratch area:
  700. NW_FIFO_WAIT(ppdev, pjMmBase, 1);
  701. NW_ALT_CMD(ppdev, pjMmBase, 0);
  702. NW_GP_WAIT(ppdev, pjMmBase);
  703. NW_FIFO_WAIT(ppdev, pjMmBase, 7);
  704. WRITE_FORMATTER_D(pjMmBase, PF_CONTROL, dwVEctrl);
  705. WRITE_FORMATTER_D(pjMmBase, PF_DDA, dwVEdda);
  706. WRITE_FORMATTER_D(pjMmBase, PF_STEP, ppdev->dwVEstep);
  707. WRITE_FORMATTER_D(pjMmBase, PF_CROP, dwVEcrop);
  708. WRITE_FORMATTER_D(pjMmBase, PF_SRCADDR, dwVEsrcAddr);
  709. WRITE_FORMATTER_D(pjMmBase, PF_DSTADDR, dwVEdstAddr);
  710. WRITE_FORMATTER_D(pjMmBase, PF_NOP, 0);
  711. NW_FORMATTER_WAIT(ppdev, pjMmBase);
  712. dwError += srcHeight;
  713. if (dwError >= dstHeight)
  714. {
  715. dwError -= dstHeight;
  716. dwVEsrcAddr += srcPitch;
  717. }
  718. // Now copy from the scratch area to the final destination:
  719. NW_FIFO_WAIT(ppdev, pjMmBase, 6);
  720. NW_ALT_MIX(ppdev, pjMmBase, SRC_DISPLAY_MEMORY | OVERPAINT, 0);
  721. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  722. NW_ABS_CURXY_FAST(ppdev, pjMmBase, srcX, srcY);
  723. NW_ABS_DESTXY_FAST(ppdev, pjMmBase, dstX, dstY);
  724. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, 0);
  725. NW_ALT_CMD(ppdev, pjMmBase, BITBLT | DRAW | DIR_TYPE_XY |
  726. WRITE | DRAWING_DIR_TBLRXM);
  727. dstY++;
  728. } while (--i != 0);
  729. }
  730. }
  731. else
  732. {
  733. //////////////////////////////////////////////////////////////////////
  734. // Overlay Blts
  735. //
  736. // Here we have to take care of cases where the destination is a
  737. // funky pixel format.
  738. // In order to make ActiveMovie and DirectVideo work, we have
  739. // to support blting between funky pixel format surfaces of the
  740. // same type. This is used to copy the current frame to the
  741. // next overlay surface in line.
  742. //
  743. // Unfortunately, it's not easy to switch the S3 graphics
  744. // processor out of its current pixel depth, so we'll only support
  745. // the minimal functionality required:
  746. if (!(dwFlags & DDBLT_ROP) ||
  747. (srcX != 0) ||
  748. (srcY != 0) ||
  749. (dstX != 0) ||
  750. (dstY != 0) ||
  751. (dstWidth != dstSurf->wWidth) ||
  752. (dstHeight != dstSurf->wHeight) ||
  753. (dstSurf->lPitch != srcSurf->lPitch) ||
  754. (dstSurf->ddpfSurface.dwRGBBitCount
  755. != srcSurf->ddpfSurface.dwRGBBitCount))
  756. {
  757. DISPDBG((0, "Sorry, we do only full-surface blts between same-type"));
  758. DISPDBG((0, "surfaces that have a funky pixel format."));
  759. return(DDHAL_DRIVER_NOTHANDLED);
  760. }
  761. else
  762. {
  763. // Convert the dimensions to the current pixel format. This
  764. // is pretty easy because we created the bitmap linearly, so
  765. // it takes the entire width of the screen:
  766. dstWidth = ppdev->cxMemory;
  767. dstHeight = dstSurf->dwBlockSizeY;
  768. convertToGlobalCord(dstX, dstY, dstSurf);
  769. convertToGlobalCord(srcX, srcY, srcSurf);
  770. NW_FIFO_WAIT(ppdev, pjMmBase, 6);
  771. NW_ALT_MIX(ppdev, pjMmBase, SRC_DISPLAY_MEMORY | OVERPAINT, 0);
  772. NW_PIX_CNTL(ppdev, pjMmBase, ALL_ONES);
  773. NW_ABS_CURXY_FAST(ppdev, pjMmBase, srcX, srcY);
  774. NW_ABS_DESTXY_FAST(ppdev, pjMmBase, dstX, dstY);
  775. NW_ALT_PCNT(ppdev, pjMmBase, dstWidth - 1, dstHeight - 1);
  776. NW_ALT_CMD(ppdev, pjMmBase, BITBLT | DRAW | DIR_TYPE_XY |
  777. WRITE | DRAWING_DIR_TBLRXM);
  778. }
  779. }
  780. lpBlt->ddRVal = DD_OK;
  781. return(DDHAL_DRIVER_HANDLED);
  782. }
  783. /******************************Public*Routine******************************\
  784. * DWORD DdFlip
  785. *
  786. * Note that lpSurfCurr may not necessarily be valid.
  787. *
  788. \**************************************************************************/
  789. DWORD DdFlip(
  790. PDD_FLIPDATA lpFlip)
  791. {
  792. PDEV* ppdev;
  793. BYTE* pjIoBase;
  794. BYTE* pjMmBase;
  795. HRESULT ddrval;
  796. ULONG ulMemoryOffset;
  797. ULONG ulLowOffset;
  798. ULONG ulMiddleOffset;
  799. ULONG ulHighOffset;
  800. ppdev = (PDEV*) lpFlip->lpDD->dhpdev;
  801. pjIoBase = ppdev->pjIoBase;
  802. pjMmBase = ppdev->pjMmBase;
  803. // Is the current flip still in progress?
  804. //
  805. // Don't want a flip to work until after the last flip is done,
  806. // so we ask for the general flip status and ignore the vmem.
  807. ddrval = ddrvalUpdateFlipStatus(ppdev, (FLATPTR) -1);
  808. if ((ddrval != DD_OK) || (NW_GP_BUSY(ppdev, pjMmBase)))
  809. {
  810. lpFlip->ddRVal = DDERR_WASSTILLDRAWING;
  811. return(DDHAL_DRIVER_HANDLED);
  812. }
  813. ulMemoryOffset = (ULONG)(lpFlip->lpSurfTarg->lpGbl->fpVidMem);
  814. // Make sure that the border/blanking period isn't active; wait if
  815. // it is. We could return DDERR_WASSTILLDRAWING in this case, but
  816. // that will increase the odds that we can't flip the next time:
  817. while (!(DISPLAY_IS_ACTIVE(pjIoBase)))
  818. ;
  819. if (ppdev->flStatus & STAT_STREAMS_ENABLED)
  820. {
  821. // When using the streams processor, we have to do the flip via the
  822. // streams registers:
  823. if (lpFlip->lpSurfCurr->ddsCaps.dwCaps & DDSCAPS_PRIMARYSURFACE)
  824. {
  825. WRITE_STREAM_D(pjMmBase, P_0, ulMemoryOffset);
  826. }
  827. else if (lpFlip->lpSurfCurr->ddsCaps.dwCaps & DDSCAPS_OVERLAY)
  828. {
  829. // Make sure that the overlay surface we're flipping from is
  830. // currently visible. If you don't do this check, you'll get
  831. // really weird results when someone starts up two ActiveMovie
  832. // or DirectVideo movies simultaneously!
  833. if (lpFlip->lpSurfCurr->lpGbl->fpVidMem == ppdev->fpVisibleOverlay)
  834. {
  835. ppdev->fpVisibleOverlay = ulMemoryOffset;
  836. WRITE_STREAM_D(pjMmBase, S_0, ulMemoryOffset +
  837. ppdev->dwOverlayFlipOffset);
  838. }
  839. }
  840. }
  841. else
  842. {
  843. // Do the old way, via the CRTC registers:
  844. ulMemoryOffset >>= 2;
  845. ulLowOffset = 0x0d | ((ulMemoryOffset & 0x0000ff) << 8);
  846. ulMiddleOffset = 0x0c | ((ulMemoryOffset & 0x00ff00));
  847. ulHighOffset = 0x69 | ((ulMemoryOffset & 0x1f0000) >> 8)
  848. | ppdev->ulExtendedSystemControl3Register_69;
  849. // Don't let the cursor thread touch the CRT registers while we're
  850. // using them:
  851. ACQUIRE_CRTC_CRITICAL_SECTION(ppdev);
  852. // Too bad that the S3's flip can't be done in a single atomic register
  853. // write; as it is, we stand a small chance of being context-switched
  854. // out and exactly hitting the vertical blank in the middle of doing
  855. // these outs, possibly causing the screen to momentarily jump.
  856. //
  857. // There are some hoops we could jump through to minimize the chances
  858. // of this happening; we could try to align the flip buffer such that
  859. // the minor registers are ensured to be identical for either flip
  860. // position, and so that only the high address need be written, an
  861. // obviously atomic operation.
  862. //
  863. // However, I'm simply not going to worry about it.
  864. OUTPW(pjIoBase, CRTC_INDEX, ulLowOffset);
  865. OUTPW(pjIoBase, CRTC_INDEX, ulMiddleOffset);
  866. OUTPW(pjIoBase, CRTC_INDEX, ulHighOffset);
  867. RELEASE_CRTC_CRITICAL_SECTION(ppdev);
  868. }
  869. // Remember where and when we were when we did the flip:
  870. EngQueryPerformanceCounter(&ppdev->flipRecord.liFlipTime);
  871. ppdev->flipRecord.bFlipFlag = TRUE;
  872. ppdev->flipRecord.bHaveEverCrossedVBlank = FALSE;
  873. ppdev->flipRecord.bWasEverInDisplay = FALSE;
  874. ppdev->flipRecord.fpFlipFrom = lpFlip->lpSurfCurr->lpGbl->fpVidMem;
  875. lpFlip->ddRVal = DD_OK;
  876. return(DDHAL_DRIVER_HANDLED);
  877. }
  878. /******************************Public*Routine******************************\
  879. * DWORD DdLock
  880. *
  881. \**************************************************************************/
  882. DWORD DdLock(
  883. PDD_LOCKDATA lpLock)
  884. {
  885. PDEV* ppdev;
  886. BYTE* pjMmBase;
  887. HRESULT ddrval;
  888. ppdev = (PDEV*) lpLock->lpDD->dhpdev;
  889. pjMmBase = ppdev->pjMmBase;
  890. // Check to see if any pending physical flip has occurred. Don't allow
  891. // a lock if a blt is in progress:
  892. ddrval = ddrvalUpdateFlipStatus(ppdev, lpLock->lpDDSurface->lpGbl->fpVidMem);
  893. if (ddrval != DD_OK)
  894. {
  895. lpLock->ddRVal = DDERR_WASSTILLDRAWING;
  896. return(DDHAL_DRIVER_HANDLED);
  897. }
  898. // Here's one of the places where the Windows 95 and Windows NT DirectDraw
  899. // implementations differ: on Windows NT, you should watch for
  900. // DDLOCK_WAIT and loop in the driver while the accelerator is busy.
  901. // On Windows 95, it doesn't really matter.
  902. //
  903. // (The reason is that Windows NT allows applications to draw directly
  904. // to the frame buffer even while the accelerator is running, and does
  905. // not synchronize everything on the Win16Lock. Note that on Windows NT,
  906. // it is even possible for multiple threads to be holding different
  907. // DirectDraw surface locks at the same time.)
  908. if (lpLock->dwFlags & DDLOCK_WAIT)
  909. {
  910. NW_GP_WAIT(ppdev, pjMmBase);
  911. }
  912. else if (NW_GP_BUSY(ppdev, pjMmBase))
  913. {
  914. lpLock->ddRVal = DDERR_WASSTILLDRAWING;
  915. return(DDHAL_DRIVER_HANDLED);
  916. }
  917. return(DDHAL_DRIVER_NOTHANDLED);
  918. }
  919. /******************************Public*Routine******************************\
  920. * DWORD DdGetBltStatus
  921. *
  922. * Doesn't currently really care what surface is specified, just checks
  923. * and goes.
  924. *
  925. \**************************************************************************/
  926. DWORD DdGetBltStatus(
  927. PDD_GETBLTSTATUSDATA lpGetBltStatus)
  928. {
  929. PDEV* ppdev;
  930. BYTE* pjMmBase;
  931. HRESULT ddRVal;
  932. ppdev = (PDEV*) lpGetBltStatus->lpDD->dhpdev;
  933. pjMmBase = ppdev->pjMmBase;
  934. ddRVal = DD_OK;
  935. if (lpGetBltStatus->dwFlags == DDGBS_CANBLT)
  936. {
  937. // DDGBS_CANBLT case: can we add a blt?
  938. ddRVal = ddrvalUpdateFlipStatus(ppdev,
  939. lpGetBltStatus->lpDDSurface->lpGbl->fpVidMem);
  940. if (ddRVal == DD_OK)
  941. {
  942. // There was no flip going on, so is there room in the FIFO
  943. // to add a blt?
  944. if (MM_FIFO_BUSY(ppdev, pjMmBase, DDBLT_FIFO_COUNT))
  945. {
  946. ddRVal = DDERR_WASSTILLDRAWING;
  947. }
  948. }
  949. }
  950. else
  951. {
  952. // DDGBS_ISBLTDONE case: is a blt in progress?
  953. if (NW_GP_BUSY(ppdev, pjMmBase))
  954. {
  955. ddRVal = DDERR_WASSTILLDRAWING;
  956. }
  957. }
  958. lpGetBltStatus->ddRVal = ddRVal;
  959. return(DDHAL_DRIVER_HANDLED);
  960. }
  961. /******************************Public*Routine******************************\
  962. * DWORD DdMapMemory
  963. *
  964. * This is a new DDI call specific to Windows NT that is used to map
  965. * or unmap all the application modifiable portions of the frame buffer
  966. * into the specified process's address space.
  967. *
  968. \**************************************************************************/
  969. DWORD DdMapMemory(
  970. PDD_MAPMEMORYDATA lpMapMemory)
  971. {
  972. PDEV* ppdev;
  973. VIDEO_SHARE_MEMORY ShareMemory;
  974. VIDEO_SHARE_MEMORY_INFORMATION ShareMemoryInformation;
  975. DWORD ReturnedDataLength;
  976. ppdev = (PDEV*) lpMapMemory->lpDD->dhpdev;
  977. if (lpMapMemory->bMap)
  978. {
  979. ShareMemory.ProcessHandle = lpMapMemory->hProcess;
  980. // 'RequestedVirtualAddress' isn't actually used for the SHARE IOCTL:
  981. ShareMemory.RequestedVirtualAddress = 0;
  982. // We map in starting at the top of the frame buffer:
  983. ShareMemory.ViewOffset = 0;
  984. // We map down to the end of the frame buffer.
  985. //
  986. // Note: There is a 64k granularity on the mapping (meaning that
  987. // we have to round up to 64k).
  988. //
  989. // Note: If there is any portion of the frame buffer that must
  990. // not be modified by an application, that portion of memory
  991. // MUST NOT be mapped in by this call. This would include
  992. // any data that, if modified by a malicious application,
  993. // would cause the driver to crash. This could include, for
  994. // example, any DSP code that is kept in off-screen memory.
  995. ShareMemory.ViewSize
  996. = ROUND_UP_TO_64K(ppdev->cyMemory * ppdev->lDelta);
  997. if (EngDeviceIoControl(ppdev->hDriver,
  998. IOCTL_VIDEO_SHARE_VIDEO_MEMORY,
  999. &ShareMemory,
  1000. sizeof(VIDEO_SHARE_MEMORY),
  1001. &ShareMemoryInformation,
  1002. sizeof(VIDEO_SHARE_MEMORY_INFORMATION),
  1003. &ReturnedDataLength))
  1004. {
  1005. DISPDBG((0, "Failed IOCTL_VIDEO_SHARE_MEMORY"));
  1006. lpMapMemory->ddRVal = DDERR_GENERIC;
  1007. return(DDHAL_DRIVER_HANDLED);
  1008. }
  1009. lpMapMemory->fpProcess = (FLATPTR)ShareMemoryInformation.VirtualAddress;
  1010. }
  1011. else
  1012. {
  1013. ShareMemory.ProcessHandle = lpMapMemory->hProcess;
  1014. ShareMemory.ViewOffset = 0;
  1015. ShareMemory.ViewSize = 0;
  1016. ShareMemory.RequestedVirtualAddress = (VOID*) lpMapMemory->fpProcess;
  1017. if (EngDeviceIoControl(ppdev->hDriver,
  1018. IOCTL_VIDEO_UNSHARE_VIDEO_MEMORY,
  1019. &ShareMemory,
  1020. sizeof(VIDEO_SHARE_MEMORY),
  1021. NULL,
  1022. 0,
  1023. &ReturnedDataLength))
  1024. {
  1025. RIP("Failed IOCTL_VIDEO_UNSHARE_MEMORY");
  1026. }
  1027. }
  1028. lpMapMemory->ddRVal = DD_OK;
  1029. return(DDHAL_DRIVER_HANDLED);
  1030. }
  1031. /******************************Public*Routine******************************\
  1032. * DWORD DdGetFlipStatus
  1033. *
  1034. * If the display has gone through one refresh cycle since the flip
  1035. * occurred, we return DD_OK. If it has not gone through one refresh
  1036. * cycle we return DDERR_WASSTILLDRAWING to indicate that this surface
  1037. * is still busy "drawing" the flipped page. We also return
  1038. * DDERR_WASSTILLDRAWING if the bltter is busy and the caller wanted
  1039. * to know if they could flip yet.
  1040. *
  1041. \**************************************************************************/
  1042. DWORD DdGetFlipStatus(
  1043. PDD_GETFLIPSTATUSDATA lpGetFlipStatus)
  1044. {
  1045. PDEV* ppdev;
  1046. BYTE* pjMmBase;
  1047. ppdev = (PDEV*) lpGetFlipStatus->lpDD->dhpdev;
  1048. pjMmBase = ppdev->pjMmBase;
  1049. // We don't want a flip to work until after the last flip is done,
  1050. // so we ask for the general flip status and ignore the vmem:
  1051. lpGetFlipStatus->ddRVal = ddrvalUpdateFlipStatus(ppdev, (FLATPTR) -1);
  1052. // Check if the bltter is busy if someone wants to know if they can
  1053. // flip:
  1054. if (lpGetFlipStatus->dwFlags == DDGFS_CANFLIP)
  1055. {
  1056. if ((lpGetFlipStatus->ddRVal == DD_OK) && (NW_GP_BUSY(ppdev, pjMmBase)))
  1057. {
  1058. lpGetFlipStatus->ddRVal = DDERR_WASSTILLDRAWING;
  1059. }
  1060. }
  1061. return(DDHAL_DRIVER_HANDLED);
  1062. }
  1063. /******************************Public*Routine******************************\
  1064. * DWORD DdWaitForVerticalBlank
  1065. *
  1066. \**************************************************************************/
  1067. DWORD DdWaitForVerticalBlank(
  1068. PDD_WAITFORVERTICALBLANKDATA lpWaitForVerticalBlank)
  1069. {
  1070. PDEV* ppdev;
  1071. BYTE* pjIoBase;
  1072. ppdev = (PDEV*) lpWaitForVerticalBlank->lpDD->dhpdev;
  1073. pjIoBase = ppdev->pjIoBase;
  1074. switch (lpWaitForVerticalBlank->dwFlags)
  1075. {
  1076. case DDWAITVB_I_TESTVB:
  1077. // If TESTVB, it's just a request for the current vertical blank
  1078. // status:
  1079. if (VBLANK_IS_ACTIVE(pjIoBase))
  1080. lpWaitForVerticalBlank->bIsInVB = TRUE;
  1081. else
  1082. lpWaitForVerticalBlank->bIsInVB = FALSE;
  1083. lpWaitForVerticalBlank->ddRVal = DD_OK;
  1084. return(DDHAL_DRIVER_HANDLED);
  1085. case DDWAITVB_BLOCKBEGIN:
  1086. // If BLOCKBEGIN is requested, we wait until the vertical blank
  1087. // is over, and then wait for the display period to end:
  1088. while (VBLANK_IS_ACTIVE(pjIoBase))
  1089. ;
  1090. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  1091. ;
  1092. lpWaitForVerticalBlank->ddRVal = DD_OK;
  1093. return(DDHAL_DRIVER_HANDLED);
  1094. case DDWAITVB_BLOCKEND:
  1095. // If BLOCKEND is requested, we wait for the vblank interval to end:
  1096. while (!(VBLANK_IS_ACTIVE(pjIoBase)))
  1097. ;
  1098. while (VBLANK_IS_ACTIVE(pjIoBase))
  1099. ;
  1100. lpWaitForVerticalBlank->ddRVal = DD_OK;
  1101. return(DDHAL_DRIVER_HANDLED);
  1102. }
  1103. return(DDHAL_DRIVER_NOTHANDLED);
  1104. }
  1105. /******************************Public*Routine******************************\
  1106. * DWORD DdCanCreateSurface
  1107. *
  1108. \**************************************************************************/
  1109. DWORD DdCanCreateSurface(
  1110. PDD_CANCREATESURFACEDATA lpCanCreateSurface)
  1111. {
  1112. PDEV* ppdev;
  1113. DWORD dwRet;
  1114. LPDDSURFACEDESC lpSurfaceDesc;
  1115. ppdev = (PDEV*) lpCanCreateSurface->lpDD->dhpdev;
  1116. lpSurfaceDesc = lpCanCreateSurface->lpDDSurfaceDesc;
  1117. dwRet = DDHAL_DRIVER_NOTHANDLED;
  1118. if (!lpCanCreateSurface->bIsDifferentPixelFormat)
  1119. {
  1120. // It's trivially easy to create plain surfaces that are the same
  1121. // type as the primary surface:
  1122. dwRet = DDHAL_DRIVER_HANDLED;
  1123. }
  1124. // If the streams processor is capable, we can handle overlays:
  1125. else if (ppdev->flCaps & CAPS_STREAMS_CAPABLE)
  1126. {
  1127. // When using the Streams processor, we handle only overlays of
  1128. // different pixel formats -- not any off-screen memory:
  1129. if (lpSurfaceDesc->ddsCaps.dwCaps & DDSCAPS_OVERLAY)
  1130. {
  1131. // We handle two types of YUV overlay surfaces:
  1132. if (lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_FOURCC)
  1133. {
  1134. // Check first for a supported YUV type:
  1135. if (lpSurfaceDesc->ddpfPixelFormat.dwFourCC == FOURCC_YUY2)
  1136. {
  1137. lpSurfaceDesc->ddpfPixelFormat.dwYUVBitCount = 16;
  1138. dwRet = DDHAL_DRIVER_HANDLED;
  1139. }
  1140. }
  1141. // We handle 16bpp and 32bpp RGB overlay surfaces:
  1142. else if ((lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_RGB) &&
  1143. !(lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8))
  1144. {
  1145. if (lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount == 16)
  1146. {
  1147. if (IS_RGB15(&lpSurfaceDesc->ddpfPixelFormat) ||
  1148. IS_RGB16(&lpSurfaceDesc->ddpfPixelFormat))
  1149. {
  1150. dwRet = DDHAL_DRIVER_HANDLED;
  1151. }
  1152. }
  1153. // We don't handle 24bpp overlay surfaces because they are
  1154. // undocumented and don't seem to work on the Trio64V+.
  1155. //
  1156. // We don't handle 32bpp overlay surfaces because our streams
  1157. // minimum-stretch-ratio tables were obviously created for
  1158. // 16bpp overlay surfaces; 32bpp overlay surfaces create a lot
  1159. // of noise when close to the minimum stretch ratio.
  1160. }
  1161. }
  1162. }
  1163. // If the pixel formatter is enabled, we can handle funky format off-
  1164. // screen surfaces, but not at 8bpp because of palette issues:
  1165. else if ((ppdev->flCaps & CAPS_PIXEL_FORMATTER) &&
  1166. (ppdev->iBitmapFormat > BMF_8BPP))
  1167. {
  1168. if (lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_FOURCC)
  1169. {
  1170. if (lpSurfaceDesc->ddpfPixelFormat.dwFourCC == FOURCC_YUY2)
  1171. {
  1172. lpSurfaceDesc->ddpfPixelFormat.dwYUVBitCount = 16;
  1173. dwRet = DDHAL_DRIVER_HANDLED;
  1174. }
  1175. }
  1176. // We handle 16bpp and 32bpp RGB off-screen surfaces:
  1177. else if ((lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_RGB) &&
  1178. !(lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8))
  1179. {
  1180. if (lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount == 16)
  1181. {
  1182. if (IS_RGB15(&lpSurfaceDesc->ddpfPixelFormat) ||
  1183. IS_RGB16(&lpSurfaceDesc->ddpfPixelFormat))
  1184. {
  1185. dwRet = DDHAL_DRIVER_HANDLED;
  1186. }
  1187. }
  1188. else if (lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount == 32)
  1189. {
  1190. if (IS_RGB32(&lpSurfaceDesc->ddpfPixelFormat))
  1191. {
  1192. dwRet = DDHAL_DRIVER_HANDLED;
  1193. }
  1194. }
  1195. }
  1196. }
  1197. // Print some spew if this was a surface we refused to create:
  1198. if (dwRet == DDHAL_DRIVER_NOTHANDLED)
  1199. {
  1200. if (lpSurfaceDesc->ddpfPixelFormat.dwFlags & DDPF_RGB)
  1201. {
  1202. DISPDBG((0, "Failed creation of %libpp RGB surface %lx %lx %lx",
  1203. lpSurfaceDesc->ddpfPixelFormat.dwRGBBitCount,
  1204. lpSurfaceDesc->ddpfPixelFormat.dwRBitMask,
  1205. lpSurfaceDesc->ddpfPixelFormat.dwGBitMask,
  1206. lpSurfaceDesc->ddpfPixelFormat.dwBBitMask));
  1207. }
  1208. else
  1209. {
  1210. DISPDBG((0, "Failed creation of type 0x%lx YUV 0x%lx surface",
  1211. lpSurfaceDesc->ddpfPixelFormat.dwFlags,
  1212. lpSurfaceDesc->ddpfPixelFormat.dwFourCC));
  1213. }
  1214. }
  1215. lpCanCreateSurface->ddRVal = DD_OK;
  1216. return(dwRet);
  1217. }
  1218. /******************************Public*Routine******************************\
  1219. * DWORD DdCreateSurface
  1220. *
  1221. \**************************************************************************/
  1222. DWORD DdCreateSurface(
  1223. PDD_CREATESURFACEDATA lpCreateSurface)
  1224. {
  1225. PDEV* ppdev;
  1226. DD_SURFACE_LOCAL* lpSurfaceLocal;
  1227. DD_SURFACE_GLOBAL* lpSurfaceGlobal;
  1228. LPDDSURFACEDESC lpSurfaceDesc;
  1229. DWORD dwByteCount;
  1230. LONG lLinearPitch;
  1231. DWORD dwHeight;
  1232. ppdev = (PDEV*) lpCreateSurface->lpDD->dhpdev;
  1233. // On Windows NT, dwSCnt will always be 1, so there will only ever
  1234. // be one entry in the 'lplpSList' array:
  1235. lpSurfaceLocal = lpCreateSurface->lplpSList[0];
  1236. lpSurfaceGlobal = lpSurfaceLocal->lpGbl;
  1237. lpSurfaceDesc = lpCreateSurface->lpDDSurfaceDesc;
  1238. // We repeat the same checks we did in 'DdCanCreateSurface' because
  1239. // it's possible that an application doesn't call 'DdCanCreateSurface'
  1240. // before calling 'DdCreateSurface'.
  1241. ASSERTDD(lpSurfaceGlobal->ddpfSurface.dwSize == sizeof(DDPIXELFORMAT),
  1242. "NT is supposed to guarantee that ddpfSurface.dwSize is valid");
  1243. // DdCanCreateSurface already validated whether the hardware supports
  1244. // the surface, so we don't need to do any validation here. We'll
  1245. // just go ahead and allocate it.
  1246. //
  1247. // Note that we don't do anything special for RGB surfaces that are
  1248. // the same pixel format as the display -- by returning DDHAL_DRIVER_
  1249. // NOTHANDLED, DirectDraw will automatically handle the allocation
  1250. // for us.
  1251. //
  1252. // Also, since we'll be making linear surfaces, make sure the width
  1253. // isn't unreasonably large.
  1254. //
  1255. // Note that on NT, an overlay can be created only if the driver
  1256. // okay's it here in this routine. Under Win95, the overlay will be
  1257. // created automatically if it's the same pixel format as the primary
  1258. // display.
  1259. if ((lpSurfaceLocal->ddsCaps.dwCaps & DDSCAPS_OVERLAY) ||
  1260. (lpSurfaceGlobal->ddpfSurface.dwFlags & DDPF_FOURCC) ||
  1261. (lpSurfaceGlobal->ddpfSurface.dwYUVBitCount
  1262. != (DWORD) 8 * ppdev->cjPelSize) ||
  1263. (lpSurfaceGlobal->ddpfSurface.dwRBitMask != ppdev->flRed))
  1264. {
  1265. if (lpSurfaceGlobal->wWidth <= (DWORD) ppdev->cxMemory)
  1266. {
  1267. // The S3 cannot easily draw to YUV surfaces or surfaces that are
  1268. // a different RGB format than the display. So we'll make them
  1269. // linear surfaces to save some space:
  1270. if (lpSurfaceGlobal->ddpfSurface.dwFlags & DDPF_FOURCC)
  1271. {
  1272. ASSERTDD((lpSurfaceGlobal->ddpfSurface.dwFourCC == FOURCC_YUY2),
  1273. "Expected our DdCanCreateSurface to allow only YUY2 or Y211");
  1274. dwByteCount = (lpSurfaceGlobal->ddpfSurface.dwFourCC == FOURCC_YUY2)
  1275. ? 2 : 1;
  1276. // We have to fill in the bit-count for FourCC surfaces:
  1277. lpSurfaceGlobal->ddpfSurface.dwYUVBitCount = 8 * dwByteCount;
  1278. DISPDBG((0, "Created YUV: %li x %li",
  1279. lpSurfaceGlobal->wWidth, lpSurfaceGlobal->wHeight));
  1280. }
  1281. else
  1282. {
  1283. dwByteCount = lpSurfaceGlobal->ddpfSurface.dwRGBBitCount >> 3;
  1284. DISPDBG((0, "Created RGB %libpp: %li x %li Red: %lx",
  1285. 8 * dwByteCount, lpSurfaceGlobal->wWidth, lpSurfaceGlobal->wHeight,
  1286. lpSurfaceGlobal->ddpfSurface.dwRBitMask));
  1287. // The S3 can't handle palettized or 32bpp overlays. Note that
  1288. // we sometimes don't get a chance to say no to these surfaces
  1289. // in CanCreateSurface, because DirectDraw won't call
  1290. // CanCreateSurface if the surface to be created is the same
  1291. // pixel format as the primary display:
  1292. if ((dwByteCount != 2) &&
  1293. (lpSurfaceLocal->ddsCaps.dwCaps & DDSCAPS_OVERLAY))
  1294. {
  1295. lpCreateSurface->ddRVal = DDERR_INVALIDPIXELFORMAT;
  1296. return(DDHAL_DRIVER_HANDLED);
  1297. }
  1298. }
  1299. // We want to allocate a linear surface to store the FourCC
  1300. // surface, but DirectDraw is using a 2-D heap-manager because
  1301. // the rest of our surfaces have to be 2-D. So here we have to
  1302. // convert the linear size to a 2-D size.
  1303. //
  1304. // The stride has to be a dword multiple:
  1305. lLinearPitch = (lpSurfaceGlobal->wWidth * dwByteCount + 3) & ~3;
  1306. dwHeight = (lpSurfaceGlobal->wHeight * lLinearPitch
  1307. + ppdev->lDelta - 1) / ppdev->lDelta;
  1308. // Now fill in enough stuff to have the DirectDraw heap-manager
  1309. // do the allocation for us:
  1310. lpSurfaceGlobal->fpVidMem = DDHAL_PLEASEALLOC_BLOCKSIZE;
  1311. lpSurfaceGlobal->dwBlockSizeX = ppdev->lDelta; // Specified in bytes
  1312. lpSurfaceGlobal->dwBlockSizeY = dwHeight;
  1313. lpSurfaceGlobal->lPitch = lLinearPitch;
  1314. lpSurfaceGlobal->dwReserved1 = DD_RESERVED_DIFFERENTPIXELFORMAT;
  1315. lpSurfaceDesc->lPitch = lLinearPitch;
  1316. lpSurfaceDesc->dwFlags |= DDSD_PITCH;
  1317. }
  1318. else
  1319. {
  1320. DISPDBG((0, "Refused to create surface with large width"));
  1321. }
  1322. }
  1323. return(DDHAL_DRIVER_NOTHANDLED);
  1324. }
  1325. /******************************Public*Routine******************************\
  1326. * DWORD DdFreeDriverMemory
  1327. *
  1328. * This function called by DirectDraw when it's running low on memory in
  1329. * our heap. You only need to implement this function if you use the
  1330. * DirectDraw 'HeapVidMemAllocAligned' function in your driver, and you
  1331. * can boot those allocations out of memory to make room for DirectDraw.
  1332. *
  1333. * We implement this function in the S3 driver because we have DirectDraw
  1334. * entirely manage our off-screen heap, and we use HeapVidMemAllocAligned
  1335. * to put GDI device-bitmaps in off-screen memory. DirectDraw applications
  1336. * have a higher priority for getting stuff into video memory, though, and
  1337. * so this function is used to boot those GDI surfaces out of memory in
  1338. * order to make room for DirectDraw.
  1339. *
  1340. \**************************************************************************/
  1341. DWORD DdFreeDriverMemory(
  1342. PDD_FREEDRIVERMEMORYDATA lpFreeDriverMemory)
  1343. {
  1344. PDEV* ppdev;
  1345. ppdev = (PDEV*) lpFreeDriverMemory->lpDD->dhpdev;
  1346. lpFreeDriverMemory->ddRVal = DDERR_OUTOFMEMORY;
  1347. // If we successfully freed up some memory, set the return value to
  1348. // 'DD_OK'. DirectDraw will try again to do its allocation, and
  1349. // will call us again if there's still not enough room. (It will
  1350. // call us until either there's enough room for its alocation to
  1351. // succeed, or until we return something other than DD_OK.)
  1352. if (bMoveOldestOffscreenDfbToDib(ppdev))
  1353. {
  1354. lpFreeDriverMemory->ddRVal = DD_OK;
  1355. }
  1356. return(DDHAL_DRIVER_HANDLED);
  1357. }
  1358. /******************************Public*Routine******************************\
  1359. * DWORD DdSetColorKey
  1360. *
  1361. \**************************************************************************/
  1362. DWORD DdSetColorKey(
  1363. PDD_SETCOLORKEYDATA lpSetColorKey)
  1364. {
  1365. PDEV* ppdev;
  1366. BYTE* pjIoBase;
  1367. BYTE* pjMmBase;
  1368. DD_SURFACE_GLOBAL* lpSurface;
  1369. DWORD dwKeyLow;
  1370. DWORD dwKeyHigh;
  1371. ppdev = (PDEV*) lpSetColorKey->lpDD->dhpdev;
  1372. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Shouldn't have hooked call");
  1373. pjIoBase = ppdev->pjIoBase;
  1374. pjMmBase = ppdev->pjMmBase;
  1375. lpSurface = lpSetColorKey->lpDDSurface->lpGbl;
  1376. // We don't have to do anything for normal blt source colour keys:
  1377. if (lpSetColorKey->dwFlags & DDCKEY_SRCBLT)
  1378. {
  1379. lpSetColorKey->ddRVal = DD_OK;
  1380. return(DDHAL_DRIVER_HANDLED);
  1381. }
  1382. else if (lpSetColorKey->dwFlags & DDCKEY_DESTOVERLAY)
  1383. {
  1384. dwKeyLow = lpSetColorKey->ckNew.dwColorSpaceLowValue;
  1385. if (lpSurface->ddpfSurface.dwFlags & DDPF_PALETTEINDEXED8)
  1386. {
  1387. dwKeyLow = dwGetPaletteEntry(ppdev, dwKeyLow);
  1388. }
  1389. else
  1390. {
  1391. ASSERTDD(lpSurface->ddpfSurface.dwFlags & DDPF_RGB,
  1392. "Expected only RGB cases here");
  1393. // We have to transform the colour key from its native format
  1394. // to 8-8-8:
  1395. if (lpSurface->ddpfSurface.dwRGBBitCount == 16)
  1396. {
  1397. if (IS_RGB15_R(lpSurface->ddpfSurface.dwRBitMask))
  1398. dwKeyLow = RGB15to32(dwKeyLow);
  1399. else
  1400. dwKeyLow = RGB16to32(dwKeyLow);
  1401. }
  1402. else
  1403. {
  1404. ASSERTDD((lpSurface->ddpfSurface.dwRGBBitCount == 32),
  1405. "Expected the primary surface to be either 8, 16, or 32bpp");
  1406. }
  1407. }
  1408. dwKeyHigh = dwKeyLow;
  1409. dwKeyLow |= CompareBits0t7 | KeyFromCompare;
  1410. // Check for stream processor enabled before setting registers
  1411. if(ppdev->flStatus & STAT_STREAMS_ENABLED)
  1412. {
  1413. WAIT_FOR_VBLANK(pjIoBase);
  1414. WRITE_STREAM_D(pjMmBase, CKEY_LOW, dwKeyLow);
  1415. WRITE_STREAM_D(pjMmBase, CKEY_HI, dwKeyHigh);
  1416. }
  1417. else
  1418. {
  1419. // Save away the color key to be set when streams
  1420. // processor is turned on.
  1421. ppdev->ulColorKey = dwKeyHigh;
  1422. }
  1423. lpSetColorKey->ddRVal = DD_OK;
  1424. return(DDHAL_DRIVER_HANDLED);
  1425. }
  1426. DISPDBG((0, "DdSetColorKey: Invalid command"));
  1427. return(DDHAL_DRIVER_NOTHANDLED);
  1428. }
  1429. /******************************Public*Routine******************************\
  1430. * DWORD DdUpdateOverlay
  1431. *
  1432. \**************************************************************************/
  1433. DWORD DdUpdateOverlay(
  1434. PDD_UPDATEOVERLAYDATA lpUpdateOverlay)
  1435. {
  1436. PDEV* ppdev;
  1437. BYTE* pjIoBase;
  1438. BYTE* pjMmBase;
  1439. DD_SURFACE_GLOBAL* lpSource;
  1440. DD_SURFACE_GLOBAL* lpDestination;
  1441. DWORD dwStride;
  1442. LONG srcWidth;
  1443. LONG srcHeight;
  1444. LONG dstWidth;
  1445. LONG dstHeight;
  1446. DWORD dwBitCount;
  1447. DWORD dwStart;
  1448. DWORD dwTmp;
  1449. BOOL bColorKey;
  1450. DWORD dwKeyLow;
  1451. DWORD dwKeyHigh;
  1452. DWORD dwBytesPerPixel;
  1453. DWORD dwSecCtrl;
  1454. DWORD dwBlendCtrl;
  1455. ppdev = (PDEV*) lpUpdateOverlay->lpDD->dhpdev;
  1456. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Shouldn't have hooked call");
  1457. pjIoBase = ppdev->pjIoBase;
  1458. pjMmBase = ppdev->pjMmBase;
  1459. // 'Source' is the overlay surface, 'destination' is the surface to
  1460. // be overlayed:
  1461. lpSource = lpUpdateOverlay->lpDDSrcSurface->lpGbl;
  1462. if (lpUpdateOverlay->dwFlags & DDOVER_HIDE)
  1463. {
  1464. if (lpSource->fpVidMem == ppdev->fpVisibleOverlay)
  1465. {
  1466. WAIT_FOR_VBLANK(pjIoBase);
  1467. WRITE_STREAM_D(pjMmBase, BLEND_CONTROL, POnS);
  1468. WRITE_STREAM_D(pjMmBase, S_WH, WH(10, 2)); // Set to 10x2 rectangle
  1469. WRITE_STREAM_D(pjMmBase, OPAQUE_CONTROL, 0);// Disable opaque control
  1470. ppdev->fpVisibleOverlay = 0;
  1471. ASSERTDD(ppdev->flStatus & STAT_STREAMS_ENABLED,
  1472. "Expected streams to be enabled");
  1473. ppdev->flStatus &= ~STAT_STREAMS_ENABLED;
  1474. vTurnOffStreamsProcessorMode(ppdev);
  1475. }
  1476. lpUpdateOverlay->ddRVal = DD_OK;
  1477. return(DDHAL_DRIVER_HANDLED);
  1478. }
  1479. // Dereference 'lpDDDestSurface' only after checking for the DDOVER_HIDE
  1480. // case:
  1481. lpDestination = lpUpdateOverlay->lpDDDestSurface->lpGbl;
  1482. if (lpSource->fpVidMem != ppdev->fpVisibleOverlay)
  1483. {
  1484. if (lpUpdateOverlay->dwFlags & DDOVER_SHOW)
  1485. {
  1486. if (ppdev->fpVisibleOverlay != 0)
  1487. {
  1488. // Some other overlay is already visible:
  1489. DISPDBG((0, "DdUpdateOverlay: An overlay is already visible"));
  1490. lpUpdateOverlay->ddRVal = DDERR_OUTOFCAPS;
  1491. return(DDHAL_DRIVER_HANDLED);
  1492. }
  1493. else
  1494. {
  1495. // We're going to make the overlay visible, so mark it as
  1496. // such:
  1497. ppdev->fpVisibleOverlay = lpSource->fpVidMem;
  1498. }
  1499. }
  1500. else
  1501. {
  1502. // The overlay isn't visible, and we haven't been asked to make
  1503. // it visible, so this call is trivially easy:
  1504. lpUpdateOverlay->ddRVal = DD_OK;
  1505. return(DDHAL_DRIVER_HANDLED);
  1506. }
  1507. }
  1508. if (!(ppdev->flStatus & STAT_STREAMS_ENABLED))
  1509. {
  1510. ppdev->flStatus |= STAT_STREAMS_ENABLED;
  1511. vTurnOnStreamsProcessorMode(ppdev);
  1512. }
  1513. dwStride = lpSource->lPitch;
  1514. srcWidth = lpUpdateOverlay->rSrc.right - lpUpdateOverlay->rSrc.left;
  1515. srcHeight = lpUpdateOverlay->rSrc.bottom - lpUpdateOverlay->rSrc.top;
  1516. dstWidth = lpUpdateOverlay->rDest.right - lpUpdateOverlay->rDest.left;
  1517. dstHeight = lpUpdateOverlay->rDest.bottom - lpUpdateOverlay->rDest.top;
  1518. // Calculate DDA horizonal accumulator initial value:
  1519. dwSecCtrl = HDDA(srcWidth, dstWidth);
  1520. // Overlay input data format:
  1521. if (lpSource->ddpfSurface.dwFlags & DDPF_FOURCC)
  1522. {
  1523. dwBitCount = lpSource->ddpfSurface.dwYUVBitCount;
  1524. switch (lpSource->ddpfSurface.dwFourCC)
  1525. {
  1526. case FOURCC_YUY2:
  1527. dwSecCtrl |= S_YCrCb422; // Not S_YUV422! Dunno why...
  1528. break;
  1529. default:
  1530. RIP("Unexpected FourCC");
  1531. }
  1532. }
  1533. else
  1534. {
  1535. ASSERTDD(lpSource->ddpfSurface.dwFlags & DDPF_RGB,
  1536. "Expected us to have created only RGB or YUV overlays");
  1537. // The overlay surface is in RGB format:
  1538. dwBitCount = lpSource->ddpfSurface.dwRGBBitCount;
  1539. ASSERTDD(dwBitCount == 16,
  1540. "Expected us to have created 16bpp RGB surfaces only");
  1541. if (IS_RGB15_R(lpSource->ddpfSurface.dwRBitMask))
  1542. dwSecCtrl |= S_RGB15;
  1543. else
  1544. dwSecCtrl |= S_RGB16;
  1545. }
  1546. // Calculate start of video memory in QWORD boundary
  1547. dwBytesPerPixel = dwBitCount >> 3;
  1548. dwStart = (lpUpdateOverlay->rSrc.top * dwStride)
  1549. + (lpUpdateOverlay->rSrc.left * dwBytesPerPixel);
  1550. // Note that since we're shifting the source's edge to the left, we
  1551. // should really increase the source width to compensate. However,
  1552. // doing so when running at 1 to 1 would cause us to request a
  1553. // shrinking overlay -- something the S3 can't do.
  1554. dwStart = dwStart - (dwStart & 0x7);
  1555. ppdev->dwOverlayFlipOffset = dwStart; // Save for flip
  1556. dwStart += (DWORD)lpSource->fpVidMem;
  1557. // Set overlay filter characteristics:
  1558. if ((dstWidth != srcWidth) || (dstHeight != srcHeight))
  1559. {
  1560. if (dstWidth >= (srcWidth << 2))
  1561. {
  1562. dwSecCtrl |= S_Beyond4x; // Linear, 1-2-2-2-1, for >4X stretch
  1563. }
  1564. else if (dstWidth >= (srcWidth << 1))
  1565. {
  1566. dwSecCtrl |= S_2xTo4x; // Bi-linear, for 2X to 4X stretch
  1567. }
  1568. else
  1569. {
  1570. dwSecCtrl |= S_Upto2x; // Linear, 0-2-4-2-0, for X stretch
  1571. }
  1572. }
  1573. // Extract colour key:
  1574. bColorKey = FALSE;
  1575. dwBlendCtrl = 0;
  1576. if (lpUpdateOverlay->dwFlags & DDOVER_KEYDEST)
  1577. {
  1578. bColorKey = TRUE;
  1579. dwKeyLow = lpUpdateOverlay->lpDDDestSurface->ddckCKDestOverlay.dwColorSpaceLowValue;
  1580. dwBlendCtrl |= KeyOnP;
  1581. }
  1582. else if (lpUpdateOverlay->dwFlags & DDOVER_KEYDESTOVERRIDE)
  1583. {
  1584. bColorKey = TRUE;
  1585. dwKeyLow = lpUpdateOverlay->overlayFX.dckDestColorkey.dwColorSpaceLowValue;
  1586. dwBlendCtrl |= KeyOnP;
  1587. }
  1588. if (bColorKey)
  1589. {
  1590. // We support only destination colour keys:
  1591. if (lpDestination->ddpfSurface.dwFlags & DDPF_PALETTEINDEXED8)
  1592. {
  1593. dwKeyLow = dwGetPaletteEntry(ppdev, dwKeyLow);
  1594. }
  1595. else if (lpDestination->ddpfSurface.dwFlags & DDPF_RGB)
  1596. {
  1597. ASSERTDD(lpDestination->ddpfSurface.dwFlags & DDPF_RGB,
  1598. "Expected only RGB cases here");
  1599. // We have to transform the colour key from its native format
  1600. // to 8-8-8:
  1601. if (lpDestination->ddpfSurface.dwRGBBitCount == 16)
  1602. {
  1603. if (IS_RGB15_R(lpDestination->ddpfSurface.dwRBitMask))
  1604. dwKeyLow = RGB15to32(dwKeyLow);
  1605. else
  1606. dwKeyLow = RGB16to32(dwKeyLow);
  1607. }
  1608. else
  1609. {
  1610. ASSERTDD((lpDestination->ddpfSurface.dwRGBBitCount == 32),
  1611. "Expected the primary surface to be either 8, 16, or 32bpp");
  1612. }
  1613. }
  1614. dwKeyHigh = dwKeyLow;
  1615. dwKeyLow |= CompareBits0t7 | KeyFromCompare;
  1616. }
  1617. // Update and show:
  1618. NW_GP_WAIT(ppdev, pjMmBase);
  1619. WAIT_FOR_VBLANK(pjIoBase);
  1620. WRITE_STREAM_D(pjMmBase, S_0, dwStart);
  1621. WRITE_STREAM_D(pjMmBase, S_XY, XY(lpUpdateOverlay->rDest.left,
  1622. lpUpdateOverlay->rDest.top));
  1623. WRITE_STREAM_D(pjMmBase, S_WH, WH(dstWidth, dstHeight));
  1624. WRITE_STREAM_D(pjMmBase, S_STRIDE, dwStride);
  1625. WRITE_STREAM_D(pjMmBase, S_CONTROL, dwSecCtrl);
  1626. WRITE_STREAM_D(pjMmBase, S_HK1K2, HK1K2(srcWidth, dstWidth));
  1627. WRITE_STREAM_D(pjMmBase, S_VK1, VK1(srcHeight));
  1628. WRITE_STREAM_D(pjMmBase, S_VK2, VK2(srcHeight, dstHeight));
  1629. WRITE_STREAM_D(pjMmBase, S_VDDA, VDDA(dstHeight));
  1630. if (bColorKey)
  1631. {
  1632. WRITE_STREAM_D(pjMmBase, CKEY_LOW, dwKeyLow);
  1633. WRITE_STREAM_D(pjMmBase, CKEY_HI, dwKeyHigh);
  1634. }
  1635. WRITE_STREAM_D(pjMmBase, BLEND_CONTROL, dwBlendCtrl);
  1636. WRITE_STREAM_D(pjMmBase, FIFO_CONTROL, ppdev->ulFifoValue);
  1637. lpUpdateOverlay->ddRVal = DD_OK;
  1638. return(DDHAL_DRIVER_HANDLED);
  1639. }
  1640. /******************************Public*Routine******************************\
  1641. * DWORD DdSetOverlayPosition
  1642. *
  1643. \**************************************************************************/
  1644. DWORD DdSetOverlayPosition(
  1645. PDD_SETOVERLAYPOSITIONDATA lpSetOverlayPosition)
  1646. {
  1647. PDEV* ppdev;
  1648. BYTE* pjIoBase;
  1649. BYTE* pjMmBase;
  1650. ppdev = (PDEV*) lpSetOverlayPosition->lpDD->dhpdev;
  1651. pjIoBase = ppdev->pjIoBase;
  1652. pjMmBase = ppdev->pjMmBase;
  1653. ASSERTDD(ppdev->flCaps & CAPS_STREAMS_CAPABLE, "Shouldn't have hooked call");
  1654. // Check that streams processor is enabled before settting registers
  1655. if(ppdev->flStatus & STAT_STREAMS_ENABLED)
  1656. {
  1657. WAIT_FOR_VBLANK(pjIoBase);
  1658. WRITE_STREAM_D(pjMmBase, S_XY, XY(lpSetOverlayPosition->lXPos,
  1659. lpSetOverlayPosition->lYPos));
  1660. }
  1661. lpSetOverlayPosition->ddRVal = DD_OK;
  1662. return(DDHAL_DRIVER_HANDLED);
  1663. }
  1664. /******************************Public*Routine******************************\
  1665. * DWORD DdGetDriverInfo
  1666. *
  1667. * This function is an extensible method for returning DirectDraw
  1668. * capabilities and methods.
  1669. *
  1670. \**************************************************************************/
  1671. DWORD DdGetDriverInfo(
  1672. PDD_GETDRIVERINFODATA lpGetDriverInfo)
  1673. {
  1674. DWORD dwSize;
  1675. lpGetDriverInfo->ddRVal = DDERR_CURRENTLYNOTAVAIL;
  1676. if (IsEqualIID(&lpGetDriverInfo->guidInfo, &GUID_NTCallbacks))
  1677. {
  1678. DD_NTCALLBACKS NtCallbacks;
  1679. memset(&NtCallbacks, 0, sizeof(NtCallbacks));
  1680. dwSize = min(lpGetDriverInfo->dwExpectedSize, sizeof(DD_NTCALLBACKS));
  1681. NtCallbacks.dwSize = dwSize;
  1682. NtCallbacks.dwFlags = DDHAL_NTCB32_FREEDRIVERMEMORY;
  1683. NtCallbacks.FreeDriverMemory = DdFreeDriverMemory;
  1684. memcpy(lpGetDriverInfo->lpvData, &NtCallbacks, dwSize);
  1685. lpGetDriverInfo->ddRVal = DD_OK;
  1686. }
  1687. return(DDHAL_DRIVER_HANDLED);
  1688. }
  1689. /******************************Public*Routine******************************\
  1690. * VOID vAssertModeDirectDraw
  1691. *
  1692. * This function is called by enable.c when entering or leaving the
  1693. * DOS full-screen character mode.
  1694. *
  1695. \**************************************************************************/
  1696. VOID vAssertModeDirectDraw(
  1697. PDEV* ppdev,
  1698. BOOL bEnable)
  1699. {
  1700. }
  1701. /******************************Public*Routine******************************\
  1702. * BOOL bEnableDirectDraw
  1703. *
  1704. * This function is called by enable.c when the mode is first initialized,
  1705. * right after the miniport does the mode-set.
  1706. *
  1707. \**************************************************************************/
  1708. BOOL bEnableDirectDraw(
  1709. PDEV* ppdev)
  1710. {
  1711. BYTE* pjIoBase;
  1712. VIDEO_QUERY_STREAMS_MODE VideoQueryStreamsMode;
  1713. VIDEO_QUERY_STREAMS_PARAMETERS VideoQueryStreamsParameters;
  1714. DWORD ReturnedDataLength;
  1715. BOOL bDDrawEnabled=TRUE;
  1716. // We're not going to bother to support accelerated DirectDraw on
  1717. // those S3s that can't support memory-mapped I/O, simply because
  1718. // they're old cards and it's not worth the effort. We also
  1719. // require DIRECT_ACCESS to the frame buffer.
  1720. //
  1721. // We also don't support 864/964 cards because writing to the frame
  1722. // buffer can hang the entire system if an accelerated operation is
  1723. // going on at the same time.
  1724. //
  1725. // The 765 (Trio64V+) has a bug such that writing to the frame
  1726. // buffer during an accelerator operation may cause a hang if
  1727. // you do the write soon enough after starting the blt. (There is
  1728. // a small window of opportunity.) On UP machines, the context
  1729. // switch time seems to be enough to avoid the problem. However,
  1730. // on MP machines, we'll have to disable direct draw.
  1731. //
  1732. // NOTE: We can identify the 765 since it is the only chip with
  1733. // the CAPS_STREAMS_CAPABLE flag.
  1734. if (ppdev->flCaps & CAPS_STREAMS_CAPABLE)
  1735. {
  1736. DWORD numProcessors;
  1737. if (EngQuerySystemAttribute(EngNumberOfProcessors, &numProcessors))
  1738. {
  1739. if (numProcessors != 1)
  1740. {
  1741. DISPDBG((1, "Disabling DDraw for MP 765 box.\n"));
  1742. bDDrawEnabled = FALSE;
  1743. }
  1744. }
  1745. else
  1746. {
  1747. DISPDBG((1, "Can't determine number of processors, so play it "
  1748. "safe and disable DDraw for 765.\n"));
  1749. bDDrawEnabled = FALSE;
  1750. }
  1751. }
  1752. // The stretch and YUV bltter capabilities of the S3 868 and 968 were
  1753. // disabled to account for bug 135541.
  1754. ppdev->flCaps &= ~CAPS_PIXEL_FORMATTER;
  1755. if ((ppdev->flCaps & CAPS_NEW_MMIO) &&
  1756. !(ppdev->flCaps & CAPS_NO_DIRECT_ACCESS) &&
  1757. (bDDrawEnabled))
  1758. {
  1759. pjIoBase = ppdev->pjIoBase;
  1760. // We have to preserve the contents of register 0x69 on the S3's page
  1761. // flip:
  1762. ACQUIRE_CRTC_CRITICAL_SECTION(ppdev);
  1763. OUTP(pjIoBase, CRTC_INDEX, 0x69);
  1764. ppdev->ulExtendedSystemControl3Register_69
  1765. = (INP(pjIoBase, CRTC_DATA) & 0xe0) << 8;
  1766. RELEASE_CRTC_CRITICAL_SECTION(ppdev);
  1767. // Accurately measure the refresh rate for later:
  1768. vGetDisplayDuration(ppdev);
  1769. if (ppdev->flCaps & CAPS_STREAMS_CAPABLE)
  1770. {
  1771. // Query the miniport to get the correct streams parameters
  1772. // for this mode:
  1773. VideoQueryStreamsMode.ScreenWidth = ppdev->cxScreen;
  1774. VideoQueryStreamsMode.BitsPerPel = ppdev->cBitsPerPel;
  1775. VideoQueryStreamsMode.RefreshRate = ppdev->ulRefreshRate;
  1776. if (EngDeviceIoControl(ppdev->hDriver,
  1777. IOCTL_VIDEO_S3_QUERY_STREAMS_PARAMETERS,
  1778. &VideoQueryStreamsMode,
  1779. sizeof(VideoQueryStreamsMode),
  1780. &VideoQueryStreamsParameters,
  1781. sizeof(VideoQueryStreamsParameters),
  1782. &ReturnedDataLength))
  1783. {
  1784. DISPDBG((0, "Miniport reported no streams parameters"));
  1785. ppdev->flCaps &= ~CAPS_STREAMS_CAPABLE;
  1786. }
  1787. else
  1788. {
  1789. ppdev->ulMinOverlayStretch
  1790. = VideoQueryStreamsParameters.MinOverlayStretch;
  1791. ppdev->ulFifoValue
  1792. = VideoQueryStreamsParameters.FifoValue;
  1793. DISPDBG((0, "Refresh rate: %li Minimum overlay stretch: %li.%03li Fifo value: %lx",
  1794. ppdev->ulRefreshRate,
  1795. ppdev->ulMinOverlayStretch / 1000,
  1796. ppdev->ulMinOverlayStretch % 1000,
  1797. ppdev->ulFifoValue));
  1798. }
  1799. }
  1800. else if (ppdev->flCaps & CAPS_PIXEL_FORMATTER)
  1801. {
  1802. // The pixel formatter doesn't work at 24bpp:
  1803. if (ppdev->iBitmapFormat != BMF_24BPP)
  1804. {
  1805. // We'll need a pixel-high scratch area to work around a
  1806. // hardware bug for thin stretches:
  1807. ppdev->pdsurfVideoEngineScratch = pVidMemAllocate(ppdev,
  1808. ppdev->cxMemory,
  1809. 1);
  1810. if (ppdev->pdsurfVideoEngineScratch)
  1811. {
  1812. if (ppdev->cyMemory * ppdev->lDelta <= 0x100000)
  1813. ppdev->dwVEstep = 0x00040004; // If 1MB, 4 bytes/write
  1814. else
  1815. ppdev->dwVEstep = 0x00080008; // If 2MB, 8 bytes/write
  1816. ppdev->flCaps |= CAPS_PIXEL_FORMATTER;
  1817. }
  1818. }
  1819. }
  1820. }
  1821. return(TRUE);
  1822. }
  1823. /******************************Public*Routine******************************\
  1824. * VOID vDisableDirectDraw
  1825. *
  1826. * This function is called by enable.c when the driver is shutting down.
  1827. *
  1828. \**************************************************************************/
  1829. VOID vDisableDirectDraw(
  1830. PDEV* ppdev)
  1831. {
  1832. if (ppdev->pdsurfVideoEngineScratch)
  1833. {
  1834. vVidMemFree(ppdev->pdsurfVideoEngineScratch);
  1835. }
  1836. }