/******************************Module*Header**********************************\ * * ************************** * * DirectDraw SAMPLE CODE * * ************************** * * Module Name: dddownld.c * * Content: DirectDraw Blt implementation for sysmem-vidmem blts and clears * * Copyright (c) 1994-1999 3Dlabs Inc. Ltd. All rights reserved. * Copyright (c) 1995-2003 Microsoft Corporation. All rights reserved. \*****************************************************************************/ #include "glint.h" #include "dma.h" #include "tag.h" #define UNROLL_COUNT 8 // Number of iterations of transfer in an unrolled loop #define P3_BLOCK_SIZE (UNROLL_COUNT * 8) // # of unrolled loops . #define GAMMA_BLOCK_SIZE (UNROLL_COUNT * 2) // # of unrolled loops . #define BLOCK_SIZE (DWORD)((TLCHIP_GAMMA)?GAMMA_BLOCK_SIZE:P3_BLOCK_SIZE) #define TAGGED_SIZE ((BLOCK_SIZE - 1) << 16) #define UNROLLED() \ MEMORY_BARRIER(); \ dmaPtr[0] = pCurrentLine[0]; \ ulTotalImageDWORDs--; \ MEMORY_BARRIER(); \ dmaPtr[1] = pCurrentLine[1]; \ ulTotalImageDWORDs--; \ MEMORY_BARRIER(); \ dmaPtr[2] = pCurrentLine[2]; \ ulTotalImageDWORDs--; \ MEMORY_BARRIER(); \ dmaPtr[3] = pCurrentLine[3]; \ ulTotalImageDWORDs--; \ MEMORY_BARRIER(); \ dmaPtr[4] = pCurrentLine[4]; \ ulTotalImageDWORDs--; \ MEMORY_BARRIER(); \ dmaPtr[5] = pCurrentLine[5]; \ ulTotalImageDWORDs--; \ MEMORY_BARRIER(); \ dmaPtr[6] = pCurrentLine[6]; \ ulTotalImageDWORDs--; \ MEMORY_BARRIER(); \ dmaPtr[7] = pCurrentLine[7]; \ ulTotalImageDWORDs--; \ MEMORY_BARRIER(); \ dmaPtr += UNROLL_COUNT; \ CHECK_FIFO(UNROLL_COUNT); \ pCurrentLine += UNROLL_COUNT; //----------------------------------------------------------------------------- // // _DD_P3Download // // // Function to do an image download to the rectangular region. // Uses the packed bit on Permedia to do the packing for us. // //----------------------------------------------------------------------------- void _DD_P3Download( P3_THUNKEDDATA* pThisDisplay, FLATPTR pSrcfpVidMem, FLATPTR pDestfpVidMem, DWORD dwSrcChipPatchMode, DWORD dwDestChipPatchMode, DWORD dwSrcPitch, DWORD dwDestPitch, DWORD dwDestPixelPitch, DWORD dwDestPixelSize, RECTL* rSrc, RECTL* rDest) { // Work out pixel offset into the framestore of the rendered surface ULONG ulSCRoundedUpDWords; ULONG ulSCWholeDWords, ulSCDWordsCnt, ulSCExtraBytes; ULONG ulTotalImageDWORDs; DWORD SrcOffset; DWORD DstOffset; ULONG ulImageLines; ULONG count; ULONG renderData; DWORD dwDownloadTag; int rDestleft, rDesttop, rSrcleft, rSrctop; RECTL rNewDest; P3_DMA_DEFS(); // Because of a bug in RL we sometimes have to fiddle with these values rSrctop = rSrc->top; rSrcleft = rSrc->left; rDesttop = rDest->top; rDestleft = rDest->left; // Fix coords origin if(!_DD_BLT_FixRectlOrigin("_DD_P3Download", rSrc, rDest)) { // Nothing to be blitted return; } SrcOffset = (DWORD)(rSrc->left << dwDestPixelSize) + (rSrc->top * dwSrcPitch); DstOffset = (DWORD)(rDest->left << dwDestPixelSize) + (rDest->top * dwDestPitch); ulSCRoundedUpDWords = rDest->right - rDest->left; ulImageLines = rDest->bottom - rDest->top; P3_DMA_GET_BUFFER(); P3_ENSURE_DX_SPACE(16); WAIT_FIFO(16); SEND_P3_DATA(FBWriteBufferAddr0, (DWORD)(pDestfpVidMem - pThisDisplay->dwScreenFlatAddr) ); SEND_P3_DATA(FBWriteBufferWidth0, dwDestPixelPitch); SEND_P3_DATA(FBWriteBufferOffset0, (rDest->top << 16) | (rDest->left & 0xFFFF)); SEND_P3_DATA(LogicalOpMode, 7); SEND_P3_DATA(PixelSize, (2 - dwDestPixelSize)); SEND_P3_DATA(FBWriteMode, P3RX_FBWRITEMODE_WRITEENABLE(__PERMEDIA_ENABLE) | P3RX_FBWRITEMODE_LAYOUT0(P3RX_LAYOUT_LINEAR)); SEND_P3_DATA(FBDestReadMode, P3RX_FBDESTREAD_READENABLE(__PERMEDIA_DISABLE) | P3RX_FBDESTREAD_LAYOUT0(P3RX_LAYOUT_LINEAR)); dwDownloadTag = Color_Tag; rNewDest = *rDest; DISPDBG((DBGLVL, "Image download %dx%d", ulSCRoundedUpDWords, ulImageLines)); // ulSCWholeDWords is the number of whole DWORDs along each scanline // ulSCExtraBytes is the number of extra BYTEs at the end of each scanline // ulSCRoundedUpDWords is the size of each scanline rounded up to DWORDs if (dwDestPixelSize != __GLINT_32BITPIXEL) { if (dwDestPixelSize == __GLINT_8BITPIXEL) { ulSCExtraBytes = ulSCRoundedUpDWords & 3; ulSCWholeDWords = ulSCRoundedUpDWords >> 2; ulSCRoundedUpDWords = (ulSCRoundedUpDWords + 3) >> 2; if (dwDownloadTag != Color_Tag) { rNewDest.right = rNewDest.left + (ulSCRoundedUpDWords << 2); } } else { ulSCExtraBytes = (ulSCRoundedUpDWords & 1) << 1; ulSCWholeDWords = ulSCRoundedUpDWords >> 1; ulSCRoundedUpDWords = (ulSCRoundedUpDWords + 1) >> 1; if (dwDownloadTag != Color_Tag) { rNewDest.right = rNewDest.left + (ulSCRoundedUpDWords << 1); } } } else { ulSCExtraBytes = 0; ulSCWholeDWords = ulSCRoundedUpDWords; } // Calc the total number of image DWORDs to send to GPU ulTotalImageDWORDs = ulImageLines * ulSCWholeDWords; P3_ENSURE_DX_SPACE(20); WAIT_FIFO(20); SEND_P3_DATA(FBSourceReadMode, P3RX_FBSOURCEREAD_READENABLE(__PERMEDIA_DISABLE) | P3RX_FBSOURCEREAD_LAYOUT(dwSrcChipPatchMode)); SEND_P3_DATA(RectanglePosition, 0); if (dwDownloadTag == Color_Tag) { renderData = P3RX_RENDER2D_WIDTH((rNewDest.right - rNewDest.left) & 0xfff ) | P3RX_RENDER2D_HEIGHT((rNewDest.bottom - rNewDest.top ) & 0xfff ) | P3RX_RENDER2D_OPERATION( P3RX_RENDER2D_OPERATION_SYNC_ON_HOST_DATA ) | P3RX_RENDER2D_SPANOPERATION( P3RX_RENDER2D_SPAN_VARIABLE ) | P3RX_RENDER2D_INCREASINGX( __PERMEDIA_ENABLE ) | P3RX_RENDER2D_INCREASINGY( __PERMEDIA_ENABLE ); SEND_P3_DATA(Render2D, renderData); } else { // Don't use spans for the unpacking scheme, but use the 2D Setup // unit to do the work of setting up the destination SEND_P3_DATA(ScissorMinXY, 0); SEND_P3_DATA(ScissorMaxXY, P3RX_SCISSOR_X_Y(rDest->right, rDest->bottom)); SEND_P3_DATA(ScissorMode, P3RX_SCISSORMODE_USER(__PERMEDIA_ENABLE)); renderData = P3RX_RENDER2D_WIDTH( (rNewDest.right - rNewDest.left) & 0xfff ) | P3RX_RENDER2D_HEIGHT( 0 ) | P3RX_RENDER2D_OPERATION( P3RX_RENDER2D_OPERATION_SYNC_ON_HOST_DATA ) | P3RX_RENDER2D_INCREASINGX( __PERMEDIA_ENABLE ) | P3RX_RENDER2D_INCREASINGY( __PERMEDIA_ENABLE ); SEND_P3_DATA(Render2D, renderData); SEND_P3_DATA(Count, rDest->bottom - rDest->top ); SEND_P3_DATA(Render, __RENDER_TRAPEZOID_PRIMITIVE | __RENDER_SYNC_ON_HOST_DATA); } P3_ENSURE_DX_SPACE(32); WAIT_FIFO(32); __try { BYTE *pSurfaceData = (BYTE *)pSrcfpVidMem + SrcOffset; UNALIGNED DWORD *pCurrentLine = (DWORD *)pSurfaceData; while (ulImageLines-- > 0) { DISPDBG((DBGLVL, "Image download lines %d", ulImageLines)); // Initialize the number of DWORDs counter ulSCDWordsCnt = ulSCWholeDWords; // Send the texels in DWORDS while (ulSCDWordsCnt >= BLOCK_SIZE) { P3_ENSURE_DX_SPACE(BLOCK_SIZE + 1); WAIT_FIFO(BLOCK_SIZE + 1); ADD_FUNNY_DWORD(TAGGED_SIZE | dwDownloadTag); for (count = BLOCK_SIZE / UNROLL_COUNT; count; count--) { DISPDBG((DBGLVL, "Image download count %d", count)); UNROLLED(); } ulSCDWordsCnt -= BLOCK_SIZE; } // Finish off the rest of the whole DWORDs on the scanline if (ulSCDWordsCnt) { P3_ENSURE_DX_SPACE(ulSCDWordsCnt + 1); WAIT_FIFO(ulSCDWordsCnt + 1); ADD_FUNNY_DWORD(((ulSCDWordsCnt - 1) << 16) | dwDownloadTag); for (count = 0; count < ulSCDWordsCnt; count++, pCurrentLine++) { ADD_FUNNY_DWORD(*pCurrentLine); ulTotalImageDWORDs--; } } // Finish off the extra bytes at the end of the scanline if (ulSCExtraBytes) { DWORD dwTemp; P3_ENSURE_DX_SPACE(1 + 1); // 1 tag + 1 data DWORD WAIT_FIFO(1 + 1); ADD_FUNNY_DWORD(dwDownloadTag); memcpy(&dwTemp, pCurrentLine, ulSCExtraBytes); ADD_FUNNY_DWORD(dwTemp); ulTotalImageDWORDs--; } pSurfaceData += dwSrcPitch; pCurrentLine = (DWORD*)pSurfaceData; } } __except(EXCEPTION_EXECUTE_HANDLER) { DISPDBG((ERRLVL, "Perm3 caused exception at line %u of file %s", __LINE__,__FILE__)); // Send enough DWORDs to the GPU to avoid deadlock for (count = 0; count < ulTotalImageDWORDs; count++) { ADD_FUNNY_DWORD(dwDownloadTag); ADD_FUNNY_DWORD(0); // Dummy pixel data } } P3_ENSURE_DX_SPACE(4); WAIT_FIFO(4); SEND_P3_DATA(WaitForCompletion, 0); SEND_P3_DATA(ScissorMode, __PERMEDIA_DISABLE); // Put back the values if we changed them. rSrc->top = rSrctop; rSrc->left = rSrcleft; rDest->top = rDesttop; rDest->left = rDestleft; P3_DMA_COMMIT_BUFFER(); } // _DD_P3Download //----------------------------------------------------------------------------- // // _DD_P3DownloadDD // // // Function to do an image download to the rectangular region. // Uses the packed bit on Permedia to do the packing for us. // //----------------------------------------------------------------------------- void _DD_P3DownloadDD( P3_THUNKEDDATA* pThisDisplay, LPDDRAWI_DDRAWSURFACE_LCL pSource, LPDDRAWI_DDRAWSURFACE_LCL pDest, P3_SURF_FORMAT* pFormatSource, P3_SURF_FORMAT* pFormatDest, RECTL* rSrc, RECTL* rDest) { _DD_P3Download(pThisDisplay, pSource->lpGbl->fpVidMem, pDest->lpGbl->fpVidMem, P3RX_LAYOUT_LINEAR, // src P3RX_LAYOUT_LINEAR, // dst, pSource->lpGbl->lPitch, pDest->lpGbl->lPitch, DDSurf_GetPixelPitch(pDest), DDSurf_GetChipPixelSize(pDest), rSrc, rDest); } // _DD_P3DownloadDD //----------------------------------------------------------------------------- // // _DD_P3DownloadDstCh // // Function to do an image download to the rectangular region. // Uses the packed bit on Permedia to do the packing for us. // //----------------------------------------------------------------------------- void _DD_P3DownloadDstCh( P3_THUNKEDDATA* pThisDisplay, LPDDRAWI_DDRAWSURFACE_LCL pSource, LPDDRAWI_DDRAWSURFACE_LCL pDest, P3_SURF_FORMAT* pFormatSource, P3_SURF_FORMAT* pFormatDest, LPDDHAL_BLTDATA lpBlt, RECTL* rSrc, RECTL* rDest) { // Work out pixel offset into the framestore of the rendered surface ULONG ulSCRoundedUpDWords; ULONG ulSCWholeDWords, ulSCDWordsCnt, ulSCExtraBytes; ULONG ulTotalImageDWORDs; DWORD SrcOffset; ULONG ulImageLines; ULONG count; ULONG renderData; DWORD dwDownloadTag; int rDestleft, rDesttop, rSrcleft, rSrctop; RECTL rNewDest; BOOL bDstKey = FALSE; P3_DMA_DEFS(); // Because of a bug in RL we sometimes have to fiddle with these values rSrctop = rSrc->top; rSrcleft = rSrc->left; rDesttop = rDest->top; rDestleft = rDest->left; // Fix coords origin if(!_DD_BLT_FixRectlOrigin("_DD_P3DownloadDstCh", rSrc, rDest)) { // Nothing to be blitted return; } SrcOffset = (DWORD)(rSrc->left << DDSurf_GetChipPixelSize(pDest)) + (rSrc->top * pSource->lpGbl->lPitch); ulSCRoundedUpDWords = rDest->right - rDest->left; ulImageLines = rDest->bottom - rDest->top; P3_DMA_GET_BUFFER(); P3_ENSURE_DX_SPACE(32); WAIT_FIFO(32); SEND_P3_DATA(FBWriteBufferAddr0, DDSurf_SurfaceOffsetFromMemoryBase(pThisDisplay, pDest)); SEND_P3_DATA(FBWriteBufferWidth0, DDSurf_GetPixelPitch(pDest)); SEND_P3_DATA(FBWriteBufferOffset0, (rDest->top << 16) | (rDest->left & 0xFFFF)); SEND_P3_DATA(LogicalOpMode, 7); SEND_P3_DATA(PixelSize, (2 - DDSurf_GetChipPixelSize(pDest))); if (lpBlt->dwFlags & DDBLT_KEYDESTOVERRIDE) { bDstKey = TRUE; // Dest keying. // The conventional chroma test is set up to key off the dest - the framebuffer. SEND_P3_DATA(ChromaTestMode, P3RX_CHROMATESTMODE_ENABLE(__PERMEDIA_ENABLE) | P3RX_CHROMATESTMODE_SOURCE(P3RX_CHROMATESTMODE_SOURCE_FBDATA) | P3RX_CHROMATESTMODE_PASSACTION(P3RX_CHROMATESTMODE_ACTION_PASS) | P3RX_CHROMATESTMODE_FAILACTION(P3RX_CHROMATESTMODE_ACTION_REJECT) ); SEND_P3_DATA(ChromaLower, lpBlt->bltFX.ddckDestColorkey.dwColorSpaceLowValue); SEND_P3_DATA(ChromaUpper, lpBlt->bltFX.ddckDestColorkey.dwColorSpaceHighValue); // The source buffer is the source for the destination color key SEND_P3_DATA(FBSourceReadBufferAddr, DDSurf_SurfaceOffsetFromMemoryBase(pThisDisplay, pDest)); SEND_P3_DATA(FBSourceReadBufferWidth, DDSurf_GetPixelPitch(pDest)); SEND_P3_DATA(FBSourceReadBufferOffset, (rDest->top << 16) | (rDest->left & 0xFFFF)); // Enable source reads to get the colorkey color SEND_P3_DATA(FBSourceReadMode, P3RX_FBSOURCEREAD_READENABLE(__PERMEDIA_ENABLE) | P3RX_FBSOURCEREAD_LAYOUT(P3RX_LAYOUT_LINEAR)); } else { SEND_P3_DATA(FBSourceReadMode, P3RX_FBSOURCEREAD_READENABLE(__PERMEDIA_DISABLE)); } SEND_P3_DATA(FBWriteMode, P3RX_FBWRITEMODE_WRITEENABLE(__PERMEDIA_ENABLE) | P3RX_FBWRITEMODE_LAYOUT0(P3RX_LAYOUT_LINEAR)); SEND_P3_DATA(FBDestReadMode, P3RX_FBDESTREAD_READENABLE(__PERMEDIA_DISABLE) | P3RX_FBDESTREAD_LAYOUT0(P3RX_LAYOUT_LINEAR)); // This dest-colorkey download always needs to send unpacked color data // because it can't use spans. SEND_P3_DATA(DownloadTarget, Color_Tag); switch (DDSurf_GetChipPixelSize(pDest)) { case __GLINT_8BITPIXEL: dwDownloadTag = Packed8Pixels_Tag; break; case __GLINT_16BITPIXEL: dwDownloadTag = Packed16Pixels_Tag; break; default: dwDownloadTag = Color_Tag; break; } rNewDest = *rDest; DISPDBG((DBGLVL, "Image download %dx%d", ulSCRoundedUpDWords, ulImageLines)); if (DDSurf_GetChipPixelSize(pDest) != __GLINT_32BITPIXEL) { if (DDSurf_GetChipPixelSize(pDest) == __GLINT_8BITPIXEL) { ulSCExtraBytes = ulSCRoundedUpDWords & 3; ulSCWholeDWords = ulSCRoundedUpDWords >> 2; ulSCRoundedUpDWords = (ulSCRoundedUpDWords + 3) >> 2; if (dwDownloadTag != Color_Tag) { rNewDest.right = rNewDest.left + (ulSCRoundedUpDWords << 2); } } else { ulSCExtraBytes = (ulSCRoundedUpDWords & 1) << 1; ulSCWholeDWords = ulSCRoundedUpDWords >> 1; ulSCRoundedUpDWords = (ulSCRoundedUpDWords + 1) >> 1; if (dwDownloadTag != Color_Tag) { rNewDest.right = rNewDest.left + (ulSCRoundedUpDWords << 1); } } } // Calc the total number of image DWORDs to send to GPU ulTotalImageDWORDs = ulImageLines * ulSCWholeDWords; P3_ENSURE_DX_SPACE(32); WAIT_FIFO(32); SEND_P3_DATA(RectanglePosition, 0); // Don't use spans for the unpacking scheme, but use the 2D Setup // unit to do the work of setting up the destination SEND_P3_DATA(ScissorMinXY, 0 ) SEND_P3_DATA(ScissorMaxXY,P3RX_SCISSOR_X_Y(rDest->right - rDest->left , rDest->bottom - rDest->top )); SEND_P3_DATA(ScissorMode, P3RX_SCISSORMODE_USER(__PERMEDIA_ENABLE)); renderData = P3RX_RENDER2D_WIDTH( (rNewDest.right - rNewDest.left) & 0xfff ) | P3RX_RENDER2D_HEIGHT( 0 ) | P3RX_RENDER2D_OPERATION( P3RX_RENDER2D_OPERATION_SYNC_ON_HOST_DATA ) | P3RX_RENDER2D_INCREASINGX( __PERMEDIA_ENABLE ) | P3RX_RENDER2D_INCREASINGY( __PERMEDIA_ENABLE ) | P3RX_RENDER2D_FBREADSOURCEENABLE((bDstKey ? __PERMEDIA_ENABLE : __PERMEDIA_DISABLE)); SEND_P3_DATA(Render2D, renderData); SEND_P3_DATA(Count, rNewDest.bottom - rNewDest.top ); SEND_P3_DATA(Render, P3RX_RENDER_PRIMITIVETYPE(P3RX_RENDER_PRIMITIVETYPE_TRAPEZOID) | P3RX_RENDER_SYNCONHOSTDATA(__PERMEDIA_ENABLE) | P3RX_RENDER_FBSOURCEREADENABLE((bDstKey ? __PERMEDIA_ENABLE : __PERMEDIA_DISABLE))); _try { BYTE *pSurfaceData = (BYTE *)pSource->lpGbl->fpVidMem + SrcOffset; DWORD *pCurrentLine = (DWORD *)pSurfaceData; while (ulImageLines-- > 0) { DISPDBG((DBGLVL, "Image download lines %d", ulImageLines)); // Initialize the number of DWORDs counter ulSCDWordsCnt = ulSCWholeDWords; // Send the texels in DWORDS while (ulSCDWordsCnt >= BLOCK_SIZE) { P3_ENSURE_DX_SPACE(BLOCK_SIZE + 1); WAIT_FIFO(BLOCK_SIZE + 1); ADD_FUNNY_DWORD(TAGGED_SIZE | dwDownloadTag); for (count = BLOCK_SIZE / UNROLL_COUNT; count; count--) { DISPDBG((DBGLVL, "Image download count %d", count)); UNROLLED(); } ulSCDWordsCnt -= BLOCK_SIZE; } // Finish off the rest of the whole DWORDs on the scanline if (ulSCDWordsCnt) { P3_ENSURE_DX_SPACE(ulSCDWordsCnt + 1); WAIT_FIFO(ulSCDWordsCnt + 1); ADD_FUNNY_DWORD(((ulSCDWordsCnt - 1) << 16) | dwDownloadTag); for (count = 0; count < ulSCDWordsCnt; count++, pCurrentLine++) { ADD_FUNNY_DWORD(*pCurrentLine); ulTotalImageDWORDs--; } } // Finish off the extra bytes at the end of the scanline if (ulSCExtraBytes) { DWORD dwTemp; P3_ENSURE_DX_SPACE(1 + 1); // 1 tag + 1 data DWORD WAIT_FIFO(1 + 1); ADD_FUNNY_DWORD(dwDownloadTag); memcpy(&dwTemp, pCurrentLine, ulSCExtraBytes); ADD_FUNNY_DWORD(dwTemp); ulTotalImageDWORDs--; } pSurfaceData += pSource->lpGbl->lPitch; pCurrentLine = (DWORD*)pSurfaceData; } } __except(EXCEPTION_EXECUTE_HANDLER) { DISPDBG((ERRLVL, "Perm3 caused exception at line %u of file %s", __LINE__,__FILE__)); // Send enough DWORDs to the GPU to avoid deadlock for (count = 0; count < ulTotalImageDWORDs; count++) { ADD_FUNNY_DWORD(dwDownloadTag); ADD_FUNNY_DWORD(0); // Dummy pixel data } } P3_ENSURE_DX_SPACE(6); WAIT_FIFO(6); SEND_P3_DATA(WaitForCompletion, 0); SEND_P3_DATA(ScissorMode, __PERMEDIA_DISABLE); SEND_P3_DATA(FBSourceReadMode, __PERMEDIA_DISABLE); // Put back the values if we changed them. rSrc->top = rSrctop; rSrc->left = rSrcleft; rDest->top = rDesttop; rDest->left = rDestleft; P3_DMA_COMMIT_BUFFER(); } //_DD_P3DownloadDstCh