/******************************Module*Header*******************************\
* Module Name: px_fast.c                                                   *
*                                                                          *
* Fast special case code for the pixel routines                            *
*                                                                          *
* Created: 10-Oct-1995                                                     *
* Author: Drew Bliss [drewb]                                               *
*                                                                          *
* Copyright (c) 1995 Microsoft Corporation                                 *
\**************************************************************************/

#include "precomp.h"
#pragma hdrstop

#include <gencx.h>
#include <devlock.h>

#include "px_fast.h"

#ifdef NT

// Color rescaling table for [0,255] -> [0,7]
// Generated by (i*14+255)/510, which matches the OpenGL conversion of
// i*7.0/255.0+0.5
static GLubyte ab255to7[256] =
{
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};

// Similar table for [0,255] -> [0,3]
static GLubyte ab255to3[256] =
{
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
};

// Color rescaling table for [0,7] -> [0,255]
// Computed as i*255/7
static GLubyte ab7to255[8] =
{
    0, 36, 72, 109, 145, 182, 218, 255
};

// Similar table for [0,3] -> [0,255]
static GLubyte ab3to255[4] =
{
    0, 85, 170, 255
};

/******************************Public*Routine******************************\
*
* DrawRgbPixels
*
* Special case of glDrawPixels for GL_RGB with straight data copy
*
* History:
*  Tue Oct 10 18:43:04 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean DrawRgbPixels(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    HDC hdc = NULL;
    HBITMAP hbm = NULL;
    __GLcolorBuffer *cfb;
    BYTE abBitmapInfo[sizeof(BITMAPINFO)+2*sizeof(RGBQUAD)];
    BITMAPINFO *pbmi = NULL;
    BITMAPINFOHEADER *pbmih;
    BYTE *pbBits, *pbSrc, *pbDst;
    int x, y, xDst, yDst;
    GLboolean bFail = GL_TRUE;
    int cbSrcLine, cbSrcExtra, cbDstExtra, cbSrcElement, cbDstElement, cbWidth;
    int cBits;
    __GLGENcontext *gengc;
    HPALETTE hpal;

#if 0
    DbgPrint("DrawRgbPixels\n");
#endif

    gengc = (__GLGENcontext *)gc;
    cBits = gengc->CurrentFormat.cColorBits;
    
    // Don't bother with 4bpp because of problems with color reduction
    // The same problems occur in 8bpp but there is special case code
    // to handle it.
    if (cBits < 8)
    {
        return GL_FALSE;
    }

    // If there is no lock, we must have failed to reacquire the lock
    // from some previous call.  This is an error condition
    // and we should not continue.

    if (gengc->ulLockType == NO_LOCK)
    {
	WARNING("DrawRgbPixels: No lock\n");
	return GL_FALSE;
    }

    // We need to drop any DCI access we have because we're making
    // GDI calls
    glsrvReleaseDci(gengc, (GLGENwindow *)gengc->pwo);

    cfb = gc->drawBuffer;

    // Determine buffer coordinates
    xDst = __GL_UNBIAS_X(gc, spanInfo->startCol);
    yDst = __GL_UNBIAS_Y(gc, spanInfo->startRow)-spanInfo->height+1;

    if (cBits == 8)
    {
        pbmi = (BITMAPINFO *)__wglTempAlloc(gc, sizeof(BITMAPINFO)+
                                            255*sizeof(RGBQUAD));
        if (pbmi == NULL)
        {
            goto EH_Fail;
        }
    }
    else
    {
        pbmi = (BITMAPINFO *)abBitmapInfo;
    }
        
    pbmih = &pbmi->bmiHeader;
    pbmih->biSize = sizeof(BITMAPINFOHEADER);
    // Start out setting the width to the line length to describe
    // the actual data coming in
    pbmih->biWidth = spanInfo->srcLineLength;
    pbmih->biHeight = spanInfo->height;
    pbmih->biPlanes = 1;
    
    if (cBits == 8)
    {
        int i;
        RGBQUAD rqTmp;
        
        // If the destination is 8bpp then we do the color
        // reduction ourselves.  In this case we want to create
        // an 8bpp DIB whose color table matches the destination
        pbmih->biBitCount = 8;
        pbmih->biCompression = BI_RGB;

        hpal = GetCurrentObject(CURRENT_DC, OBJ_PAL);
        if (hpal == NULL)
        {
            goto EH_Fail;
        }

        if (GetPaletteEntries(hpal, 0, 256,
                              (LPPALETTEENTRY)pbmi->bmiColors) != 256)
        {
            goto EH_Fail;
        }

        for (i = 0; i < 256; i++)
        {
            rqTmp = pbmi->bmiColors[i];
            pbmi->bmiColors[i].rgbRed = rqTmp.rgbBlue;
            pbmi->bmiColors[i].rgbBlue = rqTmp.rgbRed;
            pbmi->bmiColors[i].rgbReserved = 0;
        }

        cbDstElement = 1;
    }
    else
    {
        if (spanInfo->srcFormat == GL_BGRA_EXT)
        {
            pbmih->biBitCount = 32;
            pbmih->biCompression = BI_BITFIELDS;
            *((DWORD *)pbmi->bmiColors+0) = 0xff0000;
            *((DWORD *)pbmi->bmiColors+1) = 0xff00;
            *((DWORD *)pbmi->bmiColors+2) = 0xff;
            cbDstElement = 4;
        }
        else
        {
            pbmih->biBitCount = 24;
            pbmih->biCompression = BI_RGB;
            cbDstElement = 3;
        }
    }
    
    pbmih->biSizeImage = 0;
    pbmih->biXPelsPerMeter = 0;
    pbmih->biYPelsPerMeter = 0;
    pbmih->biClrUsed = 0;
    pbmih->biClrImportant = 0;
    
    // For GL_BGR_EXT and GL_BGRA_EXT we can use the data directly if
    // it is laid out in memory like a DIB.  The key thing to check
    // is that scanlines are DWORD aligned.
    // If we can't use the data directly, fall back on the DIB section
    // method which works for anything
    if (cBits > 8 &&
        (spanInfo->srcFormat == GL_BGR_EXT ||
         spanInfo->srcFormat == GL_BGRA_EXT) &&
        spanInfo->srcAlignment == 4)
    {
        if (SetDIBitsToDevice(CURRENT_DC, xDst, yDst,
                              spanInfo->width, spanInfo->height,
                              spanInfo->srcSkipPixels, spanInfo->srcSkipLines,
                              0, spanInfo->height, spanInfo->srcImage,
                              pbmi, DIB_RGB_COLORS) == 0)
        {
            goto EH_Fail;
        }
        
        bFail = GL_FALSE;
        goto EH_Fail;
    }

    // Create a DIB section of the appropriate height and width
    // We originally set the BITMAPINFO width to the width of the
    // incoming data, but we only need to create a DIB section
    // as large as the data we're going to copy so reset the
    // width
    pbmih->biWidth = spanInfo->width;

    hdc = CreateCompatibleDC(CURRENT_DC);
    if (hdc == NULL)
    {
        goto EH_Fail;
    }

    hbm = CreateDIBSection(hdc, pbmi, DIB_RGB_COLORS,
                           &pbBits, NULL, 0);
    if (hbm == NULL)
    {
        goto EH_Fail;
    }

    if (SelectObject(hdc, hbm) == NULL)
    {
        goto EH_Fail;
    }

    // Copy the input data to the DIB's contents, possibly swapping R and B,
    // plus skipping any appropriate data, fixing up alignment and
    // obeying the line length

    if (spanInfo->srcFormat == GL_BGRA_EXT)
    {
        cbSrcElement = 4;
    }
    else
    {
        cbSrcElement = 3;
    }
    
    cbSrcLine = spanInfo->srcLineLength*cbSrcElement;
    cbSrcExtra = cbSrcLine % spanInfo->srcAlignment;
    if (cbSrcExtra != 0)
    {
        cbSrcExtra = spanInfo->srcAlignment-cbSrcExtra;
        cbSrcLine += cbSrcExtra;
    }
    cbSrcExtra = cbSrcLine - spanInfo->width*cbSrcElement;

    cbDstExtra = spanInfo->width*cbDstElement & 3;
    if (cbDstExtra != 0)
    {
        cbDstExtra = 4-cbDstExtra;
    }
    
    pbSrc = (BYTE *)spanInfo->srcImage+
        spanInfo->srcSkipPixels*cbSrcElement+
        spanInfo->srcSkipLines*cbSrcLine;
    pbDst = pbBits;

    if (cBits == 8)
    {
        // For 8bpp destinations we need to perform the color reduction
        // ourselves because GDI's reduction doesn't match OpenGL's.
        // GDI does a closest-match-in-palette for each pixel, while
        // OpenGL does a rescaling of the color range plus rounding
        
        switch(spanInfo->srcFormat)
        {
        case GL_RGB:
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *pbDst++ =
                        (ab255to3[*(pbSrc+2)] << cfb->blueShift) |
                        (ab255to7[*(pbSrc+1)] << cfb->greenShift) |
                        (ab255to7[*(pbSrc+0)] << cfb->redShift);
                    pbSrc += 3;
                }
                
                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
            break;
            
        case GL_BGR_EXT:
        case GL_BGRA_EXT:
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *pbDst++ =
                        (ab255to3[*(pbSrc+0)] << cfb->blueShift) |
                        (ab255to7[*(pbSrc+1)] << cfb->greenShift) |
                        (ab255to7[*(pbSrc+2)] << cfb->redShift);
                    pbSrc += cbSrcElement;
                }
                
                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
            break;
        }
    }
    else
    {
        cbWidth = spanInfo->width*cbSrcElement;

        switch(spanInfo->srcFormat)
        {
        case GL_RGB:
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *pbDst++ = *(pbSrc+2);
                    *pbDst++ = *(pbSrc+1);
                    *pbDst++ = *pbSrc;
                    pbSrc += 3;
                }
                
                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
            break;
            
        case GL_BGR_EXT:
        case GL_BGRA_EXT:
            if (cbSrcExtra == 0 && cbDstExtra == 0)
            {
                CopyMemory(pbDst, pbSrc, cbWidth*spanInfo->height);
            }
            else
            {
                for (y = 0; y < spanInfo->height; y++)
                {
                    CopyMemory(pbDst, pbSrc, cbWidth);
                    pbSrc += cbSrcExtra;
                    pbDst += cbDstExtra;
                }
            }
            break;
        }
    }

    // Copy the DIB to the buffer
    bFail = !BitBlt(CURRENT_DC, xDst, yDst, spanInfo->width, spanInfo->height,
                    hdc, 0, 0, SRCCOPY);

 EH_Fail:
    if (hdc != NULL)
    {
        DeleteDC(hdc);
    }
    if (hbm != NULL)
    {
        DeleteObject(hbm);
    }
    if (pbmi != NULL && pbmi != (BITMAPINFO *)abBitmapInfo)
    {
        __wglTempFree(gc, pbmi);
    }

    // Must regrab lock.  There's not much we can do if this fails
    glsrvGrabDci(gengc, (GLGENwindow *)gengc->pwo);
    
    return !bFail;
}

/******************************Public*Routine******************************\
*
* StoreZPixels
*
* Special case of glDrawPixels for GL_DEPTH_COMPONENTs going directly
* into the Z buffer with no color buffer modification.
*
* History:
*  Tue Oct 10 18:43:36 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean StoreZPixels(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    __GLdepthBuffer *fb;
    BYTE *pbBits, *pbSrc, *pbDst;
    int x, y;
    int cbElement, cbSrcLine, cbSrcExtra, cbDstExtra;

#if 0
    DbgPrint("StoreZPixels\n");
#endif
    
    fb = &gc->depthBuffer;
    
    // Copy the input data to the depth buffer,
    // skipping any appropriate data, fixing up alignment and
    // obeying the line length

    switch(spanInfo->srcType)
    {
    case GL_UNSIGNED_SHORT:
        cbElement = 2;
        break;
    case GL_UNSIGNED_INT:
        cbElement = 4;
        break;
        
    default:
        ASSERTOPENGL(0, "StoreZPixels: Unknown srcType\n");
        break;
    }
    
    cbSrcLine = spanInfo->srcLineLength*cbElement;
    cbSrcExtra = cbSrcLine % spanInfo->srcAlignment;
    if (cbSrcExtra != 0)
    {
        cbSrcExtra = spanInfo->srcAlignment-cbSrcExtra;
        cbSrcLine += cbSrcExtra;
    }

    pbSrc = (BYTE *)spanInfo->srcImage+
        spanInfo->srcSkipPixels*cbElement+
        spanInfo->srcSkipLines*cbSrcLine;
    
    // Determine buffer coordinates
    x = spanInfo->startCol;
    y = spanInfo->startRow;

    if (fb->buf.elementSize == sizeof(__GLzValue))
    {
        pbDst = (BYTE *)__GL_DEPTH_ADDR(fb, (__GLzValue*), x, y);
        cbDstExtra = -(fb->buf.outerWidth+spanInfo->width)*sizeof(__GLzValue);
    }
    else
    {
        pbDst = (BYTE *)__GL_DEPTH_ADDR(fb, (__GLz16Value*), x, y);
        cbDstExtra = -(fb->buf.outerWidth+spanInfo->width)*
            sizeof(__GLz16Value);
    }

    switch(spanInfo->srcType)
    {
    case GL_UNSIGNED_SHORT:
        if (fb->buf.elementSize == sizeof(__GLzValue))
        {
            ASSERTOPENGL(fb->scale == 0x7fffffff,
                         "Depth buffer scale invalid\n");
            
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *(__GLzValue *)pbDst =
                        (__GLzValue)(*(GLushort *)pbSrc) << (Z16_SHIFT-1);
                    pbDst += sizeof(__GLzValue);
                    pbSrc += cbElement;
                }

                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
        }
        else
        {
            ASSERTOPENGL(fb->scale == 0x7fff,
                         "Depth buffer scale invalid\n");
            
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *(__GLz16Value *)pbDst =
                        (*(GLushort *)pbSrc) >> 1;
                    pbDst += sizeof(__GLz16Value);
                    pbSrc += cbElement;
                }

                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
        }
        break;

    case GL_UNSIGNED_INT:
        if (fb->buf.elementSize == sizeof(__GLzValue))
        {
            ASSERTOPENGL(fb->scale == 0x7fffffff,
                         "Depth buffer scale invalid\n");
            
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *(__GLzValue *)pbDst =
                        (*(GLuint *)pbSrc) >> 1;
                    pbDst += sizeof(__GLzValue);
                    pbSrc += cbElement;
                }

                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
        }
        else
        {
            ASSERTOPENGL(fb->scale == 0x7fff,
                         "Depth buffer scale invalid\n");
            
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *(__GLz16Value *)pbDst =
                        (__GLz16Value)((*(GLuint *)pbSrc) >> (Z16_SHIFT+1));
                    pbDst += sizeof(__GLz16Value);
                    pbSrc += cbElement;
                }

                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
        }
        break;
    }

    return GL_TRUE;
}

/******************************Public*Routine******************************\
*
* ReadRgbPixels
*
* Special case of glReadPixels for GL_RGB with straight data copy
*
* History:
*  Tue Oct 10 18:43:04 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean ReadRgbPixels(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    HDC hdc = NULL;
    HBITMAP hbm = NULL;
    __GLcolorBuffer *cfb;
    BYTE abBitmapInfo[sizeof(BITMAPINFO)+2*sizeof(RGBQUAD)];
    BITMAPINFO *pbmi = NULL;
    BITMAPINFOHEADER *pbmih;
    BYTE *pbBits, *pbDst, *pbSrc;
    DWORD *pdwDst;
    int x, y;
    GLboolean bFail = GL_TRUE;
    int cbDstLine, cbDstExtra, cbSrcExtra, cbSrcElement, cbDstElement, cbWidth;
    int cBits;
    HPALETTE hpal;
    __GLGENcontext *gengc;

#if 0
    DbgPrint("ReadRgbPixels\n");
#endif

    gengc = (__GLGENcontext *)gc;
    cBits = gengc->CurrentFormat.cColorBits;
    
    // Don't bother with 4bpp surfaces
    if (cBits < 8)
    {
        return GL_FALSE;
    }

    // If there is no lock, we must have failed to reacquire the lock
    // from some previous call.  This is an error condition
    // and we should not continue.

    if (gengc->ulLockType == NO_LOCK)
    {
	WARNING("ReadRgbPixels: No lock\n");
	return GL_FALSE;
    }

    // We need to drop any DCI access we have because we're making
    // GDI calls
    glsrvReleaseDci(gengc, (GLGENwindow *)gengc->pwo);

    cfb = gc->readBuffer;

    if (cBits == 8)
    {
        pbmi = (BITMAPINFO *)__wglTempAlloc(gc, sizeof(BITMAPINFO)+
                                            255*sizeof(RGBQUAD));
        if (pbmi == NULL)
        {
            goto EH_Fail;
        }
    }
    else
    {
        pbmi = (BITMAPINFO *)abBitmapInfo;
    }
    
    pbmih = &pbmi->bmiHeader;
    pbmih->biSize = sizeof(BITMAPINFOHEADER);
    // Start out setting the width to the line length to describe
    // the actual data coming in
    pbmih->biWidth = spanInfo->width;
    pbmih->biHeight = spanInfo->height;
    pbmih->biPlanes = 1;
    
    if (cBits == 8)
    {
        int i;
        RGBQUAD rqTmp;
        
        // If the destination is 8bpp then we do the color
        // expansion ourselves.  In this case we want to create
        // an 8bpp DIB whose color table matches the source
        pbmih->biBitCount = 8;
        pbmih->biCompression = BI_RGB;

        hpal = GetCurrentObject(CURRENT_DC, OBJ_PAL);
        if (hpal == NULL)
        {
            goto EH_Fail;
        }

        if (GetPaletteEntries(hpal, 0, 256,
                              (LPPALETTEENTRY)pbmi->bmiColors) != 256)
        {
            goto EH_Fail;
        }

        for (i = 0; i < 256; i++)
        {
            rqTmp = pbmi->bmiColors[i];
            pbmi->bmiColors[i].rgbRed = rqTmp.rgbBlue;
            pbmi->bmiColors[i].rgbBlue = rqTmp.rgbRed;
            pbmi->bmiColors[i].rgbReserved = 0;
        }

        cbSrcElement = 1;
    }
    else
    {
        if (spanInfo->dstFormat == GL_BGRA_EXT)
        {
            pbmih->biBitCount = 32;
            pbmih->biCompression = BI_BITFIELDS;
            *((DWORD *)pbmi->bmiColors+0) = 0xff0000;
            *((DWORD *)pbmi->bmiColors+1) = 0xff00;
            *((DWORD *)pbmi->bmiColors+2) = 0xff;
            cbSrcElement = 4;
        }
        else
        {
            pbmih->biBitCount = 24;
            pbmih->biCompression = BI_RGB;
            cbSrcElement = 3;
        }
    }
    
    pbmih->biSizeImage = 0;
    pbmih->biXPelsPerMeter = 0;
    pbmih->biYPelsPerMeter = 0;
    pbmih->biClrUsed = 0;
    pbmih->biClrImportant = 0;

    // Create a DIB section of the appropriate height and width
    // We originally set the BITMAPINFO width to the width of the
    // incoming data, but we only need to create a DIB section
    // as large as the data we're going to copy so reset the
    // width
    pbmih->biWidth = spanInfo->width;

    hdc = CreateCompatibleDC(CURRENT_DC);
    if (hdc == NULL)
    {
        goto EH_Fail;
    }

    hbm = CreateDIBSection(hdc, pbmi, DIB_RGB_COLORS,
                           &pbBits, NULL, 0);
    if (hbm == NULL)
    {
        goto EH_Fail;
    }

    if (SelectObject(hdc, hbm) == NULL)
    {
        goto EH_Fail;
    }

    if (cBits <= 8)
    {
        hpal = GetCurrentObject(CURRENT_DC, OBJ_PAL);
        if (hpal != NULL)
        {
            if (SelectPalette(hdc, hpal, FALSE) == NULL)
            {
                goto EH_Fail;
            }

            if (RealizePalette(hdc) == GDI_ERROR)
            {
                goto EH_Fail;
            }
        }
    }
    
    // Determine buffer coordinates
    x = __GL_UNBIAS_X(gc, (GLint)spanInfo->readX);
    y = __GL_UNBIAS_Y(gc, (GLint)spanInfo->readY)-spanInfo->height+1;

    // Copy the buffer's contents to the DIB
    if (!BitBlt(hdc, 0, 0, spanInfo->width, spanInfo->height,
                CURRENT_DC, x, y, SRCCOPY))
    {
        goto EH_Fail;
    }
    
    // Copy the DIB's contents to the output buffer, swapping R and B,
    // plus skipping any appropriate data, fixing up alignment and
    // obeying the line length

    if (spanInfo->dstFormat == GL_BGRA_EXT)
    {
        cbDstElement = 4;
    }
    else
    {
        cbDstElement = 3;
    }
    
    cbDstLine = spanInfo->dstLineLength*cbDstElement;
    cbDstExtra = cbDstLine % spanInfo->dstAlignment;
    if (cbDstExtra != 0)
    {
        cbDstExtra = spanInfo->dstAlignment-cbDstExtra;
        cbDstLine += cbDstExtra;
    }
    cbDstExtra = cbDstLine - spanInfo->width*cbDstElement;

    cbSrcExtra = spanInfo->width*cbSrcElement & 3;
    if (cbSrcExtra != 0)
    {
        cbSrcExtra = 4-cbSrcExtra;
    }

    pbSrc = pbBits;
    pbDst = (BYTE *)spanInfo->dstImage+
        spanInfo->dstSkipPixels*cbDstElement+
        spanInfo->dstSkipLines*cbDstLine;

    if (cBits == 8)
    {
        BYTE b;
        
        // For 8bpp sources we need to do the color expansion ourselves
        // because the 8bpp palette is only an approximation of a 3-3-2
        // palette since the system colors are forced into it.  Also,
        // GL does a rescaling of the color range.
        
        switch(spanInfo->dstFormat)
        {
        case GL_RGB:
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    b = *pbSrc++;

                    *pbDst++ =
                        ab7to255[(b & gc->modes.redMask) >> cfb->redShift];
                    *pbDst++ =
                        ab7to255[(b & gc->modes.greenMask) >> cfb->greenShift];
                    *pbDst++ =
                        ab3to255[(b & gc->modes.blueMask) >> cfb->blueShift];
                }
                
                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
            break;
            
        case GL_BGR_EXT:
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    b = *pbSrc++;

                    *pbDst++ =
                        ab3to255[(b & gc->modes.blueMask) >> cfb->blueShift];
                    *pbDst++ =
                        ab7to255[(b & gc->modes.greenMask) >> cfb->greenShift];
                    *pbDst++ =
                        ab7to255[(b & gc->modes.redMask) >> cfb->redShift];
                }
                
                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
            break;
            
        case GL_BGRA_EXT:
            pdwDst = (DWORD *)pbDst;
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    b = *pbSrc++;

                    *pdwDst++ =
                        0xff000000 |
                        ((DWORD)ab7to255[(b & gc->modes.redMask) >>
                                         cfb->redShift] << 16) |
                        ((DWORD)ab7to255[(b & gc->modes.greenMask) >>
                                         cfb->greenShift] << 8) |
                        ((DWORD)ab3to255[(b & gc->modes.blueMask) >>
                                         cfb->blueShift]);
                }
                
                pbSrc += cbSrcExtra;
                pdwDst = (DWORD *)(((BYTE *)pdwDst) + cbDstExtra);
            }
            break;
        }
    }
    else
    {
        cbWidth = spanInfo->width*cbDstElement;
    
        switch(spanInfo->dstFormat)
        {
        case GL_RGB:
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *pbDst++ = *(pbSrc+2);
                    *pbDst++ = *(pbSrc+1);
                    *pbDst++ = *pbSrc;
                    pbSrc += 3;
                }
                
                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
            break;
            
        case GL_BGR_EXT:
            if (cbSrcExtra == 0 && cbDstExtra == 0)
            {
                CopyMemory(pbDst, pbSrc, cbWidth*spanInfo->height);
            }
            else
            {
                for (y = 0; y < spanInfo->height; y++)
                {
                    CopyMemory(pbDst, pbSrc, cbWidth);
                    pbSrc += cbSrcExtra;
                    pbDst += cbDstExtra;
                }
            }
            break;

        case GL_BGRA_EXT:
            {
                DWORD *pdwSrc = (DWORD *)pbSrc;

                pdwDst = (DWORD *)pbDst;

                for (y = 0; y < spanInfo->height; y++)
                {
                    for (x = 0; x < spanInfo->width; x++)
                    {
                        *pdwDst++ = 0xff000000 | (*pdwSrc++);
                    }

                    pdwSrc = (DWORD *)(((BYTE *)pdwSrc) + cbSrcExtra);
                    pdwDst = (DWORD *)(((BYTE *)pdwDst) + cbDstExtra);
                }
            }
        }
    }
    
    bFail = GL_FALSE;

 EH_Fail:
    if (hdc != NULL)
    {
        DeleteDC(hdc);
    }
    if (hbm != NULL)
    {
        DeleteObject(hbm);
    }
    if (pbmi != NULL && pbmi != (BITMAPINFO *)abBitmapInfo)
    {
        __wglTempFree(gc, pbmi);
    }

    // Must regrab lock.  There's not much we can do if this fails
    glsrvGrabDci(gengc, (GLGENwindow *)gengc->pwo);
    
    return !bFail;
}

/******************************Public*Routine******************************\
*
* ReadZPixels
*
* Special case of glReadPixels for GL_DEPTH_COMPONENTs with
* unsigned types that require minimal transformation
*
* History:
*  Tue Oct 10 18:43:36 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean ReadZPixels(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    __GLdepthBuffer *fb;
    BYTE *pbBits, *pbSrc, *pbDst;
    int x, y;
    int cbElement, cbDstLine, cbSrcExtra, cbDstExtra;

#if 0
    DbgPrint("ReadZPixels\n");
#endif
    
    fb = &gc->depthBuffer;
    
    // Copy the depth buffer data to the output
    // skipping any appropriate data, fixing up alignment and
    // obeying the line length

    switch(spanInfo->dstType)
    {
    case GL_UNSIGNED_SHORT:
        cbElement = 2;
        break;
    case GL_UNSIGNED_INT:
        cbElement = 4;
        break;
        
    default:
        ASSERTOPENGL(0, "ReadZPixels: Unknown dstType\n");
        break;
    }
    
    cbDstLine = spanInfo->dstLineLength*cbElement;
    cbDstExtra = cbDstLine % spanInfo->dstAlignment;
    if (cbDstExtra != 0)
    {
        cbDstExtra = spanInfo->dstAlignment-cbDstExtra;
        cbDstLine += cbDstExtra;
    }

    pbDst = (BYTE *)spanInfo->dstImage+
        spanInfo->dstSkipPixels*cbElement+
        spanInfo->dstSkipLines*cbDstLine;
    
    // Determine buffer coordinates
    x = (GLint)spanInfo->readX;
    y = (GLint)spanInfo->readY;

    if (fb->buf.elementSize == sizeof(__GLzValue))
    {
        pbSrc = (BYTE *)__GL_DEPTH_ADDR(fb, (__GLzValue*), x, y);
        cbSrcExtra = -(fb->buf.outerWidth+spanInfo->width)*sizeof(__GLzValue);
    }
    else
    {
        pbSrc = (BYTE *)__GL_DEPTH_ADDR(fb, (__GLz16Value*), x, y);
        cbSrcExtra = -(fb->buf.outerWidth+spanInfo->width)*
            sizeof(__GLz16Value);
    }

    switch(spanInfo->dstType)
    {
    case GL_UNSIGNED_SHORT:
        if (fb->buf.elementSize == sizeof(__GLzValue))
        {
            ASSERTOPENGL(fb->scale == 0x7fffffff,
                         "Depth buffer scale invalid\n");
            
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *(GLushort *)pbDst =
                        (GLushort)(*(__GLzValue *)pbSrc) >> (Z16_SHIFT-1);
                    pbSrc += sizeof(__GLzValue);
                    pbDst += cbElement;
                }

                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
        }
        else
        {
            ASSERTOPENGL(fb->scale == 0x7fff,
                         "Depth buffer scale invalid\n");
            
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *(GLushort *)pbDst =
                        (*(__GLz16Value *)pbSrc) << 1;
                    pbSrc += sizeof(__GLz16Value);
                    pbDst += cbElement;
                }

                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
        }
        break;

    case GL_UNSIGNED_INT:
        if (fb->buf.elementSize == sizeof(__GLzValue))
        {
            ASSERTOPENGL(fb->scale == 0x7fffffff,
                         "Depth buffer scale invalid\n");
            
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *(GLuint *)pbDst =
                        (*(__GLzValue *)pbSrc) << 1;
                    pbSrc += sizeof(__GLzValue);
                    pbDst += cbElement;
                }

                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
        }
        else
        {
            ASSERTOPENGL(fb->scale == 0x7fff,
                         "Depth buffer scale invalid\n");
            
            for (y = 0; y < spanInfo->height; y++)
            {
                for (x = 0; x < spanInfo->width; x++)
                {
                    *(GLuint *)pbDst =
                        (GLuint)((*(__GLz16Value *)pbSrc) << (Z16_SHIFT+1));
                    pbSrc += sizeof(__GLz16Value);
                    pbDst += cbElement;
                }

                pbSrc += cbSrcExtra;
                pbDst += cbDstExtra;
            }
        }
        break;
    }

    return GL_TRUE;
}

/******************************Public*Routine******************************\
*
* CopyRgbPixels
*
* Special case of glCopyPixels for straight data copy
*
* Currently we only have to deal with normal color buffers
* If we start supporting aux buffers it may no longer be possible
* to accelerate this function in all cases
*
* History:
*  Tue Oct 10 18:43:04 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean CopyRgbPixels(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    __GLcolorBuffer *cfbSrc, *cfbDst;
    int xSrc, ySrc, xDst, yDst;
    GLboolean bFail;
    __GLGENcontext *gengc;

#if 0
    DbgPrint("CopyRgbPixels\n");
#endif

    gengc = (__GLGENcontext *)gc;
    
    // If there is no lock, we must have failed to reacquire the lock
    // from some previous call.  This is an error condition
    // and we should not continue.

    if (gengc->ulLockType == NO_LOCK)
    {
	WARNING("CopyRgbPixels: No lock\n");
	return GL_FALSE;
    }

    // We need to drop any DCI access we have because we're making
    // GDI calls
    glsrvReleaseDci(gengc, (GLGENwindow *)gengc->pwo);

    cfbSrc = gc->readBuffer;
    cfbDst = gc->drawBuffer;
    
    // Determine buffer coordinates
    xSrc = __GL_UNBIAS_X(gc, (GLint)spanInfo->readX);
    ySrc = __GL_UNBIAS_Y(gc, (GLint)spanInfo->readY)-spanInfo->height+1;
    xDst = __GL_UNBIAS_X(gc, (GLint)spanInfo->x);
    yDst = __GL_UNBIAS_Y(gc, (GLint)spanInfo->y)-spanInfo->height+1;

    // Copy the data between the buffers
    bFail = (GLboolean)BitBlt(CURRENT_DC_CFB(cfbDst), xDst, yDst,
                              spanInfo->width, spanInfo->height,
                              CURRENT_DC_CFB(cfbSrc), xSrc, ySrc, SRCCOPY);
    
    // Must regrab lock.  There's not much we can do if this fails
    glsrvGrabDci(gengc, (GLGENwindow *)gengc->pwo);
    
    return bFail;
}

/******************************Public*Routine******************************\
*
* CopyZPixels
*
* Special case of glCopyPixels for GL_DEPTH where there is no
* destination color buffer and the Z function is GL_ALWAYS
*
* History:
*  Tue Oct 10 18:43:36 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean CopyZPixels(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    __GLdepthBuffer *fb;
    BYTE *pbSrc, *pbDst;
    int y, xSrc, ySrc, xDst, yDst;
    int cbLine, cbWidth;

#if 0
    DbgPrint("CopyZPixels\n");
#endif
    
    fb = &gc->depthBuffer;
    
    // Determine buffer coordinates
    xSrc = (GLint)spanInfo->readX;
    ySrc = (GLint)spanInfo->readY;
    xDst = (GLint)spanInfo->x;
    yDst = (GLint)spanInfo->y;

    if (fb->buf.elementSize == sizeof(__GLzValue))
    {
        pbSrc = (BYTE *)__GL_DEPTH_ADDR(fb, (__GLzValue*), xSrc, ySrc);
        pbDst = (BYTE *)__GL_DEPTH_ADDR(fb, (__GLzValue*), xDst, yDst);
        cbLine = -fb->buf.outerWidth*sizeof(__GLzValue);
        cbWidth = spanInfo->width*sizeof(__GLzValue);
    }
    else
    {
        pbSrc = (BYTE *)__GL_DEPTH_ADDR(fb, (__GLz16Value*), xSrc, ySrc);
        pbDst = (BYTE *)__GL_DEPTH_ADDR(fb, (__GLz16Value*), xDst, yDst);
        cbLine = -fb->buf.outerWidth*sizeof(__GLz16Value);
        cbWidth = spanInfo->width*sizeof(__GLz16Value);
    }

    if (cbLine == cbWidth)
    {
        MoveMemory(pbDst, pbSrc, cbWidth*spanInfo->height);
    }
    else
    {
        // Adjust copy direction to handle overlap cases
        if (ySrc > yDst)
        {
            pbSrc += cbLine*spanInfo->height;
            pbDst += cbLine*spanInfo->height;
            for (y = 0; y < spanInfo->height; y++)
            {
                pbSrc -= cbLine;
                pbDst -= cbLine;
                CopyMemory(pbDst, pbSrc, cbWidth);
            }
        }
        else if (ySrc < yDst)
        {
            for (y = 0; y < spanInfo->height; y++)
            {
                CopyMemory(pbDst, pbSrc, cbWidth);
                pbSrc += cbLine;
                pbDst += cbLine;
            }
        }
        else
        {
            for (y = 0; y < spanInfo->height; y++)
            {
                MoveMemory(pbDst, pbSrc, cbWidth);
                pbSrc += cbLine;
                pbDst += cbLine;
            }
        }
    }

    return GL_TRUE;
}

/******************************Public*Routine******************************\
*
* CopyAlignedImage
*
* Copies data between memory images where straight data copy is applicable
*
* BUGBUG - This routine doesn't handle overlap
* The old code doesn't seem to either, so perhaps this isn't a problem
*
* History:
*  Tue Nov 07 14:27:06 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean CopyAlignedImage(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    int cbLine;
    int y;
    GLubyte *src, *dst;

    ASSERTOPENGL(spanInfo->srcGroupIncrement == spanInfo->dstGroupIncrement,
                 "CopyAlignedImage: Group size mismatch\n");

    cbLine = spanInfo->width*spanInfo->dstGroupIncrement;
    if (spanInfo->srcRowIncrement == spanInfo->dstRowIncrement &&
        cbLine == spanInfo->srcRowIncrement)
    {
        // Source and destination rows are the same size and the copy
        // is copying all of the row so we can do everything with a
        // single copy
        CopyMemory(spanInfo->dstCurrent, spanInfo->srcCurrent,
                   cbLine*spanInfo->height);
    }
    else
    {
        // Either the rows aren't the same size or we're not copying
        // all of each row, so we have to go row by row
        src = spanInfo->srcCurrent;
        dst = spanInfo->dstCurrent;
        for (y = spanInfo->height; y > 0; y--)
        {
            CopyMemory(dst, src, cbLine);
            src += spanInfo->srcRowIncrement;
            dst += spanInfo->dstRowIncrement;
        }
    }

    return GL_TRUE;
}

/******************************Public*Routine******************************\
*
* CopyRgbToBgraImage
*
* Special case for 24-bit RGB to 32-bit BGRA
*
* History:
*  Tue Nov 07 15:09:47 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean CopyRgbToBgraImage(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    int x, y;
    GLubyte *src;
    GLuint *dst;
    int srcStep, dstStep;

    src = spanInfo->srcCurrent;
    dst = spanInfo->dstCurrent;
    srcStep = spanInfo->srcRowIncrement-
        spanInfo->width*spanInfo->srcGroupIncrement;
    dstStep = spanInfo->dstRowIncrement-
        spanInfo->width*spanInfo->dstGroupIncrement;
    
    ASSERTOPENGL((dstStep & 3) == 0, "Non-dword step\n");
    dstStep >>= 2;
    
    for (y = spanInfo->height; y > 0; y--)
    {
        for (x = spanInfo->width; x > 0; x--)
        {
            *dst++ =
                0xff000000             |
                ((GLuint)src[0] << 16) |
                ((GLuint)src[1] <<  8) |
                ((GLuint)src[2] <<  0);
            src += 3;
        }
        src += srcStep;
        dst += dstStep;
    }

    return GL_TRUE;
}

/******************************Public*Routine******************************\
*
* CopyRgbaToBgraImage
*
* Special case for 32-bit RGBA to 32-bit BGRA
*
* History:
*  Tue Nov 07 15:09:47 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean CopyRgbaToBgraImage(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    int x, y;
    GLubyte *src;
    GLuint *dst;
    int srcStep, dstStep;

    src = spanInfo->srcCurrent;
    dst = spanInfo->dstCurrent;
    srcStep = spanInfo->srcRowIncrement-
        spanInfo->width*spanInfo->srcGroupIncrement;
    dstStep = spanInfo->dstRowIncrement-
        spanInfo->width*spanInfo->dstGroupIncrement;
    
    ASSERTOPENGL((dstStep & 3) == 0, "Non-dword step\n");
    dstStep >>= 2;
    
    for (y = spanInfo->height; y > 0; y--)
    {
        for (x = spanInfo->width; x > 0; x--)
        {
            *dst++ =
                ((GLuint)src[0] << 16) |
                ((GLuint)src[1] <<  8) |
                ((GLuint)src[2] <<  0) |
                ((GLuint)src[3] << 24);
            src += 4;
        }
        src += srcStep;
        dst += dstStep;
    }

    return GL_TRUE;
}

/******************************Public*Routine******************************\
*
* CopyBgrToBgraImage
*
* Special case for 24-bit BGR to 32-bit BGRA
*
* History:
*  Tue Nov 07 15:09:47 1995	-by-	Drew Bliss [drewb]
*   Created
*
\**************************************************************************/

GLboolean CopyBgrToBgraImage(__GLcontext *gc, __GLpixelSpanInfo *spanInfo)
{
    int x, y;
    GLubyte *src;
    GLuint *dst;
    int srcStep, dstStep;

    src = spanInfo->srcCurrent;
    dst = spanInfo->dstCurrent;
    srcStep = spanInfo->srcRowIncrement-
        spanInfo->width*spanInfo->srcGroupIncrement;
    dstStep = spanInfo->dstRowIncrement-
        spanInfo->width*spanInfo->dstGroupIncrement;
    
    ASSERTOPENGL((dstStep & 3) == 0, "Non-dword step\n");
    dstStep >>= 2;
    
    for (y = spanInfo->height; y > 0; y--)
    {
        for (x = spanInfo->width; x > 0; x--)
        {
            *dst++ =
                0xff000000             |
                ((GLuint)src[0] <<  0) |
                ((GLuint)src[1] <<  8) |
                ((GLuint)src[2] << 16);
            src += 3;
        }
        src += srcStep;
        dst += dstStep;
    }

    return GL_TRUE;
}
#endif