Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

4507 lines
166 KiB

/*
** Copyright 1991, 1992, 1993, Silicon Graphics, Inc.
** All Rights Reserved.
**
** This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.;
** the contents of this file may not be disclosed to third parties, copied or
** duplicated in any form, in whole or in part, without the prior written
** permission of Silicon Graphics, Inc.
**
** RESTRICTED RIGHTS LEGEND:
** Use, duplication or disclosure by the Government is subject to restrictions
** as set forth in subdivision (c)(1)(ii) of the Rights in Technical Data
** and Computer Software clause at DFARS 252.227-7013, and/or in similar or
** successor clauses in the FAR, DOD or NASA FAR Supplement. Unpublished -
** rights reserved under the Copyright Laws of the United States.
*/
#include "precomp.h"
#pragma hdrstop
#ifdef _X86_
#define SHADER __GLcontext.polygon.shader
#define GENGCACCEL __GLGENcontext.genAccel
#define SPANDELTA __GLGENcontext.genAccel.spanDelta
#define SPANVALUE __GLGENcontext.genAccel.spanValue
#endif
#define ENABLE_ASM 1
#if DBG
//#define FORCE_NPX_DEBUG 1
#endif
/**************************************************************************\
\**************************************************************************/
/* This routine sets gc->polygon.shader.cfb to gc->drawBuffer */
void FASTCALL __fastGenFillSubTriangle(__GLcontext *gc, GLint iyBottom, GLint iyTop)
{
GLint ixLeft, ixRight;
GLint ixLeftFrac, ixRightFrac;
GLint spanWidth, clipY0, clipY1;
ULONG ulSpanVisibility;
GLint cWalls;
GLint *Walls;
#ifdef NT
__GLstippleWord stackWords[__GL_MAX_STACK_STIPPLE_WORDS];
__GLstippleWord *words;
GLuint maxWidth;
#else
__GLstippleWord words[__GL_MAX_STIPPLE_WORDS];
#endif
BOOL bSurfaceDIB;
BOOL bClipped;
GLint xScr, yScr;
GLint zFails;
__GLzValue *zbuf, z;
GLint r, g, b, s, t;
__GLGENcontext *gengc = (__GLGENcontext *)gc;
__genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr;
__GLspanFunc zSpanFunc = GENACCEL(gc).__fastZSpanFuncPtr;
int scansize;
#ifdef NT
maxWidth = (gc->transform.clipX1 - gc->transform.clipX0) + 31;
if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS)
{
words = gcTempAlloc(gc, (maxWidth+__GL_STIPPLE_BITS-1)/8);
if (words == NULL)
{
return;
}
}
else
{
words = stackWords;
}
#endif
gc->polygon.shader.stipplePat = words;
scansize = gc->polygon.shader.cfb->buf.outerWidth;
bSurfaceDIB = (gc->polygon.shader.cfb->buf.flags & DIB_FORMAT) != 0;
bClipped = (!(gc->drawBuffer->buf.flags & NO_CLIP)) &&
bSurfaceDIB;
if (bSurfaceDIB)
GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
else
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
ixLeft = gc->polygon.shader.ixLeft;
ixLeftFrac = gc->polygon.shader.ixLeftFrac;
ixRight = gc->polygon.shader.ixRight;
ixRightFrac = gc->polygon.shader.ixRightFrac;
clipY0 = gc->transform.clipY0;
clipY1 = gc->transform.clipY1;
r = GENACCEL(gc).spanValue.r;
g = GENACCEL(gc).spanValue.g;
b = GENACCEL(gc).spanValue.b;
s = GENACCEL(gc).spanValue.s;
t = GENACCEL(gc).spanValue.t;
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z = gc->polygon.shader.frag.z;
if( gc->modes.depthBits == 32 )
zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
ixLeft, iyBottom);
else
zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer,
(__GLz16Value*),
ixLeft, iyBottom);
} else if ((gc->polygon.shader.modeFlags & __GL_SHADE_STIPPLE) == 0) {
GLuint w;
if (w = ((gc->transform.clipX1 - gc->transform.clipX0) + 31) >> 3)
RtlFillMemoryUlong(words, w, ~((ULONG)0));
GENACCEL(gc).flags &= ~(HAVE_STIPPLE);
}
//
// render the spans
//
while (iyBottom < iyTop) {
spanWidth = ixRight - ixLeft;
/*
** Only render spans that have non-zero width and which are
** not scissored out vertically.
*/
if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) {
gc->polygon.shader.frag.x = ixLeft;
gc->polygon.shader.frag.y = iyBottom;
gc->polygon.shader.zbuf = zbuf;
gc->polygon.shader.frag.z = z;
GENACCEL(gc).spanValue.r = r;
GENACCEL(gc).spanValue.g = g;
GENACCEL(gc).spanValue.b = b;
GENACCEL(gc).spanValue.s = s;
GENACCEL(gc).spanValue.t = t;
// take care of horizontal scissoring
if (!gc->transform.reasonableViewport) {
GLint clipX0 = gc->transform.clipX0;
GLint clipX1 = gc->transform.clipX1;
// see if we skip entire span
if ((ixRight <= clipX0) || (ixLeft >= clipX1))
goto advance;
// now clip right and left
if (ixRight > clipX1)
spanWidth = (clipX1 - ixLeft);
if (ixLeft < clipX0) {
GLuint delta;
delta = clipX0 - ixLeft;
spanWidth -= delta;
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r;
if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g;
GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b;
}
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s;
GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t;
}
gc->polygon.shader.frag.x = clipX0;
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
if( gc->modes.depthBits == 32 )
gc->polygon.shader.zbuf += delta;
else
(__GLz16Value *)gc->polygon.shader.zbuf += delta;
gc->polygon.shader.frag.z +=
(gc->polygon.shader.dzdx * delta);
}
}
}
// now have span length
gc->polygon.shader.length = spanWidth;
// If a stipple is active, process it first
if (gc->polygon.shader.modeFlags & __GL_SHADE_STIPPLE)
{
// If no pixels are left after stippling and depth
// testing then we can skip the span
// Note that this function handles the no-depth-
// testing case also
gc->polygon.shader.done = GL_FALSE;
if (!(*GENACCEL(gc).__fastStippleDepthTestSpan)(gc) ||
gc->polygon.shader.done)
{
goto advance;
}
GENACCEL(gc).flags |= HAVE_STIPPLE;
}
// Do z-buffering if needed, and short-circuit rest of span
// operations if nothing will be drawn.
else if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
// initially assume no stippling
GENACCEL(gc).flags &= ~(HAVE_STIPPLE);
if ((zFails = (*zSpanFunc)(gc)) == 1)
goto advance;
else if (zFails)
GENACCEL(gc).flags |= HAVE_STIPPLE;
}
if (gc->state.raster.drawBuffer == GL_FRONT_AND_BACK) {
gc->polygon.shader.cfb = &gc->frontBuffer;
xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
gc->frontBuffer.buf.xOrigin;
yScr = __GL_UNBIAS_Y(gc, iyBottom) +
gc->frontBuffer.buf.yOrigin;
// If the front buffer is a DIB, we're drawing straight to
// the screen, so we must check clipping.
if ((gc->frontBuffer.buf.flags &
(DIB_FORMAT | NO_CLIP)) == DIB_FORMAT) {
ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth,
&cWalls, &Walls);
// If the span is completely visible, we can treat the
// screen as a DIB.
if (ulSpanVisibility == WGL_SPAN_ALL) {
GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
(*cSpanFunc)(gengc);
} else if (ulSpanVisibility == WGL_SPAN_PARTIAL) {
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
if (GENACCEL(gc).flags & HAVE_STIPPLE)
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
FALSE);
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
(*cSpanFunc)(gengc);
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
TRUE);
}
} else {
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
if (GENACCEL(gc).flags & HAVE_STIPPLE)
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
FALSE);
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
(*cSpanFunc)(gengc);
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
TRUE);
}
// The back buffer is always DIB-compatible
gc->polygon.shader.cfb = &gc->backBuffer;
GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
(*cSpanFunc)(gengc);
} else {
if (bClipped) {
xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
gc->drawBuffer->buf.xOrigin;
yScr = __GL_UNBIAS_Y(gc, iyBottom) +
gc->drawBuffer->buf.yOrigin;
ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth,
&cWalls, &Walls);
if (ulSpanVisibility == WGL_SPAN_ALL) {
GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
(*cSpanFunc)(gengc);
} else if (ulSpanVisibility == WGL_SPAN_PARTIAL) {
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
if (GENACCEL(gc).flags & HAVE_STIPPLE)
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
FALSE);
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
(*cSpanFunc)(gengc);
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
TRUE);
}
} else if (bSurfaceDIB) {
(*cSpanFunc)(gengc);
} else {
xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
gc->drawBuffer->buf.xOrigin;
yScr = __GL_UNBIAS_Y(gc, iyBottom) +
gc->drawBuffer->buf.yOrigin;
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
if (GENACCEL(gc).flags & HAVE_STIPPLE)
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
FALSE);
(*cSpanFunc)(gengc);
if (!bSurfaceDIB)
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
TRUE);
}
}
}
advance:
GENACCEL(gc).pPix += scansize;
/* Advance right edge fixed point, adjusting for carry */
ixRightFrac += gc->polygon.shader.dxRightFrac;
if (ixRightFrac < 0) {
/* Carry/Borrow'd. Use large step */
ixRight += gc->polygon.shader.dxRightBig;
ixRightFrac &= ~0x80000000;
} else {
ixRight += gc->polygon.shader.dxRightLittle;
}
iyBottom++;
ixLeftFrac += gc->polygon.shader.dxLeftFrac;
if (ixLeftFrac < 0) {
/* Carry/Borrow'd. Use large step */
ixLeft += gc->polygon.shader.dxLeftBig;
ixLeftFrac &= ~0x80000000;
if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rBig);
g += *((GLint *)&gc->polygon.shader.gBig);
b += *((GLint *)&gc->polygon.shader.bBig);
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
s += *((GLint *)&gc->polygon.shader.sBig);
t += *((GLint *)&gc->polygon.shader.tBig);
}
} else {
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rBig);
}
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z += gc->polygon.shader.zBig;
/* The implicit multiply is taken out of the loop */
zbuf = (__GLzValue*)((GLubyte*)zbuf +
gc->polygon.shader.zbufBig);
}
} else {
/* Use small step */
ixLeft += gc->polygon.shader.dxLeftLittle;
if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rLittle);
g += *((GLint *)&gc->polygon.shader.gLittle);
b += *((GLint *)&gc->polygon.shader.bLittle);
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
s += *((GLint *)&gc->polygon.shader.sLittle);
t += *((GLint *)&gc->polygon.shader.tLittle);
}
} else {
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rLittle);
}
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z += gc->polygon.shader.zLittle;
/* The implicit multiply is taken out of the loop */
zbuf = (__GLzValue*)((GLubyte*)zbuf +
gc->polygon.shader.zbufLittle);
}
}
}
gc->polygon.shader.ixLeft = ixLeft;
gc->polygon.shader.ixLeftFrac = ixLeftFrac;
gc->polygon.shader.ixRight = ixRight;
gc->polygon.shader.ixRightFrac = ixRightFrac;
gc->polygon.shader.frag.z = z;
GENACCEL(gc).spanValue.r = r;
GENACCEL(gc).spanValue.g = g;
GENACCEL(gc).spanValue.b = b;
GENACCEL(gc).spanValue.s = s;
GENACCEL(gc).spanValue.t = t;
#ifdef NT
if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS)
{
gcTempFree(gc, words);
}
#endif
}
void FASTCALL __fastGenFillSubTriangleTexRGBA(__GLcontext *gc, GLint iyBottom, GLint iyTop)
{
GLint ixLeft, ixRight;
GLint ixLeftFrac, ixRightFrac;
GLint spanWidth, clipY0, clipY1;
ULONG ulSpanVisibility;
GLint cWalls;
GLint *Walls;
BOOL bSurfaceDIB;
BOOL bClipped;
GLint xScr, yScr;
__GLzValue *zbuf, z;
GLint r, g, b, a, s, t;
__GLfloat qw;
__GLGENcontext *gengc = (__GLGENcontext *)gc;
__genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr;
int scansize;
BOOL bReadPixels = (gc->state.enables.general & __GL_BLEND_ENABLE) ||
(gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST);
#ifdef _MCD_
GLboolean bMcdZ = ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) &&
(gengc->pMcdState != NULL) &&
(gengc->pMcdState->pDepthSpan != NULL) &&
(gengc->pMcdState->pMcdSurf != NULL) &&
!(gengc->pMcdState->McdBuffers.mcdDepthBuf.bufFlags & MCDBUF_ENABLED));
#endif
scansize = gc->polygon.shader.cfb->buf.outerWidth;
bSurfaceDIB = (gc->polygon.shader.cfb->buf.flags & DIB_FORMAT) != 0;
bClipped = (!(gc->drawBuffer->buf.flags & NO_CLIP)) &&
bSurfaceDIB;
if (bSurfaceDIB)
GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
else
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
ixLeft = gc->polygon.shader.ixLeft;
ixLeftFrac = gc->polygon.shader.ixLeftFrac;
ixRight = gc->polygon.shader.ixRight;
ixRightFrac = gc->polygon.shader.ixRightFrac;
clipY0 = gc->transform.clipY0;
clipY1 = gc->transform.clipY1;
r = GENACCEL(gc).spanValue.r;
g = GENACCEL(gc).spanValue.g;
b = GENACCEL(gc).spanValue.b;
a = GENACCEL(gc).spanValue.a;
s = GENACCEL(gc).spanValue.s;
t = GENACCEL(gc).spanValue.t;
qw = gc->polygon.shader.frag.qw;
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z = gc->polygon.shader.frag.z;
#ifdef _MCD_
if (bMcdZ)
{
zbuf = (__GLzValue *)gengc->pMcdState->pMcdSurf->McdDepthBuf.pv;
}
else
#endif
{
if( gc->modes.depthBits == 32 )
zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
ixLeft, iyBottom);
else
zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer,
(__GLz16Value*),
ixLeft, iyBottom);
}
}
//
// render the spans
//
while (iyBottom < iyTop) {
spanWidth = ixRight - ixLeft;
/*
** Only render spans that have non-zero width and which are
** not scissored out vertically.
*/
if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) {
gc->polygon.shader.frag.x = ixLeft;
gc->polygon.shader.frag.y = iyBottom;
gc->polygon.shader.zbuf = zbuf;
gc->polygon.shader.frag.z = z;
GENACCEL(gc).spanValue.r = r;
GENACCEL(gc).spanValue.g = g;
GENACCEL(gc).spanValue.b = b;
GENACCEL(gc).spanValue.a = a;
GENACCEL(gc).spanValue.s = s;
GENACCEL(gc).spanValue.t = t;
gc->polygon.shader.frag.qw = qw;
// take care of horizontal scissoring
if (!gc->transform.reasonableViewport) {
GLint clipX0 = gc->transform.clipX0;
GLint clipX1 = gc->transform.clipX1;
// see if we skip entire span
if ((ixRight <= clipX0) || (ixLeft >= clipX1))
goto advance;
// now clip right and left
if (ixRight > clipX1)
spanWidth = (clipX1 - ixLeft);
if (ixLeft < clipX0) {
GLuint delta;
delta = clipX0 - ixLeft;
spanWidth -= delta;
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r;
GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g;
GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b;
GENACCEL(gc).spanValue.a += delta * GENACCEL(gc).spanDelta.a;
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s;
GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t;
gc->polygon.shader.frag.qw += delta * gc->polygon.shader.dqwdx;
}
gc->polygon.shader.frag.x = clipX0;
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
if( gc->modes.depthBits == 32 )
gc->polygon.shader.zbuf += delta;
else
(__GLz16Value *)gc->polygon.shader.zbuf += delta;
gc->polygon.shader.frag.z +=
(gc->polygon.shader.dzdx * delta);
}
}
}
// now have span length
gc->polygon.shader.length = spanWidth;
#ifdef _MCD_
// read from driver z buffer into z span buffer
if (bMcdZ) {
GenMcdReadZRawSpan(&gc->depthBuffer, gc->polygon.shader.frag.x,
iyBottom, spanWidth);
}
#endif
if (bClipped) {
xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
gc->drawBuffer->buf.xOrigin;
yScr = __GL_UNBIAS_Y(gc, iyBottom) +
gc->drawBuffer->buf.yOrigin;
ulSpanVisibility = wglSpanVisible(xScr, yScr, spanWidth,
&cWalls, &Walls);
if (ulSpanVisibility == WGL_SPAN_ALL) {
GENACCEL(gc).flags |= SURFACE_TYPE_DIB;
(*cSpanFunc)(gengc);
} else if (ulSpanVisibility == WGL_SPAN_PARTIAL) {
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
if (bReadPixels)
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
FALSE);
(*cSpanFunc)(gengc);
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
TRUE);
}
} else if (bSurfaceDIB) {
(*cSpanFunc)(gengc);
} else {
xScr = __GL_UNBIAS_X(gc, gc->polygon.shader.frag.x) +
gc->drawBuffer->buf.xOrigin;
yScr = __GL_UNBIAS_Y(gc, iyBottom) +
gc->drawBuffer->buf.yOrigin;
GENACCEL(gc).flags &= ~(SURFACE_TYPE_DIB);
if (bReadPixels)
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
FALSE);
(*cSpanFunc)(gengc);
if (!bSurfaceDIB)
(*gengc->pfnCopyPixels)(gengc,
gc->polygon.shader.cfb,
xScr, yScr, spanWidth,
TRUE);
}
#ifdef _MCD_
// write z span buffer back to driver z buffer
if (bMcdZ) {
GenMcdWriteZRawSpan(&gc->depthBuffer,
gc->polygon.shader.frag.x,
iyBottom, spanWidth);
}
#endif
}
advance:
GENACCEL(gc).pPix += scansize;
/* Advance right edge fixed point, adjusting for carry */
ixRightFrac += gc->polygon.shader.dxRightFrac;
if (ixRightFrac < 0) {
/* Carry/Borrow'd. Use large step */
ixRight += gc->polygon.shader.dxRightBig;
ixRightFrac &= ~0x80000000;
} else {
ixRight += gc->polygon.shader.dxRightLittle;
}
iyBottom++;
ixLeftFrac += gc->polygon.shader.dxLeftFrac;
if (ixLeftFrac < 0) {
/* Carry/Borrow'd. Use large step */
ixLeft += gc->polygon.shader.dxLeftBig;
ixLeftFrac &= ~0x80000000;
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rBig);
g += *((GLint *)&gc->polygon.shader.gBig);
b += *((GLint *)&gc->polygon.shader.bBig);
a += *((GLint *)&gc->polygon.shader.aBig);
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
s += *((GLint *)&gc->polygon.shader.sBig);
t += *((GLint *)&gc->polygon.shader.tBig);
qw += gc->polygon.shader.qwBig;
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z += gc->polygon.shader.zBig;
/* The implicit multiply is taken out of the loop */
#ifdef _MCD_
if (!bMcdZ)
#endif
{
zbuf = (__GLzValue*)((GLubyte*)zbuf +
gc->polygon.shader.zbufBig);
}
}
} else {
/* Use small step */
ixLeft += gc->polygon.shader.dxLeftLittle;
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rLittle);
g += *((GLint *)&gc->polygon.shader.gLittle);
b += *((GLint *)&gc->polygon.shader.bLittle);
a += *((GLint *)&gc->polygon.shader.aLittle);
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
s += *((GLint *)&gc->polygon.shader.sLittle);
t += *((GLint *)&gc->polygon.shader.tLittle);
qw += gc->polygon.shader.qwLittle;
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z += gc->polygon.shader.zLittle;
/* The implicit multiply is taken out of the loop */
#ifdef _MCD_
if (!bMcdZ)
#endif
{
zbuf = (__GLzValue*)((GLubyte*)zbuf +
gc->polygon.shader.zbufLittle);
}
}
}
}
gc->polygon.shader.ixLeft = ixLeft;
gc->polygon.shader.ixLeftFrac = ixLeftFrac;
gc->polygon.shader.ixRight = ixRight;
gc->polygon.shader.ixRightFrac = ixRightFrac;
gc->polygon.shader.frag.z = z;
gc->polygon.shader.zbuf = zbuf;
GENACCEL(gc).spanValue.r = r;
GENACCEL(gc).spanValue.g = g;
GENACCEL(gc).spanValue.b = b;
GENACCEL(gc).spanValue.a = a;
GENACCEL(gc).spanValue.s = s;
GENACCEL(gc).spanValue.t = t;
gc->polygon.shader.frag.qw = qw;
}
/**************************************************************************\
\**************************************************************************/
void FASTCALL GenDrvFillSubTriangle(__GLcontext *gc, GLint iyBottom, GLint iyTop)
{
GLint ixLeft, ixRight;
GLint ixLeftFrac, ixRightFrac;
GLint spanWidth, clipY0, clipY1;
#ifdef NT
__GLstippleWord stackWords[__GL_MAX_STACK_STIPPLE_WORDS];
__GLstippleWord *words;
GLuint maxWidth;
#else
__GLstippleWord words[__GL_MAX_STIPPLE_WORDS];
#endif
GLint zFails;
__GLzValue *zbuf = NULL, z;
GLint r, g, b, a, s, t;
__GLGENcontext *gengc = (__GLGENcontext *)gc;
__genSpanFunc cSpanFunc = GENACCEL(gc).__fastSpanFuncPtr;
__GLspanFunc zSpanFunc = GENACCEL(gc).__fastZSpanFuncPtr;
#ifdef NT
maxWidth = (gc->transform.clipX1 - gc->transform.clipX0) + 31;
if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS)
{
words = gcTempAlloc(gc, (maxWidth+__GL_STIPPLE_BITS-1)/8);
if (words == NULL)
{
return;
}
}
else
{
words = stackWords;
}
#endif
gc->polygon.shader.stipplePat = words;
gc->polygon.shader.cfb = gc->drawBuffer;
ixLeft = gc->polygon.shader.ixLeft;
ixLeftFrac = gc->polygon.shader.ixLeftFrac;
ixRight = gc->polygon.shader.ixRight;
ixRightFrac = gc->polygon.shader.ixRightFrac;
clipY0 = gc->transform.clipY0;
clipY1 = gc->transform.clipY1;
r = GENACCEL(gc).spanValue.r;
g = GENACCEL(gc).spanValue.g;
b = GENACCEL(gc).spanValue.b;
a = GENACCEL(gc).spanValue.a;
s = GENACCEL(gc).spanValue.s;
t = GENACCEL(gc).spanValue.t;
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z = gc->polygon.shader.frag.z;
if( gc->modes.depthBits == 32 )
zbuf = __GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
ixLeft, iyBottom);
else
zbuf = (__GLzValue *)__GL_DEPTH_ADDR(&gc->depthBuffer,
(__GLz16Value*),
ixLeft, iyBottom);
} else {
GLuint w;
if (w = ((gc->transform.clipX1 - gc->transform.clipX0) + 31) >> 3)
RtlFillMemoryUlong(words, w, ~((ULONG)0));
GENACCEL(gc).flags &= ~(HAVE_STIPPLE);
}
while (iyBottom < iyTop) {
spanWidth = ixRight - ixLeft;
/*
** Only render spans that have non-zero width and which are
** not scissored out vertically.
*/
if ((spanWidth > 0) && (iyBottom >= clipY0) && (iyBottom < clipY1)) {
gc->polygon.shader.frag.x = ixLeft;
gc->polygon.shader.frag.y = iyBottom;
gc->polygon.shader.zbuf = zbuf;
gc->polygon.shader.frag.z = z;
GENACCEL(gc).spanValue.r = r;
GENACCEL(gc).spanValue.g = g;
GENACCEL(gc).spanValue.b = b;
GENACCEL(gc).spanValue.a = a;
GENACCEL(gc).spanValue.s = s;
GENACCEL(gc).spanValue.t = t;
// take care of horizontal scissoring
if (!gc->transform.reasonableViewport) {
GLint clipX0 = gc->transform.clipX0;
GLint clipX1 = gc->transform.clipX1;
// see if we skip entire span
if ((ixRight <= clipX0) || (ixLeft >= clipX1))
goto advance;
// now clip right and left
if (ixRight > clipX1)
spanWidth = (clipX1 - ixLeft);
if (ixLeft < clipX0) {
GLuint delta;
delta = clipX0 - ixLeft;
spanWidth -= delta;
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
GENACCEL(gc).spanValue.r += delta * GENACCEL(gc).spanDelta.r;
if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
GENACCEL(gc).spanValue.g += delta * GENACCEL(gc).spanDelta.g;
GENACCEL(gc).spanValue.b += delta * GENACCEL(gc).spanDelta.b;
}
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
GENACCEL(gc).spanValue.s += delta * GENACCEL(gc).spanDelta.s;
GENACCEL(gc).spanValue.t += delta * GENACCEL(gc).spanDelta.t;
}
gc->polygon.shader.frag.x = clipX0;
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
if( gc->modes.depthBits == 32 )
gc->polygon.shader.zbuf += delta;
else
(__GLz16Value *)gc->polygon.shader.zbuf += delta;
gc->polygon.shader.frag.z +=
(gc->polygon.shader.dzdx * delta);
}
}
}
// now have span length
gc->polygon.shader.length = spanWidth;
// Do z-buffering if needed, and short-circuit rest of span
// operations if nothing will be drawn.
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
// initially assume no stippling
GENACCEL(gc).flags &= ~(HAVE_STIPPLE);
if ((zFails = (*zSpanFunc)(gc)) == 1)
goto advance;
else if (zFails)
GENACCEL(gc).flags |= HAVE_STIPPLE;
}
(*cSpanFunc)(gengc);
}
advance:
/* Advance right edge fixed point, adjusting for carry */
ixRightFrac += gc->polygon.shader.dxRightFrac;
if (ixRightFrac < 0) {
/* Carry/Borrow'd. Use large step */
ixRight += gc->polygon.shader.dxRightBig;
ixRightFrac &= ~0x80000000;
} else {
ixRight += gc->polygon.shader.dxRightLittle;
}
iyBottom++;
ixLeftFrac += gc->polygon.shader.dxLeftFrac;
if (ixLeftFrac < 0) {
/* Carry/Borrow'd. Use large step */
ixLeft += gc->polygon.shader.dxLeftBig;
ixLeftFrac &= ~0x80000000;
if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rBig);
g += *((GLint *)&gc->polygon.shader.gBig);
b += *((GLint *)&gc->polygon.shader.bBig);
a += *((GLint *)&gc->polygon.shader.aBig);
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
s += *((GLint *)&gc->polygon.shader.sBig);
t += *((GLint *)&gc->polygon.shader.tBig);
}
} else {
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rBig);
}
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z += gc->polygon.shader.zBig;
/* The implicit multiply is taken out of the loop */
zbuf = (__GLzValue*)((GLubyte*)zbuf +
gc->polygon.shader.zbufBig);
}
} else {
/* Use small step */
ixLeft += gc->polygon.shader.dxLeftLittle;
if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB) {
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rLittle);
g += *((GLint *)&gc->polygon.shader.gLittle);
b += *((GLint *)&gc->polygon.shader.bLittle);
a += *((GLint *)&gc->polygon.shader.aLittle);
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) {
s += *((GLint *)&gc->polygon.shader.sLittle);
t += *((GLint *)&gc->polygon.shader.tLittle);
}
} else {
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
r += *((GLint *)&gc->polygon.shader.rLittle);
}
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
z += gc->polygon.shader.zLittle;
/* The implicit multiply is taken out of the loop */
zbuf = (__GLzValue*)((GLubyte*)zbuf +
gc->polygon.shader.zbufLittle);
}
}
}
gc->polygon.shader.ixLeft = ixLeft;
gc->polygon.shader.ixLeftFrac = ixLeftFrac;
gc->polygon.shader.ixRight = ixRight;
gc->polygon.shader.ixRightFrac = ixRightFrac;
gc->polygon.shader.frag.z = z;
GENACCEL(gc).spanValue.r = r;
GENACCEL(gc).spanValue.g = g;
GENACCEL(gc).spanValue.b = b;
GENACCEL(gc).spanValue.a = a;
GENACCEL(gc).spanValue.s = s;
GENACCEL(gc).spanValue.t = t;
#ifdef NT
if (maxWidth > __GL_MAX_STACK_STIPPLE_BITS)
{
gcTempFree(gc, words);
}
#endif
}
/**************************************************************************\
\**************************************************************************/
void GenSnapXLeft(__GLcontext *gc, __GLfloat xLeft, __GLfloat dxdyLeft)
{
GLint ixLeft, ixLeftFrac;
ixLeft = __GL_VERTEX_FLOAT_TO_INT(xLeft);
ixLeftFrac = __GL_VERTEX_PROMOTED_FRACTION(xLeft) + 0x40000000;
gc->polygon.shader.ixLeftFrac = ixLeftFrac & ~0x80000000;
gc->polygon.shader.ixLeft = ixLeft + (((GLuint) ixLeftFrac) >> 31);
/* Compute big and little steps */
gc->polygon.shader.dxLeftLittle = FTOL(dxdyLeft);
gc->polygon.shader.dxLeftFrac =
FLT_FRACTION(dxdyLeft - gc->polygon.shader.dxLeftLittle);
if (gc->polygon.shader.dxLeftFrac < 0) {
gc->polygon.shader.dxLeftBig = gc->polygon.shader.dxLeftLittle - 1;
} else {
gc->polygon.shader.dxLeftBig = gc->polygon.shader.dxLeftLittle + 1;
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) {
/*
** Compute the big and little depth buffer steps. We walk the
** memory pointers for the depth buffer along the edge of the
** triangle as we walk the edge. This way we don't have to
** recompute the buffer address as we go.
*/
if (gc->depthBuffer.buf.elementSize == 2) {
gc->polygon.shader.zbufLittle =
(gc->depthBuffer.buf.outerWidth +
gc->polygon.shader.dxLeftLittle) << 1;
gc->polygon.shader.zbufBig =
(gc->depthBuffer.buf.outerWidth +
gc->polygon.shader.dxLeftBig) << 1;
} else {
gc->polygon.shader.zbufLittle =
(gc->depthBuffer.buf.outerWidth +
gc->polygon.shader.dxLeftLittle) << 2;
gc->polygon.shader.zbufBig =
(gc->depthBuffer.buf.outerWidth +
gc->polygon.shader.dxLeftBig) << 2;
}
}
}
/**************************************************************************\
\**************************************************************************/
void GenSnapXRight(__GLcontext *gc, __GLfloat xRight, __GLfloat dxdyRight)
{
GLint ixRight, ixRightFrac;
ixRight = __GL_VERTEX_FLOAT_TO_INT(xRight);
ixRightFrac = __GL_VERTEX_PROMOTED_FRACTION(xRight) + 0x40000000;
gc->polygon.shader.ixRightFrac = ixRightFrac & ~0x80000000;
gc->polygon.shader.ixRight = ixRight + (((GLuint) ixRightFrac) >> 31);
/* Compute big and little steps */
gc->polygon.shader.dxRightLittle = FTOL(dxdyRight);
gc->polygon.shader.dxRightFrac =
FLT_FRACTION(dxdyRight - gc->polygon.shader.dxRightLittle);
if (gc->polygon.shader.dxRightFrac < 0) {
gc->polygon.shader.dxRightBig = gc->polygon.shader.dxRightLittle - 1;
} else {
gc->polygon.shader.dxRightBig = gc->polygon.shader.dxRightLittle + 1;
}
}
/**************************************************************************\
\**************************************************************************/
void __fastGenSetInitialParameters(
__GLcontext *gc,
const __GLvertex *a,
__GLfloat fdx,
__GLfloat fdy)
{
#define sh gc->polygon.shader
#define bPolygonOffset \
(gc->state.enables.general & __GL_POLYGON_OFFSET_FILL_ENABLE)
__GLfloat zOffset;
__GLfloat dxLeftLittle;
#if _X86_ && ENABLE_ASM
LARGE_INTEGER temp;
_asm{
mov edx, gc
fild DWORD PTR [OFFSET(SHADER.dxLeftLittle)][edx]
mov edi, [OFFSET(SHADER.modeFlags)][edx]
test edi, __GL_SHADE_DEPTH_ITER
fstp dxLeftLittle
je noZ
}
_asm{
mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx]
mov ecx, __glZero
test ebx, __GL_POLYGON_OFFSET_FILL_ENABLE
mov zOffset, ecx
je noPolyOffset
}
zOffset = __glPolygonOffsetZ(gc);
_asm{
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
noPolyOffset:
_asm{
mov eax, a
fld fdx
fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
fld fdy
fmul DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
// zy zx
fxch ST(1)
// zx zy
fadd DWORD PTR [OFFSET(__GLvertex.window.z)][eax]
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
// zl zy zx
fxch ST(1) // zy zl zx
fadd zOffset
fxch ST(1) // zl zy zx
fadd DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
// zl zy zx (+1)
fxch ST(1) // zy zl zx
faddp ST(2), ST // zl z
fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
// ZL z (+1)
fxch ST(1) // z ZL
fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
// Z ZL
fxch ST(1) // ZL Z
fistp temp
mov eax, DWORD PTR temp
mov DWORD PTR [OFFSET(SHADER.zLittle)][edx], eax
fistp temp
mov eax, DWORD PTR temp
mov DWORD PTR [OFFSET(SHADER.frag.z)][edx], eax
}
#if FORCE_NPX_DEBUG
{
ULONG fragZ = FTOL((a->window.z + fdx*sh.dzdxf +
(fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale);
__GLfloat zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale;
LONG shZLittle = FTOL(zLittle);
if (sh.frag.z != fragZ)
DbgPrint("fragZ %x %x\n", fragZ, sh.frag.z);
if (sh.zLittle != shZLittle)
DbgPrint("sh.zLittle %x %x\n", shZLittle, sh.zLittle);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
#endif // FORCE_NPX_DEBUG
noZ:
_asm{
test edi, __GL_SHADE_SMOOTH
je done
test edi, __GL_SHADE_RGB
jne rgbShade
}
// ciShade:
{
CASTFIX(sh.rLittle) =
FLT_TO_FIX(gc->polygon.shader.drdy +
dxLeftLittle * gc->polygon.shader.drdx);
GENACCEL(gc).spanValue.r =
FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy);
}
_asm{
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
jmp done
}
rgbShade:
_asm
{
mov eax, a
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.drdx)][edx]
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dgdx)][edx] // g r
fxch ST(1) // r g
fadd DWORD PTR [OFFSET(SHADER.drdy)][edx] // R g
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dbdx)][edx] // b R g
fxch ST(2) // g R b
fadd DWORD PTR [OFFSET(SHADER.dgdy)][edx] // G R b
fxch ST(2) // b R G
fadd DWORD PTR [OFFSET(SHADER.dbdy)][edx] // B R G
fxch ST(1) // R B G
fmul __glVal65536 // sR B G
fxch ST(2) // G B sR
fmul __glVal65536 // sG B sR
fxch ST(1) // B sG sR
fmul __glVal65536 // sB sG sR
fxch ST(2) // sR sG sB
fistp DWORD PTR [OFFSET(SHADER.rLittle)][edx]
fistp DWORD PTR [OFFSET(SHADER.gLittle)][edx]
fistp DWORD PTR [OFFSET(SHADER.bLittle)][edx]
fld DWORD PTR [OFFSET(SHADER.drdx)][edx]
mov eax, [OFFSET(__GLvertex.color)][eax]
fmul fdx
fld DWORD PTR [OFFSET(SHADER.drdy)][edx]
fmul fdy // r r
fxch ST(1) // r r
fadd DWORD PTR [OFFSET(__GLcolor.r)][eax]
fld DWORD PTR [OFFSET(SHADER.dgdx)][edx]
fmul fdx
fld DWORD PTR [OFFSET(SHADER.dgdy)][edx]
fmul fdy // g g r r
fxch ST(1) // g g r r
fadd DWORD PTR [OFFSET(__GLcolor.g)][eax]
fld DWORD PTR [OFFSET(SHADER.dbdx)][edx]
fmul fdx
fld DWORD PTR [OFFSET(SHADER.dbdy)][edx]
fmul fdy // b b g g r r
fxch ST(1) // b b g g r r
fadd DWORD PTR [OFFSET(__GLcolor.b)][eax]
fxch ST(4) // r b g g b r
faddp ST(5), ST // b g g b r
fxch ST(2) // g g b b r
faddp ST(1), ST // g b b r
fxch ST(2) // b b g r
faddp ST(1), ST // b g r
fxch ST(2) // r g b
fmul __glVal65536 // R g b
fxch ST(1) // g R b
fmul __glVal65536 // G R b
fxch ST(2) // b R G
fmul __glVal65536 // B R G
fxch ST(1) // R B G
fadd __glVal128 // R B G
fxch ST(2) // G B R
fadd __glVal128 // G B R
fxch ST(1) // B G R
fadd __glVal128 // B G R
fxch ST(2) // R G B
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.r)][edx]
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.g)][edx]
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.b)][edx]
}
#if FORCE_NPX_DEBUG
{
LONG rLittle = FLT_TO_FIX(gc->polygon.shader.drdy +
(__GLfloat)sh.dxLeftLittle * gc->polygon.shader.drdx);
LONG gLittle = FLT_TO_FIX(gc->polygon.shader.dgdy +
(__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dgdx);
LONG bLittle = FLT_TO_FIX(gc->polygon.shader.dbdy +
(__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dbdx);
LONG spanR = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080;
LONG spanG = FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080;
LONG spanB = FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080;
if (CASTFIX(sh.rLittle) != rLittle)
DbgPrint("rLittle: %x %x\n", rLittle, sh.rLittle);
if (CASTFIX(sh.gLittle) != gLittle)
DbgPrint("gLittle: %x %x\n", gLittle, sh.gLittle);
if (CASTFIX(sh.bLittle) != bLittle)
DbgPrint("bLittle: %x %x\n", bLittle, sh.bLittle);
if (spanR != GENACCEL(gc).spanValue.r)
DbgPrint("spanR: %x %x\n", spanR, GENACCEL(gc).spanValue.r);
if (spanG != GENACCEL(gc).spanValue.g)
DbgPrint("spanG: %x %x\n", spanG, GENACCEL(gc).spanValue.g);
if (spanB != GENACCEL(gc).spanValue.b)
DbgPrint("spanB: %x %x\n", spanB, GENACCEL(gc).spanValue.b);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
#endif // FORCE_NPX_DEBUG
done:
_asm {
mov eax, [OFFSET(SHADER.dxLeftBig)][edx]
mov ecx, [OFFSET(SHADER.dxLeftLittle)][edx]
cmp eax, ecx
jle littleGreater
test edi, __GL_SHADE_SMOOTH
je bigNoSmooth
mov eax, [OFFSET(SHADER.rLittle)][edx]
mov ecx, [OFFSET(SPANDELTA.r)][edx]
mov esi, [OFFSET(SHADER.gLittle)][edx]
mov ebx, [OFFSET(SPANDELTA.g)][edx]
add eax, ecx
add esi, ebx
mov [OFFSET(SHADER.rBig)][edx], eax
mov [OFFSET(SHADER.gBig)][edx], esi
mov eax, [OFFSET(SHADER.bLittle)][edx]
mov ecx, [OFFSET(SPANDELTA.b)][edx]
mov esi, [OFFSET(SHADER.zLittle)][edx]
mov ebx, [OFFSET(SHADER.dzdx)][edx]
add eax, ecx
add esi, ebx
mov [OFFSET(SHADER.bBig)][edx], eax
mov [OFFSET(SHADER.zBig)][edx], esi
bigNoSmooth:
test edi, __GL_SHADE_DEPTH_ITER
je done2
mov eax, [OFFSET(SHADER.zLittle)][edx]
mov ecx, [OFFSET(SHADER.dzdx)][edx]
add eax, ecx
mov [OFFSET(SHADER.zBig)][edx], eax
jmp done2
littleGreater:
test edi, __GL_SHADE_SMOOTH
je smallNoSmooth
mov eax, [OFFSET(SHADER.rLittle)][edx]
mov ecx, [OFFSET(SPANDELTA.r)][edx]
mov esi, [OFFSET(SHADER.gLittle)][edx]
mov ebx, [OFFSET(SPANDELTA.g)][edx]
sub eax, ecx
sub esi, ebx
mov [OFFSET(SHADER.rBig)][edx], eax
mov [OFFSET(SHADER.gBig)][edx], esi
mov eax, [OFFSET(SHADER.bLittle)][edx]
mov ecx, [OFFSET(SPANDELTA.b)][edx]
mov esi, [OFFSET(SHADER.zLittle)][edx]
mov ebx, [OFFSET(SHADER.dzdx)][edx]
sub eax, ecx
sub esi, ebx
mov [OFFSET(SHADER.bBig)][edx], eax
mov [OFFSET(SHADER.zBig)][edx], esi
smallNoSmooth:
test edi, __GL_SHADE_DEPTH_ITER
je done2
mov eax, [OFFSET(SHADER.zLittle)][edx]
mov ecx, [OFFSET(SHADER.dzdx)][edx]
sub eax, ecx
mov [OFFSET(SHADER.zBig)][edx], eax
done2:
}
#else _X86_
__GLfloat zLittle;
dxLeftLittle = (__GLfloat)sh.dxLeftLittle;
if (sh.modeFlags & __GL_SHADE_SMOOTH) {
if (sh.modeFlags & __GL_SHADE_RGB) {
CASTFIX(sh.rLittle) =
FLT_TO_FIX(gc->polygon.shader.drdy +
dxLeftLittle * gc->polygon.shader.drdx);
CASTFIX(sh.gLittle) =
FLT_TO_FIX(gc->polygon.shader.dgdy +
dxLeftLittle * gc->polygon.shader.dgdx);
CASTFIX(sh.bLittle) =
FLT_TO_FIX(gc->polygon.shader.dbdy +
dxLeftLittle * gc->polygon.shader.dbdx);
GENACCEL(gc).spanValue.r =
FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy);
GENACCEL(gc).spanValue.g =
FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy);
GENACCEL(gc).spanValue.b =
FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy);
} else {
CASTFIX(sh.rLittle) =
FLT_TO_FIX(gc->polygon.shader.drdy +
dxLeftLittle * gc->polygon.shader.drdx);
GENACCEL(gc).spanValue.r =
FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy);
}
}
if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
zOffset = bPolygonOffset ? __glPolygonOffsetZ(gc) : 0.0f;
sh.frag.z = FTOL((a->window.z + fdx*sh.dzdxf +
(fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale);
zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale;
sh.zLittle = FTOL(zLittle);
}
if (sh.dxLeftBig > sh.dxLeftLittle) {
if (sh.modeFlags & __GL_SHADE_SMOOTH) {
CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) + GENACCEL(gc).spanDelta.r;
CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) + GENACCEL(gc).spanDelta.g;
CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) + GENACCEL(gc).spanDelta.b;
}
if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
sh.zBig = sh.zLittle + sh.dzdx;
}
} else {
if (sh.modeFlags & __GL_SHADE_SMOOTH) {
CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) - GENACCEL(gc).spanDelta.r;
CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) - GENACCEL(gc).spanDelta.g;
CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) - GENACCEL(gc).spanDelta.b;
}
if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
sh.zBig = sh.zLittle - sh.dzdx;
}
}
#endif
}
void __fastGenSetInitialParametersTexRGBA(
__GLcontext *gc,
const __GLvertex *a,
__GLfloat fdx,
__GLfloat fdy)
{
#define sh gc->polygon.shader
__GLfloat zOffset;
__GLfloat dxLeftLittle;
#if _X86_ && ENABLE_ASM
LARGE_INTEGER temp;
_asm{
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
fild DWORD PTR [OFFSET(SHADER.dxLeftLittle)][edx]
test edi, __GL_SHADE_TEXTURE
mov eax, [OFFSET(__GLcontext.state.texture.env)][edx]
je notTexture
mov ebx, [OFFSET(__GLtextureEnvState.mode)][eax]
cmp ebx, GL_REPLACE
je fastReplace
cmp ebx, GL_DECAL
jne notTexture
fastReplace:
fstp dxLeftLittle
jmp colorDone
notTexture:
test edi, __GL_SHADE_SMOOTH
fstp dxLeftLittle
je colorDone
}
_asm
{
mov eax, a
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.drdx)][edx]
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dgdx)][edx] // g r
fxch ST(1) // r g
fadd DWORD PTR [OFFSET(SHADER.drdy)][edx] // R g
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dbdx)][edx] // b R g
fxch ST(2) // g R b
fadd DWORD PTR [OFFSET(SHADER.dgdy)][edx] // G R b
fxch ST(2) // b R G
fadd DWORD PTR [OFFSET(SHADER.dbdy)][edx] // B R G
fxch ST(1) // R B G
fmul __glVal65536 // sR B G
fxch ST(2) // G B sR
fmul __glVal65536 // sG B sR
fxch ST(1) // B sG sR
fmul __glVal65536 // sB sG sR
fxch ST(2) // sR sG sB
fistp DWORD PTR [OFFSET(SHADER.rLittle)][edx]
mov eax, [OFFSET(__GLvertex.color)][eax]
fistp DWORD PTR [OFFSET(SHADER.gLittle)][edx]
fistp DWORD PTR [OFFSET(SHADER.bLittle)][edx]
fld DWORD PTR [OFFSET(SHADER.drdx)][edx]
fmul fdx
fld DWORD PTR [OFFSET(SHADER.drdy)][edx]
fmul fdy // r r
fxch ST(1) // r r
fadd DWORD PTR [OFFSET(__GLcolor.r)][eax]
fld DWORD PTR [OFFSET(SHADER.dgdx)][edx]
fmul fdx
fld DWORD PTR [OFFSET(SHADER.dgdy)][edx]
fmul fdy // g g r r
fxch ST(1) // g g r r
fadd DWORD PTR [OFFSET(__GLcolor.g)][eax]
fld DWORD PTR [OFFSET(SHADER.dbdx)][edx]
fmul fdx
fld DWORD PTR [OFFSET(SHADER.dbdy)][edx]
fmul fdy // b b g g r r
fxch ST(1) // b b g g r r
fadd DWORD PTR [OFFSET(__GLcolor.b)][eax]
fxch ST(4) // r b g g b r
faddp ST(5), ST // b g g b r
fxch ST(2) // g g b b r
faddp ST(1), ST // g b b r
fxch ST(2) // b b g r
faddp ST(1), ST // b g r
fxch ST(2) // r g b
fmul __glVal65536 // R g b
fxch ST(1) // g R b
fmul __glVal65536 // G R b
fxch ST(2) // b R G
fmul __glVal65536 // B R G
fxch ST(1) // R B G
fadd __glVal128 // R B G
fxch ST(2) // G B R
fadd __glVal128 // G B R
fxch ST(1) // B G R
fadd __glVal128 // B G R
fxch ST(2) // R G B
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.r)][edx]
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.g)][edx]
mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx]
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.b)][edx]
}
_asm{
test ebx, __GL_BLEND_ENABLE
je noBlend
}
_asm{
mov eax, a
fld DWORD PTR [OFFSET(SHADER.dadx)][edx]
mov eax, DWORD PTR [OFFSET(__GLvertex.color)][eax]
fmul fdx
fld DWORD PTR [OFFSET(SHADER.dady)][edx]
fmul fdy // a a
fxch ST(1)
fadd DWORD PTR [OFFSET(__GLcolor.a)][eax] // a a
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dadx)][edx] // al a a
fxch ST(1) // a al a
faddp ST(2), ST // al a
fadd DWORD PTR [OFFSET(SHADER.dady)][edx] // al a (+1)
fxch ST(1) // a al
fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx]
// A al
fxch ST(1) // al A
fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx]
// AL A (+1)
fxch ST(1) // A AL
fadd __glVal128 // A AL (+1)
fxch ST(1) // AL A
fistp DWORD PTR [OFFSET(SHADER.aLittle)][edx]
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.a)][edx]
}
#if FORCE_NPX_DEBUG
{
LONG aLittle = FTOL((gc->polygon.shader.dady +
(__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dadx) *
GENACCEL(gc).aAccelScale);
LONG spanA = FTOL((a->color->a + fdx * sh.dadx + fdy * sh.dady) *
GENACCEL(gc).aAccelScale)+0x0080;
if (aLittle != CASTFIX(sh.aLittle))
DbgPrint("sh.aLittle %x %x\n", aLittle, CASTFIX(sh.aLittle));
if (spanA != GENACCEL(gc).spanValue.a)
DbgPrint("spanValue.a %x %x\n", spanA, GENACCEL(gc).spanValue.a);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
#endif // FORCE_NPX_DEBUG
noBlend:
#if FORCE_NPX_DEBUG
{
LONG rLittle = FLT_TO_FIX(gc->polygon.shader.drdy +
(__GLfloat)sh.dxLeftLittle * gc->polygon.shader.drdx);
LONG gLittle = FLT_TO_FIX(gc->polygon.shader.dgdy +
(__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dgdx);
LONG bLittle = FLT_TO_FIX(gc->polygon.shader.dbdy +
(__GLfloat)sh.dxLeftLittle * gc->polygon.shader.dbdx);
LONG spanR = FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080;
LONG spanG = FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080;
LONG spanB = FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080;
if (CASTFIX(sh.rLittle) != rLittle)
DbgPrint("rLittle: %x %x\n", rLittle, sh.rLittle);
if (CASTFIX(sh.gLittle) != gLittle)
DbgPrint("gLittle: %x %x\n", gLittle, sh.gLittle);
if (CASTFIX(sh.bLittle) != bLittle)
DbgPrint("bLittle: %x %x\n", bLittle, sh.bLittle);
if (spanR != GENACCEL(gc).spanValue.r)
DbgPrint("spanR: %x %x\n", spanR, GENACCEL(gc).spanValue.r);
if (spanG != GENACCEL(gc).spanValue.g)
DbgPrint("spanG: %x %x\n", spanG, GENACCEL(gc).spanValue.g);
if (spanB != GENACCEL(gc).spanValue.b)
DbgPrint("spanB: %x %x\n", spanB, GENACCEL(gc).spanValue.b);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
#endif // FORCE_NPX_DEBUG
colorDone:
_asm{
test edi, __GL_SHADE_TEXTURE
je doneTexture
mov ebx, [OFFSET(__GLcontext.state.hints.perspectiveCorrection)][edx]
cmp ebx, GL_NICEST
je nicestTex
}
_asm{
mov eax, a
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx]
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx]
// dt ds
fld fdx
fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx]
fld fdy
fmul DWORD PTR [OFFSET(SHADER.dsdy)][edx]
fxch ST(1) // s s dt ds
fadd DWORD PTR [OFFSET(__GLvertex.texture.x)][eax]
fxch ST(3) // ds s dt s
fadd DWORD PTR [OFFSET(SHADER.dsdy)][edx]
fld fdx
fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx]
fld fdy
fmul DWORD PTR [OFFSET(SHADER.dtdy)][edx]
fxch ST(1) // t t ds s dt s
fadd DWORD PTR [OFFSET(__GLvertex.texture.y)][eax]
fxch ST(4) // dt t ds s t s
fadd DWORD PTR [OFFSET(SHADER.dtdy)][edx]
fxch ST(5) // s t ds s t dt
faddp ST(3), ST // t ds s t dt
faddp ST(3), ST // ds s t dt
fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
// DS s t dt
fxch ST(3) // dt s t DS
fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
// DT s t DS
fxch ST(1) // s DT t DS
fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
// S DT t DS
fxch ST(2) // t DT S DS
fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
// T DT S DS
fxch ST(3) // DS DT S T
fistp DWORD PTR [OFFSET(SHADER.sLittle)][edx]
fistp DWORD PTR [OFFSET(SHADER.tLittle)][edx]
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.s)][edx]
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.t)][edx]
#if !FORCE_NPX_DEBUG
jmp doneTexture
#endif
}
#if FORCE_NPX_DEBUG
{
LONG sLittle = FTOL((gc->polygon.shader.dsdy + (__GLfloat)sh.dxLeftLittle *
gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale);
LONG tLittle = FTOL((gc->polygon.shader.dtdy + (__GLfloat)sh.dxLeftLittle *
gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale);
LONG spanS = FTOL((a->texture.x +
(fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale);
LONG spanT = FTOL((a->texture.y +
(fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale);
if (sLittle != CASTFIX(sh.sLittle))
DbgPrint("sLittle %x %x\n", sLittle, CASTFIX(sh.sLittle));
if (tLittle != CASTFIX(sh.tLittle))
DbgPrint("tLittle %x %x\n", tLittle, CASTFIX(sh.tLittle));
if (GENACCEL(gc).spanValue.s != spanS)
DbgPrint("spanValue.s %x %x\n", spanS, GENACCEL(gc).spanValue.s);
if (GENACCEL(gc).spanValue.t != spanT)
DbgPrint("spanValue.t %x %x\n", spanT, GENACCEL(gc).spanValue.t);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
jmp doneTexture;
}
#endif // FORCE_NPX_DEBUG
nicestTex:
_asm{
mov eax, a
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx]
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx]
// dt ds
fld DWORD PTR fdx
fmul DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
// qwx dt ds
fxch ST(2)
// ds dt qwx
fadd DWORD PTR [OFFSET(SHADER.dsdy)][edx]
fxch ST(1) // dt ds qwx
fadd DWORD PTR [OFFSET(SHADER.dtdy)][edx]
fxch ST(2) // qwx ds dt
fld DWORD PTR fdy
fmul DWORD PTR [OFFSET(SHADER.dqwdy)][edx]
// qwy qwx ds dt
fxch ST(2) // ds qwx qwy dt
fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
fxch ST(3) // dt qwx qwy ds
fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
fxch ST(2) // qwy qwx dt ds
fld DWORD PTR [OFFSET(__GLvertex.texture.w)][eax]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
// qww qwy qwx dt ds
fxch ST(4) // ds qwy qwx dt qww
fistp DWORD PTR [OFFSET(SHADER.sLittle)][edx]
// qwy qwx dt qww
faddp ST(1), ST // qw dt qww
fxch ST(1) // dt qw qww
fistp DWORD PTR [OFFSET(SHADER.tLittle)][edx]
// qw qww
fld DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
fmul dxLeftLittle // lt qw qww
fxch ST(1) // qw lt qww
faddp ST(2), ST // lt qw
fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
// s lt qw
fxch ST(1) // lt s qw
fadd DWORD PTR [OFFSET(SHADER.dqwdy)][edx]
fxch ST(1) // s lt qw
fld fdx
fmul DWORD PTR [OFFSET(SHADER.dsdx)][edx]
fld fdy
fmul DWORD PTR [OFFSET(SHADER.dsdy)][edx]
fxch ST(1) // s s s lt qw
faddp ST(2), ST // s s lt qw
fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
fld fdx
fmul DWORD PTR [OFFSET(SHADER.dtdx)][edx]
// t t s s lt qw
fxch ST(2) // s t t s lt qw
faddp ST(3), ST // t t s lt qw
fld fdy
fmul DWORD PTR [OFFSET(SHADER.dtdy)][edx]
fxch ST(1) // t t t s lt qw
faddp ST(2), ST // t t s lt qw
fxch ST(2) // s t t lt qw
fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
// S t t lt qw
fxch ST(4) // qw t t lt S
fstp DWORD PTR [OFFSET(SHADER.frag.qw)][edx]
faddp ST(1), ST // t lt S
fxch ST(1) // lt t S
fstp DWORD PTR [OFFSET(SHADER.qwLittle)][edx]
// t S
fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx] // (+1)
// T S
fxch ST(1) // S T
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.s)][edx]
fistp DWORD PTR [OFFSET(GENGCACCEL.spanValue.t)][edx]
}
#if FORCE_NPX_DEBUG
{
LONG sLittle = FTOL((gc->polygon.shader.dsdy + dxLeftLittle *
gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale);
LONG tLittle = FTOL((gc->polygon.shader.dtdy + dxLeftLittle *
gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale);
__GLfloat qw = (a->texture.w * a->window.w) + (fdx * sh.dqwdx) +
(fdy * sh.dqwdy);
__GLfloat qwLittle = sh.dqwdy + dxLeftLittle * sh.dqwdx;
LONG spanS = FTOL(((a->texture.x * a->window.w) +
(fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale);
LONG spanT = FTOL(((a->texture.y * a->window.w) +
(fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale);
if (sLittle != CASTFIX(sh.sLittle))
DbgPrint("sLittle %x %x\n", sLittle, CASTFIX(sh.sLittle));
if (tLittle != CASTFIX(sh.tLittle))
DbgPrint("tLittle %x %x\n", tLittle, CASTFIX(sh.tLittle));
if (qw != sh.frag.qw)
DbgPrint("qw %f %f\n", qw, sh.frag.qw);
if (qwLittle != sh.qwLittle)
DbgPrint("qw %f %f\n", qwLittle, sh.qwLittle);
if (GENACCEL(gc).spanValue.s != spanS)
DbgPrint("spanValue.s %x %x\n", spanS, GENACCEL(gc).spanValue.s);
if (GENACCEL(gc).spanValue.t != spanT)
DbgPrint("spanValue.t %x %x\n", spanT, GENACCEL(gc).spanValue.t);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
#endif // FORCE_NPX_DEBUG
doneTexture:
_asm{
test edi, __GL_SHADE_DEPTH_ITER
je noZ
}
_asm{
mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx]
mov ecx, __glZero
test ebx, __GL_POLYGON_OFFSET_FILL_ENABLE
mov zOffset, ecx
je noPolyOffset
}
zOffset = __glPolygonOffsetZ(gc);
_asm{
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
noPolyOffset:
_asm{
mov eax, a
fld fdx
fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
fld fdy
fmul DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
// zy zx
fxch ST(1)
// zx zy
fadd DWORD PTR [OFFSET(__GLvertex.window.z)][eax]
fld dxLeftLittle
fmul DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
// zl zy zx
fxch ST(1) // zy zl zx
fadd zOffset
fxch ST(1) // zl zy zx
fadd DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
// zl zy zx (+1)
fxch ST(1) // zy zl zx
faddp ST(2), ST // zl z
fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
// ZL z (+1)
fxch ST(1) // z ZL
fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
// Z ZL
fxch ST(1) // ZL Z
fistp temp
mov eax, DWORD PTR temp
mov DWORD PTR [OFFSET(SHADER.zLittle)][edx], eax
fistp temp
mov eax, DWORD PTR temp
mov DWORD PTR [OFFSET(SHADER.frag.z)][edx], eax
}
#if FORCE_NPX_DEBUG
{
ULONG fragZ = FTOL((a->window.z + fdx*sh.dzdxf +
(fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale);
__GLfloat zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale;
LONG shZLittle = FTOL(zLittle);
if (sh.frag.z != fragZ)
DbgPrint("fragZ %x %x\n", fragZ, sh.frag.z);
if (sh.zLittle != shZLittle)
DbgPrint("sh.zLittle %x %x\n", shZLittle, sh.zLittle);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
#endif // FORCE_NPX_DEBUG
noZ:
_asm {
mov eax, [OFFSET(SHADER.dxLeftBig)][edx]
mov ecx, [OFFSET(SHADER.dxLeftLittle)][edx]
cmp eax, ecx
jle littleGreater
test edi, __GL_SHADE_SMOOTH
je bigNoSmooth
mov eax, [OFFSET(SHADER.rLittle)][edx]
mov ecx, [OFFSET(SPANDELTA.r)][edx]
mov esi, [OFFSET(SHADER.gLittle)][edx]
mov ebx, [OFFSET(SPANDELTA.g)][edx]
add eax, ecx
add esi, ebx
mov [OFFSET(SHADER.rBig)][edx], eax
mov [OFFSET(SHADER.gBig)][edx], esi
mov eax, [OFFSET(SHADER.bLittle)][edx]
mov ecx, [OFFSET(SPANDELTA.b)][edx]
mov esi, [OFFSET(SHADER.aLittle)][edx]
mov ebx, [OFFSET(SPANDELTA.a)][edx]
add eax, ecx
add esi, ebx
mov [OFFSET(SHADER.bBig)][edx], eax
mov [OFFSET(SHADER.aBig)][edx], esi
bigNoSmooth:
test edi, __GL_SHADE_TEXTURE
je bigNoTexture
fld DWORD PTR [OFFSET(SHADER.qwLittle)][edx]
mov eax, [OFFSET(SHADER.sLittle)][edx]
fadd DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
mov ecx, [OFFSET(SPANDELTA.s)][edx]
mov esi, [OFFSET(SHADER.tLittle)][edx]
mov ebx, [OFFSET(SPANDELTA.t)][edx]
add eax, ecx
add esi, ebx
mov [OFFSET(SHADER.sBig)][edx], eax
mov [OFFSET(SHADER.tBig)][edx], esi
fstp DWORD PTR [OFFSET(SHADER.qwBig)][edx]
bigNoTexture:
test edi, __GL_SHADE_DEPTH_ITER
je done
mov eax, [OFFSET(SHADER.zLittle)][edx]
mov ecx, [OFFSET(SHADER.dzdx)][edx]
add eax, ecx
mov [OFFSET(SHADER.zBig)][edx], eax
jmp done
littleGreater:
test edi, __GL_SHADE_SMOOTH
je smallNoSmooth
mov eax, [OFFSET(SHADER.rLittle)][edx]
mov ecx, [OFFSET(SPANDELTA.r)][edx]
mov esi, [OFFSET(SHADER.gLittle)][edx]
mov ebx, [OFFSET(SPANDELTA.g)][edx]
sub eax, ecx
sub esi, ebx
mov [OFFSET(SHADER.rBig)][edx], eax
mov [OFFSET(SHADER.gBig)][edx], esi
mov eax, [OFFSET(SHADER.bLittle)][edx]
mov ecx, [OFFSET(SPANDELTA.b)][edx]
mov esi, [OFFSET(SHADER.aLittle)][edx]
mov ebx, [OFFSET(SPANDELTA.a)][edx]
sub eax, ecx
sub esi, ebx
mov [OFFSET(SHADER.bBig)][edx], eax
mov [OFFSET(SHADER.aBig)][edx], esi
smallNoSmooth:
test edi, __GL_SHADE_TEXTURE
je smallNoTexture
fld DWORD PTR [OFFSET(SHADER.qwLittle)][edx]
mov eax, [OFFSET(SHADER.sLittle)][edx]
fsub DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
mov ecx, [OFFSET(SPANDELTA.s)][edx]
mov esi, [OFFSET(SHADER.tLittle)][edx]
mov ebx, [OFFSET(SPANDELTA.t)][edx]
sub eax, ecx
sub esi, ebx
mov [OFFSET(SHADER.sBig)][edx], eax
mov [OFFSET(SHADER.tBig)][edx], esi
fstp DWORD PTR [OFFSET(SHADER.qwBig)][edx]
smallNoTexture:
test edi, __GL_SHADE_DEPTH_ITER
je done
mov eax, [OFFSET(SHADER.zLittle)][edx]
mov ecx, [OFFSET(SHADER.dzdx)][edx]
sub eax, ecx
mov [OFFSET(SHADER.zBig)][edx], eax
done:
}
#else
__GLfloat zLittle;
__GLfloat tmp1, tmp2;
dxLeftLittle = (float)sh.dxLeftLittle;
// Don't bother with the color deltas if we're decaling or replacing
// with textures.
if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) &&
((gc->state.texture.env[0].mode == GL_REPLACE) ||
(gc->state.texture.env[0].mode == GL_DECAL))) {
;
} else if (sh.modeFlags & __GL_SHADE_SMOOTH) {
CASTFIX(sh.rLittle) =
FLT_TO_FIX(gc->polygon.shader.drdy +
dxLeftLittle * gc->polygon.shader.drdx);
CASTFIX(sh.gLittle) =
FLT_TO_FIX(gc->polygon.shader.dgdy +
dxLeftLittle * gc->polygon.shader.dgdx);
CASTFIX(sh.bLittle) =
FLT_TO_FIX(gc->polygon.shader.dbdy +
dxLeftLittle * gc->polygon.shader.dbdx);
GENACCEL(gc).spanValue.r =
FLT_TO_FIX(a->color->r + fdx * sh.drdx + fdy * sh.drdy)+0x0080;
GENACCEL(gc).spanValue.g =
FLT_TO_FIX(a->color->g + fdx * sh.dgdx + fdy * sh.dgdy)+0x0080;
GENACCEL(gc).spanValue.b =
FLT_TO_FIX(a->color->b + fdx * sh.dbdx + fdy * sh.dbdy)+0x0080;
if (gc->state.enables.general & __GL_BLEND_ENABLE) {
CASTFIX(sh.aLittle) =
FTOL((gc->polygon.shader.dady +
dxLeftLittle * gc->polygon.shader.dadx) *
GENACCEL(gc).aAccelScale);
GENACCEL(gc).spanValue.a =
FTOL((a->color->a + fdx * sh.dadx + fdy * sh.dady) *
GENACCEL(gc).aAccelScale)+0x0080;
}
}
if (sh.modeFlags & __GL_SHADE_TEXTURE) {
if (gc->state.hints.perspectiveCorrection != GL_NICEST) {
tmp1 = (gc->polygon.shader.dsdy + dxLeftLittle *
gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale;
tmp2 = (gc->polygon.shader.dtdy + dxLeftLittle *
gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale;
CASTFIX(sh.sLittle) = FTOL(tmp1);
CASTFIX(sh.tLittle) = FTOL(tmp2);
tmp1 = (a->texture.x +
(fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale;
tmp2 = (a->texture.y +
(fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale;
GENACCEL(gc).spanValue.s = FTOL(tmp1);
GENACCEL(gc).spanValue.t = FTOL(tmp2);
} else {
tmp1 = (gc->polygon.shader.dsdy + dxLeftLittle *
gc->polygon.shader.dsdx) * GENACCEL(gc).texXScale;
tmp2 = (gc->polygon.shader.dtdy + dxLeftLittle *
gc->polygon.shader.dtdx) * GENACCEL(gc).texYScale;
CASTFIX(sh.sLittle) = FTOL(tmp1);
CASTFIX(sh.tLittle) = FTOL(tmp2);
sh.frag.qw = (a->texture.w * a->window.w) + (fdx * sh.dqwdx) +
(fdy * sh.dqwdy);
sh.qwLittle = sh.dqwdy + dxLeftLittle * sh.dqwdx;
tmp1 = ((a->texture.x * a->window.w) +
(fdx * sh.dsdx) + (fdy * sh.dsdy)) * GENACCEL(gc).texXScale;
tmp2 = ((a->texture.y * a->window.w) +
(fdx * sh.dtdx) + (fdy * sh.dtdy)) * GENACCEL(gc).texYScale;
GENACCEL(gc).spanValue.s = FTOL(tmp1);
GENACCEL(gc).spanValue.t = FTOL(tmp2);
}
}
if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
zOffset = bPolygonOffset ? __glPolygonOffsetZ(gc) : 0.0f;
sh.frag.z = FTOL((a->window.z + fdx*sh.dzdxf +
(fdy*sh.dzdyf + zOffset)) * GENACCEL(gc).zScale);
zLittle = ((sh.dzdyf + sh.dxLeftLittle * sh.dzdxf)) * GENACCEL(gc).zScale;
sh.zLittle = FTOL(zLittle);
}
if (sh.dxLeftBig > sh.dxLeftLittle) {
if (sh.modeFlags & __GL_SHADE_SMOOTH) {
CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) + GENACCEL(gc).spanDelta.r;
CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) + GENACCEL(gc).spanDelta.g;
CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) + GENACCEL(gc).spanDelta.b;
if (gc->state.enables.general & __GL_BLEND_ENABLE)
CASTFIX(sh.aBig) = CASTFIX(sh.aLittle) + GENACCEL(gc).spanDelta.a;
}
if (sh.modeFlags & __GL_SHADE_TEXTURE) {
CASTFIX(sh.sBig) = CASTFIX(sh.sLittle) + GENACCEL(gc).spanDelta.s;
CASTFIX(sh.tBig) = CASTFIX(sh.tLittle) + GENACCEL(gc).spanDelta.t;
sh.qwBig = sh.qwLittle + sh.dqwdx;
}
if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
sh.zBig = sh.zLittle + sh.dzdx;
}
} else {
if (sh.modeFlags & __GL_SHADE_SMOOTH) {
CASTFIX(sh.rBig) = CASTFIX(sh.rLittle) - GENACCEL(gc).spanDelta.r;
CASTFIX(sh.gBig) = CASTFIX(sh.gLittle) - GENACCEL(gc).spanDelta.g;
CASTFIX(sh.bBig) = CASTFIX(sh.bLittle) - GENACCEL(gc).spanDelta.b;
if (gc->state.enables.general & __GL_BLEND_ENABLE)
CASTFIX(sh.aBig) = CASTFIX(sh.aLittle) - GENACCEL(gc).spanDelta.a;
}
if (sh.modeFlags & __GL_SHADE_TEXTURE) {
CASTFIX(sh.sBig) = CASTFIX(sh.sLittle) - GENACCEL(gc).spanDelta.s;
CASTFIX(sh.tBig) = CASTFIX(sh.tLittle) - GENACCEL(gc).spanDelta.t;
sh.qwBig = sh.qwLittle - sh.dqwdx;
}
if (sh.modeFlags & __GL_SHADE_DEPTH_ITER) {
sh.zBig = sh.zLittle - sh.dzdx;
}
}
#endif
}
/**************************************************************************\
\**************************************************************************/
void FASTCALL __fastGenCalcDeltas(
__GLcontext *gc,
__GLvertex *a,
__GLvertex *b,
__GLvertex *c)
{
__GLfloat oneOverArea, t1, t2, t3, t4;
#if _X86_ && ENABLE_ASM
LARGE_INTEGER temp;
_asm{
mov edx, gc
fld __glOne
fdiv DWORD PTR [OFFSET(SHADER.area)][edx]
mov edi, [OFFSET(SHADER.modeFlags)][edx]
test edi, __GL_SHADE_RGB
je notRGB
test edi, __GL_SHADE_SMOOTH
je notSmoothRGB
}
_asm{
mov eax, a
mov ebx, b
mov ecx, c
fstp oneOverArea // finish divide
fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
mov eax, [OFFSET(__GLvertex.color)][eax]
fmul oneOverArea
fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
mov ebx, [OFFSET(__GLvertex.color)][ebx]
fmul oneOverArea // dyBC dyAC
fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
mov ecx, [OFFSET(__GLvertex.color)][ecx]
fmul oneOverArea // dxAC dyBC dyAC
fxch ST(1) // dyBC dxAC dyAC
fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
fmul oneOverArea // dxBC dyBC dxAC dyAC
fxch ST(3) // dyAC dyBC dxAC dxBC
fstp t1
fstp t2
fstp t3
fstp t4
// Now, calculate deltas:
// Red
fld DWORD PTR [OFFSET(__GLcolor.r)][eax]
fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx]
fld DWORD PTR [OFFSET(__GLcolor.r)][ebx]
fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx]
// drBC drAC
fld ST(1) // drAC drBC drAC
fmul t2 // drACt2 drBC drAC
fld ST(1) // drBC drACt2 drBC drAC
fmul t1 // drBCt1 drACt2 drBC drAC
fxch ST(2) // drBC drACt2 drBCt1 drAC
fmul t3 // drBCt3 drACt2 drBCt1 drAC
fxch ST(3) // drAC drACt2 drBCt1 drBCt3
fmul t4 // drACt4 drACt2 drBCt1 drBCt3
fxch ST(2) // drBCt1 drACt2 drACt4 drBCt3
fsubp ST(1), ST // drACBC drACt4 drBCt3
fld DWORD PTR [OFFSET(__GLcolor.g)][ebx]
fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx]
// dgBC drACBC drACt4 drBCt3
fxch ST(2) // drACt4 drACBC dgBC drBCt3
fsubp ST(3), ST // drACBC dgBC drBCAC
fst DWORD PTR [OFFSET(SHADER.drdx)][edx]
fmul __glVal65536
// DRACBC dgBC drBCAC
fxch ST(2) // drBCAC dgBC DRACBC
fstp DWORD PTR [OFFSET(SHADER.drdy)][edx]
// dgBC DRACBC
fld DWORD PTR [OFFSET(__GLcolor.g)][eax]
fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx]
// dgAC dgBC DRACBC
fxch ST(2) // DRACBC dgBC dgAC
fistp DWORD PTR [OFFSET(SPANDELTA.r)][edx]
// Green
// dgBC dgAC
fld ST(1) // dgAC dgBC dgAC
fmul t2 // dgACt2 dgBC dgAC
fld ST(1) // dgBC dgACt2 dgBC dgAC
fmul t1 // dgBCt1 dgACt2 dgBC dgAC
fxch ST(2) // dgBC dgACt2 dgBCt1 dgAC
fmul t3 // dgBCt3 dgACt2 dgBCt1 dgAC
fxch ST(3) // dgAC dgACt2 dgBCt1 dgBCt3
fmul t4 // dgACt4 dgACt2 dgBCt1 dgBCt3
fxch ST(2) // dgBCt1 dgACt2 dgACt4 dgBCt3
fsubp ST(1), ST // dgACBC dgACt4 dgBCt3
fld DWORD PTR [OFFSET(__GLcolor.b)][ebx]
fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx]
// dbBC dgACBC dgACt4 dgBCt3
fxch ST(2) // dgACt4 dgACBC dbBC dgBCt3
fsubp ST(3), ST // dgACBC dbBC dgBCAC
fst DWORD PTR [OFFSET(SHADER.dgdx)][edx]
fmul __glVal65536
// DGACBC dbBC dgBCAC
fxch ST(2) // dgBCAC dbBC DGACBC
fstp DWORD PTR [OFFSET(SHADER.dgdy)][edx]
// dbBC DGACBC
fld DWORD PTR [OFFSET(__GLcolor.b)][eax]
fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx]
// dbAC dbBC DGACBC
fxch ST(2) // DGACBC dbBC dbAC
fistp DWORD PTR [OFFSET(SPANDELTA.g)][edx]
// Blue
// dbBC dbAC
fld ST(1) // dbAC dbBC dbAC
fmul t2 // dbACt2 dbBC dbAC
fld ST(1) // dbBC dbACt2 dbBC dbAC
fmul t1 // dbBCt1 dbACt2 dbBC dbAC
fxch ST(2) // dbBC dbACt2 dbBCt1 dbAC
fmul t3 // dbBCt3 dbACt2 dbBCt1 dbAC
fxch ST(3) // dbAC dbACt2 dbBCt1 dbBCt3
fmul t4 // dbACt4 dbACt2 dbBCt1 dbBCt3
fxch ST(2) // dbBCt1 dbACt2 dbACt4 dbBCt3
fsubp ST(1), ST // dbACBC dbACt4 dbBCt3
fxch ST(1) // dbACt4 dbACBC dbBCt3
fsubp ST(2), ST // dbACBC dbBCAC (+1)
fst DWORD PTR [OFFSET(SHADER.dbdx)][edx]
fmul __glVal65536
// DBACBC dbBCAC
fxch ST(1) // dbBCAC DBACBC
fstp DWORD PTR [OFFSET(SHADER.dbdy)][edx]
fistp DWORD PTR [OFFSET(SPANDELTA.b)][edx]
mov ebx, [OFFSET(GENGCACCEL.__fastSmoothSpanFuncPtr)][edx]
mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx
mov eax, [OFFSET(SPANDELTA.r)][edx]
mov ebx, [OFFSET(SPANDELTA.g)][edx]
mov ecx, [OFFSET(SPANDELTA.b)][edx]
or eax, ebx
or eax, ecx
jne notZeroDelta
mov eax, [OFFSET(GENGCACCEL.flags)][edx]
test eax, GEN_FASTZBUFFER
jne notZeroDelta
mov ebx, [OFFSET(GENGCACCEL.__fastFlatSpanFuncPtr)][edx]
mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx
notZeroDelta:
#if !FORCE_NPX_DEBUG
jmp colorDone
#endif
}
#if FORCE_NPX_DEBUG
{
__GLfloat drAC, dgAC, dbAC, daAC;
__GLfloat drBC, dgBC, dbBC, daBC;
__GLcolor *ac, *bc, *cc;
__GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea;
__GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea;
__GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea;
__GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea;
__GLfloat drdx;
__GLfloat drdy;
__GLfloat dgdx;
__GLfloat dgdy;
__GLfloat dbdx;
__GLfloat dbdy;
LONG spanR, spanG, spanB;
ac = a->color;
bc = b->color;
cc = c->color;
drAC = ac->r - cc->r;
drBC = bc->r - cc->r;
dgAC = ac->g - cc->g;
dgBC = bc->g - cc->g;
dbAC = ac->b - cc->b;
dbBC = bc->b - cc->b;
drdx = drAC * t2 - drBC * t1;
drdy = drBC * t3 - drAC * t4;
dgdx = dgAC * t2 - dgBC * t1;
dgdy = dgBC * t3 - dgAC * t4;
dbdx = dbAC * t2 - dbBC * t1;
dbdy = dbBC * t3 - dbAC * t4;
spanR = FLT_TO_FIX(drdx);
spanG = FLT_TO_FIX(dgdx);
spanB = FLT_TO_FIX(dbdx);
if (ft1 != t1)
DbgPrint("t1 %f %f\n", t1, ft1);
if (ft2 != t2)
DbgPrint("t2 %f %f\n", t2, ft2);
if (ft3 != t3)
DbgPrint("t3 %f %f\n", t3, ft3);
if (ft4 != t4)
DbgPrint("t4 %f %f\n", t4, ft4);
if (drdx != gc->polygon.shader.drdx)
DbgPrint("drdx %f %f\n", drdx, gc->polygon.shader.drdx);
if (drdy != gc->polygon.shader.drdy)
DbgPrint("drdy %f %f\n", drdy, gc->polygon.shader.drdy);
if (dgdx != gc->polygon.shader.dgdx)
DbgPrint("dgdx %f %f\n", dgdx, gc->polygon.shader.dgdx);
if (dgdy != gc->polygon.shader.dgdy)
DbgPrint("dgdy %f %f\n", dgdy, gc->polygon.shader.dgdy);
if (dbdx != gc->polygon.shader.dbdx)
DbgPrint("dbdx %f %f\n", dbdx, gc->polygon.shader.dbdx);
if (dbdy != gc->polygon.shader.dbdy)
DbgPrint("dbdy %f %f\n", dbdy, gc->polygon.shader.dbdy);
if (spanR != GENACCEL(gc).spanDelta.r)
DbgPrint("spanDelta.r %x %x\n", spanR, GENACCEL(gc).spanDelta.r);
if (spanG!= GENACCEL(gc).spanDelta.g)
DbgPrint("spanDelta.g %x %x\n", spanG, GENACCEL(gc).spanDelta.g);
if (spanB != GENACCEL(gc).spanDelta.b)
DbgPrint("spanDelta.b %x %x\n", spanB, GENACCEL(gc).spanDelta.b);
}
_asm{
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
jmp colorDone
}
#endif // FORCE_NPX_DEBUG
notSmoothRGB:
_asm{
mov eax, [OFFSET(__GLcontext.vertex.provoking)][edx]
fld __glVal65536
mov eax, [OFFSET(__GLvertex.color)][eax]
fmul DWORD PTR [OFFSET(__GLcolor.r)][eax]
fld __glVal65536
fmul DWORD PTR [OFFSET(__GLcolor.g)][eax]
fld __glVal65536
fmul DWORD PTR [OFFSET(__GLcolor.b)][eax]
// B G R
fxch ST(2) // R G B
fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx] // G B
fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx]
fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx]
mov ebx, [OFFSET(GENGCACCEL.__fastFlatSpanFuncPtr)][edx]
mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx
jmp colorDone
}
notRGB:
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH)
{
__GLfloat drAC;
__GLfloat drBC;
__GLcolor *ac, *bc, *cc;
ac = a->color;
bc = b->color;
cc = c->color;
drAC = ac->r - cc->r;
drBC = bc->r - cc->r;
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
t1 = gc->polygon.shader.dyAC * oneOverArea;
t2 = gc->polygon.shader.dyBC * oneOverArea;
t3 = gc->polygon.shader.dxAC * oneOverArea;
t4 = gc->polygon.shader.dxBC * oneOverArea;
gc->polygon.shader.drdx = drAC * t2 - drBC * t1;
gc->polygon.shader.drdy = drBC * t3 - drAC * t4;
GENACCEL(gc).spanDelta.r =
FLT_TO_FIX(gc->polygon.shader.drdx);
if (GENACCEL(gc).spanDelta.r == 0)
{
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastFlatSpanFuncPtr;
}
else
{
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastSmoothSpanFuncPtr;
}
}
else
{
GENACCEL(gc).spanValue.r =
FLT_TO_FIX(gc->vertex.provoking->color->r);
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastFlatSpanFuncPtr;
}
_asm{
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
colorDone:
_asm{
test edi, __GL_SHADE_DEPTH_ITER
je noZ
test edi, __GL_SHADE_SMOOTH
jne areaOK
}
_asm{
fstp oneOverArea // finish divide
fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
fmul oneOverArea
fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
fmul oneOverArea // dyBC dyAC
fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
fmul oneOverArea // dxAC dyBC dyAC
fxch ST(1) // dyBC dxAC dyAC
fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
fmul oneOverArea // dxBC dyBC dxAC dyAC
fxch ST(3) // dyAC dyBC dxAC dxBC
fstp t1
fstp t2
fstp t3
fstp t4
}
#if FORCE_NPX_DEBUG
{
__GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea;
__GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea;
__GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea;
__GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea;
if (ft1 != t1)
DbgPrint("zt1 %f %f\n", t1, ft1);
if (ft2 != t2)
DbgPrint("zt2 %f %f\n", t2, ft2);
if (ft3 != t3)
DbgPrint("zt3 %f %f\n", t3, ft3);
if (ft4 != t4)
DbgPrint("zt4 %f %f\n", t4, ft4);
}
_asm{
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
#endif // FORCE_NPX_DEBUG
areaOK:
_asm{
mov ecx, c
mov eax, a
mov ebx, b
fld DWORD PTR [OFFSET(__GLvertex.window.z)][eax]
fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx]
fld DWORD PTR [OFFSET(__GLvertex.window.z)][ebx]
fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx]
// dzBC dzAC
fld ST(1) // dzAC dzBC dzAC
fmul t2 // ACt2 dzBC dzAC
fld ST(1) // dzBC ACt2 dzBC dzAC
fmul t1 // BCt1 ACt2 dzBC dzAC
fxch ST(3) // dzAC ACt2 dzBC BCt1
fmul t4 // ACt4 ACt2 dzBC BCt1
fxch ST(2) // dzBC ACt2 ACt4 BCt1
fmul t3 // BCt3 ACt2 ACt4 BCt1
fsubrp ST(2),ST // ACt2 BCAC BCt1
fsubrp ST(2),ST // BCAC ACBC
fxch ST(1) // ACBC BCAC
// dzdx dzdy
fld ST(0) // dzdx dzdx dzdy
fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
// dzdxS dzdx dzdy
fxch ST(2) // dzdy dzdx dzdxS
fstp DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
fstp DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
fistp temp
mov ebx, DWORD PTR temp
mov DWORD PTR [OFFSET(SHADER.dzdx)][edx], ebx
mov DWORD PTR [OFFSET(SPANDELTA.z)][edx], ebx
#if !FORCE_NPX_DEBUG
jmp deltaDone
#endif
}
#if FORCE_NPX_DEBUG
{
__GLfloat dzdxf;
__GLfloat dzdyf;
__GLfloat dzAC, dzBC;
ULONG spanDeltaZ;
dzAC = a->window.z - c->window.z;
dzBC = b->window.z - c->window.z;
dzdxf = dzAC * t2 - dzBC * t1;
dzdyf = dzBC * t3 - dzAC * t4;
spanDeltaZ = FTOL(dzdxf * GENACCEL(gc).zScale);
if (dzdxf != gc->polygon.shader.dzdxf)
DbgPrint("dzdxf %f %f\n", dzdxf, gc->polygon.shader.dzdxf);
if (dzdyf != gc->polygon.shader.dzdyf)
DbgPrint("dzdyf %f %f\n", dzdyf, gc->polygon.shader.dzdyf);
if (spanDeltaZ != GENACCEL(gc).spanDelta.z)
DbgPrint("spanDeltaZ %x %x\n", spanDeltaZ, GENACCEL(gc).spanDelta.z);
goto deltaDone;
}
#endif // FORCE_NPX_DEBUG
noZ:
_asm{
test edi, __GL_SHADE_SMOOTH
jne deltaDone
fstp ST(0)
}
deltaDone:
return;
#else // _X86_
/* Pre-compute one over polygon area */
__GL_FLOAT_BEGIN_DIVIDE(__glOne, gc->polygon.shader.area, &oneOverArea);
/*
** t1-4 are delta values for unit changes in x or y for each
** parameter.
*/
if (gc->polygon.shader.modeFlags & __GL_SHADE_RGB)
{
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH)
{
__GLfloat drAC, dgAC, dbAC, daAC;
__GLfloat drBC, dgBC, dbBC, daBC;
__GLcolor *ac, *bc, *cc;
ac = a->color;
bc = b->color;
cc = c->color;
drAC = ac->r - cc->r;
drBC = bc->r - cc->r;
dgAC = ac->g - cc->g;
dgBC = bc->g - cc->g;
dbAC = ac->b - cc->b;
dbBC = bc->b - cc->b;
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
t1 = gc->polygon.shader.dyAC * oneOverArea;
t2 = gc->polygon.shader.dyBC * oneOverArea;
t3 = gc->polygon.shader.dxAC * oneOverArea;
t4 = gc->polygon.shader.dxBC * oneOverArea;
gc->polygon.shader.drdx = drAC * t2 - drBC * t1;
gc->polygon.shader.drdy = drBC * t3 - drAC * t4;
gc->polygon.shader.dgdx = dgAC * t2 - dgBC * t1;
gc->polygon.shader.dgdy = dgBC * t3 - dgAC * t4;
gc->polygon.shader.dbdx = dbAC * t2 - dbBC * t1;
gc->polygon.shader.dbdy = dbBC * t3 - dbAC * t4;
GENACCEL(gc).spanDelta.r = FLT_TO_FIX(gc->polygon.shader.drdx);
GENACCEL(gc).spanDelta.g = FLT_TO_FIX(gc->polygon.shader.dgdx);
GENACCEL(gc).spanDelta.b = FLT_TO_FIX(gc->polygon.shader.dbdx);
if ( ((GENACCEL(gc).spanDelta.r | GENACCEL(gc).spanDelta.g |
GENACCEL(gc).spanDelta.b) == 0)
&& ((GENACCEL(gc).flags & GEN_FASTZBUFFER) == 0))
{
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastFlatSpanFuncPtr;
}
else
{
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastSmoothSpanFuncPtr;
}
}
else
{
__GLcolor *flatColor = gc->vertex.provoking->color;
GENACCEL(gc).spanValue.r = FLT_TO_FIX(flatColor->r);
GENACCEL(gc).spanValue.g = FLT_TO_FIX(flatColor->g);
GENACCEL(gc).spanValue.b = FLT_TO_FIX(flatColor->b);
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastFlatSpanFuncPtr;
}
}
else
{
if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH)
{
__GLfloat drAC;
__GLfloat drBC;
__GLcolor *ac, *bc, *cc;
ac = a->color;
bc = b->color;
cc = c->color;
drAC = ac->r - cc->r;
drBC = bc->r - cc->r;
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
t1 = gc->polygon.shader.dyAC * oneOverArea;
t2 = gc->polygon.shader.dyBC * oneOverArea;
t3 = gc->polygon.shader.dxAC * oneOverArea;
t4 = gc->polygon.shader.dxBC * oneOverArea;
gc->polygon.shader.drdx = drAC * t2 - drBC * t1;
gc->polygon.shader.drdy = drBC * t3 - drAC * t4;
GENACCEL(gc).spanDelta.r =
FLT_TO_FIX(gc->polygon.shader.drdx);
if (GENACCEL(gc).spanDelta.r == 0)
{
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastFlatSpanFuncPtr;
}
else
{
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastSmoothSpanFuncPtr;
}
}
else
{
GENACCEL(gc).spanValue.r =
FLT_TO_FIX(gc->vertex.provoking->color->r);
GENACCEL(gc).__fastSpanFuncPtr =
GENACCEL(gc).__fastFlatSpanFuncPtr;
}
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER)
{
__GLfloat dzAC, dzBC;
if ((gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) == 0)
{
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
t1 = gc->polygon.shader.dyAC * oneOverArea;
t2 = gc->polygon.shader.dyBC * oneOverArea;
t3 = gc->polygon.shader.dxAC * oneOverArea;
t4 = gc->polygon.shader.dxBC * oneOverArea;
}
dzAC = a->window.z - c->window.z;
dzBC = b->window.z - c->window.z;
gc->polygon.shader.dzdxf = dzAC * t2 - dzBC * t1;
gc->polygon.shader.dzdyf = dzBC * t3 - dzAC * t4;
GENACCEL(gc).spanDelta.z = gc->polygon.shader.dzdx =
FTOL(gc->polygon.shader.dzdxf * GENACCEL(gc).zScale);
}
else if ((gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) == 0)
{
// In this case the divide hasn't been terminated yet so
// we need to complete it even though we don't use the result
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
}
#endif // _X86_
}
void FASTCALL __fastGenCalcDeltasTexRGBA(
__GLcontext *gc,
__GLvertex *a,
__GLvertex *b,
__GLvertex *c)
{
__GLfloat oneOverArea, t1, t2, t3, t4;
GLboolean oneOverAreaDone;
#if _X86_ && ENABLE_ASM
LARGE_INTEGER temp;
_asm{
mov edx, gc
xor eax, eax
mov oneOverAreaDone, al
mov edi, [OFFSET(SHADER.modeFlags)][edx]
fld __glOne
fdiv DWORD PTR [OFFSET(SHADER.area)][edx]
mov ebx, [OFFSET(GENGCACCEL.__fastTexSpanFuncPtr)][edx]
test edi, __GL_SHADE_TEXTURE
mov [OFFSET(GENGCACCEL.__fastSpanFuncPtr)][edx], ebx
mov eax, [OFFSET(__GLcontext.state.texture.env)][edx]
je notReplace
mov ebx, [OFFSET(__GLtextureEnvState.mode)][eax]
cmp ebx, GL_REPLACE
je fastReplace
cmp ebx, GL_DECAL
jne notReplace
}
fastReplace:
_asm{
mov eax, [OFFSET(GENGCACCEL.constantR)][edx]
mov ebx, [OFFSET(GENGCACCEL.constantG)][edx]
mov [OFFSET(SPANVALUE.r)][edx], eax
mov [OFFSET(SPANVALUE.g)][edx], ebx
mov eax, [OFFSET(GENGCACCEL.constantB)][edx]
mov ebx, [OFFSET(GENGCACCEL.constantA)][edx]
mov [OFFSET(SPANVALUE.b)][edx], eax
mov [OFFSET(SPANVALUE.a)][edx], ebx
jmp colorDone
}
notReplace:
_asm{
test edi, __GL_SHADE_SMOOTH
je doFlat
mov al, 1
mov oneOverAreaDone, al
}
// smooth:
_asm{
mov eax, a
mov ebx, b
mov ecx, c
fstp oneOverArea // finish divide
fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
mov eax, [OFFSET(__GLvertex.color)][eax]
fmul oneOverArea
fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
mov ebx, [OFFSET(__GLvertex.color)][ebx]
fmul oneOverArea // dyBC dyAC
fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
mov ecx, [OFFSET(__GLvertex.color)][ecx]
fmul oneOverArea // dxAC dyBC dyAC
fxch ST(1) // dyBC dxAC dyAC
fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
fmul oneOverArea // dxBC dyBC dxAC dyAC
fxch ST(3) // dyAC dyBC dxAC dxBC
fstp t1
fstp t2
fstp t3
fstp t4
// Now, calculate deltas:
// Red
fld DWORD PTR [OFFSET(__GLcolor.r)][eax]
fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx]
fld DWORD PTR [OFFSET(__GLcolor.r)][ebx]
fsub DWORD PTR [OFFSET(__GLcolor.r)][ecx]
// drBC drAC
fld ST(1) // drAC drBC drAC
fmul t2 // drACt2 drBC drAC
fld ST(1) // drBC drACt2 drBC drAC
fmul t1 // drBCt1 drACt2 drBC drAC
fxch ST(2) // drBC drACt2 drBCt1 drAC
fmul t3 // drBCt3 drACt2 drBCt1 drAC
fxch ST(3) // drAC drACt2 drBCt1 drBCt3
fmul t4 // drACt4 drACt2 drBCt1 drBCt3
fxch ST(2) // drBCt1 drACt2 drACt4 drBCt3
fsubp ST(1), ST // drACBC drACt4 drBCt3
fld DWORD PTR [OFFSET(__GLcolor.g)][ebx]
fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx]
// dgBC drACBC drACt4 drBCt3
fxch ST(2) // drACt4 drACBC dgBC drBCt3
fsubp ST(3), ST // drACBC dgBC drBCAC
fst DWORD PTR [OFFSET(SHADER.drdx)][edx]
fmul __glVal65536
// DRACBC dgBC drBCAC
fxch ST(2) // drBCAC dgBC DRACBC
fstp DWORD PTR [OFFSET(SHADER.drdy)][edx]
// dgBC DRACBC
fld DWORD PTR [OFFSET(__GLcolor.g)][eax]
fsub DWORD PTR [OFFSET(__GLcolor.g)][ecx]
// dgAC dgBC DRACBC
fxch ST(2) // DRACBC dgBC dgAC
fistp DWORD PTR [OFFSET(SPANDELTA.r)][edx]
// Green
// dgBC dgAC
fld ST(1) // dgAC dgBC dgAC
fmul t2 // dgACt2 dgBC dgAC
fld ST(1) // dgBC dgACt2 dgBC dgAC
fmul t1 // dgBCt1 dgACt2 dgBC dgAC
fxch ST(2) // dgBC dgACt2 dgBCt1 dgAC
fmul t3 // dgBCt3 dgACt2 dgBCt1 dgAC
fxch ST(3) // dgAC dgACt2 dgBCt1 dgBCt3
fmul t4 // dgACt4 dgACt2 dgBCt1 dgBCt3
fxch ST(2) // dgBCt1 dgACt2 dgACt4 dgBCt3
fsubp ST(1), ST // dgACBC dgACt4 dgBCt3
fld DWORD PTR [OFFSET(__GLcolor.b)][ebx]
fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx]
// dbBC dgACBC dgACt4 dgBCt3
fxch ST(2) // dgACt4 dgACBC dbBC dgBCt3
fsubp ST(3), ST // dgACBC dbBC dgBCAC
fst DWORD PTR [OFFSET(SHADER.dgdx)][edx]
fmul __glVal65536
// DGACBC dbBC dgBCAC
fxch ST(2) // dgBCAC dbBC DGACBC
fstp DWORD PTR [OFFSET(SHADER.dgdy)][edx]
// dbBC DGACBC
fld DWORD PTR [OFFSET(__GLcolor.b)][eax]
fsub DWORD PTR [OFFSET(__GLcolor.b)][ecx]
// dbAC dbBC DGACBC
fxch ST(2) // DGACBC dbBC dbAC
fistp DWORD PTR [OFFSET(SPANDELTA.g)][edx]
// Blue
// dbBC dbAC
fld ST(1) // dbAC dbBC dbAC
fmul t2 // dbACt2 dbBC dbAC
fld ST(1) // dbBC dbACt2 dbBC dbAC
fmul t1 // dbBCt1 dbACt2 dbBC dbAC
fxch ST(2) // dbBC dbACt2 dbBCt1 dbAC
fmul t3 // dbBCt3 dbACt2 dbBCt1 dbAC
fxch ST(3) // dbAC dbACt2 dbBCt1 dbBCt3
fmul t4 // dbACt4 dbACt2 dbBCt1 dbBCt3
fxch ST(2) // dbBCt1 dbACt2 dbACt4 dbBCt3
fsubp ST(1), ST // dbACBC dbACt4 dbBCt3
fxch ST(1) // dbACt4 dbACBC dbBCt3
fsubp ST(2), ST // dbACBC dbBCAC (+1)
fst DWORD PTR [OFFSET(SHADER.dbdx)][edx]
fmul __glVal65536
// DBACBC dbBCAC
fxch ST(1) // dbBCAC DBACBC
fstp DWORD PTR [OFFSET(SHADER.dbdy)][edx]
test [OFFSET(__GLcontext.state.enables.general)][edx], __GL_BLEND_ENABLE
fistp DWORD PTR [OFFSET(SPANDELTA.b)][edx]
je colorDone
fld DWORD PTR [OFFSET(__GLcolor.a)][eax]
fsub DWORD PTR [OFFSET(__GLcolor.a)][ecx]
// daAC
fld DWORD PTR [OFFSET(__GLcolor.a)][ebx]
fsub DWORD PTR [OFFSET(__GLcolor.a)][ecx]
// daBC daAC
fld ST(1) // daAC daBC daAC
fmul t2 // daACt2 daBC daAC
fld ST(1) // daBC daACt2 daBC daAC
fmul t1 // daBCt1 daACt2 daBC daAC
fxch ST(3) // daAC daACt2 daBC daBCt1
fmul t4 // daACt4 daACt2 daBC daBCt1
fxch ST(2) // daBC daACt2 daACt4 daBCt1
fmul t3 // daBCt3 daACt2 daACt4 daBCt1
fxch ST(3) // daBCt1 daACt2 daACt4 daBCt3
fsubp ST(1), ST // daACBC daACt4 daBCt3
fxch ST(1) // daACt4 daACBC daBCt3
fsubp ST(2), ST // daACBC daBCAC (+1)
fst DWORD PTR [OFFSET(SHADER.dadx)][edx]
fmul DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx]
fxch ST(1)
fstp DWORD PTR [OFFSET(SHADER.dady)][edx]
fistp DWORD PTR [OFFSET(SPANDELTA.a)][edx] // (+1)
#if !FORCE_NPX_DEBUG
jmp colorDone
#endif
}
#if FORCE_NPX_DEBUG
{
__GLfloat drAC, dgAC, dbAC, daAC;
__GLfloat drBC, dgBC, dbBC, daBC;
__GLcolor *ac, *bc, *cc;
__GLfloat ft1 = gc->polygon.shader.dyAC * oneOverArea;
__GLfloat ft2 = gc->polygon.shader.dyBC * oneOverArea;
__GLfloat ft3 = gc->polygon.shader.dxAC * oneOverArea;
__GLfloat ft4 = gc->polygon.shader.dxBC * oneOverArea;
__GLfloat drdx;
__GLfloat drdy;
__GLfloat dgdx;
__GLfloat dgdy;
__GLfloat dbdx;
__GLfloat dbdy;
LONG spanR, spanG, spanB;
ac = a->color;
bc = b->color;
cc = c->color;
drAC = ac->r - cc->r;
drBC = bc->r - cc->r;
dgAC = ac->g - cc->g;
dgBC = bc->g - cc->g;
dbAC = ac->b - cc->b;
dbBC = bc->b - cc->b;
drdx = drAC * t2 - drBC * t1;
drdy = drBC * t3 - drAC * t4;
dgdx = dgAC * t2 - dgBC * t1;
dgdy = dgBC * t3 - dgAC * t4;
dbdx = dbAC * t2 - dbBC * t1;
dbdy = dbBC * t3 - dbAC * t4;
spanR = FLT_TO_FIX(drdx);
spanG = FLT_TO_FIX(dgdx);
spanB = FLT_TO_FIX(dbdx);
if (ft1 != t1)
DbgPrint("t1 %f %f\n", t1, ft1);
if (ft2 != t2)
DbgPrint("t2 %f %f\n", t2, ft2);
if (ft3 != t3)
DbgPrint("t3 %f %f\n", t3, ft3);
if (ft4 != t4)
DbgPrint("t4 %f %f\n", t4, ft4);
if (drdx != gc->polygon.shader.drdx)
DbgPrint("drdx %f %f\n", drdx, gc->polygon.shader.drdx);
if (drdy != gc->polygon.shader.drdy)
DbgPrint("drdy %f %f\n", drdy, gc->polygon.shader.drdy);
if (dgdx != gc->polygon.shader.dgdx)
DbgPrint("dgdx %f %f\n", dgdx, gc->polygon.shader.dgdx);
if (dgdy != gc->polygon.shader.dgdy)
DbgPrint("dgdy %f %f\n", dgdy, gc->polygon.shader.dgdy);
if (dbdx != gc->polygon.shader.dbdx)
DbgPrint("dbdx %f %f\n", dbdx, gc->polygon.shader.dbdx);
if (dbdy != gc->polygon.shader.dbdy)
DbgPrint("dbdy %f %f\n", dbdy, gc->polygon.shader.dbdy);
if (spanR != GENACCEL(gc).spanDelta.r)
DbgPrint("spanDelta.r %x %x\n", spanR, GENACCEL(gc).spanDelta.r);
if (spanG!= GENACCEL(gc).spanDelta.g)
DbgPrint("spanDelta.g %x %x\n", spanG, GENACCEL(gc).spanDelta.g);
if (spanB != GENACCEL(gc).spanDelta.b)
DbgPrint("spanDelta.b %x %x\n", spanB, GENACCEL(gc).spanDelta.b);
if (gc->state.enables.general & __GL_BLEND_ENABLE) {
__GLfloat dadx;
__GLfloat dady;
LONG a;
daAC = ac->a - cc->a;
daBC = bc->a - cc->a;
dadx = daAC * t2 - daBC * t1;
dady = daBC * t3 - daAC * t4;
a = FTOL(gc->polygon.shader.dadx * GENACCEL(gc).aAccelScale);
if (dadx != gc->polygon.shader.dadx)
DbgPrint("dadx %f %f\n", dadx, gc->polygon.shader.dadx);
if (dady != gc->polygon.shader.dady)
DbgPrint("dady %f %f\n", dady, gc->polygon.shader.dady);
if (a != GENACCEL(gc).spanDelta.a)
DbgPrint("spanDelta.a %x %x\n", a, GENACCEL(gc).spanDelta.a);
}
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
jmp colorDone
}
#endif // FORCE_NPX_DEBUG
doFlat:
_asm{
mov eax, [OFFSET(__GLcontext.vertex.provoking)][edx]
fld __glVal65536
mov eax, [OFFSET(__GLvertex.color)][eax]
fmul DWORD PTR [OFFSET(__GLcolor.r)][eax]
fld __glVal65536
fmul DWORD PTR [OFFSET(__GLcolor.g)][eax]
mov ebx, [OFFSET(__GLcontext.state.enables.general)][edx]
fld __glVal65536
test ebx, __GL_BLEND_ENABLE
fmul DWORD PTR [OFFSET(__GLcolor.b)][eax]
je noFlatBlend
fld DWORD PTR [OFFSET(GENGCACCEL.aAccelScale)][edx]
fmul DWORD PTR [OFFSET(__GLcolor.a)][eax]
// A B G R
fxch ST(3) // R B G A
fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx]
fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx]
fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx]
fistp DWORD PTR [OFFSET(SPANVALUE.a)][edx]
jmp short flatDone
noFlatBlend:
// B G R
fxch ST(2) // R G B
fistp DWORD PTR [OFFSET(SPANVALUE.r)][edx] // G B
fistp DWORD PTR [OFFSET(SPANVALUE.g)][edx]
fistp DWORD PTR [OFFSET(SPANVALUE.b)][edx]
flatDone:
}
colorDone:
_asm{
test edi, __GL_SHADE_TEXTURE
mov eax, [OFFSET(GENGCACCEL.texImage)][edx]
je texDone
test eax, eax
je texDone
}
_asm{
mov al, oneOverAreaDone
mov ebx, [OFFSET(__GLcontext.state.hints.perspectiveCorrection)][edx]
test al, al
jne areaDoneAlready
}
_asm{
fstp oneOverArea // finish divide
fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
fmul oneOverArea
fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
fmul oneOverArea // dyBC dyAC
fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
fmul oneOverArea // dxAC dyBC dyAC
fxch ST(1) // dyBC dxAC dyAC
fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
fmul oneOverArea // dxBC dyBC dxAC dyAC
fxch ST(3) // dyAC dyBC dxAC dxBC
fstp t1
inc eax
fstp t2
mov oneOverAreaDone, al
fstp t3
fstp t4
}
areaDoneAlready:
_asm{
cmp ebx, GL_NICEST
je doNicest
}
_asm{
mov eax, a
mov ecx, c
mov ebx, b
fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax]
fsub DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx]
// dsAC
fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ebx]
fsub DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx]
// dsBC dsAC
fld ST(1) // dsAC dsBC dsAC
fmul t2
fxch ST(2) // dsAC dsBC dsACt2
fmul t4 // dsACt4 dsBC dsACt2
fld ST(1) // dsBC dsACt4 dsBC dsACt2
fmul t1 // dsBCt1 dsACt4 dsBC dsACt2
fxch ST(2) // dsBC dsACt4 dsBCt1 dsACt2
fmul t3 // dsBCt3 dsACt4 dsBCt1 dsACt2
fxch ST(2) // dsBCt1 dsACt4 dsBCt3 dsACt2
fsubp ST(3), ST // dsACt4 dsBCt3 dsACBC
fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ebx]
fsub DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx]
// dtBC dsACt4 dsBCt3 dsACBC
fxch ST(1) // dsACt4 dtBC dsBCt3 dsACBC
fsubp ST(2), ST // dtBC dsBCAC dsACBC
fxch ST(2) // dsACBC dsBCAC dtBC
fst DWORD PTR [OFFSET(SHADER.dsdx)][edx]
// dsdx dsBCAC dtBC
fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax]
fsub DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx]
// dtAC dsdx dsBCAC dtBC
fxch ST(2) // dsBCAC dsdx dtAC dtBC
fstp DWORD PTR [OFFSET(SHADER.dsdy)][edx]
// dsdx dtAC dtBC
fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
// deltaS dtAC dtBC
fxch ST(2) // dtBC dtAC deltaS
fld ST(1) // dtAC dtBC dtAC deltaS
fmul t2 // dtACt2 dtBC dtAC deltaS
fxch ST(2) // dtAC dtBC dtACt2 deltaS
fmul t4 // dtACt4 dtBC dtACt2 deltaS
fld ST(1) // dtBC dtACt4 dtBC dtACt2 deltaS
fmul t1 // dtBCt1 dtACt4 dtBC dtACt2 deltaS
fxch ST(2) // dtBC dtACt4 dtBCt1 dtACt2 deltaS
fmul t3 // dtBCt3 dtACt4 dtBCt1 dtACt2 deltaS
fxch ST(2) // dtBCt1 dtACt4 dtBCt3 dtACt2 deltaS
fsubp ST(3), ST // dtACt4 dtBCt3 dtACBC deltaS
fxch ST(3) // deltaS dtBCt3 dtACBC dtACt4
fistp DWORD PTR [OFFSET(SPANDELTA.s)][edx]
// dtBCt3 dtACBC dtACt4
fsubrp ST(2), ST // dtACBC dtBCAC
fst DWORD PTR [OFFSET(SHADER.dtdx)][edx]
fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
fxch ST(1) // dtBCAC deltaT
fstp DWORD PTR [OFFSET(SHADER.dtdy)][edx]
mov eax, [OFFSET(SPANDELTA.s)][edx]
fistp DWORD PTR [OFFSET(SPANDELTA.t)][edx]
shl eax, TEX_SUBDIV_LOG2
mov ebx, [OFFSET(SPANDELTA.t)][edx]
shl ebx, TEX_SUBDIV_LOG2
mov [OFFSET(GENGCACCEL.sStepX)][edx], eax
mov [OFFSET(GENGCACCEL.tStepX)][edx], ebx
#if !FORCE_NPX_DEBUG
jmp texDone
#endif
}
#if FORCE_NPX_DEBUG
{
__GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv;
__GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC;
__GLfloat dsdx, dsdy;
__GLfloat dtdx, dtdy;
LONG spanDeltaS, spanDeltaT;
dsAC = a->texture.x - c->texture.x;
dsBC = b->texture.x - c->texture.x;
dsdx = dsAC * t2 - dsBC * t1;
dsdy = dsBC * t3 - dsAC * t4;
dtAC = a->texture.y - c->texture.y;
dtBC = b->texture.y - c->texture.y;
dtdx = dtAC * t2 - dtBC * t1;
dtdy = dtBC * t3 - dtAC * t4;
spanDeltaS = FTOL(dsdx * GENACCEL(gc).texXScale);
spanDeltaT = FTOL(dtdx * GENACCEL(gc).texYScale);
if (gc->polygon.shader.dsdx != dsdx)
DbgPrint("dsdx %f %f\n", dsdx, gc->polygon.shader.dsdx);
if (gc->polygon.shader.dsdy != dsdy)
DbgPrint("dsdy %f %f\n", dsdy, gc->polygon.shader.dsdy);
if (gc->polygon.shader.dtdx != dtdx)
DbgPrint("dtdx %f %f\n", dtdx, gc->polygon.shader.dtdx);
if (gc->polygon.shader.dtdy != dtdy)
DbgPrint("dtdy %f %f\n", dtdy, gc->polygon.shader.dtdy);
if (spanDeltaS != GENACCEL(gc).spanDelta.s)
DbgPrint("spanDelta.s %x %x\n", spanDeltaS, GENACCEL(gc).spanDelta.s);
if (spanDeltaT != GENACCEL(gc).spanDelta.t)
DbgPrint("spanDelta.t %x %x\n", spanDeltaT, GENACCEL(gc).spanDelta.t);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
jmp texDone
}
#endif // FORCE_NPX_DEBUG
doNicest:
// LATER - remove store/read of dsdx, dydx
_asm{
mov ecx, c
mov ebx, b
mov eax, a
fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ecx] // sc
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx]
fld DWORD PTR [OFFSET(__GLvertex.texture.x)][eax]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax] // dsA sc
fld DWORD PTR [OFFSET(__GLvertex.texture.x)][ebx]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx]
// dsB dsA sc
fxch ST(2) // sc dsA dsB
fsub ST(1), ST // sc dsAC dsB
fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ecx] // tcwinv
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx]
// tc sc dsAC dsB
fxch ST(1) // sc tc dsAC dsB
fsubp ST(3), ST // tc dsAC dsBC
fxch ST(2) // dsBC dsAC tc
fld ST(1) // dsAC dsBC dsAC tc
fmul t2
fxch ST(2) // dsAC dsBC dsACt2 tc
fmul t4 // dsACt4 dsBC dsACt2 tc
fld ST(1) // dsBC dsACt4 dsBC dsACt2 tc
fmul t1 // dsBCt1 dsACt4 dsBC dsACt2 tc
fxch ST(2) // dsBC dsACt4 dsBCt1 dsACt2 tc
fmul t3 // dsBCt3 dsACt4 dsBCt1 dsACt2 tc
fxch ST(2) // dsBCt1 dsACt4 dsBCt3 dsACt2 tc
fsubp ST(3), ST // dsACt4 dsBCt3 dsACBC tc
fld DWORD PTR [OFFSET(__GLvertex.texture.y)][eax]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
// dtA dsACt4 dsBCt3 dsACBC tc
fxch ST(1) // dsACt4 dtA dsBCt3 dsACBC tc
fsubp ST(2), ST // dtA dsBCAC dsACBC tc
fxch ST(2) // dsACBC dsBCAC dtA tc
fstp DWORD PTR [OFFSET(SHADER.dsdx)][edx]
// dsBCAC dtA tc
fld DWORD PTR [OFFSET(__GLvertex.texture.y)][ebx]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx]
// dtB dsBCAC dtA tc
fxch ST(1) // dsBCAC dtB dtA tc
fstp DWORD PTR [OFFSET(SHADER.dsdy)][edx]
// dtB dtA tc
fxch ST(2) // tc dtA dtB
fsub ST(1), ST // tc dtAC dtB
fsubp ST(2), ST // dtAC dtBC
fld DWORD PTR [OFFSET(__GLvertex.texture.w)][ecx]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ecx]
// qw dtAC dtBC
fxch ST(2) // dtBC dtAC qw
fld ST(1) // dtAC dtBC dtAC qw
fmul t2 // dtACt2 dtBC dtAC qw
fxch ST(2) // dtAC dtBC dtACt2 qw
fmul t4 // dtACt4 dtBC dtACt2 qw
fld ST(1) // dtBC dtACt4 dtBC dtACt2 qw
fmul t1 // dtBCt1 dtACt4 dtBC dtACt2 qw
fxch ST(2) // dtBC dtACt4 dtBCt1 dtACt2 qw
fmul t3 // dtBCt3 dtACt4 dtBCt1 dtACt2 qw
fxch ST(2) // dtBCt1 dtACt4 dtBCt3 dtACt2 qw
fsubp ST(3), ST // dtACt4 dtBCt3 dtACBC qw
fld DWORD PTR [OFFSET(__GLvertex.texture.w)][eax]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][eax]
// dqA dtACt4 dtBCt3 dtACBC qw
fxch ST(1) // dtACt4 dqA dtBCt3 dtACBC qw
fsubp ST(2), ST // dqA dtBCAC dtACBC qw
fxch ST(2) // dtACBC dtBCAC dqA qw
fstp DWORD PTR [OFFSET(SHADER.dtdx)][edx]
// dsBCAC dqA qw
fld DWORD PTR [OFFSET(__GLvertex.texture.w)][ebx]
fmul DWORD PTR [OFFSET(__GLvertex.window.w)][ebx]
// dqB dsBCAC dqA qw
fxch ST(3) // qw dsBCAC dqA dqB
fsub ST(2), ST // qw dsBCAC dqAC dqB
fxch ST(1) // dsBCAC qw dqAC dqB
fstp DWORD PTR [OFFSET(SHADER.dtdy)][edx]
// qw dqAC dqB
fsubp ST(2), ST // dqAC dqBC
fxch ST(1) // dqBC dqAC
fld ST(1) // dqAC dqBC dqAC
fmul t2 // dqACt2 dqBC dqAC
fxch ST(2) // dqAC dqBC dqACt2
fmul t4 // dqACt4 dqBC dqACt2
fld ST(1) // dqBC dqACt4 dqBC dqACt2
fmul t1 // dqBCt1 dqACt4 dqBC dqACt2
fxch ST(2) // dqBC dqACt4 dqBCt1 dqACt2
fmul t3 // dqBCt3 dqACt4 dqBCt1 dqACt2
fxch ST(2) // dqBCt1 dqACt4 dqBCt3 dqACt2
fsubp ST(3), ST // dqACt4 dqBCt3 dqACBC
fxch ST(2) // dqACBC dqBCt3 dqACt4
fld DWORD PTR [OFFSET(SHADER.dsdx)][edx]
fmul DWORD PTR [OFFSET(GENGCACCEL.texXScale)][edx]
// deltaS dqACBC dqBCt3 dqACt4
fxch ST(3) // dqACt4 dqACBC dqBCt3 deltaS
fsubp ST(2), ST // dqACBC dqBCAC deltaS
fld DWORD PTR [OFFSET(SHADER.dtdx)][edx]
fmul DWORD PTR [OFFSET(GENGCACCEL.texYScale)][edx]
// deltaT dqACBC dqBCAC deltaS
fld __glTexSubDiv
fmul ST, ST(2)
// qwStep deltaT dqACBC dqBCAC deltaS
fxch ST(4) // deltaS deltaT dqACBC dqBCAC qwStep
fistp DWORD PTR [OFFSET(SPANDELTA.s)][edx]
fistp DWORD PTR [OFFSET(SPANDELTA.t)][edx]
// dqACBC dqBCAC qwStep
fstp DWORD PTR [OFFSET(SHADER.dqwdx)][edx]
fstp DWORD PTR [OFFSET(SHADER.dqwdy)][edx]
mov eax, [OFFSET(SPANDELTA.s)][edx]
fstp DWORD PTR [OFFSET(GENGCACCEL.qwStepX)][edx]
shl eax, TEX_SUBDIV_LOG2
mov ebx, [OFFSET(SPANDELTA.t)][edx]
shl ebx, TEX_SUBDIV_LOG2
mov [OFFSET(GENGCACCEL.sStepX)][edx], eax
mov [OFFSET(GENGCACCEL.tStepX)][edx], ebx
}
#if FORCE_NPX_DEBUG
{
__GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv;
__GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC;
__GLfloat dsdx, dsdy;
__GLfloat dtdx, dtdy;
__GLfloat dqwdx, dqwdy;
__GLfloat qwStepX;
LONG spanDeltaS, spanDeltaT;
awinv = a->window.w;
bwinv = b->window.w;
cwinv = c->window.w;
scwinv = c->texture.x * cwinv;
tcwinv = c->texture.y * cwinv;
qwcwinv = c->texture.w * cwinv;
dsAC = a->texture.x * awinv - scwinv;
dsBC = b->texture.x * bwinv - scwinv;
dsdx = dsAC * t2 - dsBC * t1;
dsdy = dsBC * t3 - dsAC * t4;
dtAC = a->texture.y * awinv - tcwinv;
dtBC = b->texture.y * bwinv - tcwinv;
dtdx = dtAC * t2 - dtBC * t1;
dtdy = dtBC * t3 - dtAC * t4;
dqwAC = a->texture.w * awinv - qwcwinv;
dqwBC = b->texture.w * bwinv - qwcwinv;
dqwdx = dqwAC * t2 - dqwBC * t1;
dqwdy = dqwBC * t3 - dqwAC * t4;
spanDeltaS = FTOL(dsdx * GENACCEL(gc).texXScale);
spanDeltaT = FTOL(dtdx * GENACCEL(gc).texYScale);
qwStepX = (gc->polygon.shader.dqwdx * (__GLfloat)TEX_SUBDIV);
if (gc->polygon.shader.dsdx != dsdx)
DbgPrint("dsdx %f %f\n", dsdx, gc->polygon.shader.dsdx);
if (gc->polygon.shader.dsdy != dsdy)
DbgPrint("dsdy %f %f\n", dsdy, gc->polygon.shader.dsdy);
if (gc->polygon.shader.dtdx != dtdx)
DbgPrint("dtdx %f %f\n", dtdx, gc->polygon.shader.dtdx);
if (gc->polygon.shader.dtdy != dtdy)
DbgPrint("dtdy %f %f\n", dtdy, gc->polygon.shader.dtdy);
if (gc->polygon.shader.dqwdx != dqwdx)
DbgPrint("dqdx %f %f\n", dqwdx, gc->polygon.shader.dqwdx);
if (gc->polygon.shader.dqwdy != dqwdy)
DbgPrint("dqdy %f %f\n", dqwdy, gc->polygon.shader.dqwdy);
if (spanDeltaS != GENACCEL(gc).spanDelta.s)
DbgPrint("spanDelta.s %x %x\n", spanDeltaS, GENACCEL(gc).spanDelta.s);
if (spanDeltaT != GENACCEL(gc).spanDelta.t)
DbgPrint("spanDelta.t %x %x\n", spanDeltaT, GENACCEL(gc).spanDelta.t);
if (qwStepX != GENACCEL(gc).qwStepX)
DbgPrint("qwStepX %f %f\n", qwStepX, GENACCEL(gc).qwStepX);
}
_asm {
mov edx, gc
mov edi, [OFFSET(SHADER.modeFlags)][edx]
}
#endif // FORCE_NPX_DEBUG
texDone:
_asm{
test edi, __GL_SHADE_DEPTH_ITER
je noZ
mov al, oneOverAreaDone
test al, al
jne areaDoneAlready2
}
_asm{
fstp oneOverArea // finish divide
fld DWORD PTR [OFFSET(SHADER.dyAC)][edx]
fmul oneOverArea
fld DWORD PTR [OFFSET(SHADER.dyBC)][edx]
fmul oneOverArea // dyBC dyAC
fld DWORD PTR [OFFSET(SHADER.dxAC)][edx]
fmul oneOverArea // dxAC dyBC dyAC
fxch ST(1) // dyBC dxAC dyAC
fld DWORD PTR [OFFSET(SHADER.dxBC)][edx]
fmul oneOverArea // dxBC dyBC dxAC dyAC
fxch ST(3) // dyAC dyBC dxAC dxBC
fstp t1
inc eax
fstp t2
mov oneOverAreaDone, al
fstp t3
fstp t4
}
areaDoneAlready2:
_asm{
mov ecx, c
mov eax, a
mov ebx, b
fld DWORD PTR [OFFSET(__GLvertex.window.z)][eax]
fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx]
fld DWORD PTR [OFFSET(__GLvertex.window.z)][ebx]
fsub DWORD PTR [OFFSET(__GLvertex.window.z)][ecx]
// dzBC dzAC
fld ST(1) // dzAC dzBC dzAC
fmul t2 // ACt2 dzBC dzAC
fld ST(1) // dzBC ACt2 dzBC dzAC
fmul t1 // BCt1 ACt2 dzBC dzAC
fxch ST(3) // dzAC ACt2 dzBC BCt1
fmul t4 // ACt4 ACt2 dzBC BCt1
fxch ST(2) // dzBC ACt2 ACt4 BCt1
fmul t3 // BCt3 ACt2 ACt4 BCt1
fsubrp ST(2),ST // ACt2 BCAC BCt1
fsubrp ST(2),ST // BCAC ACBC
fxch ST(1) // ACBC BCAC
// dzdx dzdy
fld ST(0) // dzdx dzdx dzdy
fmul DWORD PTR [OFFSET(GENGCACCEL.zScale)][edx]
// dzdxS dzdx dzdy
fxch ST(2) // dzdy dzdx dzdxS
fstp DWORD PTR [OFFSET(SHADER.dzdyf)][edx]
fstp DWORD PTR [OFFSET(SHADER.dzdxf)][edx]
fistp temp
mov ebx, DWORD PTR temp
mov DWORD PTR [OFFSET(SHADER.dzdx)][edx], ebx
mov DWORD PTR [OFFSET(SPANDELTA.z)][edx], ebx
#if !FORCE_NPX_DEBUG
jmp deltaDone
#endif
}
#if FORCE_NPX_DEBUG
{
__GLfloat dzAC, dzBC;
__GLfloat dzdxf;
__GLfloat dzdyf;
ULONG spanDeltaZ;
dzAC = a->window.z - c->window.z;
dzBC = b->window.z - c->window.z;
dzdxf = dzAC * t2 - dzBC * t1;
dzdyf = dzBC * t3 - dzAC * t4;
spanDeltaZ = FTOL(dzdxf * GENACCEL(gc).zScale);
if (dzdxf != gc->polygon.shader.dzdxf)
DbgPrint("dzdxf %f %f\n", dzdxf, gc->polygon.shader.dzdxf);
if (dzdyf != gc->polygon.shader.dzdyf)
DbgPrint("dzdyf %f %f\n", dzdyf, gc->polygon.shader.dzdyf);
if (spanDeltaZ != GENACCEL(gc).spanDelta.z)
DbgPrint("spanDeltaZ %x %x\n", spanDeltaZ, GENACCEL(gc).spanDelta.z);
}
#endif // FORCE_NPX_DEBUG
noZ:
_asm{
mov al, oneOverAreaDone
test al, al
jne deltaDone
fstp ST(0)
}
deltaDone:
return;
#else
/* Pre-compute one over polygon area */
__GL_FLOAT_BEGIN_DIVIDE(__glOne, gc->polygon.shader.area, &oneOverArea);
oneOverAreaDone = GL_FALSE;
/*
** Compute delta values for unit changes in x or y for each
** parameter.
*/
GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastTexSpanFuncPtr;
if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) &&
((gc->state.texture.env[0].mode == GL_REPLACE) ||
(gc->state.texture.env[0].mode == GL_DECAL))) {
GENACCEL(gc).spanValue.r = GENACCEL(gc).constantR;
GENACCEL(gc).spanValue.g = GENACCEL(gc).constantG;
GENACCEL(gc).spanValue.b = GENACCEL(gc).constantB;
GENACCEL(gc).spanValue.a = GENACCEL(gc).constantA;
} else if (gc->polygon.shader.modeFlags & __GL_SHADE_SMOOTH) {
__GLfloat drAC, dgAC, dbAC, daAC;
__GLfloat drBC, dgBC, dbBC, daBC;
__GLcolor *ac, *bc, *cc;
oneOverAreaDone = GL_TRUE;
ac = a->color;
bc = b->color;
cc = c->color;
drAC = ac->r - cc->r;
drBC = bc->r - cc->r;
dgAC = ac->g - cc->g;
dgBC = bc->g - cc->g;
dbAC = ac->b - cc->b;
dbBC = bc->b - cc->b;
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
t1 = gc->polygon.shader.dyAC * oneOverArea;
t2 = gc->polygon.shader.dyBC * oneOverArea;
t3 = gc->polygon.shader.dxAC * oneOverArea;
t4 = gc->polygon.shader.dxBC * oneOverArea;
gc->polygon.shader.drdx = drAC * t2 - drBC * t1;
gc->polygon.shader.drdy = drBC * t3 - drAC * t4;
gc->polygon.shader.dgdx = dgAC * t2 - dgBC * t1;
gc->polygon.shader.dgdy = dgBC * t3 - dgAC * t4;
gc->polygon.shader.dbdx = dbAC * t2 - dbBC * t1;
gc->polygon.shader.dbdy = dbBC * t3 - dbAC * t4;
GENACCEL(gc).spanDelta.r = FLT_TO_FIX(gc->polygon.shader.drdx);
GENACCEL(gc).spanDelta.g = FLT_TO_FIX(gc->polygon.shader.dgdx);
GENACCEL(gc).spanDelta.b = FLT_TO_FIX(gc->polygon.shader.dbdx);
if (gc->state.enables.general & __GL_BLEND_ENABLE) {
daAC = ac->a - cc->a;
daBC = bc->a - cc->a;
gc->polygon.shader.dadx = daAC * t2 - daBC * t1;
gc->polygon.shader.dady = daBC * t3 - daAC * t4;
GENACCEL(gc).spanDelta.a =
FTOL(gc->polygon.shader.dadx * GENACCEL(gc).aAccelScale);
}
#ifdef GENERIC_CAN_BLEND
//!! Note: this is not enabled in the assembly code above
if ( ((GENACCEL(gc).spanDelta.r | GENACCEL(gc).spanDelta.g | GENACCEL(gc).spanDelta.b) == 0)
&& ((GENACCEL(gc).flags & GEN_FASTZBUFFER) == 0)
) {
GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr;
} else {
GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastSmoothSpanFuncPtr;
}
#endif
} else {
__GLcolor *flatColor = gc->vertex.provoking->color;
GENACCEL(gc).spanValue.r = FLT_TO_FIX(flatColor->r);
GENACCEL(gc).spanValue.g = FLT_TO_FIX(flatColor->g);
GENACCEL(gc).spanValue.b = FLT_TO_FIX(flatColor->b);
if (gc->state.enables.general & __GL_BLEND_ENABLE)
GENACCEL(gc).spanValue.a = FTOL(flatColor->a * GENACCEL(gc).aAccelScale);
#ifdef GENERIC_CAN_BLEND
//!! Note: this is not enabled in the assembly code above
GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastFlatSpanFuncPtr;
#endif
}
if ((gc->polygon.shader.modeFlags & __GL_SHADE_TEXTURE) && (GENACCEL(gc).texImage)) {
__GLfloat awinv, bwinv, cwinv, scwinv, tcwinv, qwcwinv;
__GLfloat dsAC, dsBC, dtAC, dtBC, dqwAC, dqwBC;
#ifdef GENERIC_CAN_BLEND
GENACCEL(gc).__fastSpanFuncPtr = GENACCEL(gc).__fastTexSpanFuncPtr;
#endif
if (!oneOverAreaDone)
{
oneOverAreaDone = GL_TRUE;
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
t1 = gc->polygon.shader.dyAC * oneOverArea;
t2 = gc->polygon.shader.dyBC * oneOverArea;
t3 = gc->polygon.shader.dxAC * oneOverArea;
t4 = gc->polygon.shader.dxBC * oneOverArea;
}
if (gc->state.hints.perspectiveCorrection != GL_NICEST) {
dsAC = a->texture.x - c->texture.x;
dsBC = b->texture.x - c->texture.x;
gc->polygon.shader.dsdx = dsAC * t2 - dsBC * t1;
gc->polygon.shader.dsdy = dsBC * t3 - dsAC * t4;
dtAC = a->texture.y - c->texture.y;
dtBC = b->texture.y - c->texture.y;
gc->polygon.shader.dtdx = dtAC * t2 - dtBC * t1;
gc->polygon.shader.dtdy = dtBC * t3 - dtAC * t4;
GENACCEL(gc).spanDelta.s =
FTOL(gc->polygon.shader.dsdx * GENACCEL(gc).texXScale);
GENACCEL(gc).spanDelta.t =
FTOL(gc->polygon.shader.dtdx * GENACCEL(gc).texYScale);
GENACCEL(gc).sStepX = (GENACCEL(gc).spanDelta.s * TEX_SUBDIV);
GENACCEL(gc).tStepX = (GENACCEL(gc).spanDelta.t * TEX_SUBDIV);
} else {
awinv = a->window.w;
bwinv = b->window.w;
cwinv = c->window.w;
scwinv = c->texture.x * cwinv;
tcwinv = c->texture.y * cwinv;
qwcwinv = c->texture.w * cwinv;
dsAC = a->texture.x * awinv - scwinv;
dsBC = b->texture.x * bwinv - scwinv;
gc->polygon.shader.dsdx = dsAC * t2 - dsBC * t1;
gc->polygon.shader.dsdy = dsBC * t3 - dsAC * t4;
dtAC = a->texture.y * awinv - tcwinv;
dtBC = b->texture.y * bwinv - tcwinv;
gc->polygon.shader.dtdx = dtAC * t2 - dtBC * t1;
gc->polygon.shader.dtdy = dtBC * t3 - dtAC * t4;
dqwAC = a->texture.w * awinv - qwcwinv;
dqwBC = b->texture.w * bwinv - qwcwinv;
gc->polygon.shader.dqwdx = dqwAC * t2 - dqwBC * t1;
gc->polygon.shader.dqwdy = dqwBC * t3 - dqwAC * t4;
GENACCEL(gc).spanDelta.s = FTOL(gc->polygon.shader.dsdx * GENACCEL(gc).texXScale);
GENACCEL(gc).spanDelta.t = FTOL(gc->polygon.shader.dtdx * GENACCEL(gc).texYScale);
GENACCEL(gc).qwStepX = (gc->polygon.shader.dqwdx * (__GLfloat)TEX_SUBDIV);
GENACCEL(gc).sStepX = (GENACCEL(gc).spanDelta.s * TEX_SUBDIV);
GENACCEL(gc).tStepX = (GENACCEL(gc).spanDelta.t * TEX_SUBDIV);
}
}
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_ITER) {
__GLfloat dzAC, dzBC;
if (!oneOverAreaDone) {
oneOverAreaDone = GL_TRUE;
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
t1 = gc->polygon.shader.dyAC * oneOverArea;
t2 = gc->polygon.shader.dyBC * oneOverArea;
t3 = gc->polygon.shader.dxAC * oneOverArea;
t4 = gc->polygon.shader.dxBC * oneOverArea;
}
dzAC = a->window.z - c->window.z;
dzBC = b->window.z - c->window.z;
gc->polygon.shader.dzdxf = dzAC * t2 - dzBC * t1;
gc->polygon.shader.dzdyf = dzBC * t3 - dzAC * t4;
GENACCEL(gc).spanDelta.z = gc->polygon.shader.dzdx =
FTOL(gc->polygon.shader.dzdxf * GENACCEL(gc).zScale);
}
if (!oneOverAreaDone)
{
// In this case the divide hasn't been terminated yet so
// we need to complete it even though we don't use the result
__GL_FLOAT_SIMPLE_END_DIVIDE(oneOverArea);
}
#endif // _X86_
}
/**************************************************************************\
\**************************************************************************/
void FASTCALL __fastGenFillTriangle(
__GLcontext *gc,
__GLvertex *a,
__GLvertex *b,
__GLvertex *c,
GLboolean ccw)
{
GLint aIY, bIY, cIY;
__GLfloat dxdyAC, dxdyBC, dxdyBA;
__GLfloat dx, dy;
__GLfloat invDyAB, invDyBC, invDyAC;
#if DBG && CHECK_FPU
{
USHORT cw;
__asm {
_asm fnstcw cw
_asm mov ax, cw
_asm and ah, (~0x3f)
_asm mov cw,ax
_asm fldcw cw
}
}
#endif
//
// Snap each y coordinate to its pixel center
//
aIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(a->window.y)+
__GL_VERTEX_FRAC_HALF);
cIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(c->window.y)+
__GL_VERTEX_FRAC_HALF);
if (aIY == cIY) {
return;
}
bIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(b->window.y)+
__GL_VERTEX_FRAC_HALF);
if (cIY - aIY > __GL_MAX_INV_TABLE)
goto bigTriangle;
gc->polygon.shader.cfb = gc->drawBuffer;
CASTFIX(invDyAB) = CASTFIX(invTable[CASTFIX(b->window.y) - CASTFIX(a->window.y)]) | 0x80000000;
CASTFIX(invDyBC) = CASTFIX(invTable[CASTFIX(c->window.y) - CASTFIX(b->window.y)]) | 0x80000000;
CASTFIX(invDyAC) = CASTFIX(invTable[CASTFIX(c->window.y) - CASTFIX(a->window.y)]) | 0x80000000;
//
// Calculate delta values for unit changes in x or y
//
GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c);
//
// calculate the destination address
//
GENACCEL(gc).pPix =
(BYTE *)gc->polygon.shader.cfb->buf.base
+ ( gc->polygon.shader.cfb->buf.outerWidth
* (
aIY
- gc->constants.viewportYAdjust
+ gc->polygon.shader.cfb->buf.yOrigin
)
)
+ ( GENACCEL(gc).xMultiplier
* (
- gc->constants.viewportXAdjust
+ gc->polygon.shader.cfb->buf.xOrigin
)
);
// Calculate destination Z
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf =
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
0, aIY);
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
0, aIY);
}
}
/*
** This algorithm always fills from bottom to top, left to right.
** Because of this, ccw triangles are inherently faster because
** the parameter values need not be recomputed.
*/
if (ccw)
{
dy = (aIY + __glHalf) - a->window.y;
dxdyAC = gc->polygon.shader.dxAC * invDyAC;
GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC);
dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
if (aIY != bIY)
{
dxdyBA = (a->window.x - b->window.x) * invDyAB;
GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA);
if (bIY == cIY)
gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI;
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
if (bIY != cIY)
{
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 2));
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 1));
}
}
}
}
if (bIY != cIY)
{
dy = (bIY + __glHalf) - b->window.y;
dxdyBC = (b->window.x - c->window.x) * invDyBC;
GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC);
gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI;
GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
}
}
else
{
dy = (aIY + __glHalf) - a->window.y;
dxdyAC = gc->polygon.shader.dxAC * invDyAC;
GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC);
if (aIY != bIY)
{
dxdyBA = (a->window.x - b->window.x) * invDyAB;
GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA);
dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
if (bIY == cIY)
gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI;
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
if (bIY != cIY)
{
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 2));
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 1));
}
}
}
}
if (bIY != cIY)
{
dy = (bIY + __glHalf) - b->window.y;
dxdyBC = gc->polygon.shader.dxBC * invDyBC;
GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC);
dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy);
gc->polygon.shader.modeFlags |= __GL_SHADE_LAST_SUBTRI;
GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
}
}
gc->polygon.shader.modeFlags &= ~(__GL_SHADE_LAST_SUBTRI);
return;
bigTriangle:
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxAC,
gc->polygon.shader.dyAC,
&dxdyAC);
gc->polygon.shader.cfb = gc->drawBuffer;
//
// Calculate delta values for unit changes in x or y
//
GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c);
//
// calculate the destination address
//
GENACCEL(gc).pPix =
(BYTE *)gc->polygon.shader.cfb->buf.base
+ ( gc->polygon.shader.cfb->buf.outerWidth
* (
aIY
- gc->constants.viewportYAdjust
+ gc->polygon.shader.cfb->buf.yOrigin
)
)
+ ( GENACCEL(gc).xMultiplier
* (
- gc->constants.viewportXAdjust
+ gc->polygon.shader.cfb->buf.xOrigin
)
);
// Calculate destination Z
if ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) &&
aIY != bIY)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf =
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
0, aIY);
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
0, aIY);
}
}
/*
** This algorithm always fills from bottom to top, left to right.
** Because of this, ccw triangles are inherently faster because
** the parameter values need not be recomputed.
*/
if (ccw)
{
dy = (aIY + __glHalf) - a->window.y;
__GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC);
dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
if (aIY != bIY)
{
dxdyBA = (a->window.x - b->window.x) /
(a->window.y - b->window.y);
GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA);
if (bIY != cIY)
{
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
gc->polygon.shader.dyBC,
&dxdyBC);
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 2));
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 1));
}
}
}
else
{
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
}
}
else if (bIY != cIY)
{
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf =
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
0, bIY);
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
0, bIY);
}
}
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
gc->polygon.shader.dyBC,
&dxdyBC);
}
if (bIY != cIY)
{
dy = (bIY + __glHalf) - b->window.y;
__GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC);
GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
}
}
else
{
dy = (aIY + __glHalf) - a->window.y;
__GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC);
if (aIY != bIY)
{
dxdyBA = (a->window.x - b->window.x) /
(a->window.y - b->window.y);
GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA);
dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
if (bIY != cIY)
{
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
gc->polygon.shader.dyBC,
&dxdyBC);
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 2));
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 1));
}
}
}
else
{
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
}
}
else if (bIY != cIY)
{
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf =
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
0, bIY);
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
0, bIY);
}
}
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
gc->polygon.shader.dyBC,
&dxdyBC);
}
if (bIY != cIY)
{
dy = (bIY + __glHalf) - b->window.y;
__GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC);
dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy);
GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
}
}
CHOP_ROUND_OFF();
}
/**************************************************************************\
* __fastGenMcdFillTriangle
*
* Just like __fastGenFillTriangle, except that the floating point macros
* __GL_FLOAT_BEGIN_DIVIDE and __GL_FLOAT_SIMPLE_END_DIVIDE are not allowed
* to straddle a function call to the driver (i.e., __fastFillSubTrianglePtr
* calls the display driver span functions if direct frame buffer access is
* not available.
\**************************************************************************/
void FASTCALL __fastGenMcdFillTriangle(
__GLcontext *gc,
__GLvertex *a,
__GLvertex *b,
__GLvertex *c,
GLboolean ccw)
{
GLint aIY, bIY, cIY;
__GLfloat dxdyAC, dxdyBC, dxdyBA;
__GLfloat dx, dy;
CHOP_ROUND_ON();
//
// Calculate delta values for unit changes in x or y
//
GENACCEL(gc).__fastCalcDeltaPtr(gc, a, b, c);
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxAC,
gc->polygon.shader.dyAC,
&dxdyAC);
//
// can this be moved up even farther?
//
gc->polygon.shader.cfb = gc->drawBuffer;
//
// Snap each y coordinate to its pixel center
//
aIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(a->window.y)+
__GL_VERTEX_FRAC_HALF);
bIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(b->window.y)+
__GL_VERTEX_FRAC_HALF);
cIY = __GL_VERTEX_FIXED_TO_INT(__GL_VERTEX_FLOAT_TO_FIXED(c->window.y)+
__GL_VERTEX_FRAC_HALF);
//
// calculate the destination address
//
GENACCEL(gc).pPix =
(BYTE *)gc->polygon.shader.cfb->buf.base
+ ( gc->polygon.shader.cfb->buf.outerWidth
* (
aIY
- gc->constants.viewportYAdjust
+ gc->polygon.shader.cfb->buf.yOrigin
)
)
+ ( GENACCEL(gc).xMultiplier
* (
- gc->constants.viewportXAdjust
+ gc->polygon.shader.cfb->buf.xOrigin
)
);
// Calculate destination Z
if ((gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST) &&
aIY != bIY)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf =
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
0, aIY);
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
0, aIY);
}
}
/*
** This algorithm always fills from bottom to top, left to right.
** Because of this, ccw triangles are inherently faster because
** the parameter values need not be recomputed.
*/
if (ccw)
{
dy = (aIY + __glHalf) - a->window.y;
__GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
GenSnapXLeft(gc, a->window.x + dy*dxdyAC, dxdyAC);
dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
if (aIY != bIY)
{
dxdyBA = (a->window.x - b->window.x) /
(a->window.y - b->window.y);
GenSnapXRight(gc, a->window.x + dy*dxdyBA, dxdyBA);
if (bIY != cIY)
{
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
gc->polygon.shader.dyBC,
&dxdyBC);
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 2));
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 1));
}
}
}
else
{
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
}
}
else if (bIY != cIY)
{
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf =
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
0, bIY);
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
0, bIY);
}
}
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
gc->polygon.shader.dyBC,
&dxdyBC);
}
if (bIY != cIY)
{
dy = (bIY + __glHalf) - b->window.y;
__GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
GenSnapXRight(gc, b->window.x + dy*dxdyBC, dxdyBC);
GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
}
}
else
{
dy = (aIY + __glHalf) - a->window.y;
__GL_FLOAT_SIMPLE_END_DIVIDE(dxdyAC);
GenSnapXRight(gc, a->window.x + dy*dxdyAC, dxdyAC);
if (aIY != bIY)
{
dxdyBA = (a->window.x - b->window.x) /
(a->window.y - b->window.y);
GenSnapXLeft(gc, a->window.x + dy*dxdyBA, dxdyBA);
dx = (gc->polygon.shader.ixLeft + __glHalf) - a->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, a, dx, dy);
if (bIY != cIY)
{
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
gc->polygon.shader.dyBC,
&dxdyBC);
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 2));
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
((GLubyte *)gc->polygon.shader.zbuf-
(gc->polygon.shader.ixLeft << 1));
}
}
}
else
{
GENACCEL(gc).__fastFillSubTrianglePtr(gc, aIY, bIY);
}
}
else if (bIY != cIY)
{
if (gc->polygon.shader.modeFlags & __GL_SHADE_DEPTH_TEST)
{
if ( gc->modes.depthBits == 32 )
{
gc->polygon.shader.zbuf =
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLzValue*),
0, bIY);
}
else
{
gc->polygon.shader.zbuf = (__GLzValue *)
__GL_DEPTH_ADDR(&gc->depthBuffer, (__GLz16Value*),
0, bIY);
}
}
__GL_FLOAT_BEGIN_DIVIDE(gc->polygon.shader.dxBC,
gc->polygon.shader.dyBC,
&dxdyBC);
}
if (bIY != cIY)
{
dy = (bIY + __glHalf) - b->window.y;
__GL_FLOAT_SIMPLE_END_DIVIDE(dxdyBC);
GenSnapXLeft(gc, b->window.x + dy*dxdyBC, dxdyBC);
dx = (gc->polygon.shader.ixLeft + __glHalf) - b->window.x;
GENACCEL(gc).__fastSetInitParamPtr(gc, b, dx, dy);
GENACCEL(gc).__fastFillSubTrianglePtr(gc, bIY, cIY);
}
}
CHOP_ROUND_OFF();
}