Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

603 lines
19 KiB

///////////////////////////////////////////////////////////////////////////////
// Copyright (C) Microsoft Corporation, 2000.
//
// rastprim.cpp
//
// Direct3D Reference Device - Rasterizer Primitive Routines
//
///////////////////////////////////////////////////////////////////////////////
#include "pch.cpp"
#pragma hdrstop
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
RefRast::~RefRast()
{
delete m_pLegacyPixelShader;
}
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void RefRast::Init( RefDev* pRD )
{
m_pRD = pRD;
m_bIsLine = FALSE;
m_iFlatVtx = 0;
// initialize attributes xD Persp Clamp
m_Attr[RDATTR_DEPTH ].Init( this, 1, FALSE, TRUE );
m_Attr[RDATTR_FOG ].Init( this, 1, TRUE, TRUE );
m_Attr[RDATTR_COLOR ].Init( this, 4, TRUE, TRUE );
m_Attr[RDATTR_SPECULAR].Init( this, 4, TRUE, TRUE );
m_Attr[RDATTR_TEXTURE0].Init( this, 4, TRUE, FALSE );
m_Attr[RDATTR_TEXTURE1].Init( this, 4, TRUE, FALSE );
m_Attr[RDATTR_TEXTURE2].Init( this, 4, TRUE, FALSE );
m_Attr[RDATTR_TEXTURE3].Init( this, 4, TRUE, FALSE );
m_Attr[RDATTR_TEXTURE4].Init( this, 4, TRUE, FALSE );
m_Attr[RDATTR_TEXTURE5].Init( this, 4, TRUE, FALSE );
m_Attr[RDATTR_TEXTURE6].Init( this, 4, TRUE, FALSE );
m_Attr[RDATTR_TEXTURE7].Init( this, 4, TRUE, FALSE );
m_iPix = 0;
memset( m_bPixelIn, 0, sizeof(m_bPixelIn) );
memset( m_bSampleCovered, 0, sizeof(m_bSampleCovered) );
m_bLegacyPixelShade = TRUE;
m_pCurrentPixelShader = NULL;
m_CurrentPSInst = 0;
#if DBG
{
DWORD v = 0;
if( GetD3DRefRegValue(REG_DWORD, "VerboseCreatePixelShader", &v, sizeof(DWORD)) && v != 0 )
m_bDebugPrintTranslatedPixelShaderTokens = TRUE;
else
m_bDebugPrintTranslatedPixelShaderTokens = FALSE;
}
#endif
// default value registers
UINT i, j;
for( i = 0 ; i < 4; i++ )
{
for( j = 0; j < 4; j++ )
{
m_ZeroReg[i][j] = 0.0f;
m_OneReg[i][j] = 1.0f;
m_TwoReg[i][j] = 2.0f;
}
}
m_bLegacyPixelShade = TRUE;
m_pLegacyPixelShader = NULL;
memset( m_bPixelDiscard, 0, sizeof(m_bPixelDiscard) );
// multi-sample stuff
m_CurrentSample = 0;
m_SampleMask = 0xffffffff;
SetSampleMode( 1, TRUE );
m_bSampleCovered[0][0] =
m_bSampleCovered[0][1] =
m_bSampleCovered[0][2] =
m_bSampleCovered[0][3] = TRUE;
memset( m_TexCvg, 0, sizeof(m_TexCvg) );
memset( m_TexFlt, 0, sizeof(m_TexFlt) );
}
//-----------------------------------------------------------------------------
//
// SampleAndInvertRHW - Sample 1/W at current given location, invert, return
//
//-----------------------------------------------------------------------------
FLOAT RefRast::SampleAndInvertRHW( FLOAT fX, FLOAT fY )
{
FLOAT fPixelRHW = fX*m_fRHWA + fY*m_fRHWB + m_fRHWC;
FLOAT fPixelW = ( 0. != fPixelRHW ) ? ( 1./fPixelRHW ) : ( 0. );
return fPixelW;
}
//-----------------------------------------------------------------------------
//
//
//-----------------------------------------------------------------------------
BOOL
RefRast::EvalPixelPosition( int iPix )
{
BOOL bPixelIn;
if (m_SampleCount > 1)
{
bPixelIn = FALSE; // assume out, then set if any in
// generating multiple samples, so must evaluate all
// sample positions for in/out
do
{
BOOL bPixelSampleIn = GetCurrentSampleMask();
if (!bPixelSampleIn) continue;
// get sample location
INT32 iX = GetCurrentSampleX(iPix);
INT32 iY = GetCurrentSampleY(iPix);
// test each edge
for ( int iEdge=0; iEdge<m_iEdgeCount; iEdge++ )
{
bPixelSampleIn &= m_Edge[iEdge].Test( iX, iY );
if (!bPixelSampleIn) break;
}
m_bSampleCovered[m_CurrentSample][iPix] = bPixelSampleIn;
// accumulate per-sample test into per-pixel test
bPixelIn |= bPixelSampleIn;
} while (NextSample());
}
else
{
bPixelIn = TRUE; // assume pixel is inside all edges
// single sample, so just test pixel center
for ( int iEdge=0; iEdge<m_iEdgeCount; iEdge++ )
{
bPixelIn &= m_Edge[iEdge].Test( m_iX[iPix]<<4, m_iY[iPix]<<4 );
if (!bPixelIn) break;
}
}
return bPixelIn;
}
///////////////////////////////////////////////////////////////////////////////
//
// Triangle (& Point) Setup
//
///////////////////////////////////////////////////////////////////////////////
//-----------------------------------------------------------------------------
//
// PerTriangleSetup - Per-triangle portion of triangle setup excluding any
// per-edge or per-attribute work. Includes snapping of x,y coords to n.4
// grid to enable subsequent edge computations to be exact fixed point;
// computation of determinant; culling; computation and intersection tests
// of scan area; and setup of perspective correction function.
//
//-----------------------------------------------------------------------------
BOOL RefRast::PerTriangleSetup(
FLOAT* pVtx0, FLOAT* pVtx1, FLOAT* pVtx2,
DWORD CullMode,
RECT* pClip)
{
m_bIsLine = FALSE;
FLOAT fX0 = *(pVtx0+0);
FLOAT fY0 = *(pVtx0+1);
FLOAT fX1 = *(pVtx1+0);
FLOAT fY1 = *(pVtx1+1);
FLOAT fX2 = *(pVtx2+0);
FLOAT fY2 = *(pVtx2+1);
// compute fixed point x,y coords snapped to n.4 with nearest-even round
m_iX0 = FloatToNdot4(fX0);
m_iY0 = FloatToNdot4(fY0);
m_iX1 = FloatToNdot4(fX1);
m_iY1 = FloatToNdot4(fY1);
m_iX2 = FloatToNdot4(fX2);
m_iY2 = FloatToNdot4(fY2);
// compute integer deltas
INT32 iDelX10 = m_iX1 - m_iX0;
INT32 iDelX02 = m_iX0 - m_iX2;
INT32 iDelY01 = m_iY0 - m_iY1;
INT32 iDelY20 = m_iY2 - m_iY0;
// compute determinant in n.8 fixed point (n.4 * n.4 = n.8)
m_iDet =
( (INT64)iDelX10 * (INT64)iDelY20 ) -
( (INT64)iDelX02 * (INT64)iDelY01 );
// check for degeneracy (no area)
if ( 0 == m_iDet ) { return TRUE; }
// do culling
switch ( CullMode )
{
case D3DCULL_NONE: break;
case D3DCULL_CW: if ( m_iDet > 0 ) { return TRUE; } break;
case D3DCULL_CCW: if ( m_iDet < 0 ) { return TRUE; } break;
}
// compute bounding box for scan area
FLOAT fXMin = MIN( fX0, MIN( fX1, fX2 ) );
FLOAT fXMax = MAX( fX0, MAX( fX1, fX2 ) );
FLOAT fYMin = MIN( fY0, MIN( fY1, fY2 ) );
FLOAT fYMax = MAX( fY0, MAX( fY1, fY2 ) );
// convert to integer (round to +inf)
m_iXMin = (INT32)(fXMin+.5);
m_iXMax = (INT32)(fXMax+.5);
m_iYMin = (INT32)(fYMin+.5);
m_iYMax = (INT32)(fYMax+.5);
// clip bbox to rendering surface
m_iXMin = MAX( m_iXMin, pClip->left );
m_iXMax = MIN( m_iXMax, pClip->right );
m_iYMin = MAX( m_iYMin, pClip->top );
m_iYMax = MIN( m_iYMax, pClip->bottom );
// reject if no coverage
if ( ( m_iXMin < pClip->left ) ||
( m_iXMax > pClip->right ) ||
( m_iYMin < pClip->top ) ||
( m_iYMax > pClip->bottom ) )
{
return TRUE;
}
// compute float versions of snapped coord data
m_fX0 = (FLOAT)m_iX0 * 1.0F/16.0F;
m_fY0 = (FLOAT)m_iY0 * 1.0F/16.0F;
m_fDelX10 = (FLOAT)iDelX10 * 1.0F/16.0F;
m_fDelX02 = (FLOAT)iDelX02 * 1.0F/16.0F;
m_fDelY01 = (FLOAT)iDelY01 * 1.0F/16.0F;
m_fDelY20 = (FLOAT)iDelY20 * 1.0F/16.0F;
// compute inverse determinant
FLOAT fDet = (1./(FLOAT)(1<<8)) * (FLOAT)m_iDet;
m_fTriOODet = 1.f/fDet;
// compute linear function for 1/W (for perspective correction)
m_fRHW0 = *(pVtx0+3);
m_fRHW1 = *(pVtx1+3);
m_fRHW2 = *(pVtx2+3);
// compute linear deltas along two edges
FLOAT fDelAttrib10 = m_fRHW1 - m_fRHW0;
FLOAT fDelAttrib20 = m_fRHW2 - m_fRHW0;
// compute A & B terms (dVdX and dVdY)
m_fRHWA = m_fTriOODet * ( fDelAttrib10 * m_fDelY20 + fDelAttrib20 * m_fDelY01 );
m_fRHWB = m_fTriOODet * ( fDelAttrib20 * m_fDelX10 + fDelAttrib10 * m_fDelX02 );
// compute C term (Fv = A*Xv + B*Yv + C => C = Fv - A*Xv - B*Yv)
m_fRHWC = m_fRHW0 - ( m_fRHWA * m_fX0 ) - ( m_fRHWB * m_fY0 );
return FALSE;
}
///////////////////////////////////////////////////////////////////////////////
//
// Line Setup & Evaluate
//
///////////////////////////////////////////////////////////////////////////////
//-----------------------------------------------------------------------------
//
// PointDiamondCheck - Tests if vertex is within diamond of nearest candidate
// position. The +.5 (lower-right) tests are used because this is pixel-relative
// test - this corresponds to an upper-left test for a vertex-relative position.
//
//-----------------------------------------------------------------------------
static BOOL
PointDiamondCheck(
INT32 iXFrac, INT32 iYFrac,
BOOL bSlopeIsOne, BOOL bSlopeIsPosOne )
{
const INT32 iPosHalf = 0x8;
const INT32 iNegHalf = -0x8;
INT32 iFracAbsSum = labs( iXFrac ) + labs( iYFrac );
// return TRUE if point is in fully-exclusive diamond
if ( iFracAbsSum < iPosHalf ) return TRUE;
// else return TRUE if diamond is on left or top extreme of point
if ( ( iXFrac == ( bSlopeIsPosOne ? iNegHalf : iPosHalf ) ) &&
( iYFrac == 0 ) )
return TRUE;
if ( ( iYFrac == iPosHalf ) &&
( iXFrac == 0 ) )
return TRUE;
// return true if slope is one, vertex is on edge, and (other conditions...)
if ( bSlopeIsOne && ( iFracAbsSum == iPosHalf ) )
{
if ( bSlopeIsPosOne && ( iXFrac < 0 ) && ( iYFrac > 0 ) )
return TRUE;
if ( !bSlopeIsPosOne && ( iXFrac > 0 ) && ( iYFrac > 0 ) )
return TRUE;
}
return FALSE;
}
//-----------------------------------------------------------------------------
//
// PerLineSetup - Does per-line setup including scan conversion
//
// This implements the Grid Intersect Quanization (GIQ) convention (which is
// also used in Windows).
//
// Returns: TRUE if line is discarded; FALSE if line to be drawn
//
//-----------------------------------------------------------------------------
BOOL
RefRast::PerLineSetup(
FLOAT* pVtx0, FLOAT* pVtx1,
BOOL bLastPixel,
RECT* pClip)
{
m_bIsLine = TRUE;
FLOAT fX0 = *(pVtx0+0);
FLOAT fY0 = *(pVtx0+1);
FLOAT fX1 = *(pVtx1+0);
FLOAT fY1 = *(pVtx1+1);
// compute fixed point x,y coords snapped to n.4 with nearest-even round
m_iX0 = FloatToNdot4( fX0 );
m_iY0 = FloatToNdot4( fY0 );
m_iX1 = FloatToNdot4( fX1 );
m_iY1 = FloatToNdot4( fY1 );
// compute x,y extents of the line (fixed point)
INT32 iXSize = m_iX1 - m_iX0;
INT32 iYSize = m_iY1 - m_iY0;
if ( ( iXSize == 0 ) && ( iYSize == 0 ) ) { return TRUE; }
// determine major direction and compute line function
// use GreaterEqual compare here so X major will be used when slope is
// exactly one - this forces the per-pixel evaluation to be done on the
// Y axis and thus adheres to the rule of inclusive right (instead of
// inclusive left) for slope == 1 cases
if ( labs( iXSize ) >= labs( iYSize ) )
{
// here for X major
m_bLineXMajor = TRUE;
m_fLineMajorLength = (FLOAT)iXSize * (1./16.);
// line function: y = F(x) = ( [0]*x + [1] ) / [2]
m_iLineEdgeFunc[0] = iYSize;
m_iLineEdgeFunc[1] = (INT64)m_iY0*(INT64)m_iX1 - (INT64)m_iY1*(INT64)m_iX0;
m_iLineEdgeFunc[2] = iXSize;
}
else
{
// here for Y major
m_bLineXMajor = FALSE;
m_fLineMajorLength = (FLOAT)iYSize * (1./16.);
// line function: x = F(y) = ( [0]*y + [1] ) / [2]
m_iLineEdgeFunc[0] = iXSize;
m_iLineEdgeFunc[1] = (INT64)m_iX0*(INT64)m_iY1 - (INT64)m_iX1*(INT64)m_iY0;
m_iLineEdgeFunc[2] = iYSize;
}
BOOL bSlopeIsOne = ( labs( iXSize ) == labs( iYSize ) );
BOOL bSlopeIsPosOne =
bSlopeIsOne &&
( ( (FLOAT)m_iLineEdgeFunc[0]/(FLOAT)m_iLineEdgeFunc[2] ) > 0. );
// compute candidate pixel location for line endpoints
//
// n n
// O-------* *-------O
// n-.5 n+.5 n-.5 n+.5
//
// Nearest Ceiling Nearest Floor
//
// always nearest ceiling for Y; use nearest floor for X for exception (slope == +1)
// case else use nearest ceiling
//
// nearest ceiling of Y is ceil( Y - .5), and is done by converting to floor via:
//
// ceil( A/B ) = floor( (A+B-1)/B )
//
// where A is coordinate - .5, and B is 0x10 (thus A/B is an n.4 fixed point number)
//
// A+B-1 = ( (Y - half) + B - 1 = ( (Y-0x8) + 0x10 - 0x1 = Y + 0x7
// since B is 2**4, divide by B is right shift by 4
//
INT32 iPixX0 = ( m_iX0 + ( bSlopeIsPosOne ? 0x8 : 0x7 ) ) >> 4;
INT32 iPixX1 = ( m_iX1 + ( bSlopeIsPosOne ? 0x8 : 0x7 ) ) >> 4;
INT32 iPixY0 = ( m_iY0 + 0x7 ) >> 4;
INT32 iPixY1 = ( m_iY1 + 0x7 ) >> 4;
// check for vertices in/out of diamond
BOOL bV0InDiamond = PointDiamondCheck( m_iX0 - (iPixX0<<4), m_iY0 - (iPixY0<<4), bSlopeIsOne, bSlopeIsPosOne );
BOOL bV1InDiamond = PointDiamondCheck( m_iX1 - (iPixX1<<4), m_iY1 - (iPixY1<<4), bSlopeIsOne, bSlopeIsPosOne );
// compute step value
m_iLineStep = ( m_fLineMajorLength > 0 ) ? ( +1 ) : ( -1 );
// compute float and integer major start (V0) and end (V1) positions
INT32 iLineMajor0 = ( m_bLineXMajor ) ? ( m_iX0 ) : ( m_iY0 );
INT32 iLineMajor1 = ( m_bLineXMajor ) ? ( m_iX1 ) : ( m_iY1 );
m_iLineMin = ( m_bLineXMajor ) ? ( iPixX0 ) : ( iPixY0 );
m_iLineMax = ( m_bLineXMajor ) ? ( iPixX1 ) : ( iPixY1 );
// need to do lots of compares which are flipped if major direction is negative
#define LINEDIR_CMP( _A, _B ) \
( ( m_fLineMajorLength > 0 ) ? ( (_A) < (_B) ) : ( (_A) > (_B) ) )
// do first pixel handling - keep first pixel if not in or behind diamond
if ( !( bV0InDiamond || LINEDIR_CMP( iLineMajor0, (m_iLineMin<<4) ) ) )
{
m_iLineMin += m_iLineStep;
}
// do last-pixel handling - keep last pixel if past diamond (in which case
// the pixel is always filled) or if in diamond and rendering last pixel
if ( !( ( !bV1InDiamond && LINEDIR_CMP( (m_iLineMax<<4), iLineMajor1 ) ) ||
( bV1InDiamond && bLastPixel ) ) )
{
m_iLineMax -= m_iLineStep;
}
// return if no (major) extent (both before and after clamping to render buffer)
if ( LINEDIR_CMP( m_iLineMax, m_iLineMin ) ) return TRUE;
// snap major extent to render buffer
INT16 iRendBufMajorMin = m_bLineXMajor ? pClip->left : pClip->top;
INT16 iRendBufMajorMax = m_bLineXMajor ? pClip->right : pClip->bottom;
if ( ( ( m_iLineMin < iRendBufMajorMin ) &&
( m_iLineMax < iRendBufMajorMin ) ) ||
( ( m_iLineMin > iRendBufMajorMax ) &&
( m_iLineMax > iRendBufMajorMax ) ) ) { return TRUE; }
m_iLineMin = MAX( 0, MIN( iRendBufMajorMax, m_iLineMin ) );
m_iLineMax = MAX( 0, MIN( iRendBufMajorMax, m_iLineMax ) );
// return if no (major) extent
if ( LINEDIR_CMP( m_iLineMax, m_iLineMin ) ) return TRUE;
// number of steps to iterate
m_cLineSteps = abs( m_iLineMax - m_iLineMin );
// initial state for per-pixel line iterator
m_iMajorCoord = m_iLineMin;
// compute float versions of snapped coord data
m_fX0 = (FLOAT)m_iX0 * 1.0F/16.0F;
m_fY0 = (FLOAT)m_iY0 * 1.0F/16.0F;
// compute linear function for 1/W (for perspective correction)
m_fRHW0 = *(pVtx0+3);
m_fRHW1 = *(pVtx1+3);
FLOAT fDelta = ( m_fRHW1 - m_fRHW0 ) / m_fLineMajorLength;
m_fRHWA = ( m_bLineXMajor ) ? ( fDelta ) : ( 0. );
m_fRHWB = ( m_bLineXMajor ) ? ( 0. ) : ( fDelta );
m_fRHWC = m_fRHW0 - ( m_fRHWA * m_fX0 ) - ( m_fRHWB * m_fY0 );
return FALSE;
}
//-----------------------------------------------------------------------------
//
// DivRoundDown(A,B) = ceiling(A/B - 1/2)
//
// ceiling(A/B - 1/2) == floor(A/B + 1/2 - epsilon)
// == floor( (A + (B/2 - epsilon))/B )
//
// Does correct thing for all sign combinations of A and B.
//
//-----------------------------------------------------------------------------
static INT64
DivRoundDown(INT64 iA, INT32 iB)
{
INT32 i = 0;
static const INT32 iEps[3] =
{
1, // iA > 0, iB > 0
0, // iA < 0, iB > 0 OR iA > 0, iB < 0
1 // iA < 0, iB < 0
};
if (iA < 0)
{
i++;
iA = -iA;
}
if (iB < 0)
{
i++;
iB = -iB;
}
iA += (iB-iEps[i]) >> 1;
iA /= iB;
if (iEps[i] == 0)
iA = -iA;
return(iA);
}
//-----------------------------------------------------------------------------
//
// DoScanCnvLine - Walks the line major axis, computes the appropriate minor
// axis coordinate, and generates pixels.
//
//-----------------------------------------------------------------------------
void
RefRast::StepLine( void )
{
// evaluate line function to compute minor coord for this major
INT64 iMinorCoord =
( ( m_iLineEdgeFunc[0] * (INT64)(m_iMajorCoord<<4) ) + m_iLineEdgeFunc[1] );
iMinorCoord = DivRoundDown(iMinorCoord, m_iLineEdgeFunc[2]<<4);
// grab x,y
m_iX[0] = m_bLineXMajor ? m_iMajorCoord : iMinorCoord;
m_iY[0] = m_bLineXMajor ? iMinorCoord : m_iMajorCoord;
// step major for next evaluation
m_iMajorCoord += m_iLineStep;
}
///////////////////////////////////////////////////////////////////////////////
//
// Multi-Sample Controls
//
///////////////////////////////////////////////////////////////////////////////
#define _SetSampleDelta( _SampleNumber, _XOffset, _YOffset ) \
{ \
m_SampleDelta[_SampleNumber][0] = ((INT32)((_XOffset)*16.F)); \
m_SampleDelta[_SampleNumber][1] = ((INT32)((_YOffset)*16.F)); \
}
void
RefRast::SetSampleMode( UINT MultiSamples, BOOL bAntialias )
{
switch (MultiSamples)
{
default:
case 1:
m_SampleCount = 1;
_SetSampleDelta( 0, 0., 0. );
break;
case 4:
m_SampleCount = 4;
_SetSampleDelta( 0, -.25, -.25 );
_SetSampleDelta( 1, +.25, -.25 );
_SetSampleDelta( 2, +.25, +.25 );
_SetSampleDelta( 3, -.25, +.25 );
break;
case 9:
m_SampleCount = 9;
_SetSampleDelta( 0, -.333, -.333 );
_SetSampleDelta( 1, -.333, 0.0 );
_SetSampleDelta( 2, -.333, +.333 );
_SetSampleDelta( 3, 0.0, -.333 );
_SetSampleDelta( 4, 0.0, 0.0 );
_SetSampleDelta( 5, 0.0, +.333 );
_SetSampleDelta( 6, +.333, -.333 );
_SetSampleDelta( 7, +.333, 0.0 );
_SetSampleDelta( 8, +.333, +.333 );
break;
}
// if not FSAA then sample all at pixel center
if (!bAntialias)
{
for (UINT Sample=0; Sample<m_SampleCount; Sample++)
{
_SetSampleDelta( Sample, 0., 0. );
}
}
m_CurrentSample = 0;
m_bSampleCovered[0][0] =
m_bSampleCovered[0][1] =
m_bSampleCovered[0][2] =
m_bSampleCovered[0][3] = TRUE;
}
///////////////////////////////////////////////////////////////////////////////
// end