|
|
//----------------------------------------------------------------------------
//
// buffer.cpp
//
// PrimProcessor buffering methods.
//
// Copyright (C) Microsoft Corporation, 1997.
//
//----------------------------------------------------------------------------
#include "pch.cpp"
#pragma hdrstop
DBG_DECLARE_FILE();
// Define to use new/delete instead of VirtualAlloc/VirtualFree.
#if 0
#define USE_CPP_HEAP
#endif
// Define to show FP exceptions.
#if 0
#define UNMASK_EXCEPTIONS
#endif
//----------------------------------------------------------------------------
//
// PrimProcessor::PrimProcessor
//
// Initializes a triangle processor to an invalid state.
//
//----------------------------------------------------------------------------
PrimProcessor::PrimProcessor(void) { // Zero everything to NULL initial pointers and eliminate FP garbage.
memset(this, 0, sizeof(PrimProcessor));
m_StpCtx.PrimProcessor = (PVOID)this;
// Initialize to values that will force a validation.
// ATTENTION - Default to normalizing RHW. This is a performance hit
// and should be removed if possible.
m_uPpFlags = PPF_STATE_CHANGED | PPF_NORMALIZE_RHW; m_PrimType = D3DPT_FORCE_DWORD; m_VertType = RAST_FORCE_DWORD; }
//----------------------------------------------------------------------------
//
// PrimProcessor::Initialize
//
// Initializes the triangle processor to an active state.
//
//----------------------------------------------------------------------------
#define CACHE_LINE 32
#define BUFFER_SIZE 4096
// Uncomment to force a flush every span for debug purposes
//#define BUFFER_SIZE ((8 * sizeof(D3DI_RASTSPAN)) + sizeof(D3DI_RASTPRIM))
HRESULT PrimProcessor::Initialize(void) { HRESULT hr;
// Assert that both RASTPRIM and RASTSPAN are multiples of the cache
// line size so that everything in the buffer stays cache aligned.
RSASSERT((sizeof(D3DI_RASTPRIM) & (CACHE_LINE - 1)) == 0 && (sizeof(D3DI_RASTSPAN) & (CACHE_LINE - 1)) == 0);
#ifdef USE_CPP_HEAP
m_pBuffer = new UINT8[BUFFER_SIZE]; #else
// Get a page-aligned buffer.
m_pBuffer = (PUINT8) VirtualAlloc(NULL, BUFFER_SIZE, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); #endif
if (m_pBuffer == NULL) { return RSHRCHK(E_OUTOFMEMORY); }
m_pBufferEnd = m_pBuffer+BUFFER_SIZE;
#ifdef USE_CPP_HEAP
// Compute cache-line aligned start in the buffer. Formulated
// somewhat oddly to avoid casting a complete pointer to a DWORD and
// back.
m_pBufferStart = m_pBuffer + ((CACHE_LINE - ((UINT)m_pBuffer & (CACHE_LINE - 1))) & (CACHE_LINE - 1)); #else
// Page aligned memory should be cache aligned.
RSASSERT(((UINT_PTR)m_pBuffer & (CACHE_LINE - 1)) == 0); m_pBufferStart = m_pBuffer; #endif
m_pCur = m_pBufferStart;
return S_OK; }
//----------------------------------------------------------------------------
//
// PrimProcessor::~PrimProcessor
//
//----------------------------------------------------------------------------
PrimProcessor::~PrimProcessor(void) { #ifdef USE_CPP_HEAP
delete m_pBuffer; #else
if (m_pBuffer != NULL) { VirtualFree(m_pBuffer, 0, MEM_RELEASE); } #endif
}
//----------------------------------------------------------------------------
//
// PrimProcessor::ResetBuffer
//
// Initialize buffer pointers to an empty state.
//
//----------------------------------------------------------------------------
inline void PrimProcessor::ResetBuffer(void) { m_pCur = m_pBufferStart; m_StpCtx.pPrim = NULL; m_pOldPrim = NULL; }
//----------------------------------------------------------------------------
//
// DumpPrims
//
// Debugging function to dump primitives sent to the span renderer.
//
//----------------------------------------------------------------------------
#if DBG
void DumpPrims(PSETUPCTX pStpCtx) { PD3DI_RASTPRIM pPrim; UINT uOldFlags;
uOldFlags = RSGETFLAGS(DBG_OUTPUT_FLAGS); RSSETFLAGS(DBG_OUTPUT_FLAGS, uOldFlags | DBG_OUTPUT_ALL_MATCH);
for (pPrim = pStpCtx->pCtx->pPrim; pPrim != NULL; pPrim = pPrim->pNext) { RSDPFM((RSM_BUFPRIM, "Prim at %p, %d spans at %p\n", pPrim, pPrim->uSpans, pPrim+1)); RSDPFM((RSM_BUFPRIM | RSM_OOW, " DOoWDX %X (%f)\n", pPrim->iDOoWDX, (FLOAT)pPrim->iDOoWDX / OOW_SCALE));
if ((RSGETFLAGS(DBG_OUTPUT_MASK) & RSM_BUFSPAN) || (RSGETFLAGS(DBG_USER_FLAGS) & (RSU_MARK_SPAN_EDGES | RSU_CHECK_SPAN_EDGES))) { PD3DI_RASTSPAN pSpan; UINT16 i;
pSpan = (PD3DI_RASTSPAN)(pPrim+1); for (i = 0; i < pPrim->uSpans; i++) { RSDPFM((RSM_BUFSPAN, " Span at (%d,%d), pix %c%d, S %p Z %p\n", pSpan->uX, pSpan->uY, (pPrim->uFlags & D3DI_RASTPRIM_X_DEC) ? '-' : '+', pSpan->uPix, pSpan->pSurface, pSpan->pZ));
if (RSGETFLAGS(DBG_USER_FLAGS) & (RSU_MARK_SPAN_EDGES | RSU_CHECK_SPAN_EDGES)) { PUINT16 pPix;
pPix = (PUINT16)pSpan->pSurface; if (RSGETFLAGS(DBG_USER_FLAGS) & RSU_CHECK_SPAN_EDGES) { if (*pPix != 0) { RSDPF((" Overwrite at %p: %X\n", pPix, *pPix)); } } if (RSGETFLAGS(DBG_USER_FLAGS) & RSU_MARK_SPAN_EDGES) { *pPix = 0xffff; }
if (pSpan->uPix > 1) { if (pPrim->uFlags & D3DI_RASTPRIM_X_DEC) { pPix = (PUINT16)pSpan->pSurface - (pSpan->uPix - 1); } else { pPix = (PUINT16)pSpan->pSurface + (pSpan->uPix - 1); }
if (RSGETFLAGS(DBG_USER_FLAGS) & RSU_CHECK_SPAN_EDGES) { if (*pPix != 0) { RSDPF((" Overwrite at %p: %X\n", pPix, *pPix)); } } if (RSGETFLAGS(DBG_USER_FLAGS) & RSU_MARK_SPAN_EDGES) { *pPix = 0xffff; } } }
FLOAT fZScale; if (pStpCtx->pCtx->iZBitCount == 16) { fZScale = Z16_SCALE; } else { fZScale = Z32_SCALE; } RSDPFM((RSM_BUFSPAN | RSM_Z, " Z %X (%f)\n", pSpan->uZ, (FLOAT)pSpan->uZ / fZScale));
RSDPFM((RSM_BUFSPAN | RSM_DIFF, " D %X,%X,%X,%X (%f,%f,%f,%f)\n", pSpan->uB, pSpan->uG, pSpan->uR, pSpan->uA, (FLOAT)pSpan->uB / COLOR_SCALE, (FLOAT)pSpan->uG / COLOR_SCALE, (FLOAT)pSpan->uR / COLOR_SCALE, (FLOAT)pSpan->uA / COLOR_SCALE));
RSDPFM((RSM_BUFSPAN | RSM_SPEC, " S %X,%X,%X (%f,%f,%f)\n", pSpan->uBS, pSpan->uGS, pSpan->uRS, (FLOAT)pSpan->uBS / COLOR_SCALE, (FLOAT)pSpan->uGS / COLOR_SCALE, (FLOAT)pSpan->uRS / COLOR_SCALE));
RSDPFM((RSM_BUFSPAN | RSM_DIDX, " I %X,%X (%f,%f)\n", pSpan->iIdx, pSpan->iIdxA, (FLOAT)pSpan->iIdx / INDEX_COLOR_SCALE, (FLOAT)pSpan->iIdxA / INDEX_COLOR_SCALE));
RSDPFM((RSM_BUFSPAN | RSM_OOW, " OoW %X (%f), W %X (%f)\n", pSpan->iOoW, (FLOAT)pSpan->iOoW / OOW_SCALE, pSpan->iW, (FLOAT)pSpan->iW / W_SCALE));
RSDPFM((RSM_BUFSPAN | RSM_LOD, " LOD %X (%f), DLOD %X (%f)\n", pSpan->iLOD, (FLOAT)pSpan->iLOD / LOD_SCALE, pSpan->iDLOD, (FLOAT)pSpan->iDLOD / LOD_SCALE));
if (pStpCtx->uFlags & PRIMSF_PERSP_USED) { RSDPFM((RSM_BUFSPAN | RSM_TEX1, " PTex1 %X,%X (%f,%f) (%f,%f)\n", pSpan->iUoW1, pSpan->iVoW1, (FLOAT)pSpan->iUoW1 / TEX_SCALE, (FLOAT)pSpan->iVoW1 / TEX_SCALE, ((FLOAT)pSpan->iUoW1 * OOW_SCALE) / (TEX_SCALE * (FLOAT)pSpan->iOoW), ((FLOAT)pSpan->iVoW1 * OOW_SCALE) / (TEX_SCALE * (FLOAT)pSpan->iOoW))); } else { RSDPFM((RSM_BUFSPAN | RSM_TEX1, " ATex1 %X,%X (%f,%f)\n", pSpan->iUoW1, pSpan->iVoW1, (FLOAT)pSpan->iUoW1 / TEX_SCALE, (FLOAT)pSpan->iVoW1 / TEX_SCALE)); }
RSDPFM((RSM_BUFSPAN | RSM_FOG, " Fog %X (%f), DFog %X (%f)\n", pSpan->uFog, (FLOAT)pSpan->uFog / FOG_SCALE, pSpan->iDFog, (FLOAT)pSpan->iDFog / FOG_SCALE));
pSpan++; } } }
RSSETFLAGS(DBG_OUTPUT_FLAGS, uOldFlags); } #endif // DBG
//----------------------------------------------------------------------------
//
// PrimProcessor::Flush
//
// Flushes any remaining data from the buffer.
//
//----------------------------------------------------------------------------
HRESULT PrimProcessor::Flush(void) { HRESULT hr;
if (m_pCur - m_pBufferStart > sizeof(D3DI_RASTPRIM)) { // Process data.
m_StpCtx.pCtx->pPrim = (PD3DI_RASTPRIM)m_pBufferStart; m_StpCtx.pCtx->pNext = NULL;
#if DBG
if ((RSGETFLAGS(DBG_OUTPUT_MASK) & (RSM_BUFPRIM | RSM_BUFSPAN)) || (RSGETFLAGS(DBG_USER_FLAGS) & (RSU_MARK_SPAN_EDGES | RSU_CHECK_SPAN_EDGES))) { DumpPrims(&m_StpCtx); }
if ((RSGETFLAGS(DBG_USER_FLAGS) & RSU_NO_RENDER_SPANS) == 0) { if (RSGETFLAGS(DBG_USER_FLAGS) & RSU_BREAK_ON_RENDER_SPANS) { DebugBreak(); }
RSHRCHK(m_StpCtx.pCtx->pfnRenderSpans(m_StpCtx.pCtx)); } else { hr = D3D_OK; } #else
hr = m_StpCtx.pCtx->pfnRenderSpans(m_StpCtx.pCtx); #endif
ResetBuffer(); } else { hr = D3D_OK; }
return hr; }
//----------------------------------------------------------------------------
//
// PrimProcessor::FlushPartial
//
// Flushes the buffer in the middle of a primitive. Preserves last
// partial primitive and replaces it in the buffer after the flush.
//
//----------------------------------------------------------------------------
HRESULT PrimProcessor::FlushPartial(void) { D3DI_RASTPRIM SavedPrim; HRESULT hr;
RSDPFM((RSM_BUFFER, "FlushPartial, saving prim at %p, Y %d\n", m_StpCtx.pPrim, m_StpCtx.iY));
// Not enough space. Flush current buffer. We need to
// save the current prim and put it back in the buffer after the
// flush since it's being extended.
SavedPrim = *m_StpCtx.pPrim;
RSHRRET(Flush());
GET_PRIM();
*m_StpCtx.pPrim = SavedPrim; COMMIT_PRIM(FALSE);
return D3D_OK; }
//----------------------------------------------------------------------------
//
// PrimProcessor::AppendPrim
//
// Ensures that some primitive is active in the buffer for spans to
// be added to. If no valid primitive is available to append to,
// a zeroed primitive is committed into the buffer.
//
//----------------------------------------------------------------------------
HRESULT PrimProcessor::AppendPrim(void) { // If there's no primitive or the current primitive has not
// been committed, commit a clean primitive into the buffer.
if (m_StpCtx.pPrim == NULL || (PUINT8)m_StpCtx.pPrim == m_pCur) { GET_PRIM(); COMMIT_PRIM(TRUE); }
return D3D_OK; }
//----------------------------------------------------------------------------
//
// PrimProcessor::Begin
//
// Resets the buffer to an empty state in preparation for incoming
// triangles.
//
//----------------------------------------------------------------------------
void PrimProcessor::Begin(void) { UINT16 uFpCtrl; FPU_GET_MODE(uFpCtrl); m_uFpCtrl = uFpCtrl; uFpCtrl = FPU_MODE_CHOP_ROUND( FPU_MODE_LOW_PRECISION( FPU_MODE_MASK_EXCEPTIONS(m_uFpCtrl))); #if defined(_X86_) && defined(UNMASK_EXCEPTIONS)
// Unmask some exceptions so that we can eliminate them.
// This requires a safe set to clear any exceptions that
// are currently asserted.
//
// Exceptions left masked:
// Precision, denormal.
// Exceptions unmasked:
// Underflow, overflow, divzero, invalid op.
uFpCtrl &= ~0x1d; FPU_SAFE_SET_MODE(uFpCtrl); #else
FPU_SET_MODE(uFpCtrl); #endif
m_uPpFlags |= PPF_IN_BEGIN; ResetBuffer(); }
//----------------------------------------------------------------------------
//
// PrimProcessor::End
//
// Flushes if necessary and cleans up.
//
//----------------------------------------------------------------------------
HRESULT PrimProcessor::End(void) { HRESULT hr;
if (m_pCur - m_pBufferStart > sizeof(D3DI_RASTPRIM)) { RSHRCHK(Flush()); } else { hr = D3D_OK; }
UINT16 uFpCtrl = m_uFpCtrl; FPU_SAFE_SET_MODE(uFpCtrl);
m_uPpFlags &= ~PPF_IN_BEGIN;
return hr; }
//----------------------------------------------------------------------------
//
// PrimProcessor::SetCtx
//
// Sets the rasterization context to operate in.
//
//----------------------------------------------------------------------------
void PrimProcessor::SetCtx(PD3DI_RASTCTX pCtx) { // This function can't be called inside a Begin/End pair. This
// is enforced so that we don't have to worry about the span
// rendering function changing in the middle of a batch.
RSASSERT((m_uPpFlags & PPF_IN_BEGIN) == 0);
m_StpCtx.pCtx = pCtx; }
//----------------------------------------------------------------------------
//
// PrimProcessor::AllocSpans
//
// Checks to see if there's room in the buffer for the requested number
// of spans. If so the buffer pointer is updated and a pointer is returned.
// If the requested number is not available but some reasonable number is,
// return that many. Otherwise the buffer is flushed and the process starts
// over. The "reasonable" number must therefore be no more than what
// can fit in the buffer at once.
//
//----------------------------------------------------------------------------
// Space for enough spans to avoid a flush.
#define AVOID_FLUSH_SPACE (8 * sizeof(D3DI_RASTSPAN))
HRESULT PrimProcessor::AllocSpans(PUINT pcSpans, PD3DI_RASTSPAN *ppSpan) { PD3DI_RASTSPAN pSpan; HRESULT hr; UINT uSpanSize;
RSASSERT(AVOID_FLUSH_SPACE <= (BUFFER_SIZE - sizeof(D3DI_RASTPRIM))); // The multiplies and divides here will be really bad unless
// RASTPRIM is a nice power-of-two in size.
RSASSERT((sizeof(D3DI_RASTSPAN) & (sizeof(D3DI_RASTSPAN) - 1)) == 0);
uSpanSize = *pcSpans * sizeof(D3DI_RASTSPAN);
for (;;) { // First check for space for all requested spans.
if (m_pCur + uSpanSize > m_pBufferEnd) { // Not enough space for everything, so see if we have
// enough space to avoid a flush.
if (m_pCur + AVOID_FLUSH_SPACE > m_pBufferEnd) { // Not enough space, so flush.
RSHRCHK(FlushPartial()); if (hr != D3D_OK) { *pcSpans = 0; return hr; }
// Loop around. Flush is guaranteed to at least produce
// AVOID_FLUSH_SPACE so the loop will always exit.
} else { // Not enough space for everything but enough space
// to return some. Set new span count.
*pcSpans = (UINT)((m_pBufferEnd - m_pCur) / sizeof(D3DI_RASTSPAN)); uSpanSize = *pcSpans * sizeof(D3DI_RASTSPAN); break; } } else { break; } }
pSpan = (PD3DI_RASTSPAN)m_pCur; m_pCur += uSpanSize; *ppSpan = pSpan;
RSDPFM((RSM_BUFFER, "Alloc %d spans at %p, cur %p\n", *pcSpans, pSpan, m_pCur));
return D3D_OK; }
//----------------------------------------------------------------------------
//
// PrimProcessor::FreeSpans and FreeSpans
//
// Returns space given out by AllocSpans.
//
//----------------------------------------------------------------------------
void PrimProcessor::FreeSpans(UINT cSpans) { m_pCur -= cSpans * sizeof(D3DI_RASTSPAN);
RSDPFM((RSM_BUFFER, "Free %d spans at %p, cur %p\n", cSpans, m_pCur + cSpans * sizeof(D3DI_RASTSPAN), m_pCur)); }
|