//================ Copyright (c) Valve Corporation. All Rights Reserved. =========================== // // Per draw call Gcm state // Render states, vo/fp consts // //================================================================================================== #ifndef INCLUDED_GCMDRAWSTATE_H #define INCLUDED_GCMDRAWSTATE_H #ifndef SPU #include "tier0/platform.h" #include "tier0/dbg.h" #include "cell\gcm.h" #include "gcmconfig.h" #include "ps3gcmmemory.h" #include "dxabstract_def.h" #include "dxabstract.h" #include "shaderapi/commandbuffer.h" #include "shaderapi/shareddefs.h" #include "mathlib/vector4d.h" #include "mathlib/vmatrix.h" #include #include #include "SpuMgr_ppu.h" #else #include "spumgr_spu.h" #include "cell/gcm_spu.h" #include "cell/gcm/gcm_method_data.h" #include "dxabstract_def.h" #include "gcmtexture.h" #include "gcmlabels.h" #include "shaderapi/shareddefs.h" #include "shaderapi/commandbuffer.h" #include #include "gcmdraw_spu.h" #endif //-------------------------------------------------------------------------------------------------- // Defines //-------------------------------------------------------------------------------------------------- //#define GCM_DS_SAFE #define GCM_DRAWSTATE_MAX 9 // We have this many drawstate structures // we fill half of them and send to SPU // then we fill the other half // Need an odd number since one extra is required in SendToSPU #define GCM_NUMDRAWCALLS_SPU ((GCM_DRAWSTATE_MAX-1)/2) #define GCM_DS_FIFOPERDRAW 0x4000 // 16K is our max epr draw call FIFO. // in practice we see a highwater of 0x2800 // which combines a full RSx reset with a drawcall #define GCM_DS_MAXDATAPERDRAWCALL 0x2000 // Highwater mark is abt 3K #define GCM_DS_MAXFPCONST 96 #define GCM_DS_MAXVPCONST 256 #define GCM_DS_MAXDYNECB 0x40000 // 64K ring buffer. if <8K left wraps #define GCM_DS_INST_ENVMAP 1 #define GCM_DS_INST_LIGHTMAP 2 #define GCM_DS_INST_PAINTMAP 4 #define MAX_SAMPLERS 16 //-------------------------------------------------------------------------------------------------- // Global externs //-------------------------------------------------------------------------------------------------- extern uint8 g_d3drs_defvalue_indices[D3DRS_VALUE_LIMIT]; extern uint32 g_d3drs_defvalues[11]; extern uint32 dxtogl_stencilmode[10]; extern uint16 dxtogl_blendop[7]; extern uint8 dxtogl_addressMode[6]; extern uint8 dxtogl_anisoIndexHalf[32]; // indexed by [ dxsamp->maxAniso / 2 ] extern uint8 dxtogl_minFilter[4][4]; // indexed by _D3DTEXTUREFILTERTYPE on both axes: [row is min filter][col is mip filter]. extern uint8 dxtogl_magFilter[4]; // indexed by _D3DTEXTUREFILTERTYPE extern int g_bZcullAuto; extern int g_nZcullDefault; extern int g_nZcullMoveForward; extern int g_nZcullPushBack; extern vec_float4 g_aFPConst[GCM_DS_MAXFPCONST]; extern vec_float4 g_aVPConst[GCM_DS_MAXVPCONST]; extern D3DStreamDesc g_dxGcmVertexStreamSources[D3D_MAX_STREAMS]; extern uint32 g_UPHigh; extern uint32 g_UPFrame; extern volatile uint32_t * g_label_fppatch_ring_seg; extern uint8 g_aDynECB[GCM_DS_MAXDYNECB]; extern uint32 g_nDynECBIdx; extern uint8 gPackData[][GCM_DS_MAXDATAPERDRAWCALL]; //-------------------------------------------------------------------------------------------------- // Structs used as params //-------------------------------------------------------------------------------------------------- struct DrawScissor_t { uint16 x, y, w, h; }; struct UpdateSurface_t { // if the scissor is logically disabled, set scissor to this size //uint16 m_nRenderTargetWidth, m_nRenderTargetHeight; CPs3gcmTextureData_t m_texC, m_texZ; }; struct FpHeader_t { uint32 m_nUcodeSize; uint32 m_nPatchCount; uint32 m_nShaderControl0; uint32 m_nTexControls; // Always <= 16; 1 tex control corresponds to 2 words in the tex control table // data[] // Allocate memory layout as : // FpHeader_t // uCode // Patches // Texcontrols // total size = AlignValue( sizeof( FpHeader_t ) + m_nUcodeSize + (sizeof( uint32 ) * nPatchCount) // + (2 * sizeof( uint32 ) * nTexControls) , 16); }; //-------------------------------------------------------------------------------------------------- // Vertex streams //-------------------------------------------------------------------------------------------------- struct SetVertexDataArrayCache_t { union Data_t { vector signed int m_vi; struct Unpacked_t { uint32 m_uiLocalMemoryBuffer; // after adding the offset uint32 m_nSize; uint32 m_nStride; uint32 m_nType; //IDirect3DVertexBuffer9 *m_vtxBuffer; // for debug only //uint32 m_nBaseVertexOffset; // debug only } m_unpacked; } m_data; SetVertexDataArrayCache_t(){} SetVertexDataArrayCache_t( D3DStreamDesc &dsd, D3DVERTEXELEMENT9_GCM::GcmDecl_t const &gcmvad, uint nBaseVertexIndex ) { //m_vtxBuffer = dsd.m_vtxBuffer; uint nBaseVertexOffset = dsd.m_offset + ( nBaseVertexIndex * dsd.m_stride ) + gcmvad.m_offset; uint uiLocalMemoryBuffer = dsd.m_nLocalBufferOffset + nBaseVertexOffset; m_data.m_vi = ( vector signed int ) { uiLocalMemoryBuffer, gcmvad.m_datasize, gcmvad.m_datatype, dsd.m_stride }; // m_stride = dsd.m_stride; // m_size = gcmvad.m_datasize; // m_type = gcmvad.m_datatype; } uint GetLocalOffset()const { return m_data.m_unpacked.m_uiLocalMemoryBuffer; } bool IsNull()const { return vec_all_eq( m_data.m_vi, (vector signed int){0,0,0,0} ); } void SetNull(){ m_data.m_vi = ( vector signed int ){0,0,0,0}; } void Invalidate(){ m_data.m_vi = (vector signed int){-1,-1,-1,-1};} bool operator != ( const SetVertexDataArrayCache_t& that ) const { return !vec_all_eq( m_data.m_vi, that.m_data.m_vi ); } void operator = ( const SetVertexDataArrayCache_t& that ) { m_data.m_vi = that.m_data.m_vi ; } }; // This is global, since it is only written by the flush code extern SetVertexDataArrayCache_t g_cacheSetVertexDataArray[ D3D_MAX_STREAMS ]; // Vertex stream setup //-------------------------------------------------------------------------------------------------- // SPU draw commands //-------------------------------------------------------------------------------------------------- enum DrawCmd { CmdCommitStates = 1, CmdDrawPrim, CmdDrawPrimUP, CmdEndFrame }; //-------------------------------------------------------------------------------------------------- // GcmDrawState.. Holds data that is commited once a draw, clear etc... is made.. //-------------------------------------------------------------------------------------------------- #define DRAWSTATE_SIZEOFDMA (uintp(&(((CGcmDrawState*)(0))->m_pData)+1)-uintp(&(((CGcmDrawState*)(0))->m_cmd))) struct CGcmDrawState { // DrawData used by DrawPrimUP struct DrawData { uint8 m_type; uint8 m_idx; uint16 m_size; /*uint8 m_data[m_count];*/ }; //-------------------------------------------------------------------------------------------------- // Enums //-------------------------------------------------------------------------------------------------- // Data that gets packes and then unpacked as a cmd stream enum GcmDataType { kDataFpuConsts = 1, kDataVpuConsts, kDataStreamDesc, kDataZcullStats, kDataZcullLimit, kDataViewport, kDataSetRenderState, kDataSetZpassPixelCountEnable, kDataSetClearReport, kDataSetReport, kDataUpdateSurface, kDataClearSurface, kDataResetSurface, kDataTransferImage, kDataViewPort, kDataScissor, kDataTexture, kDataEcbTexture, kDataResetTexture, kDataUpdateVtxBufferOffset, kDataECB, kDataBeginScene, kDataSetWorldSpaceCameraPosition, kDataSetWriteBackEndLabel }; // RenderStates enum GcmDirtyStateFlags_t { kDirtyBlendFactor = ( 1 << 0 ), kDirtyAlphaFunc = ( 1 << 1 ), kDirtyStencilOp = ( 1 << 2 ), kDirtyStencilFunc = ( 1 << 3 ), kDirtyDepthBias = ( 1 << 4 ), kDirtyScissor = ( 1 << 5 ), kDirtyDepthMask = ( 1 << 6 ), kDirtyZEnable = ( 1 << 7 ), kDirtyZFunc = ( 1 << 8 ), kDirtyColorWriteEnable = ( 1 << 9 ), kDirtyCullMode = ( 1 << 10 ), kDirtyAlphablendEnable = ( 1 << 11 ), kDirtySrgbWriteEnable = ( 1 << 12 ), kDirtyAlphaTestEnable = ( 1 << 13 ), kDirtyStencilEnable = ( 1 << 14 ), kDirtyStencilWriteMask = ( 1 << 15 ), kDirtyFillMode = ( 1 << 16 ), kDirtyBlendOp = ( 1 << 17 ), kDirtyResetRsx = ( 1 << 18 ), kDirtyZeroAllPSConsts = ( 1 << 19 ), kDirtyZeroAllVSConsts = ( 1 << 20) }; // Dirty flags for caches and other misc settings enum GcmDirtyCacheFlags_t { kDirtyVxConstants = ( 1 << 0 ), kDirtyClipPlanes = ( 1 << 1 ), kDirtyVxShader = ( 1 << 2 ), kDirtyPxShader = ( 1 << 3 ), kDirtyPxConstants = ( 1 << 4 ), kDirtyVxCache = ( 1 << 5 ), kDirtyTxCache = ( 1 << 6 ) }; //-------------------------------------------------------------------------------------------------- // Data we are interested in per draw call //-------------------------------------------------------------------------------------------------- // Data that is DMA'd to the SPU directly and not packed uint32 m_cmd; uint32 m_param[8]; uint32 m_eaOutputFIFO; uint32 m_eaOutputUCode; uint32 m_nFreeLabel; // Nonzero values are set uint16 m_nBackBufferSize[2]; uint16 m_dirtySamplersMask; // Sampler dirty flags uint16 m_dirtyCachesMask; // Caches reset for Shaders flush uint32 m_dirtyStatesMask; // Render state dirty flags uint32 m_shaderVxConstants; // Booleans, go into a SetTransformbranchbits call PixelShader9Data_t* m_pPixelShaderData; VertexShader9Data_t* m_pVertexShaderData; uint32 m_nNumECB; uint8* m_aECB[3]; // No More than three per draw call (static, semi-static & dynamic) uint32 m_aSizeECB[3]; struct FixedData { uint32 m_nSampler; uint8 m_aSamplerIdx[D3D_MAX_SAMPLERS]; D3DSamplerDesc m_aSamplers[D3D_MAX_SAMPLERS]; uint32 m_nInstanced; CPs3BindTexture_t m_instanceEnvCubemap; CPs3BindTexture_t m_instanceLightmap; CPs3BindTexture_t m_instancePaintmap; }; // Unpack pointer and cursors FixedData* m_pFixed; // Fixed sized data uploaded per call uint8* m_pDataCursor; uint8* m_pData; // Fixed Data that is unpacked D3DSamplerDesc m_aSamplers[D3D_MAX_SAMPLERS]; // Data that is unpacked, or derived, or code generated somewhere (Init etc...) CPs3BindTexture_t m_aBindTexture[CBCMD_MAX_PS3TEX]; // Textures that are set from ECBs float m_vecWorldSpaceCameraPosition[4]; uint32 m_nSetTransformBranchBits; // here for now because they init in begin scene uint32 m_nDisabledSamplers; uint16 m_blends[2]; struct { uint32 func, ref; } m_alphaFunc; struct { uint32 fail, dfail, dpass; } m_stencilOp; struct { uint32 func, ref, mask; } m_stencilFunc; struct { uint32 factor, units; } m_depthBias; struct { uint16 x, y, w, h, enabled; } m_scissor; // kDirtyScissor uint16 m_nSetDepthMask; uint32 m_ZEnable; uint32 m_ZFunc; uint32 m_ColorWriteEnable; uint32 m_CullMode; uint32 m_AlphablendEnable; uint32 m_SrgbWriteEnable; uint32 m_AlphaTestEnable; uint32 m_StencilEnable; uint32 m_StencilWriteMask; uint32 m_FillMode; uint32 m_BlendOp; uint32 m_userClipPlanesState; CPs3gcmTextureData_t m_textures[D3D_MAX_TEXTURES]; float m_viewZ[2]; uint16 m_viewportSize[4]; //-------------------------------------------------------------------------------------------------- // Methods //-------------------------------------------------------------------------------------------------- public: // Init etc.. (ppu functions...) #ifndef SPU inline void Init(IDirect3DDevice9Params *params); #endif inline void Init(); void SendToSpu(); inline void Reset(); // Reset for re-use #ifndef SPU inline void BeginScene(); // Sets report for Zcull inline void EndScene(); // Gets report for Zcull inline void CmdBufferFlush(); // Flush RSX via SPU inline void CmdBufferFinish(); // Flush RSX and wait for it #endif inline void ResetRsxState(); // Lots of GCM_FUNC to default vals // Dynamic ECB mgmt inline uint8* OpenDynECB(); inline void CloseDynECB(uint32 size); // Viewport and scissor inline void UnpackSetViewport(CONST D3DVIEWPORT9* pViewport); inline HRESULT SetViewport(CONST D3DVIEWPORT9* pViewport); inline void UnpackSetScissorRect(DrawScissor_t * pScissor); inline void SetScissorRect( DrawScissor_t * pScissor ); // Reports, Zpass and labels (all packed) inline void SetZpassPixelCountEnable(uint32 enable); inline void SetClearReport(uint32 type); inline void SetReport(uint32 type, uint32 index); inline void SetWriteBackEndLabel(uint8 index, uint32 value); // RenderStates inline void UnpackSetRenderState( D3DRENDERSTATETYPE State, uint Value ); inline void SetRenderState( D3DRENDERSTATETYPE State, uint Value ); // Texture samplers, textures, texture cache inline void SetInvalidateTextureCache(); inline void SetSamplerState( uint Sampler,D3DSAMPLERSTATETYPE Type,DWORD Value ); inline void UnpackSetTexture( DWORD Stage, uint32 offset, uint32 eaLayout ); inline void UnpackResetTexture( DWORD Stage ); inline void SetTexture( DWORD Stage, CPs3gcmTexture *tex ); inline void ResetTexture( DWORD Stage ); // Vertex buffers, vertex cache, , vertex constants inline void SetInvalidateVertexCache(); inline void UnpackUpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset ); inline void UpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset ); inline void SetVertexStreamSource(uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride ); inline void _SetVertexShaderConstantB( UINT StartRegister, uint BoolCount, uint shaderVxConstants ); inline void SetVertexShaderConstantB( UINT StartRegister,CONST BOOL* pConstantData,UINT BoolCount) ; inline void SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount ); // inline void VertexConstantExtractor( float *pDestStorage, int kRegisterFirst, int kRegisterLength, // int StartRegister, const float *pConstantData, int Vector4fCount ); // Pixel shader consts inline void SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount); inline void UnpackSetWorldSpaceCameraPosition(float* pWCP); inline void SetWorldSpaceCameraPosition(float* pWCP); // Surfaces and render targets inline void Ps3Helper_UpdateSurface( UpdateSurface_t * pSurface ); inline void UnpackUpdateSurface(CellGcmSurface* pSf); inline void ResetSurfaceToKnownDefaultState(); inline void UnpackResetSurfaceToKnownDefaultState(); inline void Helper_IntersectRectsXYWH( uint16 const *a, uint16 const *b, uint16 *result ); inline void ClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth ); inline void UnpackClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth ); // Blit (packed) inline void SetTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset, uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel ); // DrawPrim inline void DrawPrimitiveUP( IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE nPrimitiveType,UINT nPrimitiveCount, CONST void *pVertexStreamZeroData, UINT nVertexStreamZeroStride ); inline void DrawIndexedPrimitive( uint32 offset, IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE Type,INT BaseVertexIndex,UINT MinVertexIndex, UINT NumVertices,UINT startIndex,UINT nDrawPrimCount ); inline void ExecuteCommandBuffer( uint8 *pCmdBuf ); inline void UnpackExecuteCommandBuffer( uint8 *pCmdBuf ); void TestCommandBuffer( uint8 *pCmdBuf ); inline void TextureReplace(uint32 id, CPs3BindTexture_t tex); // Commit, pack etc.. inline void PackData(uint8 type, uint8 idx, uint16 size, void* pSrc); inline void PackData(uint8 type, uint16 size, void* pSrc); inline void PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3); inline void PackData(uint8 type, uint32 val1, uint32 val2); inline void PackData(uint8 type, uint32 val1); inline void PackData(uint8 type); inline void PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth ); // used to pack clear surface inline void UnpackData(); // Unpacks variable sized data and sets vertex consts inline void CommitStates(); // Currently unused on PPU inline void EndFrame(); // called by Flip() inline void CommitAll(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex); inline void CommitRenderStates(); inline void CommitVertexBindings(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex); inline void CommitSampler(uint32 nSampler); inline void CommitSamplers(); inline void CommitShaders(); inline void BindFragmentProgram(uint32 nVertexToFragmentProgramAttributeMask); inline void PatchUcode(fltx4 * pUCode16, uint32 * pPatchTable, uint nPatchCount); inline fltx4* CopyUcode(FpHeader_t* pFp); #ifndef SPU inline void AllocateUcode(FpHeader_t* pFp); // Reserves space in the patchbuffer for this #endif // ExecuteCommandBuffer Subs inline void SetVertexShaderConstantInternal( int var, float const* pVec, int numVecs = 1, bool bForce = false ); inline void SetPixelShaderConstantInternal( int var, float const* pValues, int nNumConsts = 1, bool bForce = false ); inline void BindTexture2( CPs3BindTexture_t bindTex ); // Misc inline int IsLayerRender() { return 1;} // 7LTODO : zprepass ! }; //-------------------------------------------------------------------------------------------------- // Externs //-------------------------------------------------------------------------------------------------- extern CGcmDrawState* gpGcmDrawState; extern CGcmDrawState gGcmDrawState[]; extern CGcmDrawState::FixedData gFixedData[]; //-------------------------------------------------------------------------------------------------- // inlines //-------------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------------- // Generic pack data //-------------------------------------------------------------------------------------------------- inline void CGcmDrawState::PackData(uint8 type, uint8 idx, uint16 size, void* pSrc) { // SNPROF("CGcmDrawState::PackData(uint8 type, uint8 idx, uint16 size, void* pSrc)"); uint32 spacereqd = size + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pData = (DrawData*) m_pDataCursor; pData->m_type = type; pData->m_idx = idx; pData->m_size = size; V_memcpy(pData+1, pSrc, size); m_pDataCursor += spacereqd; } inline void CGcmDrawState::PackData(uint8 type, uint16 size, void* pSrc) { // SNPROF("CGcmDrawState::PackData(uint8 type, uint16 size, void* pSrc)"); uint32 spacereqd = size + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pData = (DrawData*) m_pDataCursor; pData->m_type = type; pData->m_idx = 0; pData->m_size = size; V_memcpy(pData+1, pSrc, size); m_pDataCursor += spacereqd; } inline void CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3) { // SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3)"); const uint32 size = 12; uint32 spacereqd = size + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pData = (DrawData*) m_pDataCursor; pData->m_type = type; pData->m_idx = 0; pData->m_size = size; uint32* pDest = (uint32*)(pData + 1); pDest[0] = val1; pDest[1] = val2; pDest[2] = val3; m_pDataCursor += spacereqd; } inline void CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2) { // SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2)"); const uint32 size = 8; uint32 spacereqd = size + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pData = (DrawData*) m_pDataCursor; pData->m_type = type; pData->m_idx = 0; pData->m_size = size; uint32* pDest = (uint32*)(pData + 1); pDest[0] = val1; pDest[1] = val2; m_pDataCursor += spacereqd; } inline void CGcmDrawState::PackData(uint8 type, uint32 val1) { // SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1)"); const uint32 size = 4; uint32 spacereqd = size + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pData = (DrawData*) m_pDataCursor; pData->m_type = type; pData->m_idx = 0; pData->m_size = size; uint32* pDest = (uint32*)(pData + 1); pDest[0] = val1; m_pDataCursor += spacereqd; } inline void CGcmDrawState::PackData(uint8 type) { // SNPROF("CGcmDrawState::PackData(uint8 type)"); const uint32 size = 0; uint32 spacereqd = size + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pData = (DrawData*) m_pDataCursor; pData->m_type = type; pData->m_idx = 0; pData->m_size = size; m_pDataCursor += spacereqd; } inline void CGcmDrawState::PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth ) // used to pack clear surface { // SNPROF("CGcmDrawState::PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth )"); const uint32 size = 20; uint32 spacereqd = size + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pData = (DrawData*) m_pDataCursor; pData->m_type = type; pData->m_idx = 0; pData->m_size = size; uint32* pDest = (uint32*)(pData + 1); float* pDestf = (float*) pDest; pDest[0] = nFlags; pDest[1] = nColor; pDestf[2] = flZ; pDest[3] = nStencil; pDest[4] = nDepthStencilBitDepth; m_pDataCursor += spacereqd; } //-------------------------------------------------------------------------------------------------- // Init, Begin/EndScene. Flush and Finish, ResetRsxState //-------------------------------------------------------------------------------------------------- inline void CGcmDrawState::Init() { // Initialize GCM state to defaults memset(this, 0, sizeof(CGcmDrawState)); m_scissor.enabled = 1; m_viewZ[0] = 0.1; m_viewZ[1] = 1000.0f; m_blends[0] = CELL_GCM_ONE; m_blends[1] = CELL_GCM_ZERO; m_alphaFunc.func = CELL_GCM_ALWAYS; m_alphaFunc.ref = 0; m_stencilOp.fail = CELL_GCM_KEEP; m_stencilOp.dfail = CELL_GCM_KEEP; m_stencilOp.dpass = CELL_GCM_KEEP; m_stencilFunc.func = CELL_GCM_ALWAYS; m_stencilFunc.ref = 0; m_stencilFunc.mask = 0xFF; m_depthBias.factor = 0; m_depthBias.units = 0; m_userClipPlanesState = 0; m_shaderVxConstants = 0; // Init fixed sized data m_pFixed = &gFixedData[0]; memset(m_pFixed->m_aSamplerIdx, 0xff, sizeof(m_pFixed->m_aSamplerIdx)); m_pFixed->m_nSampler = 0; m_pFixed->m_nInstanced = 0; // Init variable sized data.... m_pData = gPackData[0]; m_pDataCursor = m_pData; } #ifndef SPU inline void CGcmDrawState::Init(IDirect3DDevice9Params *params) { for (int lp = 0; lp < GCM_DRAWSTATE_MAX; lp++) { CGcmDrawState *pGcmDrawState = &gGcmDrawState[lp]; pGcmDrawState->Init(); m_nBackBufferSize[0] = params->m_presentationParameters.BackBufferWidth; m_nBackBufferSize[1] = params->m_presentationParameters.BackBufferHeight; pGcmDrawState->m_pData = gPackData[lp]; pGcmDrawState->m_pFixed = &gFixedData[lp]; DrawScissor_t temp; temp.x = 0; temp.y = 0; temp.w = m_nBackBufferSize[0]; temp.h = m_nBackBufferSize[1]; SetScissorRect(&temp); } } #endif #ifndef SPU inline void CGcmDrawState::BeginScene() { // redundant: will lead to redundant disabling of all samplers at the beginning of the frame, even though they're disabled anyway after flip PackData(kDataBeginScene); SetRenderState(D3DRS_ZWRITEENABLE, 1); // CELL_GCM_TRUE if ( g_bZcullAuto ) { PackData(kDataZcullStats); } PackData(kDataZcullLimit, g_nZcullMoveForward, g_nZcullPushBack); g_UPFrame = 0; } inline void CGcmDrawState::EndScene() { int nZcullDefault = g_nZcullDefault; // Update zcull settings based on metrics if ( g_bZcullAuto ) { int nMaxSlope = cellGcmGetReport( CELL_GCM_ZCULL_STATS, GCM_REPORT_ZCULL_STATS_0 ); int nSumSlope = cellGcmGetReport( CELL_GCM_ZCULL_STATS1, GCM_REPORT_ZCULL_STATS_1 ); int nNumTiles, nAvgSlope; nNumTiles = nMaxSlope & 0xffff; nMaxSlope = ( nMaxSlope & 0xFFFF0000 ) >> 16; nAvgSlope = nNumTiles ? nSumSlope / nNumTiles : 0; g_nZcullMoveForward = ( nAvgSlope + nMaxSlope ) / 2; g_nZcullPushBack = g_nZcullMoveForward / 2; if ( g_nZcullMoveForward < 1 || g_nZcullPushBack < 1 ) { // pick reasonable defaults in the failure case g_nZcullMoveForward = nZcullDefault; g_nZcullPushBack = nZcullDefault; } } else { g_nZcullMoveForward = nZcullDefault; g_nZcullPushBack = nZcullDefault; } // Msg("DrawPrimUP Frame %d\n", g_UPFrame); } inline void CGcmDrawState::CmdBufferFlush() { CellGcmControl volatile *control = cellGcmGetControlRegister(); // Out-of-order write protection. // this needs to be sync, not eieio as command buffer is on main memory(which is cached) // but control registers are mapped as cache inhibited, eieio doesn't gurantee order // between cached and cache inhibited region #ifdef __SNC__ __builtin_sync(); #else __asm__ volatile("sync"); #endif // __SNC__ uint32_t offsetInBytes = (uint32)gpGcmContext->current - (uint32)g_ps3gcmGlobalState.m_pIoAddress; control->put = offsetInBytes; } inline void CGcmDrawState::CmdBufferFinish() { uint32 ref = g_ps3gcmGlobalState.m_finishIdx; GCM_FUNC(cellGcmSetReferenceCommand, ref); g_ps3gcmGlobalState.m_finishIdx ^=1; CmdBufferFlush(); CellGcmControl volatile *control = cellGcmGetControlRegister(); while( control->ref != ref ) { // Don't be a ppu hog ;) sys_timer_usleep(30); } } #endif //-------------------------------------------------------------------------------------------------- // Dynamic ECB management //-------------------------------------------------------------------------------------------------- inline uint8* CGcmDrawState::OpenDynECB() { return &g_aDynECB[g_nDynECBIdx]; } inline void CGcmDrawState::CloseDynECB(uint32 size) { g_nDynECBIdx += AlignValue(size,16); // If we don't have 8K left then wrap if (g_nDynECBIdx > (GCM_DS_MAXDYNECB - 0x2000)) g_nDynECBIdx = 0; } //-------------------------------------------------------------------------------------------------- // Resets RSX to default state //-------------------------------------------------------------------------------------------------- inline void UnpackResetRsxState() { GCM_FUNC( cellGcmSetAlphaFunc, CELL_GCM_ALWAYS, 0); GCM_FUNC( cellGcmSetAlphaTestEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetBackStencilFunc, CELL_GCM_ALWAYS, 0, 0xff); GCM_FUNC( cellGcmSetBackStencilMask, 0xff); GCM_FUNC( cellGcmSetBackStencilOp, CELL_GCM_KEEP, CELL_GCM_KEEP, CELL_GCM_KEEP); GCM_FUNC( cellGcmSetBlendColor, 0, 0); GCM_FUNC( cellGcmSetBlendEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetBlendEnableMrt, CELL_GCM_FALSE, CELL_GCM_FALSE, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetBlendEquation, CELL_GCM_FUNC_ADD, CELL_GCM_FUNC_ADD); GCM_FUNC( cellGcmSetBlendFunc, CELL_GCM_ONE, CELL_GCM_ZERO, CELL_GCM_ONE, CELL_GCM_ZERO); // GCM_FUNC( cellGcmSetClearDepthStencil, 0xffffff00); // GCM_FUNC( cellGcmSetClearSurface, 0); GCM_FUNC( cellGcmSetColorMask, CELL_GCM_COLOR_MASK_A|CELL_GCM_COLOR_MASK_R|CELL_GCM_COLOR_MASK_G|CELL_GCM_COLOR_MASK_B); GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetCullFace, CELL_GCM_BACK); GCM_FUNC( cellGcmSetDepthBounds, 0.0f, 1.0f); GCM_FUNC( cellGcmSetDepthBoundsTestEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetDepthFunc, CELL_GCM_LESS); GCM_FUNC( cellGcmSetDepthMask, CELL_GCM_TRUE); GCM_FUNC( cellGcmSetDepthTestEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetDitherEnable, CELL_GCM_TRUE); GCM_FUNC( cellGcmSetFragmentProgramGammaEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetFrequencyDividerOperation, 0); GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CCW); GCM_FUNC( cellGcmSetLineWidth, 8); // fixed point [0:6:3] GCM_FUNC( cellGcmSetLogicOpEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetLogicOp, CELL_GCM_COPY); // GCM_FUNC( cellGcmSetNotifyIndex, -=something invalid=- ); // initial value is an invalid system reserved area GCM_FUNC( cellGcmSetPointSize, 1.0f); GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetPolygonOffset, 0.0f, 0.0f); GCM_FUNC( cellGcmSetRestartIndexEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetRestartIndex, 0xffffffff); GCM_FUNC( cellGcmSetScissor, 0,0,4096,4096); GCM_FUNC( cellGcmSetShadeMode, CELL_GCM_SMOOTH); GCM_FUNC( cellGcmSetStencilFunc, CELL_GCM_ALWAYS, 0, 0xff); GCM_FUNC( cellGcmSetStencilMask, 0xff); GCM_FUNC( cellGcmSetStencilOp, CELL_GCM_KEEP, CELL_GCM_KEEP, CELL_GCM_KEEP); GCM_FUNC( cellGcmSetStencilTestEnable, CELL_GCM_FALSE); for( uint nTextureSampler = 0; nTextureSampler < 16; ++nTextureSampler ) { GCM_FUNC( cellGcmSetTextureAddress, nTextureSampler, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL, CELL_GCM_TEXTURE_ZFUNC_NEVER, 0); GCM_FUNC( cellGcmSetTextureBorderColor, nTextureSampler, 0); GCM_FUNC( cellGcmSetTextureControl, nTextureSampler, CELL_GCM_FALSE, 0, 12<<8, CELL_GCM_TEXTURE_MAX_ANISO_1); GCM_FUNC( cellGcmSetTextureFilter, nTextureSampler, 0, CELL_GCM_TEXTURE_NEAREST_LINEAR, CELL_GCM_TEXTURE_LINEAR, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX); } for( uint nVertexAttribute = 0; nVertexAttribute < 16; ++nVertexAttribute ) { GCM_FUNC( cellGcmSetVertexDataArray, nVertexAttribute, 0, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0); } GCM_FUNC( cellGcmSetTwoSidedStencilTestEnable, CELL_GCM_FALSE); float scale[4] = {2048.0f, 2048.0f, 0.5f, 0.0f}; float offset[4] = {2048.0f, 2048.0f, 0.5f, 0.0f}; GCM_FUNC( cellGcmSetViewport, 0, 0, 4096, 4096, 0.0f, 1.0f, scale, offset); GCM_FUNC( cellGcmSetZcullStatsEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetAntiAliasingControl, CELL_GCM_FALSE, CELL_GCM_FALSE, CELL_GCM_FALSE, 0xffff); GCM_FUNC( cellGcmSetBackPolygonMode, CELL_GCM_POLYGON_MODE_FILL); GCM_FUNC( cellGcmSetClearColor, 0); GCM_FUNC( cellGcmSetColorMaskMrt, 0); GCM_FUNC( cellGcmSetFrontPolygonMode, CELL_GCM_POLYGON_MODE_FILL); GCM_FUNC( cellGcmSetLineSmoothEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetLineStippleEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetPointSpriteControl, CELL_GCM_FALSE, 0, 0); GCM_FUNC( cellGcmSetPolySmoothEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetPolygonStippleEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetRenderEnable, CELL_GCM_TRUE, 0); GCM_FUNC( cellGcmSetUserClipPlaneControl, CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE); GCM_FUNC( cellGcmSetVertexAttribInputMask, 0xffff); GCM_FUNC( cellGcmSetZpassPixelCountEnable, CELL_GCM_FALSE); for( uint i = 0; i < 4 ; ++i ) { GCM_FUNC( cellGcmSetVertexTextureAddress, i, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_WRAP); GCM_FUNC( cellGcmSetVertexTextureBorderColor, i, 0); GCM_FUNC( cellGcmSetVertexTextureControl, i, CELL_GCM_FALSE, 0, 12<<8); GCM_FUNC( cellGcmSetVertexTextureFilter, i, 0); } GCM_FUNC( cellGcmSetTransformBranchBits, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetTwoSideLightEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetZMinMaxControl, CELL_GCM_TRUE, CELL_GCM_FALSE, CELL_GCM_FALSE); // GCM_FUNC( cellGcmSetTextureOptimization, 1<<3); ---- who cares? this won't compile the way it's described in documentation. // GCM_FUNC( cellGcmSetCylindricalWrap, CELL_GCM_FALSE); ---- who cares? this won't compile the way it's described in documentation. GCM_FUNC( cellGcmSetTwoSideLightEnable, CELL_GCM_FALSE); GCM_FUNC( cellGcmSetTransformBranchBits, 0); GCM_FUNC( cellGcmSetVertexDataBase, 0,0); // ---- I don't wanna set the surface to the default surface that we never use, as it generates unneeded stall in RSX /* CellGcmSurface surface = { CELL_GCM_SURFACE_PITCH, // type CELL_GCM_SURFACE_CENTER_1, // antialias CELL_GCM_SURFACE_X1R5G5B5_Z1R5G5B5,// colorFormat CELL_GCM_SURFACE_TARGET_0, // colorTarget {0, 0, 0, 0}, // colorLocation {0, 0, 0, 0}, // colorOffset {64, 64, 64, 64}, // colorPitch CELL_GCM_SURFACE_Z16, // depthFormat CELL_GCM_LOCATION_LOCAL, // depthLocation {0,0}, // __padding 0, // depthOffset 64, // depthPitch 1,1, // width,height 0,0 // x,y }; GCM_FUNC( cellGcmSetSurface, &surface); */ // After ^this, the cached vertex array data is worthless.... for( uint i = 0; i < D3D_MAX_STREAMS; ++i ) g_cacheSetVertexDataArray[i].SetNull(); } inline void CGcmDrawState::ResetRsxState() { m_dirtyStatesMask |= kDirtyResetRsx; } //-------------------------------------------------------------------------------------------------- // Viewport and scissor //-------------------------------------------------------------------------------------------------- inline void CGcmDrawState::UnpackSetViewport(CONST D3DVIEWPORT9* pViewport) { m_viewZ[0] = pViewport->MinZ; m_viewZ[1] = pViewport->MaxZ; m_viewportSize[0] = pViewport->X; m_viewportSize[1] = pViewport->Y; m_viewportSize[2] = pViewport->Width; m_viewportSize[3] = pViewport->Height; float viewScale[4] = { m_viewportSize[2]/2, m_viewportSize[3]/2, ( m_viewZ[1] - m_viewZ[0] ) / 2.0f, 0.0f }; float viewOffset[4] = { m_viewportSize[0] + m_viewportSize[2]/2, m_viewportSize[1] + m_viewportSize[3]/2, ( m_viewZ[1] + m_viewZ[0] ) / 2.0f, 0.0f }; GCM_FUNC ( cellGcmSetViewport, m_viewportSize[0], m_viewportSize[1], m_viewportSize[2], m_viewportSize[3], m_viewZ[0], m_viewZ[1], viewScale, viewOffset ); } inline HRESULT CGcmDrawState::SetViewport(CONST D3DVIEWPORT9* pViewport) { PackData(kDataViewport, sizeof(D3DVIEWPORT9), (void*)pViewport); return S_OK; } inline void CGcmDrawState::UnpackSetScissorRect( DrawScissor_t * pScissor ) { m_scissor.x = pScissor->x; m_scissor.y = pScissor->y; m_scissor.w = pScissor->w; m_scissor.h = pScissor->h; m_dirtyStatesMask |= kDirtyScissor; } inline void CGcmDrawState::SetScissorRect( DrawScissor_t * pScissor ) { PackData(kDataScissor, sizeof(DrawScissor_t), pScissor); } //-------------------------------------------------------------------------------------------------- // Reports, Zpass and labels //-------------------------------------------------------------------------------------------------- inline void UnpackSetZpassPixelCountEnable(uint32 enable) { GCM_FUNC(cellGcmSetZpassPixelCountEnable, enable); } inline void UnpackSetClearReport(uint32 type) { GCM_FUNC(cellGcmSetClearReport, type); } inline void UnpackSetReport(uint32 type, uint32 index) { GCM_FUNC(cellGcmSetReport, type, index); } inline void UnpackSetWriteBackEndLabel(uint8 index, uint32 value) { GCM_FUNC(cellGcmSetWriteBackEndLabel, index, value); } inline void CGcmDrawState::SetZpassPixelCountEnable(uint32 enable) { PackData(kDataSetZpassPixelCountEnable, enable); } inline void CGcmDrawState::SetClearReport(uint32 type) { PackData(kDataSetClearReport, type); } inline void CGcmDrawState::SetReport(uint32 type, uint32 index) { PackData(kDataSetReport, type, index); } inline void CGcmDrawState::SetWriteBackEndLabel(uint8 index, uint32 value) { if (index == GCM_LABEL_MEMORY_FREE) { m_nFreeLabel = value; // 0 is not valid... } else { PackData(kDataSetWriteBackEndLabel, index, value); } } //-------------------------------------------------------------------------------------------------- // Renderstates //-------------------------------------------------------------------------------------------------- inline void CGcmDrawState::UnpackSetRenderState( D3DRENDERSTATETYPE State, uint Value ) { char ignored = 0; Assert( State < D3DRS_VALUE_LIMIT ); uint nDefvalueIndex = g_d3drs_defvalue_indices[State]; uint8 nClass = nDefvalueIndex >> 6; #ifdef DBGFLAG_ASSERT nDefvalueIndex &= 0077; Assert( nDefvalueIndex < ARRAYSIZE( g_d3drs_defvalues ) ); uint32 nDefValue = g_d3drs_defvalues[nDefvalueIndex]; #endif switch( nClass ) { case 0: // just ignore quietly. example: D3DRS_LIGHTING ignored = 1; break; case 1: { // no GL response - and no error as long as the write value matches the default Assert( Value == nDefValue ); } break; case 2: // provide GL response, but only support known default value Assert( Value == nDefValue ); // fall through to mode 3 case 3: // full GL response, support any legal value // note we're handling the class-2's as well. switch( State ) { default: Msg( "Cannot interpret State %d", (int)State ); break; case D3DRS_ZENABLE: // kGLDepthTestEnable m_ZEnable = !!Value; m_dirtyStatesMask |= kDirtyZEnable; break; case D3DRS_ZWRITEENABLE: // kGLDepthMask { uint32 newMask = Value ? 1 : 0; if(m_nSetDepthMask != newMask) { m_nSetDepthMask = newMask; m_dirtyStatesMask |= kDirtyDepthMask; } } break; case D3DRS_ZFUNC: { // kGLDepthFunc m_ZFunc = D3DCompareFuncToGL( Value ); m_dirtyStatesMask |= kDirtyZFunc; } break; case D3DRS_COLORWRITEENABLE: // kGLColorMaskSingle if( IsLayerRender() ) { m_ColorWriteEnable = ( ((Value & D3DCOLORWRITEENABLE_RED) != 0) ? CELL_GCM_COLOR_MASK_R : 0x00 ) | ( ((Value & D3DCOLORWRITEENABLE_GREEN) != 0) ? CELL_GCM_COLOR_MASK_G : 0x00 ) | ( ((Value & D3DCOLORWRITEENABLE_BLUE) != 0) ? CELL_GCM_COLOR_MASK_B : 0x00 ) | ( ((Value & D3DCOLORWRITEENABLE_ALPHA) != 0) ? CELL_GCM_COLOR_MASK_A : 0x00 ); m_dirtyStatesMask |= kDirtyColorWriteEnable; } break; case D3DRS_COLORWRITEENABLE1: // kGLColorMaskMultiple case D3DRS_COLORWRITEENABLE2: // kGLColorMaskMultiple case D3DRS_COLORWRITEENABLE3: // kGLColorMaskMultiple ignored = 1; break; case D3DRS_CULLMODE: // kGLCullFaceEnable / kGLCullFrontFace { m_CullMode = Value; m_dirtyStatesMask |= kDirtyCullMode; } break; //-------------------------------------------------------------------------------------------- alphablend stuff case D3DRS_ALPHABLENDENABLE: // kGLBlendEnable if( IsLayerRender() ) m_AlphablendEnable = !!Value; m_dirtyStatesMask |= kDirtyAlphablendEnable; break; case D3DRS_BLENDOP: // kGLBlendEquation // D3D blend-op ==> GL blend equation if( IsLayerRender() ) { m_BlendOp = Value; m_dirtyStatesMask |= kDirtyBlendOp; } break; case D3DRS_SRCBLEND: // kGLBlendFactor // D3D blend-factor ==> GL blend factor case D3DRS_DESTBLEND: // kGLBlendFactor { uint16 factor = D3DBlendFactorToGL( Value ); m_blends[!( State == D3DRS_SRCBLEND )] = factor; m_dirtyStatesMask |= kDirtyBlendFactor; } break; case D3DRS_SEPARATEALPHABLENDENABLE: case D3DRS_BLENDOPALPHA: case D3DRS_SRCBLENDALPHA: case D3DRS_DESTBLENDALPHA: ignored = 1; break; case D3DRS_SRGBWRITEENABLE: // kGLBlendEnableSRGB if( IsLayerRender() ) { m_SrgbWriteEnable = Value; m_dirtyStatesMask |= kDirtySrgbWriteEnable; } break; //-------------------------------------------------------------------------------------------- alphatest stuff case D3DRS_ALPHATESTENABLE: m_AlphaTestEnable = Value; m_dirtyStatesMask |= kDirtyAlphaTestEnable; break; case D3DRS_ALPHAREF: m_alphaFunc.ref = Value; m_dirtyStatesMask |= kDirtyAlphaFunc; break; case D3DRS_ALPHAFUNC: { uint32 func = D3DCompareFuncToGL( Value ); m_alphaFunc.func = func; m_dirtyStatesMask |= kDirtyAlphaFunc; } break; //-------------------------------------------------------------------------------------------- stencil stuff case D3DRS_STENCILENABLE: // GLStencilTestEnable_t m_StencilEnable = Value; m_dirtyStatesMask |= kDirtyStencilEnable; break; case D3DRS_STENCILFAIL: // GLStencilOp_t "what do you do if stencil test fails" { m_stencilOp.fail = dxtogl_stencilmode[Value]; m_dirtyStatesMask |= kDirtyStencilOp; } break; case D3DRS_STENCILZFAIL: // GLStencilOp_t "what do you do if stencil test passes *but* depth test fails, if depth test happened" { m_stencilOp.dfail = dxtogl_stencilmode[Value]; m_dirtyStatesMask |= kDirtyStencilOp; } break; case D3DRS_STENCILPASS: // GLStencilOp_t "what do you do if stencil test and depth test both pass" { m_stencilOp.dpass = dxtogl_stencilmode[Value]; m_dirtyStatesMask |= kDirtyStencilOp; } break; case D3DRS_STENCILFUNC: // GLStencilFunc_t { uint32 stencilfunc = D3DCompareFuncToGL( Value ); m_stencilFunc.func = stencilfunc; m_dirtyStatesMask |= kDirtyStencilFunc; } break; case D3DRS_STENCILREF: // GLStencilFunc_t m_stencilFunc.ref = (Value & 0xFF); m_dirtyStatesMask |= kDirtyStencilFunc; break; case D3DRS_STENCILMASK: // GLStencilFunc_t { m_stencilFunc.mask = (Value & 0xFF); m_dirtyStatesMask |= kDirtyStencilFunc; } break; case D3DRS_STENCILWRITEMASK: // GLStencilWriteMask_t { //if (Value==255) //{ // Value = 0xFFFFFFFF; // mask blast //} m_StencilWriteMask = Value; m_dirtyStatesMask |= kDirtyStencilWriteMask; } break; //-------------------------------------------------------------------------------------------- two-sided stencil stuff case D3DRS_TWOSIDEDSTENCILMODE: // -> GL_STENCIL_TEST_TWO_SIDE_EXT... not yet implemented ? case D3DRS_CCW_STENCILFAIL: // GLStencilOp_t case D3DRS_CCW_STENCILZFAIL: // GLStencilOp_t case D3DRS_CCW_STENCILPASS: // GLStencilOp_t case D3DRS_CCW_STENCILFUNC: // GLStencilFunc_t ignored = 1; break; case D3DRS_FOGENABLE: // none of these are implemented yet... erk case D3DRS_FOGCOLOR: case D3DRS_FOGTABLEMODE: case D3DRS_FOGSTART: case D3DRS_FOGEND: case D3DRS_FOGDENSITY: case D3DRS_RANGEFOGENABLE: case D3DRS_FOGVERTEXMODE: ignored = 1; break; case D3DRS_MULTISAMPLEANTIALIAS: case D3DRS_MULTISAMPLEMASK: ignored = 1; break; case D3DRS_SCISSORTESTENABLE: // kGLScissorEnable { m_scissor.enabled = !!Value; m_dirtyStatesMask |= kDirtyScissor; } break; case D3DRS_DEPTHBIAS: // kGLDepthBias { // the value in the dword is actually a float m_depthBias.units = Value; m_dirtyStatesMask |= kDirtyDepthBias; } break; // good ref on these: http://aras-p.info/blog/2008/06/12/depth-bias-and-the-power-of-deceiving-yourself/ case D3DRS_SLOPESCALEDEPTHBIAS: { // the value in the dword is actually a float m_depthBias.factor = Value; m_dirtyStatesMask |= kDirtyDepthBias; } break; case D3DRS_CLIPPING: // ???? is clipping ever turned off ?? ignored = 1; break; case D3DRS_CLIPPLANEENABLE: // kGLClipPlaneEnable { m_userClipPlanesState = 0; for ( uint32 j = 0, uiValueMask = 1, uiClipSetMask = CELL_GCM_USER_CLIP_PLANE_ENABLE_GE; j < 6; ++ j, uiValueMask <<= 1, uiClipSetMask <<= 2 ) { m_userClipPlanesState |= ( ( Value & uiValueMask ) != 0 ) ? uiClipSetMask : 0; } m_dirtyCachesMask |= kDirtyClipPlanes; } break; //-------------------------------------------------------------------------------------------- polygon/fill mode case D3DRS_FILLMODE: m_FillMode = Value; m_dirtyStatesMask |= kDirtyFillMode; break; } break; } } inline void CGcmDrawState::SetRenderState( D3DRENDERSTATETYPE State, uint Value ) { PackData(kDataSetRenderState, State, Value); } //-------------------------------------------------------------------------------------------------- // Texture samplers, textures, texture cache //-------------------------------------------------------------------------------------------------- inline void CGcmDrawState::SetSamplerState( uint Sampler,D3DSAMPLERSTATETYPE Type,DWORD Value ) { #ifndef CERT if (Sampler>=D3D_MAX_SAMPLERS) Error("Invalid sampler %d, PS3 suppoerts %d\n", Sampler, D3D_MAX_SAMPLERS ); #endif // indirect sampler index uint32 SamplerIdx = m_pFixed->m_aSamplerIdx[Sampler]; if (SamplerIdx == 0xFF) { SamplerIdx = m_pFixed->m_nSampler; m_pFixed->m_nSampler++; m_pFixed->m_aSamplerIdx[Sampler] = SamplerIdx; } // the D3D-to-GL translation has been moved to CommitSamplers since we want to do it at draw time // so this call just stuffs values in slots. D3DSamplerDesc *samp = m_pFixed->m_aSamplers + SamplerIdx; switch( Type ) { // addressing modes can be // D3DTADDRESS_WRAP Tile the texture at every integer junction. // D3DTADDRESS_MIRROR Similar to D3DTADDRESS_WRAP, except that the texture is flipped at every integer junction. // D3DTADDRESS_CLAMP Texture coordinates outside the range [0.0, 1.0] are set to the texture color at 0.0 or 1.0, respectively. // D3DTADDRESS_BORDER Texture coordinates outside the range [0.0, 1.0] are set to the border color. // D3DTADDRESS_MIRRORONCE Similar to D3DTADDRESS_MIRROR and D3DTADDRESS_CLAMP. // Takes the absolute value of the texture coordinate (thus, mirroring around 0), // and then clamps to the maximum value. The most common usage is for volume textures, // where support for the full D3DTADDRESS_MIRRORONCE texture-addressing mode is not // necessary, but the data is symmetric around the one axis. case D3DSAMP_ADDRESSU: samp->m_addressModeU = Value; break; case D3DSAMP_ADDRESSV: samp->m_addressModeV = Value; break; case D3DSAMP_ADDRESSW: samp->m_addressModeW = Value; break; case D3DSAMP_BORDERCOLOR: // samp->m_borderColor = Value; // Border color always 0 break; case D3DSAMP_MAGFILTER: samp->m_magFilter = (D3DTEXTUREFILTERTYPE)Value; break; case D3DSAMP_MINFILTER: samp->m_minFilter = (D3DTEXTUREFILTERTYPE)Value; break; case D3DSAMP_MIPFILTER: samp->m_mipFilter = (D3DTEXTUREFILTERTYPE)Value; break; case D3DSAMP_MIPMAPLODBIAS: samp->m_mipmapBias = Value; break; // float in sheep's clothing - check this one out case D3DSAMP_MAXMIPLEVEL: samp->m_maxMipLevel = Value; break; //FIXME (unsure here) case D3DSAMP_MAXANISOTROPY: samp->m_maxAniso = Value; break; case D3DSAMP_SRGBTEXTURE: samp->m_srgb = Value; break; case D3DSAMP_SHADOWFILTER: samp->m_shadowFilter = Value; break; default: Msg( "Unknown sampler parameter" ); DebuggerBreak(); break; } m_dirtySamplersMask |= ( 1 << Sampler ); } inline void CGcmDrawState::UnpackSetTexture( DWORD Stage, uint32 offset, uint32 eaLayout ) { // texture sets are finalized in CommitSamplers m_textures[Stage].m_nLocalOffset = offset; m_textures[Stage].m_eaLayout = eaLayout; m_dirtySamplersMask |= ( 1 << Stage ); } inline void CGcmDrawState::UnpackResetTexture( DWORD Stage ) { // texture sets are finalized in CommitSamplers m_textures[Stage].Reset(); m_dirtySamplersMask |= ( 1 << Stage ); } inline void CGcmDrawState::SetTexture( DWORD Stage, CPs3gcmTexture *tex ) { m_textures[Stage].Assign(tex); if (tex->m_lmBlock.IsLocalMemory() ) { m_textures[Stage].m_nLocalOffset |= 1; } PackData(kDataTexture, Stage, m_textures[Stage].m_nLocalOffset, m_textures[Stage].m_eaLayout ); } inline void CGcmDrawState::ResetTexture( DWORD Stage ) { PackData(kDataResetTexture, Stage); } inline void UnpackSetInvalidateTextureCache() { GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE ); } inline void CGcmDrawState::SetInvalidateTextureCache() { m_dirtyCachesMask |= kDirtyTxCache; } //-------------------------------------------------------------------------------------------------- // Vertex buffers, vertex cache, , vertex constants //-------------------------------------------------------------------------------------------------- #ifndef SPU inline void CGcmDrawState::SetVertexStreamSource( uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride ) { // SNPROF("CGcmDrawState::SetVertexStreamSource( uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride )"); // Write stream descriptor into variable data #ifdef GCM_DS_SAFE uint32 spacereqd = sizeof(D3DStreamDesc) + sizeof(DrawData); uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pData = (DrawData*) m_pDataCursor; pData->m_type = kDataStreamDesc; pData->m_size = sizeof(D3DStreamDesc); pData->m_idx = nStreamIndex; D3DStreamDesc* pDsd = (D3DStreamDesc*)(pData+1); if ( pStreamData && pStreamData->m_pBuffer ) { // we pass this pointer as a BufferBase later to compare, so we need to make sure they're binarily the same Assert( uintp( pStreamData ) == uintp( static_cast( pStreamData ) ) ); pDsd->m_offset = OffsetInBytes; pDsd->m_stride = Stride; pDsd->m_vtxBuffer = pStreamData; pDsd->m_nLocalBufferOffset = pStreamData->m_pBuffer->Offset(); } else { V_memset(pDsd, 0, sizeof( *pDsd ) ); } m_pDataCursor = (uint8*)pDsd + sizeof(D3DStreamDesc); } #endif inline void CGcmDrawState::_SetVertexShaderConstantB( UINT StartRegister, uint BoolCount, uint shaderVxConstants ) { uint nMask = ( 1 << ( StartRegister + BoolCount ) ) - ( 1 << StartRegister ) ; m_shaderVxConstants &= ~nMask; m_shaderVxConstants |= shaderVxConstants; m_dirtyCachesMask |= kDirtyVxConstants; } inline void CGcmDrawState::SetVertexShaderConstantB(UINT StartRegister,CONST BOOL* pConstantData,UINT BoolCount) { uint shaderVxConstants = 0; for ( uint32 k = MIN( StartRegister, 32 ), kEnd = MIN( StartRegister + BoolCount, 32 ), uiConstantBit = ( 1 << StartRegister ), uiDataIdx = 0; k < kEnd; ++ k, uiConstantBit <<= 1, ++ uiDataIdx ) { if( pConstantData[ uiDataIdx ] ) { shaderVxConstants |= uiConstantBit; } } _SetVertexShaderConstantB( StartRegister, BoolCount, shaderVxConstants ); } // inline void CGcmDrawState::VertexConstantExtractor( // float *pDestStorage, int kRegisterFirst, int kRegisterLength, // int StartRegister, const float *pConstantData, int Vector4fCount ) // { // int iMatrixRegister = Max( 0, StartRegister - kRegisterFirst ); // which part of matrix is updated // int iConstantDataMatrixStart = Max( StartRegister, kRegisterFirst ); // where in constant data the new values start // int numMatrixRegisters = StartRegister + Vector4fCount - iConstantDataMatrixStart; // how many new values can be used // numMatrixRegisters = Min( numMatrixRegisters, kRegisterLength - iMatrixRegister ); // we shouldn't use more values than there's room in the matrix // if ( numMatrixRegisters > 0 ) // { // iConstantDataMatrixStart -= StartRegister; // constant data values are relative to StartRegister // V_memcpy( &pDestStorage[ iMatrixRegister * 4 ], &pConstantData[ iConstantDataMatrixStart * 4 ], numMatrixRegisters * 4 * sizeof( float ) ); // } // } inline void CGcmDrawState::SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount ) { // SNPROF("CGcmDrawState::SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount )"); // // Intercept the vertex constants affecting model-view-projection [ registers C8,C9,C10,C11 ] // VertexConstantExtractor( m_matViewProjection, 8, 4, StartRegister, pConstantData, Vector4fCount ); // // Intercept the vertex constants affecting model matrix [ registers C58,C59,C60 ] // VertexConstantExtractor( m_matModel, 58, 3, StartRegister, pConstantData, Vector4fCount ); uint32 spacereqd = (Vector4fCount*sizeof(vec_float4)) + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pDest = (DrawData*)m_pDataCursor; uint8* pVals = (uint8*)(pDest+1); pDest->m_type = kDataVpuConsts; pDest->m_size = Vector4fCount * sizeof(vec_float4); pDest->m_idx = StartRegister; V_memcpy(pVals, pUnalignedConstantData, Vector4fCount * sizeof(vec_float4)); m_pDataCursor += spacereqd; } inline void UnpackSetInvalidateVertexCache() { GCM_FUNC( cellGcmSetInvalidateVertexCache ); } inline void CGcmDrawState::SetInvalidateVertexCache() { m_dirtyCachesMask |= kDirtyVxCache; } inline void CGcmDrawState::UnpackUpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset ) { for( uint i = 0; i < D3D_MAX_STREAMS; ++i ) { if( g_dxGcmVertexStreamSources[i].m_vtxBuffer == vtxBuffer ) { g_dxGcmVertexStreamSources[i].m_nLocalBufferOffset = nLocalBufferOffset; // new local buffer offset } } } inline void CGcmDrawState::UpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset ) { PackData(kDataUpdateVtxBufferOffset, (uint32)vtxBuffer, nLocalBufferOffset); } //-------------------------------------------------------------------------------------------------- // Pixel Shader Consts //-------------------------------------------------------------------------------------------------- inline void CGcmDrawState::SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount) { // SNPROF("CGcmDrawState::SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount)"); m_dirtyCachesMask |= CGcmDrawState::kDirtyPxConstants; uint32 spacereqd = (Vector4fCount*sizeof(vec_float4)) + sizeof(DrawData); #ifdef GCM_DS_SAFE uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData )); if(spacereqd > spaceleft) Error("Out of per draw call data\n"); #endif DrawData* pDest = (DrawData*)m_pDataCursor; uint8* pVals = (uint8*)(pDest+1); pDest->m_type = kDataFpuConsts; pDest->m_size = Vector4fCount * sizeof(vec_float4); pDest->m_idx = StartRegister; V_memcpy(pVals, pConstantData, Vector4fCount * sizeof(vec_float4)); m_pDataCursor += spacereqd; } inline void CGcmDrawState::UnpackSetWorldSpaceCameraPosition(float* pWCP) { memcpy(m_vecWorldSpaceCameraPosition, pWCP, sizeof(m_vecWorldSpaceCameraPosition)); } inline void CGcmDrawState::SetWorldSpaceCameraPosition(float* pWCP) { PackData(kDataSetWorldSpaceCameraPosition, (uint16)sizeof(m_vecWorldSpaceCameraPosition), (void*)pWCP); } //-------------------------------------------------------------------------------------------------- // Surfaces and render teargets //-------------------------------------------------------------------------------------------------- inline void CGcmDrawState::Ps3Helper_UpdateSurface( UpdateSurface_t * pSurface ) { const CPs3gcmTextureData_t &texC = pSurface->m_texC, &texZ = pSurface->m_texZ; const CPs3gcmTextureData_t *pTexCZ = &texC; CPs3gcmTextureLayout texC_layout, texZ_layout, *pTexCZ_layout = &texC_layout; if( texZ ) { memcpy (&texZ_layout, (void*)texZ.m_eaLayout, sizeof( texZ_layout )); pTexCZ = &texZ; pTexCZ_layout = &texZ_layout; } if( texC ) { memcpy( &texC_layout, (void*)texC.m_eaLayout, sizeof( texC_layout )); pTexCZ = &texC; pTexCZ_layout = &texC_layout; } CellGcmSurface sf; V_memset( &sf, 0, sizeof( sf ) ); sf.colorFormat = CELL_GCM_SURFACE_A8R8G8B8; sf.colorTarget = texC.NotNull() ? CELL_GCM_SURFACE_TARGET_0 : CELL_GCM_SURFACE_TARGET_NONE; sf.colorLocation[0] = CELL_GCM_LOCATION_LOCAL; sf.colorOffset[0] = texC ? texC.Offset() : 0; sf.colorPitch[0] = texC ? texC_layout.DefaultPitch2( g_ps3texFormats ) : 64; sf.colorLocation[1] = CELL_GCM_LOCATION_LOCAL; sf.colorLocation[2] = CELL_GCM_LOCATION_LOCAL; sf.colorLocation[3] = CELL_GCM_LOCATION_LOCAL; sf.colorOffset[1] = 0; sf.colorOffset[2] = 0; sf.colorOffset[3] = 0; sf.colorPitch[1] = 64; sf.colorPitch[2] = 64; sf.colorPitch[3] = 64; sf.depthFormat = CELL_GCM_SURFACE_Z24S8; if ( texZ ) { CPs3gcmTextureLayout::Format_t &zFmt = g_ps3texFormats[texZ_layout.m_nFormat]; if ( ( zFmt.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH16 ) || ( zFmt.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH16_FLOAT ) ) { sf.depthFormat = CELL_GCM_SURFACE_Z16; } } sf.depthLocation = CELL_GCM_LOCATION_LOCAL; sf.depthOffset = texZ ? texZ.Offset() : 0; sf.depthPitch = texZ ? texZ_layout.DefaultPitch2( g_ps3texFormats ) : 64; sf.type = ( texC && texC_layout.IsSwizzled() ) ? CELL_GCM_SURFACE_SWIZZLE : CELL_GCM_SURFACE_PITCH; sf.antialias = CELL_GCM_SURFACE_CENTER_1; sf.width = *pTexCZ ? pTexCZ_layout->m_key.m_size[0] : g_ps3gcmGlobalState.m_nRenderSize[0]; sf.height = *pTexCZ ? pTexCZ_layout->m_key.m_size[1] : g_ps3gcmGlobalState.m_nRenderSize[1]; sf.x = 0; sf.y = 0; PackData(kDataUpdateSurface, 0, (uint16)sizeof(sf), (void*)&sf); } inline void CGcmDrawState::UnpackUpdateSurface(CellGcmSurface* pSf) { GCM_FUNC( cellGcmSetSurface, pSf ); // cellGcmSetZcullControl invalidates Zcull, and these are the default settings anyways (LESS / LONES) // so don't bother doing anything here. // If other settings are needed, set them once at the beginning of time for each zcull region //GCM_FUNC( cellGcmSetZcullControl, CELL_GCM_ZCULL_LESS, CELL_GCM_ZCULL_LONES ); // These calls do NOT invalidate Zcull GCM_FUNC( cellGcmSetZcullEnable, CELL_GCM_TRUE, CELL_GCM_TRUE ); // when render target changes, and scissor is not enabled, and the target dimensions change, // we need to flush the scissor dimensions because we always maintain scissor ON state, and // the scissor size must conform to surface size (which just changed) m_dirtyStatesMask |= kDirtyScissor; } inline void CGcmDrawState::Helper_IntersectRectsXYWH( uint16 const *a, uint16 const *b, uint16 *result ) // Takes 2 rects a&b specified as top,left,width,height // Produces an intersection also as top,left,width,height // Intersection can have zero width and/or height { result[0] = a[0] > b[0] ? a[0] : b[0]; result[1] = a[1] > b[1] ? a[1] : b[1]; uint16 ca = a[0]+a[2], cb = b[0]+b[2]; ca = ca < cb ? ca : cb; if ( int16(ca) < int16(result[0]) ) ca = result[0]; result[2] = ca - result[0]; ca = a[1]+a[3], cb = b[1]+b[3]; ca = ca < cb ? ca : cb; if ( int16(ca) < int16(result[1]) ) ca = result[1]; result[3] = ca - result[1]; } inline void CGcmDrawState::UnpackClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth ) { uint32 uiGcmClearMask = 0 | ( ( nFlags & D3DCLEAR_STENCIL ) ? CELL_GCM_CLEAR_S : 0 ) | ( ( nFlags & D3DCLEAR_ZBUFFER ) ? CELL_GCM_CLEAR_Z : 0 ) | ( ( nFlags & D3DCLEAR_TARGET ) ? (CELL_GCM_CLEAR_R|CELL_GCM_CLEAR_G|CELL_GCM_CLEAR_B|CELL_GCM_CLEAR_A) : 0 ) ; if ( nFlags & D3DCLEAR_TARGET ) { GCM_FUNC( cellGcmSetClearColor, nColor ); } if ( nFlags & (D3DCLEAR_STENCIL|D3DCLEAR_ZBUFFER) ) { uint32 nClearValue; if ( nDepthStencilBitDepth == 16 ) { // NOTE: for SURFACE_Z16 depth is in lower 16 bits nClearValue = ( uint32 )( flZ * 0xFFFF ); } else { nClearValue = ( ( ( uint32 )( flZ * 0xFFFFFF ) ) << 8 ) | ( nStencil & 0xFF ); } // if(Z16) GCM_FUNC( cellGcmSetClearDepthStencil, (((uint32)( Z*0xFFFF ))<<8) ); GCM_FUNC( cellGcmSetClearDepthStencil, nClearValue ); } // Set scissor box to cover the intersection of viewport and scissor if ( !m_scissor.enabled ) { GCM_FUNC( cellGcmSetScissor, m_viewportSize[0], m_viewportSize[1], m_viewportSize[2], m_viewportSize[3] ); } else { uint16 uiScissorCoords[4] = {0}; Helper_IntersectRectsXYWH( m_viewportSize, &m_scissor.x, uiScissorCoords ); GCM_FUNC( cellGcmSetScissor, uiScissorCoords[0], uiScissorCoords[1], uiScissorCoords[2], uiScissorCoords[3] ); } GCM_FUNC( cellGcmSetClearSurface, uiGcmClearMask ); // Since we affected the scissor, mark it as dirty m_dirtyStatesMask |= kDirtyScissor; } inline void CGcmDrawState::ClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth ) { PackData(kDataClearSurface, nFlags, nColor, flZ, nStencil, nDepthStencilBitDepth ); } inline void CGcmDrawState::UnpackResetSurfaceToKnownDefaultState() { // Reset to default state: GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE ); GCM_FUNC( cellGcmSetCullFace, CELL_GCM_BACK ); GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CW ); GCM_FUNC( cellGcmSetBlendEnable, CELL_GCM_FALSE ); GCM_FUNC( cellGcmSetAlphaTestEnable, CELL_GCM_FALSE ); GCM_FUNC( cellGcmSetStencilTestEnable, CELL_GCM_FALSE ); GCM_FUNC( cellGcmSetDepthTestEnable, CELL_GCM_FALSE ); GCM_FUNC( cellGcmSetFrontPolygonMode, CELL_GCM_POLYGON_MODE_FILL ); GCM_FUNC( cellGcmSetBackPolygonMode, CELL_GCM_POLYGON_MODE_FILL ); GCM_FUNC( cellGcmSetPolygonOffset, 0, 0 ); GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE ); // Force the viewport to match the current back buffer D3DVIEWPORT9 dForcedView = { 0, 0, m_nBackBufferSize[0], m_nBackBufferSize[1], m_viewZ[0], m_viewZ[1] }; SetViewport( &dForcedView ); GCM_FUNC( cellGcmSetScissor, 0, 0, m_nBackBufferSize[0], m_nBackBufferSize[1] ); // Reset some cached gcm state m_userClipPlanesState = 0; m_shaderVxConstants = 0; m_dirtyCachesMask |= ( kDirtyVxConstants | kDirtyVxShader | kDirtyClipPlanes | kDirtyPxShader | kDirtyPxConstants ); } inline void CGcmDrawState::ResetSurfaceToKnownDefaultState() { PackData(kDataResetSurface); } //-------------------------------------------------------------------------------------------------- // Blit //-------------------------------------------------------------------------------------------------- inline void UnpackTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset, uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel ) { GCM_FUNC(cellGcmSetTransferImage, mode, dstOffset, dstPitch, dstX, dstY, srcOffset, srcPitch, srcX, srcY, width, height, bytesPerPixel ); } inline void CGcmDrawState::SetTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset, uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel ) { // return UnpackTransferImage( mode, dstOffset, dstPitch, dstX, dstY, srcOffset, // srcPitch, srcX, srcY, width, height, bytesPerPixel); uint32 aValues[12]; aValues[0] = mode; aValues[1] = dstOffset; aValues[2] = dstPitch; aValues[3] = dstX; aValues[4] = dstY; aValues[5] = srcOffset; aValues[6] = srcPitch; aValues[7] = srcX; aValues[8] = srcY; aValues[9] = width; aValues[10] = height; aValues[11] = bytesPerPixel; PackData(kDataTransferImage, 0, sizeof(aValues), (void*)aValues); } //-------------------------------------------------------------------------------------------------- // State Flushing and Pixel Shader Patching //-------------------------------------------------------------------------------------------------- inline void CGcmDrawState::UnpackData() { static uint32 highWater = 0; static float average = 0.0f; static uint32 count = 0; #ifndef SPU static int display = 4000; #endif m_nNumECB = 0; int aSizes[64]; memset(aSizes, 0, sizeof(aSizes)); DrawData* pSrc = (DrawData*)m_pData; while ((uint8*)pSrc < m_pDataCursor) { uint32* pVals = (uint32*)(pSrc+1); float* pfVals = (float*)pVals; aSizes[pSrc->m_type] += pSrc->m_size; switch (pSrc->m_type) { case kDataEcbTexture: V_memcpy(&m_aBindTexture[pSrc->m_idx], pVals, pSrc->m_size); break; case kDataSetRenderState: UnpackSetRenderState((D3DRENDERSTATETYPE)pVals[0], pVals[1]); break; case kDataFpuConsts: V_memcpy(&g_aFPConst[pSrc->m_idx], pVals, pSrc->m_size); break; case kDataSetWorldSpaceCameraPosition: UnpackSetWorldSpaceCameraPosition(pfVals); break; case kDataStreamDesc: V_memcpy(&g_dxGcmVertexStreamSources[pSrc->m_idx], pVals, pSrc->m_size); break; case kDataVpuConsts: GCM_FUNC( cellGcmSetVertexProgramParameterBlock, pSrc->m_idx, pSrc->m_size/16, (float*)pVals ); break; case kDataZcullStats: GCM_FUNC( cellGcmSetReport, CELL_GCM_ZCULL_STATS, GCM_REPORT_ZCULL_STATS_0 ); GCM_FUNC( cellGcmSetReport, CELL_GCM_ZCULL_STATS1, GCM_REPORT_ZCULL_STATS_1 ); break; case kDataZcullLimit: GCM_FUNC(cellGcmSetZcullLimit, pVals[0], pVals[2] ); break; case kDataViewport: UnpackSetViewport((D3DVIEWPORT9*) pVals); break; case kDataScissor: UnpackSetScissorRect((DrawScissor_t*) pVals); break; case kDataSetZpassPixelCountEnable: UnpackSetZpassPixelCountEnable(pVals[0]); break; case kDataSetClearReport: UnpackSetClearReport(pVals[0]); break; case kDataSetReport: UnpackSetReport(pVals[0], pVals[1]); break; case kDataSetWriteBackEndLabel: UnpackSetWriteBackEndLabel(pVals[0], pVals[1]); break; case kDataUpdateSurface: UnpackUpdateSurface((CellGcmSurface*)pVals); break; case kDataResetSurface: UnpackResetSurfaceToKnownDefaultState(); break; case kDataClearSurface: UnpackClearSurface(pVals[0], pVals[1], pfVals[2], pVals[3], pVals[4] ); break; case kDataTransferImage: UnpackTransferImage(pVals[0], pVals[1], pVals[2], pVals[3], pVals[4], pVals[5], pVals[6], pVals[7], pVals[8], pVals[9], pVals[10], pVals[11] ); break; case kDataTexture: UnpackSetTexture(pVals[0], pVals[1], pVals[2]); break; case kDataResetTexture: UnpackResetTexture(pVals[0]); break; case kDataUpdateVtxBufferOffset: UnpackUpdateVtxBufferOffset((IDirect3DVertexBuffer9*)pVals[0], pVals[1]); break; case kDataECB: UnpackExecuteCommandBuffer(m_aECB[m_nNumECB]); m_aECB[m_nNumECB] = 0; m_nNumECB++; break; case kDataBeginScene: m_nDisabledSamplers = 0; m_nSetTransformBranchBits = 0; break; } pSrc = (DrawData*)((uint8*)(pSrc+1)+pSrc->m_size); } m_nNumECB = 0; // Record High Water uint32 size = m_pDataCursor - m_pData; average *= count; count++; average += size; average /= count; #ifndef SPU uint32 avgInt = uint32(average + 0.5f); #endif if (size > highWater) { highWater = size; Msg("\n>>>>>>>>>>>High Water %d (0x%x) : Average %d (0x%x) : Avg plus GcmDrawState = %d (0x%x) : This plus drawstate (%d (0x%x)) \n", highWater, highWater, avgInt, avgInt, avgInt + DRAWSTATE_SIZEOFDMA, avgInt + DRAWSTATE_SIZEOFDMA, size + DRAWSTATE_SIZEOFDMA, size + DRAWSTATE_SIZEOFDMA ); for (int i = 1; i <= kDataTransferImage; i++ ) { Msg( ">>>%d : %d\n", i, aSizes[i]); } } // display--; // if ( (display < 1) || ((size+sizeof(CGcmDrawState)) > 0x1800)) // { // Msg("\n>>>>>>>>>>>High Water %d (0x%x) : Average %d (0x%x) : Avg plus GcmDrawState = %d (0x%x) : This (%d (0x%x)) \n", highWater, highWater, // avgInt, avgInt, avgInt + sizeof(CGcmDrawState), avgInt + sizeof(CGcmDrawState), size, size ); // // display = 10000; // } // Reset cursor m_pDataCursor = m_pData; } inline void CGcmDrawState::CommitRenderStates() { uint nMask = m_dirtyStatesMask; m_dirtyStatesMask = 0; if ( nMask & kDirtyDepthMask) { GCM_FUNC(cellGcmSetDepthMask, m_nSetDepthMask); } if ( nMask & kDirtyZEnable ) { GCM_FUNC( cellGcmSetDepthTestEnable, m_ZEnable ); } if ( nMask & kDirtyZFunc ) { GCM_FUNC( cellGcmSetDepthFunc, m_ZFunc ); } if ( nMask & kDirtyColorWriteEnable ) { GCM_FUNC( cellGcmSetColorMask, m_ColorWriteEnable); } if ( nMask & kDirtyCullMode ) { switch(m_CullMode) { case D3DCULL_NONE: GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_FALSE ); break; case D3DCULL_CW: GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE ); GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CCW ); // opposite from D3D break; case D3DCULL_CCW: GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE ); GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CW ); // opposite from D3D break; } } if ( nMask & kDirtyAlphablendEnable ) { GCM_FUNC( cellGcmSetBlendEnable, m_AlphablendEnable ); } if (nMask & kDirtyBlendOp) { uint32 Value = m_BlendOp; uint16 equation = dxtogl_blendop[ Value ]; GCM_FUNC( cellGcmSetBlendEquation, equation, equation ); } if ( nMask & kDirtySrgbWriteEnable ) { uint32 Value = m_SrgbWriteEnable; GCM_FUNC( cellGcmSetFragmentProgramGammaEnable, !!Value ); } if ( nMask & kDirtyAlphaTestEnable ) { uint32 Value = m_AlphaTestEnable; GCM_FUNC( cellGcmSetAlphaTestEnable, !!Value ); } if ( nMask & kDirtyStencilEnable ) { uint32 Value = m_StencilEnable; GCM_FUNC( cellGcmSetStencilTestEnable, !!Value ); } if ( nMask & kDirtyStencilWriteMask ) { uint32 Value = m_StencilWriteMask; GCM_FUNC( cellGcmSetStencilMask, Value ); } if ( nMask & kDirtyFillMode ) { uint32 Value = m_FillMode; uint32 mode = CELL_GCM_POLYGON_MODE_POINT + ( Value - D3DFILL_POINT ); GCM_FUNC( cellGcmSetFrontPolygonMode, mode ); GCM_FUNC( cellGcmSetBackPolygonMode, mode ); } if ( nMask & CGcmDrawState::kDirtyBlendFactor ) { GCM_FUNC( cellGcmSetBlendFunc, m_blends[0], m_blends[1], m_blends[0], m_blends[1] ); } if ( nMask & CGcmDrawState::kDirtyAlphaFunc ) { GCM_FUNC( cellGcmSetAlphaFunc, m_alphaFunc.func, m_alphaFunc.ref ); } if ( nMask & CGcmDrawState::kDirtyStencilOp ) { GCM_FUNC( cellGcmSetStencilOp, m_stencilOp.fail, m_stencilOp.dfail, m_stencilOp.dpass ); GCM_FUNC( cellGcmSetBackStencilOp, m_stencilOp.fail, m_stencilOp.dfail, m_stencilOp.dpass ); } if ( nMask & CGcmDrawState::kDirtyStencilFunc ) { GCM_FUNC( cellGcmSetStencilFunc, m_stencilFunc.func, m_stencilFunc.ref, m_stencilFunc.mask ); GCM_FUNC( cellGcmSetBackStencilFunc, m_stencilFunc.func, m_stencilFunc.ref, m_stencilFunc.mask ); } if ( nMask & CGcmDrawState::kDirtyScissor ) { if( m_scissor.enabled ) { GCM_FUNC( cellGcmSetScissor, m_scissor.x, m_scissor.y, m_scissor.w, m_scissor.h ); } else { GCM_FUNC( cellGcmSetScissor, 0, 0, 4095, 4095 ); // disable scissor } } if ( nMask & CGcmDrawState::kDirtyDepthBias ) { float units = *((float*)&m_depthBias.units); GCM_FUNC( cellGcmSetPolygonOffset, *((float*)&m_depthBias.factor), /* NEED 2x here:see PSGL! */ 2.0f * units ); if ( ( m_depthBias.factor != 0.0f ) || ( m_depthBias.units != 0.0f ) ) { GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_TRUE ); } else { GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE ); } } } inline void CGcmDrawState::CommitVertexBindings(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex) { // push vertex buffer state for the current vertex decl uint uiVertexSlotMask = m_pVertexShaderData->m_attributeInputMask; if ( !uiVertexSlotMask) Error(">>>>Blank vertex shader attr\n"); for( int nStreamIndex = 0; nStreamIndex < D3D_MAX_STREAMS; ++ nStreamIndex, uiVertexSlotMask >>= 1 ) { SetVertexDataArrayCache_t *pOldCache = &g_cacheSetVertexDataArray[nStreamIndex]; // Check if this attribute is unused by the shader program // and try to find the match in the decl. if ( int j = ( uiVertexSlotMask & 1 ) ? pDecl->m_cgAttrSlots[ nStreamIndex ] : 0 ) { D3DVERTEXELEMENT9_GCM *elem = &pDecl->m_elements[ j - 1 ]; int streamIndex = elem->m_dxdecl.Stream; Assert( streamIndex >= 0 && streamIndex < D3D_MAX_STREAMS ); D3DStreamDesc &dsd = g_dxGcmVertexStreamSources[ streamIndex ]; D3DVERTEXELEMENT9_GCM::GcmDecl_t const &gcmvad = elem->m_gcmdecl; const uint8_t stride = dsd.m_stride; const uint8_t size = gcmvad.m_datasize; const uint8_t type = gcmvad.m_datatype; SetVertexDataArrayCache_t newCache( dsd, gcmvad, baseVertexIndex ); if( *pOldCache != newCache ) { // Msg(">>>>>>>>>> Offset 0x%x <<<<<<<<<<\n\n", newCache.GetLocalOffset()); GCM_FUNC( cellGcmSetVertexDataArray, nStreamIndex, 1, stride, size, type, CELL_GCM_LOCATION_LOCAL, newCache.GetLocalOffset() ); // // if (!newCache.GetLocalOffset()) Error (">>>>>>>>>>>>>>>>>address %x <<<<<<<<<<<<<<<<<<<<<<\n", newCache.GetLocalOffset()); *pOldCache = newCache; } continue; } if( !pOldCache->IsNull() ) { // Disable data slot if we failed to bind proper data stream GCM_FUNC( cellGcmSetVertexDataArray, nStreamIndex, 1, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0 ); pOldCache->SetNull(); // disable } } } inline void CGcmDrawState::CommitSampler(uint32 nSampler) { D3DSamplerDesc const & dxsamp = m_aSamplers[ nSampler ]; #ifdef SPU extern CPs3gcmTextureLayout gaLayout[D3D_MAX_TEXTURES]; CPs3gcmTextureLayout const & texlayout = gaLayout[nSampler]; #else CPs3gcmTextureLayout const & texlayout = *((CPs3gcmTextureLayout const *)m_textures[ nSampler ].m_eaLayout); #endif uint nMips = texlayout.m_mipCount; Assert( nMips > 0 ); CPs3gcmTextureLayout::Format_t & texlayoutFormat = g_ps3texFormats[texlayout.m_nFormat]; // If bReadsRawDepth is true, a depth texture has been set but shadow filtering has NOT been enabled. In this case, the shader is expecting to read // the texture as A8R8G8B8 and manually recover depth (used for depth feathering). bool bReadsRawDepth = ( texlayoutFormat.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH24_D8 ) && !dxsamp.m_shadowFilter; // GCM_FUNC( cellGcmReserveMethodSize, 11 ); uint32_t *current = gpGcmContext->current; current[0] = CELL_GCM_METHOD_HEADER_TEXTURE_OFFSET( nSampler, 8 ); current[1] = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET( m_textures[ nSampler ].Offset() ); uint locn; if (current[1] & 1) { locn = CELL_GCM_LOCATION_LOCAL; current[1] &= 0xFFFFFFFE; } else { locn = CELL_GCM_LOCATION_MAIN; } current[2] = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT( locn, texlayout.IsCubeMap() ? CELL_GCM_TRUE : CELL_GCM_FALSE, texlayout.IsVolumeTex() ? CELL_GCM_TEXTURE_DIMENSION_3 : CELL_GCM_TEXTURE_DIMENSION_2, ( bReadsRawDepth ? CELL_GCM_TEXTURE_A8R8G8B8 // bind depth textures as ARGB and reassemble depth in shader : texlayoutFormat.m_gcmFormat ) | ( texlayout.IsSwizzled() ? CELL_GCM_TEXTURE_SZ : CELL_GCM_TEXTURE_LN ), nMips ); current[3] = CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS( dxtogl_addressMode[ dxsamp.m_addressModeU ], dxtogl_addressMode[ dxsamp.m_addressModeV ], dxtogl_addressMode[ dxsamp.m_addressModeW ], CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL, dxsamp.m_shadowFilter ? CELL_GCM_TEXTURE_ZFUNC_GEQUAL : CELL_GCM_TEXTURE_ZFUNC_NEVER, ( ( texlayoutFormat.m_gcmCaps & CPs3gcmTextureLayout::Format_t::kCapSRGB ) && dxsamp.m_srgb ) ? CELL_GCM_TEXTURE_GAMMA_R | CELL_GCM_TEXTURE_GAMMA_G | CELL_GCM_TEXTURE_GAMMA_B : 0, 0 ); current[4] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL0( CELL_GCM_TRUE, (uint16)( Max( Min( dxsamp.m_maxMipLevel, nMips - 1 ), 0u ) * 256.0f ), (uint16)( Max( nMips - 1, 0u ) * 256.0f ), texlayout.IsVolumeTex() || ( ( dxsamp.m_minFilter != D3DTEXF_ANISOTROPIC ) && ( dxsamp.m_magFilter != D3DTEXF_ANISOTROPIC ) ) ? CELL_GCM_TEXTURE_MAX_ANISO_1 // 3D textures cannot have anisotropic filtering! : CELL_GCM_TEXTURE_MAX_ANISO_4 // dxtogl_anisoIndexHalf[ ( dxsamp.m_maxAniso / 2 ) & ( ARRAYSIZE( dxtogl_anisoIndexHalf ) - 1 ) ] ); current[5] = bReadsRawDepth ? CELL_GCM_REMAP_MODE( CELL_GCM_TEXTURE_REMAP_ORDER_XYXY, CELL_GCM_TEXTURE_REMAP_FROM_B, CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP ) : texlayoutFormat.m_gcmRemap; if( bReadsRawDepth ) current[6] = CELL_GCM_METHOD_DATA_TEXTURE_FILTER( 0, CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX ); else current[6] = CELL_GCM_METHOD_DATA_TEXTURE_FILTER( 0, // 0x1FBE, // 0x1FC0, // corresponding to PSGL 0 mip bias, formula: [( bias - .26 )*256] & 0x1FFF dxtogl_minFilter[ dxsamp.m_minFilter ][ Min( (D3DTEXTUREFILTERTYPE)dxsamp.m_mipFilter, D3DTEXF_LINEAR ) ], dxtogl_magFilter[ dxsamp.m_magFilter ], CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX ); current[7] = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT( texlayout.m_key.m_size[1], texlayout.m_key.m_size[0] ); current[8] = CELL_GCM_METHOD_DATA_TEXTURE_BORDER_COLOR( 0 // Border color always 0 ... dxsamp.m_borderColor // R=>>16; G=>>8; B=>>0; A=>>24 (same thing as GCM, see JSGCM_CALC_COLOR_LE_ARGB8) ); current[9] = CELL_GCM_METHOD_HEADER_TEXTURE_CONTROL3( nSampler, 1 ); current[10] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3( texlayout.DefaultPitch2( g_ps3texFormats ), texlayout.m_key.m_size[2] ); gpGcmContext->current = ¤t[11]; } inline void CGcmDrawState::CommitSamplers() { // Unpack from Fixed data into m_aSamplers for (uint32 lp = 0; lp < D3D_MAX_SAMPLERS; lp++) { uint32 SamplerIdx = m_pFixed->m_aSamplerIdx[lp]; if (SamplerIdx != 0xFF) m_aSamplers[lp] = m_pFixed->m_aSamplers[SamplerIdx]; } // PS3 is binding textures here uint mask = m_dirtySamplersMask; m_dirtySamplersMask = 0; uint16 uiPixelShaderInputMask = m_pPixelShaderData ? m_pPixelShaderData->m_samplerInputMask : 0; uint16 uiRunningUpBitMask = 1; uint nDisabledSamplers = m_nDisabledSamplers; m_nDisabledSamplers = 0; for ( int nSampler = 0; nSampler < 16; ++ nSampler, mask >>= 1, uiPixelShaderInputMask >>= 1, uiRunningUpBitMask <<= 1 ) { if ( ( uiPixelShaderInputMask & 1 ) == 0 ) // The texture will not be sampled by pixel shader, unset it { // optimization if( !( nDisabledSamplers & uiRunningUpBitMask ) ) { GCM_FUNC( cellGcmSetTextureControl, nSampler, CELL_GCM_FALSE, 0, 0, 0 ); } m_dirtySamplersMask |= uiRunningUpBitMask; // Keep the sampler dirty because we might have textures previously set on it m_nDisabledSamplers |= uiRunningUpBitMask; // don't disable repeatedly continue; } if ( ( mask & 1 ) == 0 ) // If the sampler is not dirty then don't do anything continue; if ( m_textures[nSampler].IsNull() ) // The sampler is dirty, but no texture on it, disable the sampler { // optimization if( !( nDisabledSamplers & uiRunningUpBitMask ) ) { GCM_FUNC( cellGcmSetTextureControl, nSampler, CELL_GCM_FALSE, 0, 0, 0 ); } m_nDisabledSamplers |= uiRunningUpBitMask; // don't disable repeatedly continue; } CommitSampler(nSampler); } m_pFixed->m_nInstanced = 0; } static vector unsigned int g_swap16x32m1[5] = { {0x02030001, 0x14151617, 0x18191A1B, 0x1C1D1E1F}, {0x02030001, 0x06070405, 0x18191A1B, 0x1C1D1E1F}, {0x02030001, 0x06070405, 0x0A0B0809, 0x1C1D1E1F}, {0x02030001, 0x06070405, 0x0A0B0809, 0x0E0F0C0D} }; static inline void PatchUcodeConstSwap( void * pDestination, const fltx4 f4Source, int nLengthMinus1 ) { *( fltx4* )pDestination = vec_perm( f4Source, *( fltx4* )pDestination, ( vector unsigned char )g_swap16x32m1[nLengthMinus1] ); } inline void CGcmDrawState::PatchUcode(fltx4 * pUCode16, uint32 * pPatchTable, uint nPatchCount ) { for ( uint nPatchIndex = 0; nPatchIndex < nPatchCount; ++nPatchIndex ) { uint nPatchWord = pPatchTable[ nPatchIndex ], nLengthMinus1 = nPatchWord >> 30; uint nUcodeOffsetQword = nPatchWord & 0xFFFF; uint nRegister = ( nPatchWord >> 16 ) & 0x3FF; fltx4 & reg = g_aFPConst[nRegister]; PatchUcodeConstSwap( pUCode16 + nUcodeOffsetQword, reg, nLengthMinus1 ); } } #ifndef SPU inline void CGcmDrawState::AllocateUcode(FpHeader_t* pFp) { uint32 patchIdx = g_ps3gcmGlobalState.m_nPatchIdx; uint32 uCodeSize = pFp->m_nUcodeSize; uint32 patchSize = AlignValue(uCodeSize + 400, 128); uint32 nEndPos = patchIdx + patchSize; uint32 nEndSeg = nEndPos/GCM_PATCHSEGSIZE; uint32 writeSeg = patchIdx/GCM_PATCHSEGSIZE; // are we out of space and so need to move to the next segment ? if (nEndSeg != writeSeg) { // move to the next segment uint32 nextSeg = (writeSeg + 1) % (GCM_PATCHBUFFSIZE/GCM_PATCHSEGSIZE); // Wait for RSX not to be in this segment uint32 readSeg = g_ps3gcmGlobalState.m_nPatchReadSeg; if (nextSeg == readSeg) readSeg = *g_label_fppatch_ring_seg; gpGcmDrawState->CmdBufferFlush(); uint32 spins = 0; while (nextSeg == readSeg) { spins++; sys_timer_usleep(60); // Not on SPU.. readSeg = *g_label_fppatch_ring_seg; } // if (spins > 0) Msg("Patch Spins %d\n", spins); // Move to the next segment and record the new readSeg patchIdx = (nextSeg * GCM_PATCHSEGSIZE); writeSeg = nextSeg; g_ps3gcmGlobalState.m_nPatchReadSeg = readSeg; // Msg("New Patch Segment 0x%x\n", patchIdx); } uint8* pDst = g_ps3gcmGlobalState.m_pPatchBuff + patchIdx; patchIdx += patchSize; g_ps3gcmGlobalState.m_nPatchIdx = patchIdx; m_eaOutputUCode = uintp(pDst); } #endif inline fltx4* CGcmDrawState::CopyUcode(FpHeader_t* pFp) { uint8* pDst = (uint8*)m_eaOutputUCode; uint32 patchIdx = pDst - g_ps3gcmGlobalState.m_pPatchBuff; uint32 uCodeSize = pFp->m_nUcodeSize; uint32 writeSeg = patchIdx/GCM_PATCHSEGSIZE; #ifndef SPU V_memcpy(pDst, (uint8*)(pFp+1), uCodeSize); #endif // Set the label to say we're using shaders in this part of the ring buffer now GCM_FUNC(cellGcmSetWriteBackEndLabel, GCM_LABEL_FPPATCH_RING_SEG, writeSeg); return (fltx4*) pDst; } inline void CGcmDrawState::BindFragmentProgram(uint32 nVertexToFragmentProgramAttributeMask) { FpHeader_t * fpHeader = m_pPixelShaderData->m_eaFp; // Copy and Patch Ucode uint32* pPatches = (uint32*)((uint8*)(fpHeader + 1) + fpHeader->m_nUcodeSize); fltx4* pUcode = CopyUcode(fpHeader); #ifndef SPU PatchUcode(pUcode, pPatches, fpHeader->m_nPatchCount ); #else fltx4* pUcodeSPU = (fltx4*) (fpHeader+1); PatchUcode(pUcodeSPU, pPatches, fpHeader->m_nPatchCount ); gSpuMgr.DmaSync(); gSpuMgr.DmaPut(m_eaOutputUCode, (void*)pUcodeSPU, fpHeader->m_nUcodeSize, SPU_DMAPUT_TAG); #endif // Set Fragment Shader uint32 nFragmentProgramOffset = uintp(pUcode); nFragmentProgramOffset += g_ps3gcmGlobalState.m_nIoOffsetDelta; uint32* pTexControls = pPatches + fpHeader->m_nPatchCount; uint nTexControls = fpHeader->m_nTexControls; // GCM_FUNC( cellGcmReserveMethodSize, 6 + (2 * nTexControls) ); CELL_GCM_METHOD_SET_SHADER_CONTROL( gpGcmContext->current, fpHeader->m_nShaderControl0 ); // +2 CELL_GCM_METHOD_SET_SHADER_PROGRAM( gpGcmContext->current, CELL_GCM_LOCATION_MAIN + 1, ( nFragmentProgramOffset & 0x1fffffff ) ); // +2 CELL_GCM_METHOD_SET_VERTEX_ATTRIB_OUTPUT_MASK( gpGcmContext->current, nVertexToFragmentProgramAttributeMask /*psh->m_attributeInputMask | 0x20*/ ); // +2 - this gets overwritten later, so it's useless here , but GPAD says "unrecognized sequence" if I don't insert this command here V_memcpy( gpGcmContext->current, pTexControls, fpHeader->m_nTexControls * sizeof( uint32 ) * 2 ); gpGcmContext->current += 2 * nTexControls; } void CGcmDrawState::CommitShaders() { uint nMask = m_dirtyCachesMask; m_dirtyCachesMask = 0; if( nMask & kDirtyVxCache ) { GCM_FUNC(cellGcmSetInvalidateVertexCache); } if( nMask & kDirtyTxCache ) { GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE ); } if ( nMask & kDirtyVxShader ) { void* pVertexShaderCmdBuffer = (void*)(m_pVertexShaderData->m_pVertexShaderCmdBuffer ); if( pVertexShaderCmdBuffer ) { uint32 nVertexShaderCmdBufferWords = m_pVertexShaderData->m_nVertexShaderCmdBufferWords; // GCM_FUNC( cellGcmReserveMethodSize, nVertexShaderCmdBufferWords ); // uint32_t *current = gpGcmContext->current; V_memcpy(gpGcmContext->current, pVertexShaderCmdBuffer, nVertexShaderCmdBufferWords * sizeof( uint32 )); gpGcmContext->current += nVertexShaderCmdBufferWords; } } if ( nMask & kDirtyVxConstants ) { uint nBits = m_shaderVxConstants; // Disabling this check because it causes lots of per-vertex dynamic lighting problems in common_vs_fxc.h function DoLighting(). if( m_nSetTransformBranchBits != nBits ) { GCM_FUNC( cellGcmSetTransformBranchBits, nBits ); m_nSetTransformBranchBits = nBits; } } if ( nMask & ( kDirtyVxShader | kDirtyClipPlanes ) ) { // GCM_FUNC( cellGcmSetUserClipPlaneControl, // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 0 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0, // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 1 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0, // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 2 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0, // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 3 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0, // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 4 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0, // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 5 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0 // ); } uint setVertexAttribOutputMask = ( nMask & ( kDirtyVxShader | kDirtyPxShader ) ); uint nVertexToFragmentProgramAttributeMask = m_pVertexShaderData->m_attributeOutputMask; if ( m_pPixelShaderData ) { nVertexToFragmentProgramAttributeMask = m_pPixelShaderData->m_attributeInputMask; nVertexToFragmentProgramAttributeMask |= 0x20; BindFragmentProgram( nVertexToFragmentProgramAttributeMask ); } else { // we need to set the shader, but no shader specified, so set the default empty shader if ( nMask & ( kDirtyPxShader | kDirtyPxConstants ) ) { CELL_GCM_METHOD_SET_SHADER_CONTROL( gpGcmContext->current, g_ps3gcmGlobalState.m_nPsEmptyShaderControl0 ); // +2 CELL_GCM_METHOD_SET_SHADER_PROGRAM( gpGcmContext->current, CELL_GCM_LOCATION_LOCAL + 1, ( g_ps3gcmGlobalState.m_pShaderPsEmptyBuffer.Offset() & 0x1fffffff ) ); // +2 CELL_GCM_METHOD_SET_VERTEX_ATTRIB_OUTPUT_MASK( gpGcmContext->current, g_ps3gcmGlobalState.m_nPsEmptyAttributeInputMask | 0x20 ); } } if ( setVertexAttribOutputMask ) { GCM_FUNC( cellGcmSetVertexAttribOutputMask, nVertexToFragmentProgramAttributeMask ); } } inline void ZeroFPConsts() { memset(g_aFPConst, 0, sizeof(g_aFPConst)); } inline void ZeroVPConsts() { GCM_FUNC( cellGcmSetVertexProgramParameterBlock, 0, GCM_DS_MAXVPCONST, (float*)g_aVPConst); } #ifndef SPU inline void CGcmDrawState::EndFrame() { m_cmd = CmdEndFrame; SendToSpu(); } #endif #ifndef SPU inline void CGcmDrawState::CommitStates() { m_cmd = CmdCommitStates; SendToSpu(); } #else inline void CGcmDrawState::CommitStates() { if (m_nFreeLabel) UnpackSetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, m_nFreeLabel); if ( m_dirtyStatesMask & kDirtyResetRsx) UnpackResetRsxState(); if (m_dirtyStatesMask & kDirtyZeroAllPSConsts) ZeroFPConsts(); if (m_dirtyStatesMask & kDirtyZeroAllVSConsts) ZeroVPConsts(); UnpackData(); // Pulls out pixel shader consts and sets vertex shader consts CommitRenderStates(); } #endif inline void CGcmDrawState::CommitAll(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex) { if (m_nFreeLabel) UnpackSetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, m_nFreeLabel); if ( m_dirtyStatesMask & kDirtyResetRsx) UnpackResetRsxState(); if (m_dirtyStatesMask & kDirtyZeroAllPSConsts) ZeroFPConsts(); if (m_dirtyStatesMask & kDirtyZeroAllVSConsts) ZeroVPConsts(); UnpackData(); // Pulls out pixel shader consts and sets vertex shader consts #ifdef SPU extern void GetTextureLayouts(); GetTextureLayouts(); #endif CommitRenderStates(); CommitVertexBindings(pDecl, baseVertexIndex); CommitSamplers(); CommitShaders(); } //-------------------------------------------------------------------------------------------------- // Draw Prim //-------------------------------------------------------------------------------------------------- #ifndef SPU inline void CGcmDrawState::DrawPrimitiveUP( IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE nPrimitiveType,UINT nPrimitiveCount, CONST void *pVertexStreamZeroData, UINT nVertexStreamZeroStride ) { // Put drawcall into call buffer uint32 callAddr = g_ps3gcmGlobalState.DrawPrimitiveUP(nPrimitiveType, nPrimitiveCount, pVertexStreamZeroData, nVertexStreamZeroStride); // Allocate space to patch frag prog if ( m_pPixelShaderData) { AllocateUcode((FpHeader_t*)m_pPixelShaderData->m_eaFp); } // if (m_param[0] > uint32(0xD0000000) ) // Error("Decl on Stack\n"); m_cmd = CmdDrawPrimUP; m_param[0] = uintp(pDecl); m_param[1] = callAddr + g_ps3gcmGlobalState.m_nIoOffsetDelta; m_param[2] = nVertexStreamZeroStride; m_param[4] = (uint32)&g_ps3texFormats; SendToSpu(); } inline void CGcmDrawState::DrawIndexedPrimitive( uint32 offset, IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE Type,INT BaseVertexIndex,UINT MinVertexIndex, UINT NumVertices,UINT startIndex,UINT nDrawPrimCount ) { uint8 uiGcmMode = GetGcmMode(Type); if( !uiGcmMode ) Error("PS3 : Unsupported prim type\n"); uint32 nPartitionStartIndex = startIndex; uint nPartitionPrimCount = nDrawPrimCount; uint32 uiGcmCount = GetGcmCount( Type, nPartitionPrimCount ); uint32 ioMemoryIndexBuffer = offset + nPartitionStartIndex * sizeof( uint16 ) ; if (uiGcmCount) { if ( m_pPixelShaderData) { AllocateUcode((FpHeader_t*)m_pPixelShaderData->m_eaFp); } m_param[0] = uintp(pDecl); m_param[1] = BaseVertexIndex; m_param[2] = uiGcmMode; m_param[3] = ioMemoryIndexBuffer; m_param[4] = (uint32)&g_ps3texFormats; m_param[5] = uiGcmCount; m_cmd = CmdDrawPrim; SendToSpu(); } } #endif //-------------------------------------------------------------------------------------------------- // Execute command shader buffers //-------------------------------------------------------------------------------------------------- template FORCEINLINE T GetData( uint8 *pData ) { return * ( reinterpret_cast< T const *>( pData ) ); } inline void CGcmDrawState::BindTexture2( CPs3BindTexture_t bindTex) { // On SPU, we need to pull in the lmblock to get the correct offset #ifdef SPU extern CPs3gcmLocalMemoryBlock gLmBlock; gSpuMgr.DmaGetUNSAFE(&gLmBlock, uintp(bindTex.m_pLmBlock), sizeof(gLmBlock), SPU_DMAGET_TAG ); #endif // Check for same texture ? // Check for NULL texture ? uint32 stage = bindTex.m_sampler; if(bindTex.m_nLayout) { // Msg("New Bind Flags %d\n", bindTex.m_nBindFlags); // if(gBind != bindTex.m_nBindFlags) DebuggerBreak(); SetSamplerState( stage, D3DSAMP_SRGBTEXTURE, ( bindTex.m_nBindFlags & (TEXTURE_BINDFLAGS_SRGBREAD>>24) ) != 0 ); SetSamplerState( stage, D3DSAMP_SHADOWFILTER, ( bindTex.m_nBindFlags & (TEXTURE_BINDFLAGS_SHADOWDEPTH>>24) ) ? 1 : 0 ); SetSamplerState( stage, D3DSAMP_ADDRESSU, bindTex.m_UWrap ); SetSamplerState( stage, D3DSAMP_ADDRESSV, bindTex.m_VWrap ); SetSamplerState( stage, D3DSAMP_ADDRESSW, bindTex.m_WWrap ); SetSamplerState( stage, D3DSAMP_MINFILTER, bindTex.m_minFilter ); SetSamplerState( stage, D3DSAMP_MAGFILTER, bindTex.m_magFilter ); SetSamplerState( stage, D3DSAMP_MIPFILTER, bindTex.m_mipFilter ); // if (m_textures[stage].m_nLocalOffset != bindTex.m_pLmBlock->Offset()) DebuggerBreak(); // if (m_textures[stage].m_eaLayout != bindTex.m_nLayout) DebuggerBreak(); #ifdef SPU gSpuMgr.DmaDone(SPU_DMAGET_TAG_WAIT); bindTex.m_pLmBlock = &gLmBlock; #endif m_textures[stage].m_nLocalOffset = bindTex.m_pLmBlock->Offset(); m_textures[stage].m_eaLayout = bindTex.m_nLayout; if (bindTex.m_pLmBlock->IsLocalMemory() ) { m_textures[stage].m_nLocalOffset |= 1; } m_dirtySamplersMask |= ( 1 << stage ); //PackData(kDataTexture, stage, m_textures[stage].m_nLocalOffset, m_textures[stage].m_eaLayout ); UnpackSetTexture(stage, m_textures[stage].m_nLocalOffset, m_textures[stage].m_eaLayout ); } else { #ifdef SPU gSpuMgr.DmaDone(SPU_DMAGET_TAG_WAIT); #endif UnpackResetTexture(stage); } } inline void CGcmDrawState::SetVertexShaderConstantInternal( int var, float const* pVec, int numVecs, bool bForce) { GCM_FUNC( cellGcmSetVertexProgramParameterBlock, var, numVecs, pVec ); } inline void CGcmDrawState::SetPixelShaderConstantInternal( int var, float const* pValues, int nNumConsts, bool bForce) { V_memcpy(&g_aFPConst[var], pValues, nNumConsts * 16); } #ifndef SPU #include "shaderapifast.h" #endif void CGcmDrawState::ExecuteCommandBuffer( uint8 *pCmdBuf ) { #ifndef SPU int* pOffset = (int*) (pCmdBuf + sizeof(int) + (2*sizeof(int))); for ( int i = 0; i < CBCMD_MAX_PS3TEX; i++) { uint32 offset = pOffset[i]; if (!offset) break; CPs3BindParams_t* pBindParams = (CPs3BindParams_t*)(offset + pCmdBuf); CPs3BindTexture_t tex; CPs3BindTexture_t* pTex = &tex; pTex->m_sampler = pBindParams->m_sampler; pTex->m_nBindFlags = pBindParams->m_nBindFlags; pTex->m_boundStd = pBindParams->m_boundStd; pTex->m_hTexture = pBindParams->m_hTexture; if (pTex->m_boundStd == -1) { ShaderApiFast( pShaderAPI )->GetPs3Texture(pTex, (ShaderAPITextureHandle_t)pTex->m_hTexture); } else { ShaderApiFast( pShaderAPI )->GetPs3Texture(pTex, (StandardTextureId_t)pTex->m_boundStd); } PackData(kDataEcbTexture, (uint8) i, sizeof(CPs3BindTexture_t), pTex); } #endif m_aECB[m_nNumECB] = pCmdBuf; uint32 size = *((uint32*)(pCmdBuf+4)); m_aSizeECB[m_nNumECB] = size; m_nNumECB++; PackData(kDataECB); } void CGcmDrawState::UnpackExecuteCommandBuffer( uint8 *pCmdBuf ) { uint8* pStart = pCmdBuf; uint8 *pReturnStack[20]; uint8 **pSP = &pReturnStack[ARRAYSIZE(pReturnStack)]; uint8 *pLastCmd; for(;;) { uint8 *pCmd=pCmdBuf; int nCmd = GetData( pCmdBuf ); if (nCmd > CBCMD_SET_VERTEX_SHADER_NEARZFARZ_STATE) DebuggerBreak(); switch( nCmd ) { case CBCMD_END: { if ( pSP == &pReturnStack[ARRAYSIZE(pReturnStack)] ) return; else { // pop pc pCmdBuf = *( pSP ++ ); break; } } case CBCMD_JUMP: pCmdBuf = GetData( pCmdBuf + sizeof( int ) ); break; case CBCMD_JSR: { Assert( pSP > &(pReturnStack[0] ) ); // *(--pSP ) = pCmdBuf + sizeof( int ) + sizeof( uint8 *); // pCmdBuf = GetData( pCmdBuf + sizeof( int ) ); UnpackExecuteCommandBuffer( GetData( pCmdBuf + sizeof( int ) ) ); pCmdBuf = pCmdBuf + sizeof( int ) + sizeof( uint8 *); break; } case CBCMD_SET_PIXEL_SHADER_FLOAT_CONST: { int nStartConst = GetData( pCmdBuf + sizeof( int ) ); int nNumConsts = GetData( pCmdBuf + 2 * sizeof( int ) ); float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) ); pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int ); SetPixelShaderConstantInternal( nStartConst, pValues, nNumConsts, false ); break; } case CBCMD_SETPIXELSHADERFOGPARAMS: { Error("Pixel Shader Fog params not supported\n"); break; } case CBCMD_STORE_EYE_POS_IN_PSCONST: { int nReg = GetData( pCmdBuf + sizeof( int ) ); float flWValue = GetData( pCmdBuf + 2 * sizeof( int ) ); pCmdBuf += 2 * sizeof( int ) + sizeof( float ); float vecValue[4]; memcpy(vecValue, m_vecWorldSpaceCameraPosition, sizeof(vecValue)); vecValue[3] = flWValue; SetPixelShaderConstantInternal( nReg, vecValue, 1, false ); break; } case CBCMD_SET_DEPTH_FEATHERING_CONST: { // int nConst = GetData( pCmdBuf + sizeof( int ) ); // float fDepthBlendScale = GetData( pCmdBuf + 2 * sizeof( int ) ); pCmdBuf += 2 * sizeof( int ) + sizeof( float ); // SetDepthFeatheringPixelShaderConstant( nConst, fDepthBlendScale ); break; } case CBCMD_SET_VERTEX_SHADER_FLOAT_CONST: { int nStartConst = GetData( pCmdBuf + sizeof( int ) ); int nNumConsts = GetData( pCmdBuf + 2 * sizeof( int ) ); float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) ); pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int ); SetVertexShaderConstantInternal( nStartConst, pValues, nNumConsts, false ); break; } case CBCMD_BIND_PS3_TEXTURE: { CPs3BindParams_t params = GetData (pCmdBuf + sizeof( int )); CPs3BindTexture_t tex = m_aBindTexture[params.m_nBindTexIndex]; gpGcmDrawState->BindTexture2( tex ); pCmdBuf += sizeof(int) + sizeof(params); break; } case CBCMD_BIND_PS3_STANDARD_TEXTURE: { CPs3BindParams_t params = GetData (pCmdBuf + sizeof( int )); CPs3BindTexture_t tex = m_aBindTexture[params.m_nBindTexIndex]; if (m_pFixed->m_nInstanced) { uint32 nBindFlags = tex.m_nBindFlags; uint32 nSampler = tex.m_sampler; switch (tex.m_boundStd) { case TEXTURE_LOCAL_ENV_CUBEMAP: if (m_pFixed->m_nInstanced & GCM_DS_INST_ENVMAP) tex = m_pFixed->m_instanceEnvCubemap; break; case TEXTURE_LIGHTMAP: if (m_pFixed->m_nInstanced & GCM_DS_INST_LIGHTMAP) tex = m_pFixed->m_instanceLightmap; break; case TEXTURE_PAINT: if (m_pFixed->m_nInstanced & GCM_DS_INST_PAINTMAP) tex = m_pFixed->m_instancePaintmap; break; } tex.m_nBindFlags = nBindFlags; tex.m_sampler = nSampler; } // Bind texture gpGcmDrawState->BindTexture2( tex ); // Twice more for bumped... if ( (tex.m_boundStd == TEXTURE_LIGHTMAP_BUMPED) || (tex.m_boundStd == TEXTURE_LIGHTMAP_BUMPED)) { tex.m_sampler++; gpGcmDrawState->BindTexture2( tex ); tex.m_sampler++; gpGcmDrawState->BindTexture2( tex ); } pCmdBuf += sizeof(int) + sizeof(params); break; } case CBCMD_PS3TEX: { pCmdBuf += sizeof(int) + (CBCMD_MAX_PS3TEX*sizeof(int)); break; } case CBCMD_LENGTH: { pCmdBuf += sizeof(int) *2 ; break; } case CBCMD_SET_PSHINDEX: { // int nIdx = GetData( pCmdBuf + sizeof( int ) ); // ShaderManager()->SetPixelShaderIndex( nIdx ); // pCmdBuf += 2 * sizeof( int ); Error("PSHINDEX Not Supported\n"); break; } case CBCMD_SET_VSHINDEX: { // int nIdx = GetData( pCmdBuf + sizeof( int ) ); // ShaderManager()->SetVertexShaderIndex( nIdx ); pCmdBuf += 2 * sizeof( int ); Error("VSHINDEX Not Supported\n"); break; } case CBCMD_SET_VERTEX_SHADER_FLASHLIGHT_STATE: { // int nStartConst = GetData( pCmdBuf + sizeof( int ) ); // SetVertexShaderConstantInternal( nStartConst, m_FlashlightWorldToTexture.Base(), 4, false ); // pCmdBuf += 2 * sizeof( int ); // Error("Flashlight unsupported\n"); pCmdBuf += 2 * sizeof( int ); break; } case CBCMD_SET_VERTEX_SHADER_NEARZFARZ_STATE: { Error("SetVertexShaderNearAndFarZ NOt SUPPORTED\n"); // int nStartConst = GetData( pCmdBuf + sizeof( int ) ); // // VMatrix m; // // m = m_MaterialProjectionMatrix; // // // GetMatrix( MATERIAL_PROJECTION, m.m[0] ); // // // m[2][2] = F/(N-F) (flip sign if RH) // // m[3][2] = NF/(N-F) // // float vNearFar[4]; // // float N = m[3][2] / m[2][2]; // float F = (m[3][2]*N) / (N + m[3][2]); // // vNearFar[0] = N; // vNearFar[1] = F; // // SetVertexShaderConstantInternal( nStartConst, vNearFar, 1, false ); pCmdBuf += 2 * sizeof( int ); break; } case CBCMD_SET_PIXEL_SHADER_FLASHLIGHT_STATE: { // int nLightSampler = GetData( pCmdBuf + sizeof( int ) ); // int nDepthSampler = GetData( pCmdBuf + 2 * sizeof( int ) ); // int nShadowNoiseSampler = GetData( pCmdBuf + 3 * sizeof( int ) ); // int nColorConst = GetData( pCmdBuf + 4 * sizeof( int ) ); // int nAttenConst = GetData( pCmdBuf + 5 * sizeof( int ) ); // int nOriginConst = GetData( pCmdBuf + 6 * sizeof( int ) ); // int nDepthTweakConst = GetData( pCmdBuf + 7 * sizeof( int ) ); // int nScreenScaleConst = GetData( pCmdBuf + 8 * sizeof( int ) ); // int nWorldToTextureConstant = GetData( pCmdBuf + 9 * sizeof( int ) ); // bool bFlashlightNoLambert = GetData( pCmdBuf + 10 * sizeof( int ) ) != 0; // bool bSinglePassFlashlight = GetData( pCmdBuf + 11 * sizeof( int ) ) != 0; // pCmdBuf += 12 * sizeof( int ); // // ShaderAPITextureHandle_t hTexture = g_pShaderUtil->GetShaderAPITextureBindHandle( m_FlashlightState.m_pSpotlightTexture, m_FlashlightState.m_nSpotlightTextureFrame, 0 ); // BindTexture( (Sampler_t)nLightSampler, TEXTURE_BINDFLAGS_SRGBREAD, hTexture ); // !!!BUG!!!srgb or not? // // SetPixelShaderConstantInternal( nAttenConst, m_pFlashlightAtten, 1, false ); // SetPixelShaderConstantInternal( nOriginConst, m_pFlashlightPos, 1, false ); // // m_pFlashlightColor[3] = bFlashlightNoLambert ? 2.0f : 0.0f; // This will be added to N.L before saturate to force a 1.0 N.L term // // // DX10 hardware and single pass flashlight require a hack scalar since the flashlight is added in linear space // float flashlightColor[4] = { m_pFlashlightColor[0], m_pFlashlightColor[1], m_pFlashlightColor[2], m_pFlashlightColor[3] }; // if ( ( g_pHardwareConfig->UsesSRGBCorrectBlending() ) || ( bSinglePassFlashlight ) ) // { // // Magic number that works well on the 360 and NVIDIA 8800 // flashlightColor[0] *= 2.5f; // flashlightColor[1] *= 2.5f; // flashlightColor[2] *= 2.5f; // } // // SetPixelShaderConstantInternal( nColorConst, flashlightColor, 1, false ); // // if ( nWorldToTextureConstant >= 0 ) // { // SetPixelShaderConstantInternal( nWorldToTextureConstant, m_FlashlightWorldToTexture.Base(), 4, false ); // } // // BindStandardTexture( (Sampler_t)nShadowNoiseSampler, TEXTURE_BINDFLAGS_NONE, TEXTURE_SHADOW_NOISE_2D ); // if( m_pFlashlightDepthTexture && m_FlashlightState.m_bEnableShadows && ShaderUtil()->GetConfig().ShadowDepthTexture() ) // { // ShaderAPITextureHandle_t hDepthTexture = g_pShaderUtil->GetShaderAPITextureBindHandle( m_pFlashlightDepthTexture, 0, 0 ); // BindTexture( (Sampler_t)nDepthSampler, TEXTURE_BINDFLAGS_SHADOWDEPTH, hDepthTexture ); // // SetPixelShaderConstantInternal( nDepthTweakConst, m_pFlashlightTweaks, 1, false ); // // // Dimensions of screen, used for screen-space noise map sampling // float vScreenScale[4] = {1280.0f / 32.0f, 720.0f / 32.0f, 0, 0}; // int nWidth, nHeight; // BaseClass::GetBackBufferDimensions( nWidth, nHeight ); // // int nTexWidth, nTexHeight; // GetStandardTextureDimensions( &nTexWidth, &nTexHeight, TEXTURE_SHADOW_NOISE_2D ); // // vScreenScale[0] = (float) nWidth / nTexWidth; // vScreenScale[1] = (float) nHeight / nTexHeight; // vScreenScale[2] = 1.0f / m_FlashlightState.m_flShadowMapResolution; // vScreenScale[3] = 2.0f / m_FlashlightState.m_flShadowMapResolution; // SetPixelShaderConstantInternal( nScreenScaleConst, vScreenScale, 1, false ); // } // else // { // BindStandardTexture( (Sampler_t)nDepthSampler, TEXTURE_BINDFLAGS_NONE, TEXTURE_WHITE ); // } // Error("Flashlight unsupported\n"); pCmdBuf += 12 * sizeof( int ); break; } case CBCMD_SET_PIXEL_SHADER_UBERLIGHT_STATE: { // int iEdge0Const = GetData( pCmdBuf + sizeof( int ) ); // int iEdge1Const = GetData( pCmdBuf + 2 * sizeof( int ) ); // int iEdgeOOWConst = GetData( pCmdBuf + 3 * sizeof( int ) ); // int iShearRoundConst = GetData( pCmdBuf + 4 * sizeof( int ) ); // int iAABBConst = GetData( pCmdBuf + 5 * sizeof( int ) ); // int iWorldToLightConst = GetData( pCmdBuf + 6 * sizeof( int ) ); pCmdBuf += 7 * sizeof( int ); // // SetPixelShaderConstantInternal( iEdge0Const, m_UberlightRenderState.m_vSmoothEdge0.Base(), 1, false ); // SetPixelShaderConstantInternal( iEdge1Const, m_UberlightRenderState.m_vSmoothEdge1.Base(), 1, false ); // SetPixelShaderConstantInternal( iEdgeOOWConst, m_UberlightRenderState.m_vSmoothOneOverW.Base(), 1, false ); // SetPixelShaderConstantInternal( iShearRoundConst, m_UberlightRenderState.m_vShearRound.Base(), 1, false ); // SetPixelShaderConstantInternal( iAABBConst, m_UberlightRenderState.m_vaAbB.Base(), 1, false ); // SetPixelShaderConstantInternal( iWorldToLightConst, m_UberlightRenderState.m_WorldToLight.Base(), 4, false ); Error("Uberlight state unsupported\n"); break; } #ifndef NDEBUG default: Assert(0); break; #endif } pLastCmd = pCmd; } } inline void CGcmDrawState::TextureReplace(uint32 id, CPs3BindTexture_t tex) { switch (id) { case TEXTURE_LOCAL_ENV_CUBEMAP: m_pFixed->m_nInstanced |= GCM_DS_INST_ENVMAP; m_pFixed->m_instanceEnvCubemap = tex; break; case TEXTURE_LIGHTMAP: m_pFixed->m_nInstanced |= GCM_DS_INST_LIGHTMAP; m_pFixed->m_instanceLightmap = tex; break; case TEXTURE_PAINT: m_pFixed->m_nInstanced |= GCM_DS_INST_ENVMAP; m_pFixed->m_instancePaintmap = tex; break; } } #endif // INCLUDED_GCMDRAWSTATE_H