Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3364 lines
104 KiB

  1. //================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
  2. //
  3. // Per draw call Gcm state
  4. // Render states, vo/fp consts
  5. //
  6. //==================================================================================================
  7. #ifndef INCLUDED_GCMDRAWSTATE_H
  8. #define INCLUDED_GCMDRAWSTATE_H
  9. #ifndef SPU
  10. #include "tier0/platform.h"
  11. #include "tier0/dbg.h"
  12. #include "cell\gcm.h"
  13. #include "gcmconfig.h"
  14. #include "ps3gcmmemory.h"
  15. #include "dxabstract_def.h"
  16. #include "dxabstract.h"
  17. #include "shaderapi/commandbuffer.h"
  18. #include "shaderapi/shareddefs.h"
  19. #include "mathlib/vector4d.h"
  20. #include "mathlib/vmatrix.h"
  21. #include <shaderapi/ishaderdynamic.h>
  22. #include <vprof.h>
  23. #include "SpuMgr_ppu.h"
  24. #else
  25. #include "spumgr_spu.h"
  26. #include "cell/gcm_spu.h"
  27. #include "cell/gcm/gcm_method_data.h"
  28. #include "dxabstract_def.h"
  29. #include "gcmtexture.h"
  30. #include "gcmlabels.h"
  31. #include "shaderapi/shareddefs.h"
  32. #include "shaderapi/commandbuffer.h"
  33. #include <shaderapi/ishaderdynamic.h>
  34. #include "gcmdraw_spu.h"
  35. #endif
  36. //--------------------------------------------------------------------------------------------------
  37. // Defines
  38. //--------------------------------------------------------------------------------------------------
  39. //#define GCM_DS_SAFE
  40. #define GCM_DRAWSTATE_MAX 9 // We have this many drawstate structures
  41. // we fill half of them and send to SPU
  42. // then we fill the other half
  43. // Need an odd number since one extra is required in SendToSPU
  44. #define GCM_NUMDRAWCALLS_SPU ((GCM_DRAWSTATE_MAX-1)/2)
  45. #define GCM_DS_FIFOPERDRAW 0x4000 // 16K is our max epr draw call FIFO.
  46. // in practice we see a highwater of 0x2800
  47. // which combines a full RSx reset with a drawcall
  48. #define GCM_DS_MAXDATAPERDRAWCALL 0x2000 // Highwater mark is abt 3K
  49. #define GCM_DS_MAXFPCONST 96
  50. #define GCM_DS_MAXVPCONST 256
  51. #define GCM_DS_MAXDYNECB 0x40000 // 64K ring buffer. if <8K left wraps
  52. #define GCM_DS_INST_ENVMAP 1
  53. #define GCM_DS_INST_LIGHTMAP 2
  54. #define GCM_DS_INST_PAINTMAP 4
  55. #define MAX_SAMPLERS 16
  56. //--------------------------------------------------------------------------------------------------
  57. // Global externs
  58. //--------------------------------------------------------------------------------------------------
  59. extern uint8 g_d3drs_defvalue_indices[D3DRS_VALUE_LIMIT];
  60. extern uint32 g_d3drs_defvalues[11];
  61. extern uint32 dxtogl_stencilmode[10];
  62. extern uint16 dxtogl_blendop[7];
  63. extern uint8 dxtogl_addressMode[6];
  64. extern uint8 dxtogl_anisoIndexHalf[32]; // indexed by [ dxsamp->maxAniso / 2 ]
  65. extern uint8 dxtogl_minFilter[4][4]; // indexed by _D3DTEXTUREFILTERTYPE on both axes: [row is min filter][col is mip filter].
  66. extern uint8 dxtogl_magFilter[4]; // indexed by _D3DTEXTUREFILTERTYPE
  67. extern int g_bZcullAuto;
  68. extern int g_nZcullDefault;
  69. extern int g_nZcullMoveForward;
  70. extern int g_nZcullPushBack;
  71. extern vec_float4 g_aFPConst[GCM_DS_MAXFPCONST];
  72. extern vec_float4 g_aVPConst[GCM_DS_MAXVPCONST];
  73. extern D3DStreamDesc g_dxGcmVertexStreamSources[D3D_MAX_STREAMS];
  74. extern uint32 g_UPHigh;
  75. extern uint32 g_UPFrame;
  76. extern volatile uint32_t * g_label_fppatch_ring_seg;
  77. extern uint8 g_aDynECB[GCM_DS_MAXDYNECB];
  78. extern uint32 g_nDynECBIdx;
  79. extern uint8 gPackData[][GCM_DS_MAXDATAPERDRAWCALL];
  80. //--------------------------------------------------------------------------------------------------
  81. // Structs used as params
  82. //--------------------------------------------------------------------------------------------------
  83. struct DrawScissor_t
  84. {
  85. uint16 x, y, w, h;
  86. };
  87. struct UpdateSurface_t
  88. {
  89. // if the scissor is logically disabled, set scissor to this size
  90. //uint16 m_nRenderTargetWidth, m_nRenderTargetHeight;
  91. CPs3gcmTextureData_t m_texC, m_texZ;
  92. };
  93. struct FpHeader_t
  94. {
  95. uint32 m_nUcodeSize;
  96. uint32 m_nPatchCount;
  97. uint32 m_nShaderControl0;
  98. uint32 m_nTexControls; // Always <= 16; 1 tex control corresponds to 2 words in the tex control table
  99. // data[]
  100. // Allocate memory layout as :
  101. // FpHeader_t
  102. // uCode
  103. // Patches
  104. // Texcontrols
  105. // total size = AlignValue( sizeof( FpHeader_t ) + m_nUcodeSize + (sizeof( uint32 ) * nPatchCount)
  106. // + (2 * sizeof( uint32 ) * nTexControls) , 16);
  107. };
  108. //--------------------------------------------------------------------------------------------------
  109. // Vertex streams
  110. //--------------------------------------------------------------------------------------------------
  111. struct SetVertexDataArrayCache_t
  112. {
  113. union Data_t
  114. {
  115. vector signed int m_vi;
  116. struct Unpacked_t
  117. {
  118. uint32 m_uiLocalMemoryBuffer; // after adding the offset
  119. uint32 m_nSize;
  120. uint32 m_nStride;
  121. uint32 m_nType;
  122. //IDirect3DVertexBuffer9 *m_vtxBuffer; // for debug only
  123. //uint32 m_nBaseVertexOffset; // debug only
  124. } m_unpacked;
  125. } m_data;
  126. SetVertexDataArrayCache_t(){}
  127. SetVertexDataArrayCache_t( D3DStreamDesc &dsd, D3DVERTEXELEMENT9_GCM::GcmDecl_t const &gcmvad, uint nBaseVertexIndex )
  128. {
  129. //m_vtxBuffer = dsd.m_vtxBuffer;
  130. uint nBaseVertexOffset = dsd.m_offset + ( nBaseVertexIndex * dsd.m_stride ) + gcmvad.m_offset;
  131. uint uiLocalMemoryBuffer = dsd.m_nLocalBufferOffset + nBaseVertexOffset;
  132. m_data.m_vi = ( vector signed int ) { uiLocalMemoryBuffer, gcmvad.m_datasize, gcmvad.m_datatype, dsd.m_stride };
  133. // m_stride = dsd.m_stride;
  134. // m_size = gcmvad.m_datasize;
  135. // m_type = gcmvad.m_datatype;
  136. }
  137. uint GetLocalOffset()const { return m_data.m_unpacked.m_uiLocalMemoryBuffer; }
  138. bool IsNull()const { return vec_all_eq( m_data.m_vi, (vector signed int){0,0,0,0} ); }
  139. void SetNull(){ m_data.m_vi = ( vector signed int ){0,0,0,0}; }
  140. void Invalidate(){ m_data.m_vi = (vector signed int){-1,-1,-1,-1};}
  141. bool operator != ( const SetVertexDataArrayCache_t& that ) const { return !vec_all_eq( m_data.m_vi, that.m_data.m_vi ); }
  142. void operator = ( const SetVertexDataArrayCache_t& that ) { m_data.m_vi = that.m_data.m_vi ; }
  143. };
  144. // This is global, since it is only written by the flush code
  145. extern SetVertexDataArrayCache_t g_cacheSetVertexDataArray[ D3D_MAX_STREAMS ]; // Vertex stream setup
  146. //--------------------------------------------------------------------------------------------------
  147. // SPU draw commands
  148. //--------------------------------------------------------------------------------------------------
  149. enum DrawCmd
  150. {
  151. CmdCommitStates = 1,
  152. CmdDrawPrim,
  153. CmdDrawPrimUP,
  154. CmdEndFrame
  155. };
  156. //--------------------------------------------------------------------------------------------------
  157. // GcmDrawState.. Holds data that is commited once a draw, clear etc... is made..
  158. //--------------------------------------------------------------------------------------------------
  159. #define DRAWSTATE_SIZEOFDMA (uintp(&(((CGcmDrawState*)(0))->m_pData)+1)-uintp(&(((CGcmDrawState*)(0))->m_cmd)))
  160. struct CGcmDrawState
  161. {
  162. // DrawData used by DrawPrimUP
  163. struct DrawData { uint8 m_type; uint8 m_idx; uint16 m_size; /*uint8 m_data[m_count];*/ };
  164. //--------------------------------------------------------------------------------------------------
  165. // Enums
  166. //--------------------------------------------------------------------------------------------------
  167. // Data that gets packes and then unpacked as a cmd stream
  168. enum GcmDataType
  169. {
  170. kDataFpuConsts = 1,
  171. kDataVpuConsts,
  172. kDataStreamDesc,
  173. kDataZcullStats,
  174. kDataZcullLimit,
  175. kDataViewport,
  176. kDataSetRenderState,
  177. kDataSetZpassPixelCountEnable,
  178. kDataSetClearReport,
  179. kDataSetReport,
  180. kDataUpdateSurface,
  181. kDataClearSurface,
  182. kDataResetSurface,
  183. kDataTransferImage,
  184. kDataViewPort,
  185. kDataScissor,
  186. kDataTexture,
  187. kDataEcbTexture,
  188. kDataResetTexture,
  189. kDataUpdateVtxBufferOffset,
  190. kDataECB,
  191. kDataBeginScene,
  192. kDataSetWorldSpaceCameraPosition,
  193. kDataSetWriteBackEndLabel
  194. };
  195. // RenderStates
  196. enum GcmDirtyStateFlags_t
  197. {
  198. kDirtyBlendFactor = ( 1 << 0 ),
  199. kDirtyAlphaFunc = ( 1 << 1 ),
  200. kDirtyStencilOp = ( 1 << 2 ),
  201. kDirtyStencilFunc = ( 1 << 3 ),
  202. kDirtyDepthBias = ( 1 << 4 ),
  203. kDirtyScissor = ( 1 << 5 ),
  204. kDirtyDepthMask = ( 1 << 6 ),
  205. kDirtyZEnable = ( 1 << 7 ),
  206. kDirtyZFunc = ( 1 << 8 ),
  207. kDirtyColorWriteEnable = ( 1 << 9 ),
  208. kDirtyCullMode = ( 1 << 10 ),
  209. kDirtyAlphablendEnable = ( 1 << 11 ),
  210. kDirtySrgbWriteEnable = ( 1 << 12 ),
  211. kDirtyAlphaTestEnable = ( 1 << 13 ),
  212. kDirtyStencilEnable = ( 1 << 14 ),
  213. kDirtyStencilWriteMask = ( 1 << 15 ),
  214. kDirtyFillMode = ( 1 << 16 ),
  215. kDirtyBlendOp = ( 1 << 17 ),
  216. kDirtyResetRsx = ( 1 << 18 ),
  217. kDirtyZeroAllPSConsts = ( 1 << 19 ),
  218. kDirtyZeroAllVSConsts = ( 1 << 20)
  219. };
  220. // Dirty flags for caches and other misc settings
  221. enum GcmDirtyCacheFlags_t
  222. {
  223. kDirtyVxConstants = ( 1 << 0 ),
  224. kDirtyClipPlanes = ( 1 << 1 ),
  225. kDirtyVxShader = ( 1 << 2 ),
  226. kDirtyPxShader = ( 1 << 3 ),
  227. kDirtyPxConstants = ( 1 << 4 ),
  228. kDirtyVxCache = ( 1 << 5 ),
  229. kDirtyTxCache = ( 1 << 6 )
  230. };
  231. //--------------------------------------------------------------------------------------------------
  232. // Data we are interested in per draw call
  233. //--------------------------------------------------------------------------------------------------
  234. // Data that is DMA'd to the SPU directly and not packed
  235. uint32 m_cmd;
  236. uint32 m_param[8];
  237. uint32 m_eaOutputFIFO;
  238. uint32 m_eaOutputUCode;
  239. uint32 m_nFreeLabel; // Nonzero values are set
  240. uint16 m_nBackBufferSize[2];
  241. uint16 m_dirtySamplersMask; // Sampler dirty flags
  242. uint16 m_dirtyCachesMask; // Caches reset for Shaders flush
  243. uint32 m_dirtyStatesMask; // Render state dirty flags
  244. uint32 m_shaderVxConstants; // Booleans, go into a SetTransformbranchbits call
  245. PixelShader9Data_t* m_pPixelShaderData;
  246. VertexShader9Data_t* m_pVertexShaderData;
  247. uint32 m_nNumECB;
  248. uint8* m_aECB[3]; // No More than three per draw call (static, semi-static & dynamic)
  249. uint32 m_aSizeECB[3];
  250. struct FixedData
  251. {
  252. uint32 m_nSampler;
  253. uint8 m_aSamplerIdx[D3D_MAX_SAMPLERS];
  254. D3DSamplerDesc m_aSamplers[D3D_MAX_SAMPLERS];
  255. uint32 m_nInstanced;
  256. CPs3BindTexture_t m_instanceEnvCubemap;
  257. CPs3BindTexture_t m_instanceLightmap;
  258. CPs3BindTexture_t m_instancePaintmap;
  259. };
  260. // Unpack pointer and cursors
  261. FixedData* m_pFixed; // Fixed sized data uploaded per call
  262. uint8* m_pDataCursor;
  263. uint8* m_pData;
  264. // Fixed Data that is unpacked
  265. D3DSamplerDesc m_aSamplers[D3D_MAX_SAMPLERS];
  266. // Data that is unpacked, or derived, or code generated somewhere (Init etc...)
  267. CPs3BindTexture_t m_aBindTexture[CBCMD_MAX_PS3TEX]; // Textures that are set from ECBs
  268. float m_vecWorldSpaceCameraPosition[4];
  269. uint32 m_nSetTransformBranchBits; // here for now because they init in begin scene
  270. uint32 m_nDisabledSamplers;
  271. uint16 m_blends[2];
  272. struct { uint32 func, ref; } m_alphaFunc;
  273. struct { uint32 fail, dfail, dpass; } m_stencilOp;
  274. struct { uint32 func, ref, mask; } m_stencilFunc;
  275. struct { uint32 factor, units; } m_depthBias;
  276. struct { uint16 x, y, w, h, enabled; } m_scissor; // kDirtyScissor
  277. uint16 m_nSetDepthMask;
  278. uint32 m_ZEnable;
  279. uint32 m_ZFunc;
  280. uint32 m_ColorWriteEnable;
  281. uint32 m_CullMode;
  282. uint32 m_AlphablendEnable;
  283. uint32 m_SrgbWriteEnable;
  284. uint32 m_AlphaTestEnable;
  285. uint32 m_StencilEnable;
  286. uint32 m_StencilWriteMask;
  287. uint32 m_FillMode;
  288. uint32 m_BlendOp;
  289. uint32 m_userClipPlanesState;
  290. CPs3gcmTextureData_t m_textures[D3D_MAX_TEXTURES];
  291. float m_viewZ[2];
  292. uint16 m_viewportSize[4];
  293. //--------------------------------------------------------------------------------------------------
  294. // Methods
  295. //--------------------------------------------------------------------------------------------------
  296. public:
  297. // Init etc.. (ppu functions...)
  298. #ifndef SPU
  299. inline void Init(IDirect3DDevice9Params *params);
  300. #endif
  301. inline void Init();
  302. void SendToSpu();
  303. inline void Reset(); // Reset for re-use
  304. #ifndef SPU
  305. inline void BeginScene(); // Sets report for Zcull
  306. inline void EndScene(); // Gets report for Zcull
  307. inline void CmdBufferFlush(); // Flush RSX via SPU
  308. inline void CmdBufferFinish(); // Flush RSX and wait for it
  309. #endif
  310. inline void ResetRsxState(); // Lots of GCM_FUNC to default vals
  311. // Dynamic ECB mgmt
  312. inline uint8* OpenDynECB();
  313. inline void CloseDynECB(uint32 size);
  314. // Viewport and scissor
  315. inline void UnpackSetViewport(CONST D3DVIEWPORT9* pViewport);
  316. inline HRESULT SetViewport(CONST D3DVIEWPORT9* pViewport);
  317. inline void UnpackSetScissorRect(DrawScissor_t * pScissor);
  318. inline void SetScissorRect( DrawScissor_t * pScissor );
  319. // Reports, Zpass and labels (all packed)
  320. inline void SetZpassPixelCountEnable(uint32 enable);
  321. inline void SetClearReport(uint32 type);
  322. inline void SetReport(uint32 type, uint32 index);
  323. inline void SetWriteBackEndLabel(uint8 index, uint32 value);
  324. // RenderStates
  325. inline void UnpackSetRenderState( D3DRENDERSTATETYPE State, uint Value );
  326. inline void SetRenderState( D3DRENDERSTATETYPE State, uint Value );
  327. // Texture samplers, textures, texture cache
  328. inline void SetInvalidateTextureCache();
  329. inline void SetSamplerState( uint Sampler,D3DSAMPLERSTATETYPE Type,DWORD Value );
  330. inline void UnpackSetTexture( DWORD Stage, uint32 offset, uint32 eaLayout );
  331. inline void UnpackResetTexture( DWORD Stage );
  332. inline void SetTexture( DWORD Stage, CPs3gcmTexture *tex );
  333. inline void ResetTexture( DWORD Stage );
  334. // Vertex buffers, vertex cache, , vertex constants
  335. inline void SetInvalidateVertexCache();
  336. inline void UnpackUpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset );
  337. inline void UpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset );
  338. inline void SetVertexStreamSource(uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride );
  339. inline void _SetVertexShaderConstantB( UINT StartRegister, uint BoolCount, uint shaderVxConstants );
  340. inline void SetVertexShaderConstantB( UINT StartRegister,CONST BOOL* pConstantData,UINT BoolCount) ;
  341. inline void SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount );
  342. // inline void VertexConstantExtractor( float *pDestStorage, int kRegisterFirst, int kRegisterLength,
  343. // int StartRegister, const float *pConstantData, int Vector4fCount );
  344. // Pixel shader consts
  345. inline void SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount);
  346. inline void UnpackSetWorldSpaceCameraPosition(float* pWCP);
  347. inline void SetWorldSpaceCameraPosition(float* pWCP);
  348. // Surfaces and render targets
  349. inline void Ps3Helper_UpdateSurface( UpdateSurface_t * pSurface );
  350. inline void UnpackUpdateSurface(CellGcmSurface* pSf);
  351. inline void ResetSurfaceToKnownDefaultState();
  352. inline void UnpackResetSurfaceToKnownDefaultState();
  353. inline void Helper_IntersectRectsXYWH( uint16 const *a, uint16 const *b, uint16 *result );
  354. inline void ClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
  355. uint32 nDepthStencilBitDepth );
  356. inline void UnpackClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
  357. uint32 nDepthStencilBitDepth );
  358. // Blit (packed)
  359. inline void SetTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset,
  360. uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel );
  361. // DrawPrim
  362. inline void DrawPrimitiveUP( IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE nPrimitiveType,UINT nPrimitiveCount,
  363. CONST void *pVertexStreamZeroData, UINT nVertexStreamZeroStride );
  364. inline void DrawIndexedPrimitive( uint32 offset, IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE Type,INT BaseVertexIndex,UINT MinVertexIndex,
  365. UINT NumVertices,UINT startIndex,UINT nDrawPrimCount );
  366. inline void ExecuteCommandBuffer( uint8 *pCmdBuf );
  367. inline void UnpackExecuteCommandBuffer( uint8 *pCmdBuf );
  368. void TestCommandBuffer( uint8 *pCmdBuf );
  369. inline void TextureReplace(uint32 id, CPs3BindTexture_t tex);
  370. // Commit, pack etc..
  371. inline void PackData(uint8 type, uint8 idx, uint16 size, void* pSrc);
  372. inline void PackData(uint8 type, uint16 size, void* pSrc);
  373. inline void PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3);
  374. inline void PackData(uint8 type, uint32 val1, uint32 val2);
  375. inline void PackData(uint8 type, uint32 val1);
  376. inline void PackData(uint8 type);
  377. inline void PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
  378. uint32 nDepthStencilBitDepth ); // used to pack clear surface
  379. inline void UnpackData(); // Unpacks variable sized data and sets vertex consts
  380. inline void CommitStates(); // Currently unused on PPU
  381. inline void EndFrame(); // called by Flip()
  382. inline void CommitAll(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex);
  383. inline void CommitRenderStates();
  384. inline void CommitVertexBindings(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex);
  385. inline void CommitSampler(uint32 nSampler);
  386. inline void CommitSamplers();
  387. inline void CommitShaders();
  388. inline void BindFragmentProgram(uint32 nVertexToFragmentProgramAttributeMask);
  389. inline void PatchUcode(fltx4 * pUCode16, uint32 * pPatchTable, uint nPatchCount);
  390. inline fltx4* CopyUcode(FpHeader_t* pFp);
  391. #ifndef SPU
  392. inline void AllocateUcode(FpHeader_t* pFp); // Reserves space in the patchbuffer for this
  393. #endif
  394. // ExecuteCommandBuffer Subs
  395. inline void SetVertexShaderConstantInternal( int var, float const* pVec, int numVecs = 1, bool bForce = false );
  396. inline void SetPixelShaderConstantInternal( int var, float const* pValues, int nNumConsts = 1, bool bForce = false );
  397. inline void BindTexture2( CPs3BindTexture_t bindTex );
  398. // Misc
  399. inline int IsLayerRender() { return 1;} // 7LTODO : zprepass !
  400. };
  401. //--------------------------------------------------------------------------------------------------
  402. // Externs
  403. //--------------------------------------------------------------------------------------------------
  404. extern CGcmDrawState* gpGcmDrawState;
  405. extern CGcmDrawState gGcmDrawState[];
  406. extern CGcmDrawState::FixedData gFixedData[];
  407. //--------------------------------------------------------------------------------------------------
  408. // inlines
  409. //--------------------------------------------------------------------------------------------------
  410. //--------------------------------------------------------------------------------------------------
  411. // Generic pack data
  412. //--------------------------------------------------------------------------------------------------
  413. inline void CGcmDrawState::PackData(uint8 type, uint8 idx, uint16 size, void* pSrc)
  414. {
  415. // SNPROF("CGcmDrawState::PackData(uint8 type, uint8 idx, uint16 size, void* pSrc)");
  416. uint32 spacereqd = size + sizeof(DrawData);
  417. #ifdef GCM_DS_SAFE
  418. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  419. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  420. #endif
  421. DrawData* pData = (DrawData*) m_pDataCursor;
  422. pData->m_type = type;
  423. pData->m_idx = idx;
  424. pData->m_size = size;
  425. V_memcpy(pData+1, pSrc, size);
  426. m_pDataCursor += spacereqd;
  427. }
  428. inline void CGcmDrawState::PackData(uint8 type, uint16 size, void* pSrc)
  429. {
  430. // SNPROF("CGcmDrawState::PackData(uint8 type, uint16 size, void* pSrc)");
  431. uint32 spacereqd = size + sizeof(DrawData);
  432. #ifdef GCM_DS_SAFE
  433. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  434. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  435. #endif
  436. DrawData* pData = (DrawData*) m_pDataCursor;
  437. pData->m_type = type;
  438. pData->m_idx = 0;
  439. pData->m_size = size;
  440. V_memcpy(pData+1, pSrc, size);
  441. m_pDataCursor += spacereqd;
  442. }
  443. inline void CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3)
  444. {
  445. // SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2, uint32 val3)");
  446. const uint32 size = 12;
  447. uint32 spacereqd = size + sizeof(DrawData);
  448. #ifdef GCM_DS_SAFE
  449. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  450. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  451. #endif
  452. DrawData* pData = (DrawData*) m_pDataCursor;
  453. pData->m_type = type;
  454. pData->m_idx = 0;
  455. pData->m_size = size;
  456. uint32* pDest = (uint32*)(pData + 1);
  457. pDest[0] = val1;
  458. pDest[1] = val2;
  459. pDest[2] = val3;
  460. m_pDataCursor += spacereqd;
  461. }
  462. inline void CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2)
  463. {
  464. // SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1, uint32 val2)");
  465. const uint32 size = 8;
  466. uint32 spacereqd = size + sizeof(DrawData);
  467. #ifdef GCM_DS_SAFE
  468. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  469. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  470. #endif
  471. DrawData* pData = (DrawData*) m_pDataCursor;
  472. pData->m_type = type;
  473. pData->m_idx = 0;
  474. pData->m_size = size;
  475. uint32* pDest = (uint32*)(pData + 1);
  476. pDest[0] = val1;
  477. pDest[1] = val2;
  478. m_pDataCursor += spacereqd;
  479. }
  480. inline void CGcmDrawState::PackData(uint8 type, uint32 val1)
  481. {
  482. // SNPROF("CGcmDrawState::PackData(uint8 type, uint32 val1)");
  483. const uint32 size = 4;
  484. uint32 spacereqd = size + sizeof(DrawData);
  485. #ifdef GCM_DS_SAFE
  486. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  487. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  488. #endif
  489. DrawData* pData = (DrawData*) m_pDataCursor;
  490. pData->m_type = type;
  491. pData->m_idx = 0;
  492. pData->m_size = size;
  493. uint32* pDest = (uint32*)(pData + 1);
  494. pDest[0] = val1;
  495. m_pDataCursor += spacereqd;
  496. }
  497. inline void CGcmDrawState::PackData(uint8 type)
  498. {
  499. // SNPROF("CGcmDrawState::PackData(uint8 type)");
  500. const uint32 size = 0;
  501. uint32 spacereqd = size + sizeof(DrawData);
  502. #ifdef GCM_DS_SAFE
  503. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  504. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  505. #endif
  506. DrawData* pData = (DrawData*) m_pDataCursor;
  507. pData->m_type = type;
  508. pData->m_idx = 0;
  509. pData->m_size = size;
  510. m_pDataCursor += spacereqd;
  511. }
  512. inline void CGcmDrawState::PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth ) // used to pack clear surface
  513. {
  514. // SNPROF("CGcmDrawState::PackData(uint8 type, DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil, uint32 nDepthStencilBitDepth )");
  515. const uint32 size = 20;
  516. uint32 spacereqd = size + sizeof(DrawData);
  517. #ifdef GCM_DS_SAFE
  518. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  519. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  520. #endif
  521. DrawData* pData = (DrawData*) m_pDataCursor;
  522. pData->m_type = type;
  523. pData->m_idx = 0;
  524. pData->m_size = size;
  525. uint32* pDest = (uint32*)(pData + 1);
  526. float* pDestf = (float*) pDest;
  527. pDest[0] = nFlags;
  528. pDest[1] = nColor;
  529. pDestf[2] = flZ;
  530. pDest[3] = nStencil;
  531. pDest[4] = nDepthStencilBitDepth;
  532. m_pDataCursor += spacereqd;
  533. }
  534. //--------------------------------------------------------------------------------------------------
  535. // Init, Begin/EndScene. Flush and Finish, ResetRsxState
  536. //--------------------------------------------------------------------------------------------------
  537. inline void CGcmDrawState::Init()
  538. {
  539. // Initialize GCM state to defaults
  540. memset(this, 0, sizeof(CGcmDrawState));
  541. m_scissor.enabled = 1;
  542. m_viewZ[0] = 0.1;
  543. m_viewZ[1] = 1000.0f;
  544. m_blends[0] = CELL_GCM_ONE;
  545. m_blends[1] = CELL_GCM_ZERO;
  546. m_alphaFunc.func = CELL_GCM_ALWAYS;
  547. m_alphaFunc.ref = 0;
  548. m_stencilOp.fail = CELL_GCM_KEEP;
  549. m_stencilOp.dfail = CELL_GCM_KEEP;
  550. m_stencilOp.dpass = CELL_GCM_KEEP;
  551. m_stencilFunc.func = CELL_GCM_ALWAYS;
  552. m_stencilFunc.ref = 0;
  553. m_stencilFunc.mask = 0xFF;
  554. m_depthBias.factor = 0;
  555. m_depthBias.units = 0;
  556. m_userClipPlanesState = 0;
  557. m_shaderVxConstants = 0;
  558. // Init fixed sized data
  559. m_pFixed = &gFixedData[0];
  560. memset(m_pFixed->m_aSamplerIdx, 0xff, sizeof(m_pFixed->m_aSamplerIdx));
  561. m_pFixed->m_nSampler = 0;
  562. m_pFixed->m_nInstanced = 0;
  563. // Init variable sized data....
  564. m_pData = gPackData[0];
  565. m_pDataCursor = m_pData;
  566. }
  567. #ifndef SPU
  568. inline void CGcmDrawState::Init(IDirect3DDevice9Params *params)
  569. {
  570. for (int lp = 0; lp < GCM_DRAWSTATE_MAX; lp++)
  571. {
  572. CGcmDrawState *pGcmDrawState = &gGcmDrawState[lp];
  573. pGcmDrawState->Init();
  574. m_nBackBufferSize[0] = params->m_presentationParameters.BackBufferWidth;
  575. m_nBackBufferSize[1] = params->m_presentationParameters.BackBufferHeight;
  576. pGcmDrawState->m_pData = gPackData[lp];
  577. pGcmDrawState->m_pFixed = &gFixedData[lp];
  578. DrawScissor_t temp;
  579. temp.x = 0;
  580. temp.y = 0;
  581. temp.w = m_nBackBufferSize[0];
  582. temp.h = m_nBackBufferSize[1];
  583. SetScissorRect(&temp);
  584. }
  585. }
  586. #endif
  587. #ifndef SPU
  588. inline void CGcmDrawState::BeginScene()
  589. {
  590. // redundant: will lead to redundant disabling of all samplers at the beginning of the frame, even though they're disabled anyway after flip
  591. PackData(kDataBeginScene);
  592. SetRenderState(D3DRS_ZWRITEENABLE, 1); // CELL_GCM_TRUE
  593. if ( g_bZcullAuto )
  594. {
  595. PackData(kDataZcullStats);
  596. }
  597. PackData(kDataZcullLimit, g_nZcullMoveForward, g_nZcullPushBack);
  598. g_UPFrame = 0;
  599. }
  600. inline void CGcmDrawState::EndScene()
  601. {
  602. int nZcullDefault = g_nZcullDefault;
  603. // Update zcull settings based on metrics
  604. if ( g_bZcullAuto )
  605. {
  606. int nMaxSlope = cellGcmGetReport( CELL_GCM_ZCULL_STATS, GCM_REPORT_ZCULL_STATS_0 );
  607. int nSumSlope = cellGcmGetReport( CELL_GCM_ZCULL_STATS1, GCM_REPORT_ZCULL_STATS_1 );
  608. int nNumTiles, nAvgSlope;
  609. nNumTiles = nMaxSlope & 0xffff;
  610. nMaxSlope = ( nMaxSlope & 0xFFFF0000 ) >> 16;
  611. nAvgSlope = nNumTiles ? nSumSlope / nNumTiles : 0;
  612. g_nZcullMoveForward = ( nAvgSlope + nMaxSlope ) / 2;
  613. g_nZcullPushBack = g_nZcullMoveForward / 2;
  614. if ( g_nZcullMoveForward < 1 || g_nZcullPushBack < 1 )
  615. {
  616. // pick reasonable defaults in the failure case
  617. g_nZcullMoveForward = nZcullDefault;
  618. g_nZcullPushBack = nZcullDefault;
  619. }
  620. }
  621. else
  622. {
  623. g_nZcullMoveForward = nZcullDefault;
  624. g_nZcullPushBack = nZcullDefault;
  625. }
  626. // Msg("DrawPrimUP Frame %d\n", g_UPFrame);
  627. }
  628. inline void CGcmDrawState::CmdBufferFlush()
  629. {
  630. CellGcmControl volatile *control = cellGcmGetControlRegister();
  631. // Out-of-order write protection.
  632. // this needs to be sync, not eieio as command buffer is on main memory(which is cached)
  633. // but control registers are mapped as cache inhibited, eieio doesn't gurantee order
  634. // between cached and cache inhibited region
  635. #ifdef __SNC__
  636. __builtin_sync();
  637. #else
  638. __asm__ volatile("sync");
  639. #endif // __SNC__
  640. uint32_t offsetInBytes = (uint32)gpGcmContext->current - (uint32)g_ps3gcmGlobalState.m_pIoAddress;
  641. control->put = offsetInBytes;
  642. }
  643. inline void CGcmDrawState::CmdBufferFinish()
  644. {
  645. uint32 ref = g_ps3gcmGlobalState.m_finishIdx;
  646. GCM_FUNC(cellGcmSetReferenceCommand, ref);
  647. g_ps3gcmGlobalState.m_finishIdx ^=1;
  648. CmdBufferFlush();
  649. CellGcmControl volatile *control = cellGcmGetControlRegister();
  650. while( control->ref != ref )
  651. {
  652. // Don't be a ppu hog ;)
  653. sys_timer_usleep(30);
  654. }
  655. }
  656. #endif
  657. //--------------------------------------------------------------------------------------------------
  658. // Dynamic ECB management
  659. //--------------------------------------------------------------------------------------------------
  660. inline uint8* CGcmDrawState::OpenDynECB()
  661. {
  662. return &g_aDynECB[g_nDynECBIdx];
  663. }
  664. inline void CGcmDrawState::CloseDynECB(uint32 size)
  665. {
  666. g_nDynECBIdx += AlignValue(size,16);
  667. // If we don't have 8K left then wrap
  668. if (g_nDynECBIdx > (GCM_DS_MAXDYNECB - 0x2000))
  669. g_nDynECBIdx = 0;
  670. }
  671. //--------------------------------------------------------------------------------------------------
  672. // Resets RSX to default state
  673. //--------------------------------------------------------------------------------------------------
  674. inline void UnpackResetRsxState()
  675. {
  676. GCM_FUNC( cellGcmSetAlphaFunc, CELL_GCM_ALWAYS, 0);
  677. GCM_FUNC( cellGcmSetAlphaTestEnable, CELL_GCM_FALSE);
  678. GCM_FUNC( cellGcmSetBackStencilFunc, CELL_GCM_ALWAYS, 0, 0xff);
  679. GCM_FUNC( cellGcmSetBackStencilMask, 0xff);
  680. GCM_FUNC( cellGcmSetBackStencilOp, CELL_GCM_KEEP, CELL_GCM_KEEP, CELL_GCM_KEEP);
  681. GCM_FUNC( cellGcmSetBlendColor, 0, 0);
  682. GCM_FUNC( cellGcmSetBlendEnable, CELL_GCM_FALSE);
  683. GCM_FUNC( cellGcmSetBlendEnableMrt, CELL_GCM_FALSE, CELL_GCM_FALSE, CELL_GCM_FALSE);
  684. GCM_FUNC( cellGcmSetBlendEquation, CELL_GCM_FUNC_ADD, CELL_GCM_FUNC_ADD);
  685. GCM_FUNC( cellGcmSetBlendFunc, CELL_GCM_ONE, CELL_GCM_ZERO, CELL_GCM_ONE, CELL_GCM_ZERO);
  686. // GCM_FUNC( cellGcmSetClearDepthStencil, 0xffffff00);
  687. // GCM_FUNC( cellGcmSetClearSurface, 0);
  688. GCM_FUNC( cellGcmSetColorMask, CELL_GCM_COLOR_MASK_A|CELL_GCM_COLOR_MASK_R|CELL_GCM_COLOR_MASK_G|CELL_GCM_COLOR_MASK_B);
  689. GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_FALSE);
  690. GCM_FUNC( cellGcmSetCullFace, CELL_GCM_BACK);
  691. GCM_FUNC( cellGcmSetDepthBounds, 0.0f, 1.0f);
  692. GCM_FUNC( cellGcmSetDepthBoundsTestEnable, CELL_GCM_FALSE);
  693. GCM_FUNC( cellGcmSetDepthFunc, CELL_GCM_LESS);
  694. GCM_FUNC( cellGcmSetDepthMask, CELL_GCM_TRUE);
  695. GCM_FUNC( cellGcmSetDepthTestEnable, CELL_GCM_FALSE);
  696. GCM_FUNC( cellGcmSetDitherEnable, CELL_GCM_TRUE);
  697. GCM_FUNC( cellGcmSetFragmentProgramGammaEnable, CELL_GCM_FALSE);
  698. GCM_FUNC( cellGcmSetFrequencyDividerOperation, 0);
  699. GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CCW);
  700. GCM_FUNC( cellGcmSetLineWidth, 8); // fixed point [0:6:3]
  701. GCM_FUNC( cellGcmSetLogicOpEnable, CELL_GCM_FALSE);
  702. GCM_FUNC( cellGcmSetLogicOp, CELL_GCM_COPY);
  703. // GCM_FUNC( cellGcmSetNotifyIndex, -=something invalid=- ); // initial value is an invalid system reserved area
  704. GCM_FUNC( cellGcmSetPointSize, 1.0f);
  705. GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE);
  706. GCM_FUNC( cellGcmSetPolygonOffset, 0.0f, 0.0f);
  707. GCM_FUNC( cellGcmSetRestartIndexEnable, CELL_GCM_FALSE);
  708. GCM_FUNC( cellGcmSetRestartIndex, 0xffffffff);
  709. GCM_FUNC( cellGcmSetScissor, 0,0,4096,4096);
  710. GCM_FUNC( cellGcmSetShadeMode, CELL_GCM_SMOOTH);
  711. GCM_FUNC( cellGcmSetStencilFunc, CELL_GCM_ALWAYS, 0, 0xff);
  712. GCM_FUNC( cellGcmSetStencilMask, 0xff);
  713. GCM_FUNC( cellGcmSetStencilOp, CELL_GCM_KEEP, CELL_GCM_KEEP, CELL_GCM_KEEP);
  714. GCM_FUNC( cellGcmSetStencilTestEnable, CELL_GCM_FALSE);
  715. for( uint nTextureSampler = 0; nTextureSampler < 16; ++nTextureSampler )
  716. {
  717. GCM_FUNC( cellGcmSetTextureAddress, nTextureSampler, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_WRAP,
  718. CELL_GCM_TEXTURE_CLAMP_TO_EDGE, CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL,
  719. CELL_GCM_TEXTURE_ZFUNC_NEVER, 0);
  720. GCM_FUNC( cellGcmSetTextureBorderColor, nTextureSampler, 0);
  721. GCM_FUNC( cellGcmSetTextureControl, nTextureSampler, CELL_GCM_FALSE, 0, 12<<8, CELL_GCM_TEXTURE_MAX_ANISO_1);
  722. GCM_FUNC( cellGcmSetTextureFilter, nTextureSampler, 0, CELL_GCM_TEXTURE_NEAREST_LINEAR,
  723. CELL_GCM_TEXTURE_LINEAR, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX);
  724. }
  725. for( uint nVertexAttribute = 0; nVertexAttribute < 16; ++nVertexAttribute )
  726. {
  727. GCM_FUNC( cellGcmSetVertexDataArray, nVertexAttribute, 0, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0);
  728. }
  729. GCM_FUNC( cellGcmSetTwoSidedStencilTestEnable, CELL_GCM_FALSE);
  730. float scale[4] = {2048.0f, 2048.0f, 0.5f, 0.0f};
  731. float offset[4] = {2048.0f, 2048.0f, 0.5f, 0.0f};
  732. GCM_FUNC( cellGcmSetViewport, 0, 0, 4096, 4096, 0.0f, 1.0f, scale, offset);
  733. GCM_FUNC( cellGcmSetZcullStatsEnable, CELL_GCM_FALSE);
  734. GCM_FUNC( cellGcmSetAntiAliasingControl, CELL_GCM_FALSE, CELL_GCM_FALSE, CELL_GCM_FALSE, 0xffff);
  735. GCM_FUNC( cellGcmSetBackPolygonMode, CELL_GCM_POLYGON_MODE_FILL);
  736. GCM_FUNC( cellGcmSetClearColor, 0);
  737. GCM_FUNC( cellGcmSetColorMaskMrt, 0);
  738. GCM_FUNC( cellGcmSetFrontPolygonMode, CELL_GCM_POLYGON_MODE_FILL);
  739. GCM_FUNC( cellGcmSetLineSmoothEnable, CELL_GCM_FALSE);
  740. GCM_FUNC( cellGcmSetLineStippleEnable, CELL_GCM_FALSE);
  741. GCM_FUNC( cellGcmSetPointSpriteControl, CELL_GCM_FALSE, 0, 0);
  742. GCM_FUNC( cellGcmSetPolySmoothEnable, CELL_GCM_FALSE);
  743. GCM_FUNC( cellGcmSetPolygonStippleEnable, CELL_GCM_FALSE);
  744. GCM_FUNC( cellGcmSetRenderEnable, CELL_GCM_TRUE, 0);
  745. GCM_FUNC( cellGcmSetUserClipPlaneControl, CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE,CELL_GCM_FALSE);
  746. GCM_FUNC( cellGcmSetVertexAttribInputMask, 0xffff);
  747. GCM_FUNC( cellGcmSetZpassPixelCountEnable, CELL_GCM_FALSE);
  748. for( uint i = 0; i < 4 ; ++i )
  749. {
  750. GCM_FUNC( cellGcmSetVertexTextureAddress, i, CELL_GCM_TEXTURE_WRAP, CELL_GCM_TEXTURE_WRAP);
  751. GCM_FUNC( cellGcmSetVertexTextureBorderColor, i, 0);
  752. GCM_FUNC( cellGcmSetVertexTextureControl, i, CELL_GCM_FALSE, 0, 12<<8);
  753. GCM_FUNC( cellGcmSetVertexTextureFilter, i, 0);
  754. }
  755. GCM_FUNC( cellGcmSetTransformBranchBits, CELL_GCM_FALSE);
  756. GCM_FUNC( cellGcmSetTwoSideLightEnable, CELL_GCM_FALSE);
  757. GCM_FUNC( cellGcmSetZMinMaxControl, CELL_GCM_TRUE, CELL_GCM_FALSE, CELL_GCM_FALSE);
  758. // GCM_FUNC( cellGcmSetTextureOptimization, 1<<3); --<sergiy>-- who cares? this won't compile the way it's described in documentation.
  759. // GCM_FUNC( cellGcmSetCylindricalWrap, CELL_GCM_FALSE); --<sergiy>-- who cares? this won't compile the way it's described in documentation.
  760. GCM_FUNC( cellGcmSetTwoSideLightEnable, CELL_GCM_FALSE);
  761. GCM_FUNC( cellGcmSetTransformBranchBits, 0);
  762. GCM_FUNC( cellGcmSetVertexDataBase, 0,0);
  763. // --<sergiy>-- I don't wanna set the surface to the default surface that we never use, as it generates unneeded stall in RSX
  764. /*
  765. CellGcmSurface surface = {
  766. CELL_GCM_SURFACE_PITCH, // type
  767. CELL_GCM_SURFACE_CENTER_1, // antialias
  768. CELL_GCM_SURFACE_X1R5G5B5_Z1R5G5B5,// colorFormat
  769. CELL_GCM_SURFACE_TARGET_0, // colorTarget
  770. {0, 0, 0, 0}, // colorLocation
  771. {0, 0, 0, 0}, // colorOffset
  772. {64, 64, 64, 64}, // colorPitch
  773. CELL_GCM_SURFACE_Z16, // depthFormat
  774. CELL_GCM_LOCATION_LOCAL, // depthLocation
  775. {0,0}, // __padding
  776. 0, // depthOffset
  777. 64, // depthPitch
  778. 1,1, // width,height
  779. 0,0 // x,y
  780. };
  781. GCM_FUNC( cellGcmSetSurface, &surface);
  782. */
  783. // After ^this, the cached vertex array data is worthless....
  784. for( uint i = 0; i < D3D_MAX_STREAMS; ++i )
  785. g_cacheSetVertexDataArray[i].SetNull();
  786. }
  787. inline void CGcmDrawState::ResetRsxState()
  788. {
  789. m_dirtyStatesMask |= kDirtyResetRsx;
  790. }
  791. //--------------------------------------------------------------------------------------------------
  792. // Viewport and scissor
  793. //--------------------------------------------------------------------------------------------------
  794. inline void CGcmDrawState::UnpackSetViewport(CONST D3DVIEWPORT9* pViewport)
  795. {
  796. m_viewZ[0] = pViewport->MinZ;
  797. m_viewZ[1] = pViewport->MaxZ;
  798. m_viewportSize[0] = pViewport->X;
  799. m_viewportSize[1] = pViewport->Y;
  800. m_viewportSize[2] = pViewport->Width;
  801. m_viewportSize[3] = pViewport->Height;
  802. float viewScale[4] = { m_viewportSize[2]/2, m_viewportSize[3]/2,
  803. ( m_viewZ[1] - m_viewZ[0] ) / 2.0f, 0.0f };
  804. float viewOffset[4] = { m_viewportSize[0] + m_viewportSize[2]/2, m_viewportSize[1] + m_viewportSize[3]/2,
  805. ( m_viewZ[1] + m_viewZ[0] ) / 2.0f, 0.0f };
  806. GCM_FUNC ( cellGcmSetViewport, m_viewportSize[0], m_viewportSize[1],
  807. m_viewportSize[2], m_viewportSize[3],
  808. m_viewZ[0], m_viewZ[1],
  809. viewScale, viewOffset );
  810. }
  811. inline HRESULT CGcmDrawState::SetViewport(CONST D3DVIEWPORT9* pViewport)
  812. {
  813. PackData(kDataViewport, sizeof(D3DVIEWPORT9), (void*)pViewport);
  814. return S_OK;
  815. }
  816. inline void CGcmDrawState::UnpackSetScissorRect( DrawScissor_t * pScissor )
  817. {
  818. m_scissor.x = pScissor->x;
  819. m_scissor.y = pScissor->y;
  820. m_scissor.w = pScissor->w;
  821. m_scissor.h = pScissor->h;
  822. m_dirtyStatesMask |= kDirtyScissor;
  823. }
  824. inline void CGcmDrawState::SetScissorRect( DrawScissor_t * pScissor )
  825. {
  826. PackData(kDataScissor, sizeof(DrawScissor_t), pScissor);
  827. }
  828. //--------------------------------------------------------------------------------------------------
  829. // Reports, Zpass and labels
  830. //--------------------------------------------------------------------------------------------------
  831. inline void UnpackSetZpassPixelCountEnable(uint32 enable)
  832. {
  833. GCM_FUNC(cellGcmSetZpassPixelCountEnable, enable);
  834. }
  835. inline void UnpackSetClearReport(uint32 type)
  836. {
  837. GCM_FUNC(cellGcmSetClearReport, type);
  838. }
  839. inline void UnpackSetReport(uint32 type, uint32 index)
  840. {
  841. GCM_FUNC(cellGcmSetReport, type, index);
  842. }
  843. inline void UnpackSetWriteBackEndLabel(uint8 index, uint32 value)
  844. {
  845. GCM_FUNC(cellGcmSetWriteBackEndLabel, index, value);
  846. }
  847. inline void CGcmDrawState::SetZpassPixelCountEnable(uint32 enable)
  848. {
  849. PackData(kDataSetZpassPixelCountEnable, enable);
  850. }
  851. inline void CGcmDrawState::SetClearReport(uint32 type)
  852. {
  853. PackData(kDataSetClearReport, type);
  854. }
  855. inline void CGcmDrawState::SetReport(uint32 type, uint32 index)
  856. {
  857. PackData(kDataSetReport, type, index);
  858. }
  859. inline void CGcmDrawState::SetWriteBackEndLabel(uint8 index, uint32 value)
  860. {
  861. if (index == GCM_LABEL_MEMORY_FREE)
  862. {
  863. m_nFreeLabel = value; // 0 is not valid...
  864. }
  865. else
  866. {
  867. PackData(kDataSetWriteBackEndLabel, index, value);
  868. }
  869. }
  870. //--------------------------------------------------------------------------------------------------
  871. // Renderstates
  872. //--------------------------------------------------------------------------------------------------
  873. inline void CGcmDrawState::UnpackSetRenderState( D3DRENDERSTATETYPE State, uint Value )
  874. {
  875. char ignored = 0;
  876. Assert( State < D3DRS_VALUE_LIMIT );
  877. uint nDefvalueIndex = g_d3drs_defvalue_indices[State];
  878. uint8 nClass = nDefvalueIndex >> 6;
  879. #ifdef DBGFLAG_ASSERT
  880. nDefvalueIndex &= 0077;
  881. Assert( nDefvalueIndex < ARRAYSIZE( g_d3drs_defvalues ) );
  882. uint32 nDefValue = g_d3drs_defvalues[nDefvalueIndex];
  883. #endif
  884. switch( nClass )
  885. {
  886. case 0: // just ignore quietly. example: D3DRS_LIGHTING
  887. ignored = 1;
  888. break;
  889. case 1:
  890. {
  891. // no GL response - and no error as long as the write value matches the default
  892. Assert( Value == nDefValue );
  893. }
  894. break;
  895. case 2:
  896. // provide GL response, but only support known default value
  897. Assert( Value == nDefValue );
  898. // fall through to mode 3
  899. case 3:
  900. // full GL response, support any legal value
  901. // note we're handling the class-2's as well.
  902. switch( State )
  903. {
  904. default:
  905. Msg( "Cannot interpret State %d", (int)State );
  906. break;
  907. case D3DRS_ZENABLE: // kGLDepthTestEnable
  908. m_ZEnable = !!Value;
  909. m_dirtyStatesMask |= kDirtyZEnable;
  910. break;
  911. case D3DRS_ZWRITEENABLE: // kGLDepthMask
  912. {
  913. uint32 newMask = Value ? 1 : 0;
  914. if(m_nSetDepthMask != newMask)
  915. {
  916. m_nSetDepthMask = newMask;
  917. m_dirtyStatesMask |= kDirtyDepthMask;
  918. }
  919. }
  920. break;
  921. case D3DRS_ZFUNC:
  922. {
  923. // kGLDepthFunc
  924. m_ZFunc = D3DCompareFuncToGL( Value );
  925. m_dirtyStatesMask |= kDirtyZFunc;
  926. }
  927. break;
  928. case D3DRS_COLORWRITEENABLE: // kGLColorMaskSingle
  929. if( IsLayerRender() )
  930. {
  931. m_ColorWriteEnable = ( ((Value & D3DCOLORWRITEENABLE_RED) != 0) ? CELL_GCM_COLOR_MASK_R : 0x00 )
  932. | ( ((Value & D3DCOLORWRITEENABLE_GREEN) != 0) ? CELL_GCM_COLOR_MASK_G : 0x00 )
  933. | ( ((Value & D3DCOLORWRITEENABLE_BLUE) != 0) ? CELL_GCM_COLOR_MASK_B : 0x00 )
  934. | ( ((Value & D3DCOLORWRITEENABLE_ALPHA) != 0) ? CELL_GCM_COLOR_MASK_A : 0x00 );
  935. m_dirtyStatesMask |= kDirtyColorWriteEnable;
  936. }
  937. break;
  938. case D3DRS_COLORWRITEENABLE1: // kGLColorMaskMultiple
  939. case D3DRS_COLORWRITEENABLE2: // kGLColorMaskMultiple
  940. case D3DRS_COLORWRITEENABLE3: // kGLColorMaskMultiple
  941. ignored = 1;
  942. break;
  943. case D3DRS_CULLMODE: // kGLCullFaceEnable / kGLCullFrontFace
  944. {
  945. m_CullMode = Value;
  946. m_dirtyStatesMask |= kDirtyCullMode;
  947. }
  948. break;
  949. //-------------------------------------------------------------------------------------------- alphablend stuff
  950. case D3DRS_ALPHABLENDENABLE: // kGLBlendEnable
  951. if( IsLayerRender() )
  952. m_AlphablendEnable = !!Value;
  953. m_dirtyStatesMask |= kDirtyAlphablendEnable;
  954. break;
  955. case D3DRS_BLENDOP: // kGLBlendEquation // D3D blend-op ==> GL blend equation
  956. if( IsLayerRender() )
  957. {
  958. m_BlendOp = Value;
  959. m_dirtyStatesMask |= kDirtyBlendOp;
  960. }
  961. break;
  962. case D3DRS_SRCBLEND: // kGLBlendFactor // D3D blend-factor ==> GL blend factor
  963. case D3DRS_DESTBLEND: // kGLBlendFactor
  964. {
  965. uint16 factor = D3DBlendFactorToGL( Value );
  966. m_blends[!( State == D3DRS_SRCBLEND )] = factor;
  967. m_dirtyStatesMask |= kDirtyBlendFactor;
  968. }
  969. break;
  970. case D3DRS_SEPARATEALPHABLENDENABLE:
  971. case D3DRS_BLENDOPALPHA:
  972. case D3DRS_SRCBLENDALPHA:
  973. case D3DRS_DESTBLENDALPHA:
  974. ignored = 1;
  975. break;
  976. case D3DRS_SRGBWRITEENABLE: // kGLBlendEnableSRGB
  977. if( IsLayerRender() )
  978. {
  979. m_SrgbWriteEnable = Value;
  980. m_dirtyStatesMask |= kDirtySrgbWriteEnable;
  981. }
  982. break;
  983. //-------------------------------------------------------------------------------------------- alphatest stuff
  984. case D3DRS_ALPHATESTENABLE:
  985. m_AlphaTestEnable = Value;
  986. m_dirtyStatesMask |= kDirtyAlphaTestEnable;
  987. break;
  988. case D3DRS_ALPHAREF:
  989. m_alphaFunc.ref = Value;
  990. m_dirtyStatesMask |= kDirtyAlphaFunc;
  991. break;
  992. case D3DRS_ALPHAFUNC:
  993. {
  994. uint32 func = D3DCompareFuncToGL( Value );
  995. m_alphaFunc.func = func;
  996. m_dirtyStatesMask |= kDirtyAlphaFunc;
  997. }
  998. break;
  999. //-------------------------------------------------------------------------------------------- stencil stuff
  1000. case D3DRS_STENCILENABLE: // GLStencilTestEnable_t
  1001. m_StencilEnable = Value;
  1002. m_dirtyStatesMask |= kDirtyStencilEnable;
  1003. break;
  1004. case D3DRS_STENCILFAIL: // GLStencilOp_t "what do you do if stencil test fails"
  1005. {
  1006. m_stencilOp.fail = dxtogl_stencilmode[Value];
  1007. m_dirtyStatesMask |= kDirtyStencilOp;
  1008. }
  1009. break;
  1010. case D3DRS_STENCILZFAIL: // GLStencilOp_t "what do you do if stencil test passes *but* depth test fails, if depth test happened"
  1011. {
  1012. m_stencilOp.dfail = dxtogl_stencilmode[Value];
  1013. m_dirtyStatesMask |= kDirtyStencilOp;
  1014. }
  1015. break;
  1016. case D3DRS_STENCILPASS: // GLStencilOp_t "what do you do if stencil test and depth test both pass"
  1017. {
  1018. m_stencilOp.dpass = dxtogl_stencilmode[Value];
  1019. m_dirtyStatesMask |= kDirtyStencilOp;
  1020. }
  1021. break;
  1022. case D3DRS_STENCILFUNC: // GLStencilFunc_t
  1023. {
  1024. uint32 stencilfunc = D3DCompareFuncToGL( Value );
  1025. m_stencilFunc.func = stencilfunc;
  1026. m_dirtyStatesMask |= kDirtyStencilFunc;
  1027. }
  1028. break;
  1029. case D3DRS_STENCILREF: // GLStencilFunc_t
  1030. m_stencilFunc.ref = (Value & 0xFF);
  1031. m_dirtyStatesMask |= kDirtyStencilFunc;
  1032. break;
  1033. case D3DRS_STENCILMASK: // GLStencilFunc_t
  1034. {
  1035. m_stencilFunc.mask = (Value & 0xFF);
  1036. m_dirtyStatesMask |= kDirtyStencilFunc;
  1037. }
  1038. break;
  1039. case D3DRS_STENCILWRITEMASK: // GLStencilWriteMask_t
  1040. {
  1041. //if (Value==255)
  1042. //{
  1043. // Value = 0xFFFFFFFF; // mask blast
  1044. //}
  1045. m_StencilWriteMask = Value;
  1046. m_dirtyStatesMask |= kDirtyStencilWriteMask;
  1047. }
  1048. break;
  1049. //-------------------------------------------------------------------------------------------- two-sided stencil stuff
  1050. case D3DRS_TWOSIDEDSTENCILMODE: // -> GL_STENCIL_TEST_TWO_SIDE_EXT... not yet implemented ?
  1051. case D3DRS_CCW_STENCILFAIL: // GLStencilOp_t
  1052. case D3DRS_CCW_STENCILZFAIL: // GLStencilOp_t
  1053. case D3DRS_CCW_STENCILPASS: // GLStencilOp_t
  1054. case D3DRS_CCW_STENCILFUNC: // GLStencilFunc_t
  1055. ignored = 1;
  1056. break;
  1057. case D3DRS_FOGENABLE: // none of these are implemented yet... erk
  1058. case D3DRS_FOGCOLOR:
  1059. case D3DRS_FOGTABLEMODE:
  1060. case D3DRS_FOGSTART:
  1061. case D3DRS_FOGEND:
  1062. case D3DRS_FOGDENSITY:
  1063. case D3DRS_RANGEFOGENABLE:
  1064. case D3DRS_FOGVERTEXMODE:
  1065. ignored = 1;
  1066. break;
  1067. case D3DRS_MULTISAMPLEANTIALIAS:
  1068. case D3DRS_MULTISAMPLEMASK:
  1069. ignored = 1;
  1070. break;
  1071. case D3DRS_SCISSORTESTENABLE: // kGLScissorEnable
  1072. {
  1073. m_scissor.enabled = !!Value;
  1074. m_dirtyStatesMask |= kDirtyScissor;
  1075. }
  1076. break;
  1077. case D3DRS_DEPTHBIAS: // kGLDepthBias
  1078. {
  1079. // the value in the dword is actually a float
  1080. m_depthBias.units = Value;
  1081. m_dirtyStatesMask |= kDirtyDepthBias;
  1082. }
  1083. break;
  1084. // good ref on these: http://aras-p.info/blog/2008/06/12/depth-bias-and-the-power-of-deceiving-yourself/
  1085. case D3DRS_SLOPESCALEDEPTHBIAS:
  1086. {
  1087. // the value in the dword is actually a float
  1088. m_depthBias.factor = Value;
  1089. m_dirtyStatesMask |= kDirtyDepthBias;
  1090. }
  1091. break;
  1092. case D3DRS_CLIPPING: // ???? is clipping ever turned off ??
  1093. ignored = 1;
  1094. break;
  1095. case D3DRS_CLIPPLANEENABLE: // kGLClipPlaneEnable
  1096. {
  1097. m_userClipPlanesState = 0;
  1098. for ( uint32 j = 0, uiValueMask = 1, uiClipSetMask = CELL_GCM_USER_CLIP_PLANE_ENABLE_GE;
  1099. j < 6; ++ j, uiValueMask <<= 1, uiClipSetMask <<= 2 )
  1100. {
  1101. m_userClipPlanesState |= ( ( Value & uiValueMask ) != 0 ) ? uiClipSetMask : 0;
  1102. }
  1103. m_dirtyCachesMask |= kDirtyClipPlanes;
  1104. }
  1105. break;
  1106. //-------------------------------------------------------------------------------------------- polygon/fill mode
  1107. case D3DRS_FILLMODE:
  1108. m_FillMode = Value;
  1109. m_dirtyStatesMask |= kDirtyFillMode;
  1110. break;
  1111. }
  1112. break;
  1113. }
  1114. }
  1115. inline void CGcmDrawState::SetRenderState( D3DRENDERSTATETYPE State, uint Value )
  1116. {
  1117. PackData(kDataSetRenderState, State, Value);
  1118. }
  1119. //--------------------------------------------------------------------------------------------------
  1120. // Texture samplers, textures, texture cache
  1121. //--------------------------------------------------------------------------------------------------
  1122. inline void CGcmDrawState::SetSamplerState( uint Sampler,D3DSAMPLERSTATETYPE Type,DWORD Value )
  1123. {
  1124. #ifndef CERT
  1125. if (Sampler>=D3D_MAX_SAMPLERS) Error("Invalid sampler %d, PS3 suppoerts %d\n", Sampler, D3D_MAX_SAMPLERS );
  1126. #endif
  1127. // indirect sampler index
  1128. uint32 SamplerIdx = m_pFixed->m_aSamplerIdx[Sampler];
  1129. if (SamplerIdx == 0xFF)
  1130. {
  1131. SamplerIdx = m_pFixed->m_nSampler;
  1132. m_pFixed->m_nSampler++;
  1133. m_pFixed->m_aSamplerIdx[Sampler] = SamplerIdx;
  1134. }
  1135. // the D3D-to-GL translation has been moved to CommitSamplers since we want to do it at draw time
  1136. // so this call just stuffs values in slots.
  1137. D3DSamplerDesc *samp = m_pFixed->m_aSamplers + SamplerIdx;
  1138. switch( Type )
  1139. {
  1140. // addressing modes can be
  1141. // D3DTADDRESS_WRAP Tile the texture at every integer junction.
  1142. // D3DTADDRESS_MIRROR Similar to D3DTADDRESS_WRAP, except that the texture is flipped at every integer junction.
  1143. // D3DTADDRESS_CLAMP Texture coordinates outside the range [0.0, 1.0] are set to the texture color at 0.0 or 1.0, respectively.
  1144. // D3DTADDRESS_BORDER Texture coordinates outside the range [0.0, 1.0] are set to the border color.
  1145. // D3DTADDRESS_MIRRORONCE Similar to D3DTADDRESS_MIRROR and D3DTADDRESS_CLAMP.
  1146. // Takes the absolute value of the texture coordinate (thus, mirroring around 0),
  1147. // and then clamps to the maximum value. The most common usage is for volume textures,
  1148. // where support for the full D3DTADDRESS_MIRRORONCE texture-addressing mode is not
  1149. // necessary, but the data is symmetric around the one axis.
  1150. case D3DSAMP_ADDRESSU:
  1151. samp->m_addressModeU = Value;
  1152. break;
  1153. case D3DSAMP_ADDRESSV:
  1154. samp->m_addressModeV = Value;
  1155. break;
  1156. case D3DSAMP_ADDRESSW:
  1157. samp->m_addressModeW = Value;
  1158. break;
  1159. case D3DSAMP_BORDERCOLOR:
  1160. // samp->m_borderColor = Value; // Border color always 0
  1161. break;
  1162. case D3DSAMP_MAGFILTER: samp->m_magFilter = (D3DTEXTUREFILTERTYPE)Value; break;
  1163. case D3DSAMP_MINFILTER: samp->m_minFilter = (D3DTEXTUREFILTERTYPE)Value; break;
  1164. case D3DSAMP_MIPFILTER: samp->m_mipFilter = (D3DTEXTUREFILTERTYPE)Value; break;
  1165. case D3DSAMP_MIPMAPLODBIAS: samp->m_mipmapBias = Value; break; // float in sheep's clothing - check this one out
  1166. case D3DSAMP_MAXMIPLEVEL: samp->m_maxMipLevel = Value; break; //FIXME (unsure here)
  1167. case D3DSAMP_MAXANISOTROPY: samp->m_maxAniso = Value; break;
  1168. case D3DSAMP_SRGBTEXTURE: samp->m_srgb = Value; break;
  1169. case D3DSAMP_SHADOWFILTER: samp->m_shadowFilter = Value; break;
  1170. default:
  1171. Msg( "Unknown sampler parameter" );
  1172. DebuggerBreak();
  1173. break;
  1174. }
  1175. m_dirtySamplersMask |= ( 1 << Sampler );
  1176. }
  1177. inline void CGcmDrawState::UnpackSetTexture( DWORD Stage, uint32 offset, uint32 eaLayout )
  1178. {
  1179. // texture sets are finalized in CommitSamplers
  1180. m_textures[Stage].m_nLocalOffset = offset;
  1181. m_textures[Stage].m_eaLayout = eaLayout;
  1182. m_dirtySamplersMask |= ( 1 << Stage );
  1183. }
  1184. inline void CGcmDrawState::UnpackResetTexture( DWORD Stage )
  1185. {
  1186. // texture sets are finalized in CommitSamplers
  1187. m_textures[Stage].Reset();
  1188. m_dirtySamplersMask |= ( 1 << Stage );
  1189. }
  1190. inline void CGcmDrawState::SetTexture( DWORD Stage, CPs3gcmTexture *tex )
  1191. {
  1192. m_textures[Stage].Assign(tex);
  1193. if (tex->m_lmBlock.IsLocalMemory() )
  1194. {
  1195. m_textures[Stage].m_nLocalOffset |= 1;
  1196. }
  1197. PackData(kDataTexture, Stage, m_textures[Stage].m_nLocalOffset, m_textures[Stage].m_eaLayout );
  1198. }
  1199. inline void CGcmDrawState::ResetTexture( DWORD Stage )
  1200. {
  1201. PackData(kDataResetTexture, Stage);
  1202. }
  1203. inline void UnpackSetInvalidateTextureCache()
  1204. {
  1205. GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE );
  1206. }
  1207. inline void CGcmDrawState::SetInvalidateTextureCache()
  1208. {
  1209. m_dirtyCachesMask |= kDirtyTxCache;
  1210. }
  1211. //--------------------------------------------------------------------------------------------------
  1212. // Vertex buffers, vertex cache, , vertex constants
  1213. //--------------------------------------------------------------------------------------------------
  1214. #ifndef SPU
  1215. inline void CGcmDrawState::SetVertexStreamSource( uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride )
  1216. {
  1217. // SNPROF("CGcmDrawState::SetVertexStreamSource( uint nStreamIndex, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride )");
  1218. // Write stream descriptor into variable data
  1219. #ifdef GCM_DS_SAFE
  1220. uint32 spacereqd = sizeof(D3DStreamDesc) + sizeof(DrawData);
  1221. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  1222. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  1223. #endif
  1224. DrawData* pData = (DrawData*) m_pDataCursor;
  1225. pData->m_type = kDataStreamDesc;
  1226. pData->m_size = sizeof(D3DStreamDesc);
  1227. pData->m_idx = nStreamIndex;
  1228. D3DStreamDesc* pDsd = (D3DStreamDesc*)(pData+1);
  1229. if ( pStreamData && pStreamData->m_pBuffer )
  1230. {
  1231. // we pass this pointer as a BufferBase later to compare, so we need to make sure they're binarily the same
  1232. Assert( uintp( pStreamData ) == uintp( static_cast<IDirect3DGcmBufferBase*>( pStreamData ) ) );
  1233. pDsd->m_offset = OffsetInBytes;
  1234. pDsd->m_stride = Stride;
  1235. pDsd->m_vtxBuffer = pStreamData;
  1236. pDsd->m_nLocalBufferOffset = pStreamData->m_pBuffer->Offset();
  1237. }
  1238. else
  1239. {
  1240. V_memset(pDsd, 0, sizeof( *pDsd ) );
  1241. }
  1242. m_pDataCursor = (uint8*)pDsd + sizeof(D3DStreamDesc);
  1243. }
  1244. #endif
  1245. inline void CGcmDrawState::_SetVertexShaderConstantB( UINT StartRegister, uint BoolCount, uint shaderVxConstants )
  1246. {
  1247. uint nMask = ( 1 << ( StartRegister + BoolCount ) ) - ( 1 << StartRegister ) ;
  1248. m_shaderVxConstants &= ~nMask;
  1249. m_shaderVxConstants |= shaderVxConstants;
  1250. m_dirtyCachesMask |= kDirtyVxConstants;
  1251. }
  1252. inline void CGcmDrawState::SetVertexShaderConstantB(UINT StartRegister,CONST BOOL* pConstantData,UINT BoolCount)
  1253. {
  1254. uint shaderVxConstants = 0;
  1255. for ( uint32 k = MIN( StartRegister, 32 ), kEnd = MIN( StartRegister + BoolCount, 32 ),
  1256. uiConstantBit = ( 1 << StartRegister ), uiDataIdx = 0;
  1257. k < kEnd; ++ k, uiConstantBit <<= 1, ++ uiDataIdx )
  1258. {
  1259. if( pConstantData[ uiDataIdx ] )
  1260. {
  1261. shaderVxConstants |= uiConstantBit;
  1262. }
  1263. }
  1264. _SetVertexShaderConstantB( StartRegister, BoolCount, shaderVxConstants );
  1265. }
  1266. // inline void CGcmDrawState::VertexConstantExtractor(
  1267. // float *pDestStorage, int kRegisterFirst, int kRegisterLength,
  1268. // int StartRegister, const float *pConstantData, int Vector4fCount )
  1269. // {
  1270. // int iMatrixRegister = Max<int>( 0, StartRegister - kRegisterFirst ); // which part of matrix is updated
  1271. // int iConstantDataMatrixStart = Max<int>( StartRegister, kRegisterFirst ); // where in constant data the new values start
  1272. // int numMatrixRegisters = StartRegister + Vector4fCount - iConstantDataMatrixStart; // how many new values can be used
  1273. // numMatrixRegisters = Min<int>( numMatrixRegisters, kRegisterLength - iMatrixRegister ); // we shouldn't use more values than there's room in the matrix
  1274. // if ( numMatrixRegisters > 0 )
  1275. // {
  1276. // iConstantDataMatrixStart -= StartRegister; // constant data values are relative to StartRegister
  1277. // V_memcpy( &pDestStorage[ iMatrixRegister * 4 ], &pConstantData[ iConstantDataMatrixStart * 4 ], numMatrixRegisters * 4 * sizeof( float ) );
  1278. // }
  1279. // }
  1280. inline void CGcmDrawState::SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount )
  1281. {
  1282. // SNPROF("CGcmDrawState::SetVertexShaderConstantF( UINT StartRegister, void* pUnalignedConstantData, UINT Vector4fCount )");
  1283. // // Intercept the vertex constants affecting model-view-projection [ registers C8,C9,C10,C11 ]
  1284. // VertexConstantExtractor( m_matViewProjection, 8, 4, StartRegister, pConstantData, Vector4fCount );
  1285. // // Intercept the vertex constants affecting model matrix [ registers C58,C59,C60 ]
  1286. // VertexConstantExtractor( m_matModel, 58, 3, StartRegister, pConstantData, Vector4fCount );
  1287. uint32 spacereqd = (Vector4fCount*sizeof(vec_float4)) + sizeof(DrawData);
  1288. #ifdef GCM_DS_SAFE
  1289. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  1290. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  1291. #endif
  1292. DrawData* pDest = (DrawData*)m_pDataCursor;
  1293. uint8* pVals = (uint8*)(pDest+1);
  1294. pDest->m_type = kDataVpuConsts;
  1295. pDest->m_size = Vector4fCount * sizeof(vec_float4);
  1296. pDest->m_idx = StartRegister;
  1297. V_memcpy(pVals, pUnalignedConstantData, Vector4fCount * sizeof(vec_float4));
  1298. m_pDataCursor += spacereqd;
  1299. }
  1300. inline void UnpackSetInvalidateVertexCache()
  1301. {
  1302. GCM_FUNC( cellGcmSetInvalidateVertexCache );
  1303. }
  1304. inline void CGcmDrawState::SetInvalidateVertexCache()
  1305. {
  1306. m_dirtyCachesMask |= kDirtyVxCache;
  1307. }
  1308. inline void CGcmDrawState::UnpackUpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset )
  1309. {
  1310. for( uint i = 0; i < D3D_MAX_STREAMS; ++i )
  1311. {
  1312. if( g_dxGcmVertexStreamSources[i].m_vtxBuffer == vtxBuffer )
  1313. {
  1314. g_dxGcmVertexStreamSources[i].m_nLocalBufferOffset = nLocalBufferOffset; // new local buffer offset
  1315. }
  1316. }
  1317. }
  1318. inline void CGcmDrawState::UpdateVtxBufferOffset( IDirect3DVertexBuffer9 * vtxBuffer, uint nLocalBufferOffset )
  1319. {
  1320. PackData(kDataUpdateVtxBufferOffset, (uint32)vtxBuffer, nLocalBufferOffset);
  1321. }
  1322. //--------------------------------------------------------------------------------------------------
  1323. // Pixel Shader Consts
  1324. //--------------------------------------------------------------------------------------------------
  1325. inline void CGcmDrawState::SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount)
  1326. {
  1327. // SNPROF("CGcmDrawState::SetPixelShaderConstantF(uint32 StartRegister, float* pConstantData, uint32 Vector4fCount)");
  1328. m_dirtyCachesMask |= CGcmDrawState::kDirtyPxConstants;
  1329. uint32 spacereqd = (Vector4fCount*sizeof(vec_float4)) + sizeof(DrawData);
  1330. #ifdef GCM_DS_SAFE
  1331. uint32 spaceleft = (GCM_DS_MAXDATAPERDRAWCALL - (m_pDataCursor - m_pData ));
  1332. if(spacereqd > spaceleft) Error("Out of per draw call data\n");
  1333. #endif
  1334. DrawData* pDest = (DrawData*)m_pDataCursor;
  1335. uint8* pVals = (uint8*)(pDest+1);
  1336. pDest->m_type = kDataFpuConsts;
  1337. pDest->m_size = Vector4fCount * sizeof(vec_float4);
  1338. pDest->m_idx = StartRegister;
  1339. V_memcpy(pVals, pConstantData, Vector4fCount * sizeof(vec_float4));
  1340. m_pDataCursor += spacereqd;
  1341. }
  1342. inline void CGcmDrawState::UnpackSetWorldSpaceCameraPosition(float* pWCP)
  1343. {
  1344. memcpy(m_vecWorldSpaceCameraPosition, pWCP, sizeof(m_vecWorldSpaceCameraPosition));
  1345. }
  1346. inline void CGcmDrawState::SetWorldSpaceCameraPosition(float* pWCP)
  1347. {
  1348. PackData(kDataSetWorldSpaceCameraPosition, (uint16)sizeof(m_vecWorldSpaceCameraPosition), (void*)pWCP);
  1349. }
  1350. //--------------------------------------------------------------------------------------------------
  1351. // Surfaces and render teargets
  1352. //--------------------------------------------------------------------------------------------------
  1353. inline void CGcmDrawState::Ps3Helper_UpdateSurface( UpdateSurface_t * pSurface )
  1354. {
  1355. const CPs3gcmTextureData_t &texC = pSurface->m_texC, &texZ = pSurface->m_texZ;
  1356. const CPs3gcmTextureData_t *pTexCZ = &texC;
  1357. CPs3gcmTextureLayout texC_layout, texZ_layout, *pTexCZ_layout = &texC_layout;
  1358. if( texZ )
  1359. {
  1360. memcpy (&texZ_layout, (void*)texZ.m_eaLayout, sizeof( texZ_layout ));
  1361. pTexCZ = &texZ;
  1362. pTexCZ_layout = &texZ_layout;
  1363. }
  1364. if( texC )
  1365. {
  1366. memcpy( &texC_layout, (void*)texC.m_eaLayout, sizeof( texC_layout ));
  1367. pTexCZ = &texC;
  1368. pTexCZ_layout = &texC_layout;
  1369. }
  1370. CellGcmSurface sf;
  1371. V_memset( &sf, 0, sizeof( sf ) );
  1372. sf.colorFormat = CELL_GCM_SURFACE_A8R8G8B8;
  1373. sf.colorTarget = texC.NotNull() ? CELL_GCM_SURFACE_TARGET_0 : CELL_GCM_SURFACE_TARGET_NONE;
  1374. sf.colorLocation[0] = CELL_GCM_LOCATION_LOCAL;
  1375. sf.colorOffset[0] = texC ? texC.Offset() : 0;
  1376. sf.colorPitch[0] = texC ? texC_layout.DefaultPitch2( g_ps3texFormats ) : 64;
  1377. sf.colorLocation[1] = CELL_GCM_LOCATION_LOCAL;
  1378. sf.colorLocation[2] = CELL_GCM_LOCATION_LOCAL;
  1379. sf.colorLocation[3] = CELL_GCM_LOCATION_LOCAL;
  1380. sf.colorOffset[1] = 0;
  1381. sf.colorOffset[2] = 0;
  1382. sf.colorOffset[3] = 0;
  1383. sf.colorPitch[1] = 64;
  1384. sf.colorPitch[2] = 64;
  1385. sf.colorPitch[3] = 64;
  1386. sf.depthFormat = CELL_GCM_SURFACE_Z24S8;
  1387. if ( texZ )
  1388. {
  1389. CPs3gcmTextureLayout::Format_t &zFmt = g_ps3texFormats[texZ_layout.m_nFormat];
  1390. if ( ( zFmt.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH16 ) || ( zFmt.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH16_FLOAT ) )
  1391. {
  1392. sf.depthFormat = CELL_GCM_SURFACE_Z16;
  1393. }
  1394. }
  1395. sf.depthLocation = CELL_GCM_LOCATION_LOCAL;
  1396. sf.depthOffset = texZ ? texZ.Offset() : 0;
  1397. sf.depthPitch = texZ ? texZ_layout.DefaultPitch2( g_ps3texFormats ) : 64;
  1398. sf.type = ( texC && texC_layout.IsSwizzled() ) ? CELL_GCM_SURFACE_SWIZZLE : CELL_GCM_SURFACE_PITCH;
  1399. sf.antialias = CELL_GCM_SURFACE_CENTER_1;
  1400. sf.width = *pTexCZ ? pTexCZ_layout->m_key.m_size[0] : g_ps3gcmGlobalState.m_nRenderSize[0];
  1401. sf.height = *pTexCZ ? pTexCZ_layout->m_key.m_size[1] : g_ps3gcmGlobalState.m_nRenderSize[1];
  1402. sf.x = 0;
  1403. sf.y = 0;
  1404. PackData(kDataUpdateSurface, 0, (uint16)sizeof(sf), (void*)&sf);
  1405. }
  1406. inline void CGcmDrawState::UnpackUpdateSurface(CellGcmSurface* pSf)
  1407. {
  1408. GCM_FUNC( cellGcmSetSurface, pSf );
  1409. // cellGcmSetZcullControl invalidates Zcull, and these are the default settings anyways (LESS / LONES)
  1410. // so don't bother doing anything here.
  1411. // If other settings are needed, set them once at the beginning of time for each zcull region
  1412. //GCM_FUNC( cellGcmSetZcullControl, CELL_GCM_ZCULL_LESS, CELL_GCM_ZCULL_LONES );
  1413. // These calls do NOT invalidate Zcull
  1414. GCM_FUNC( cellGcmSetZcullEnable, CELL_GCM_TRUE, CELL_GCM_TRUE );
  1415. // when render target changes, and scissor is not enabled, and the target dimensions change,
  1416. // we need to flush the scissor dimensions because we always maintain scissor ON state, and
  1417. // the scissor size must conform to surface size (which just changed)
  1418. m_dirtyStatesMask |= kDirtyScissor;
  1419. }
  1420. inline void CGcmDrawState::Helper_IntersectRectsXYWH( uint16 const *a, uint16 const *b, uint16 *result )
  1421. // Takes 2 rects a&b specified as top,left,width,height
  1422. // Produces an intersection also as top,left,width,height
  1423. // Intersection can have zero width and/or height
  1424. {
  1425. result[0] = a[0] > b[0] ? a[0] : b[0];
  1426. result[1] = a[1] > b[1] ? a[1] : b[1];
  1427. uint16 ca = a[0]+a[2], cb = b[0]+b[2];
  1428. ca = ca < cb ? ca : cb;
  1429. if ( int16(ca) < int16(result[0]) )
  1430. ca = result[0];
  1431. result[2] = ca - result[0];
  1432. ca = a[1]+a[3], cb = b[1]+b[3];
  1433. ca = ca < cb ? ca : cb;
  1434. if ( int16(ca) < int16(result[1]) )
  1435. ca = result[1];
  1436. result[3] = ca - result[1];
  1437. }
  1438. inline void CGcmDrawState::UnpackClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
  1439. uint32 nDepthStencilBitDepth )
  1440. {
  1441. uint32 uiGcmClearMask = 0
  1442. | ( ( nFlags & D3DCLEAR_STENCIL ) ? CELL_GCM_CLEAR_S : 0 )
  1443. | ( ( nFlags & D3DCLEAR_ZBUFFER ) ? CELL_GCM_CLEAR_Z : 0 )
  1444. | ( ( nFlags & D3DCLEAR_TARGET ) ? (CELL_GCM_CLEAR_R|CELL_GCM_CLEAR_G|CELL_GCM_CLEAR_B|CELL_GCM_CLEAR_A) : 0 )
  1445. ;
  1446. if ( nFlags & D3DCLEAR_TARGET )
  1447. {
  1448. GCM_FUNC( cellGcmSetClearColor, nColor );
  1449. }
  1450. if ( nFlags & (D3DCLEAR_STENCIL|D3DCLEAR_ZBUFFER) )
  1451. {
  1452. uint32 nClearValue;
  1453. if ( nDepthStencilBitDepth == 16 )
  1454. {
  1455. // NOTE: for SURFACE_Z16 depth is in lower 16 bits
  1456. nClearValue = ( uint32 )( flZ * 0xFFFF );
  1457. }
  1458. else
  1459. {
  1460. nClearValue = ( ( ( uint32 )( flZ * 0xFFFFFF ) ) << 8 ) | ( nStencil & 0xFF );
  1461. }
  1462. // if(Z16) GCM_FUNC( cellGcmSetClearDepthStencil, (((uint32)( Z*0xFFFF ))<<8) );
  1463. GCM_FUNC( cellGcmSetClearDepthStencil, nClearValue );
  1464. }
  1465. // Set scissor box to cover the intersection of viewport and scissor
  1466. if ( !m_scissor.enabled )
  1467. {
  1468. GCM_FUNC( cellGcmSetScissor, m_viewportSize[0], m_viewportSize[1], m_viewportSize[2], m_viewportSize[3] );
  1469. }
  1470. else
  1471. {
  1472. uint16 uiScissorCoords[4] = {0};
  1473. Helper_IntersectRectsXYWH( m_viewportSize, &m_scissor.x, uiScissorCoords );
  1474. GCM_FUNC( cellGcmSetScissor, uiScissorCoords[0], uiScissorCoords[1], uiScissorCoords[2], uiScissorCoords[3] );
  1475. }
  1476. GCM_FUNC( cellGcmSetClearSurface, uiGcmClearMask );
  1477. // Since we affected the scissor, mark it as dirty
  1478. m_dirtyStatesMask |= kDirtyScissor;
  1479. }
  1480. inline void CGcmDrawState::ClearSurface( DWORD nFlags, D3DCOLOR nColor, float flZ, uint32 nStencil,
  1481. uint32 nDepthStencilBitDepth )
  1482. {
  1483. PackData(kDataClearSurface, nFlags, nColor, flZ, nStencil, nDepthStencilBitDepth );
  1484. }
  1485. inline void CGcmDrawState::UnpackResetSurfaceToKnownDefaultState()
  1486. {
  1487. // Reset to default state:
  1488. GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE );
  1489. GCM_FUNC( cellGcmSetCullFace, CELL_GCM_BACK );
  1490. GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CW );
  1491. GCM_FUNC( cellGcmSetBlendEnable, CELL_GCM_FALSE );
  1492. GCM_FUNC( cellGcmSetAlphaTestEnable, CELL_GCM_FALSE );
  1493. GCM_FUNC( cellGcmSetStencilTestEnable, CELL_GCM_FALSE );
  1494. GCM_FUNC( cellGcmSetDepthTestEnable, CELL_GCM_FALSE );
  1495. GCM_FUNC( cellGcmSetFrontPolygonMode, CELL_GCM_POLYGON_MODE_FILL );
  1496. GCM_FUNC( cellGcmSetBackPolygonMode, CELL_GCM_POLYGON_MODE_FILL );
  1497. GCM_FUNC( cellGcmSetPolygonOffset, 0, 0 );
  1498. GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE );
  1499. // Force the viewport to match the current back buffer
  1500. D3DVIEWPORT9 dForcedView =
  1501. {
  1502. 0, 0,
  1503. m_nBackBufferSize[0],
  1504. m_nBackBufferSize[1],
  1505. m_viewZ[0],
  1506. m_viewZ[1]
  1507. };
  1508. SetViewport( &dForcedView );
  1509. GCM_FUNC( cellGcmSetScissor, 0, 0, m_nBackBufferSize[0], m_nBackBufferSize[1] );
  1510. // Reset some cached gcm state
  1511. m_userClipPlanesState = 0;
  1512. m_shaderVxConstants = 0;
  1513. m_dirtyCachesMask |= ( kDirtyVxConstants | kDirtyVxShader |
  1514. kDirtyClipPlanes | kDirtyPxShader |
  1515. kDirtyPxConstants );
  1516. }
  1517. inline void CGcmDrawState::ResetSurfaceToKnownDefaultState()
  1518. {
  1519. PackData(kDataResetSurface);
  1520. }
  1521. //--------------------------------------------------------------------------------------------------
  1522. // Blit
  1523. //--------------------------------------------------------------------------------------------------
  1524. inline void UnpackTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset,
  1525. uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel )
  1526. {
  1527. GCM_FUNC(cellGcmSetTransferImage, mode, dstOffset, dstPitch, dstX, dstY, srcOffset,
  1528. srcPitch, srcX, srcY, width, height, bytesPerPixel );
  1529. }
  1530. inline void CGcmDrawState::SetTransferImage(uint8 mode, uint32 dstOffset, uint32 dstPitch, uint32 dstX, uint32 dstY, uint32 srcOffset,
  1531. uint32 srcPitch, uint32 srcX, uint32 srcY, uint32 width, uint32 height, uint32 bytesPerPixel )
  1532. {
  1533. // return UnpackTransferImage( mode, dstOffset, dstPitch, dstX, dstY, srcOffset,
  1534. // srcPitch, srcX, srcY, width, height, bytesPerPixel);
  1535. uint32 aValues[12];
  1536. aValues[0] = mode;
  1537. aValues[1] = dstOffset;
  1538. aValues[2] = dstPitch;
  1539. aValues[3] = dstX;
  1540. aValues[4] = dstY;
  1541. aValues[5] = srcOffset;
  1542. aValues[6] = srcPitch;
  1543. aValues[7] = srcX;
  1544. aValues[8] = srcY;
  1545. aValues[9] = width;
  1546. aValues[10] = height;
  1547. aValues[11] = bytesPerPixel;
  1548. PackData(kDataTransferImage, 0, sizeof(aValues), (void*)aValues);
  1549. }
  1550. //--------------------------------------------------------------------------------------------------
  1551. // State Flushing and Pixel Shader Patching
  1552. //--------------------------------------------------------------------------------------------------
  1553. inline void CGcmDrawState::UnpackData()
  1554. {
  1555. static uint32 highWater = 0;
  1556. static float average = 0.0f;
  1557. static uint32 count = 0;
  1558. #ifndef SPU
  1559. static int display = 4000;
  1560. #endif
  1561. m_nNumECB = 0;
  1562. int aSizes[64];
  1563. memset(aSizes, 0, sizeof(aSizes));
  1564. DrawData* pSrc = (DrawData*)m_pData;
  1565. while ((uint8*)pSrc < m_pDataCursor)
  1566. {
  1567. uint32* pVals = (uint32*)(pSrc+1);
  1568. float* pfVals = (float*)pVals;
  1569. aSizes[pSrc->m_type] += pSrc->m_size;
  1570. switch (pSrc->m_type)
  1571. {
  1572. case kDataEcbTexture:
  1573. V_memcpy(&m_aBindTexture[pSrc->m_idx], pVals, pSrc->m_size);
  1574. break;
  1575. case kDataSetRenderState:
  1576. UnpackSetRenderState((D3DRENDERSTATETYPE)pVals[0], pVals[1]);
  1577. break;
  1578. case kDataFpuConsts:
  1579. V_memcpy(&g_aFPConst[pSrc->m_idx], pVals, pSrc->m_size);
  1580. break;
  1581. case kDataSetWorldSpaceCameraPosition:
  1582. UnpackSetWorldSpaceCameraPosition(pfVals);
  1583. break;
  1584. case kDataStreamDesc:
  1585. V_memcpy(&g_dxGcmVertexStreamSources[pSrc->m_idx], pVals, pSrc->m_size);
  1586. break;
  1587. case kDataVpuConsts:
  1588. GCM_FUNC( cellGcmSetVertexProgramParameterBlock, pSrc->m_idx, pSrc->m_size/16, (float*)pVals );
  1589. break;
  1590. case kDataZcullStats:
  1591. GCM_FUNC( cellGcmSetReport, CELL_GCM_ZCULL_STATS, GCM_REPORT_ZCULL_STATS_0 );
  1592. GCM_FUNC( cellGcmSetReport, CELL_GCM_ZCULL_STATS1, GCM_REPORT_ZCULL_STATS_1 );
  1593. break;
  1594. case kDataZcullLimit:
  1595. GCM_FUNC(cellGcmSetZcullLimit, pVals[0], pVals[2] );
  1596. break;
  1597. case kDataViewport:
  1598. UnpackSetViewport((D3DVIEWPORT9*) pVals);
  1599. break;
  1600. case kDataScissor:
  1601. UnpackSetScissorRect((DrawScissor_t*) pVals);
  1602. break;
  1603. case kDataSetZpassPixelCountEnable:
  1604. UnpackSetZpassPixelCountEnable(pVals[0]);
  1605. break;
  1606. case kDataSetClearReport:
  1607. UnpackSetClearReport(pVals[0]);
  1608. break;
  1609. case kDataSetReport:
  1610. UnpackSetReport(pVals[0], pVals[1]);
  1611. break;
  1612. case kDataSetWriteBackEndLabel:
  1613. UnpackSetWriteBackEndLabel(pVals[0], pVals[1]);
  1614. break;
  1615. case kDataUpdateSurface:
  1616. UnpackUpdateSurface((CellGcmSurface*)pVals);
  1617. break;
  1618. case kDataResetSurface:
  1619. UnpackResetSurfaceToKnownDefaultState();
  1620. break;
  1621. case kDataClearSurface:
  1622. UnpackClearSurface(pVals[0], pVals[1], pfVals[2], pVals[3], pVals[4] );
  1623. break;
  1624. case kDataTransferImage:
  1625. UnpackTransferImage(pVals[0], pVals[1], pVals[2], pVals[3],
  1626. pVals[4], pVals[5], pVals[6], pVals[7],
  1627. pVals[8], pVals[9], pVals[10], pVals[11] );
  1628. break;
  1629. case kDataTexture:
  1630. UnpackSetTexture(pVals[0], pVals[1], pVals[2]);
  1631. break;
  1632. case kDataResetTexture:
  1633. UnpackResetTexture(pVals[0]);
  1634. break;
  1635. case kDataUpdateVtxBufferOffset:
  1636. UnpackUpdateVtxBufferOffset((IDirect3DVertexBuffer9*)pVals[0], pVals[1]);
  1637. break;
  1638. case kDataECB:
  1639. UnpackExecuteCommandBuffer(m_aECB[m_nNumECB]);
  1640. m_aECB[m_nNumECB] = 0;
  1641. m_nNumECB++;
  1642. break;
  1643. case kDataBeginScene:
  1644. m_nDisabledSamplers = 0;
  1645. m_nSetTransformBranchBits = 0;
  1646. break;
  1647. }
  1648. pSrc = (DrawData*)((uint8*)(pSrc+1)+pSrc->m_size);
  1649. }
  1650. m_nNumECB = 0;
  1651. // Record High Water
  1652. uint32 size = m_pDataCursor - m_pData;
  1653. average *= count;
  1654. count++;
  1655. average += size;
  1656. average /= count;
  1657. #ifndef SPU
  1658. uint32 avgInt = uint32(average + 0.5f);
  1659. #endif
  1660. if (size > highWater)
  1661. {
  1662. highWater = size;
  1663. Msg("\n>>>>>>>>>>>High Water %d (0x%x) : Average %d (0x%x) : Avg plus GcmDrawState = %d (0x%x) : This plus drawstate (%d (0x%x)) \n", highWater, highWater,
  1664. avgInt, avgInt, avgInt + DRAWSTATE_SIZEOFDMA, avgInt + DRAWSTATE_SIZEOFDMA, size + DRAWSTATE_SIZEOFDMA, size + DRAWSTATE_SIZEOFDMA );
  1665. for (int i = 1; i <= kDataTransferImage; i++ )
  1666. {
  1667. Msg( ">>>%d : %d\n", i, aSizes[i]);
  1668. }
  1669. }
  1670. // display--;
  1671. // if ( (display < 1) || ((size+sizeof(CGcmDrawState)) > 0x1800))
  1672. // {
  1673. // Msg("\n>>>>>>>>>>>High Water %d (0x%x) : Average %d (0x%x) : Avg plus GcmDrawState = %d (0x%x) : This (%d (0x%x)) \n", highWater, highWater,
  1674. // avgInt, avgInt, avgInt + sizeof(CGcmDrawState), avgInt + sizeof(CGcmDrawState), size, size );
  1675. //
  1676. // display = 10000;
  1677. // }
  1678. // Reset cursor
  1679. m_pDataCursor = m_pData;
  1680. }
  1681. inline void CGcmDrawState::CommitRenderStates()
  1682. {
  1683. uint nMask = m_dirtyStatesMask;
  1684. m_dirtyStatesMask = 0;
  1685. if ( nMask & kDirtyDepthMask)
  1686. {
  1687. GCM_FUNC(cellGcmSetDepthMask, m_nSetDepthMask);
  1688. }
  1689. if ( nMask & kDirtyZEnable )
  1690. {
  1691. GCM_FUNC( cellGcmSetDepthTestEnable, m_ZEnable );
  1692. }
  1693. if ( nMask & kDirtyZFunc )
  1694. {
  1695. GCM_FUNC( cellGcmSetDepthFunc, m_ZFunc );
  1696. }
  1697. if ( nMask & kDirtyColorWriteEnable )
  1698. {
  1699. GCM_FUNC( cellGcmSetColorMask, m_ColorWriteEnable);
  1700. }
  1701. if ( nMask & kDirtyCullMode )
  1702. {
  1703. switch(m_CullMode)
  1704. {
  1705. case D3DCULL_NONE:
  1706. GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_FALSE );
  1707. break;
  1708. case D3DCULL_CW:
  1709. GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE );
  1710. GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CCW ); // opposite from D3D
  1711. break;
  1712. case D3DCULL_CCW:
  1713. GCM_FUNC( cellGcmSetCullFaceEnable, CELL_GCM_TRUE );
  1714. GCM_FUNC( cellGcmSetFrontFace, CELL_GCM_CW ); // opposite from D3D
  1715. break;
  1716. }
  1717. }
  1718. if ( nMask & kDirtyAlphablendEnable )
  1719. {
  1720. GCM_FUNC( cellGcmSetBlendEnable, m_AlphablendEnable );
  1721. }
  1722. if (nMask & kDirtyBlendOp)
  1723. {
  1724. uint32 Value = m_BlendOp;
  1725. uint16 equation = dxtogl_blendop[ Value ];
  1726. GCM_FUNC( cellGcmSetBlendEquation, equation, equation );
  1727. }
  1728. if ( nMask & kDirtySrgbWriteEnable )
  1729. {
  1730. uint32 Value = m_SrgbWriteEnable;
  1731. GCM_FUNC( cellGcmSetFragmentProgramGammaEnable, !!Value );
  1732. }
  1733. if ( nMask & kDirtyAlphaTestEnable )
  1734. {
  1735. uint32 Value = m_AlphaTestEnable;
  1736. GCM_FUNC( cellGcmSetAlphaTestEnable, !!Value );
  1737. }
  1738. if ( nMask & kDirtyStencilEnable )
  1739. {
  1740. uint32 Value = m_StencilEnable;
  1741. GCM_FUNC( cellGcmSetStencilTestEnable, !!Value );
  1742. }
  1743. if ( nMask & kDirtyStencilWriteMask )
  1744. {
  1745. uint32 Value = m_StencilWriteMask;
  1746. GCM_FUNC( cellGcmSetStencilMask, Value );
  1747. }
  1748. if ( nMask & kDirtyFillMode )
  1749. {
  1750. uint32 Value = m_FillMode;
  1751. uint32 mode = CELL_GCM_POLYGON_MODE_POINT + ( Value - D3DFILL_POINT );
  1752. GCM_FUNC( cellGcmSetFrontPolygonMode, mode );
  1753. GCM_FUNC( cellGcmSetBackPolygonMode, mode );
  1754. }
  1755. if ( nMask & CGcmDrawState::kDirtyBlendFactor )
  1756. {
  1757. GCM_FUNC( cellGcmSetBlendFunc,
  1758. m_blends[0], m_blends[1],
  1759. m_blends[0], m_blends[1] );
  1760. }
  1761. if ( nMask & CGcmDrawState::kDirtyAlphaFunc )
  1762. {
  1763. GCM_FUNC( cellGcmSetAlphaFunc, m_alphaFunc.func, m_alphaFunc.ref );
  1764. }
  1765. if ( nMask & CGcmDrawState::kDirtyStencilOp )
  1766. {
  1767. GCM_FUNC( cellGcmSetStencilOp, m_stencilOp.fail, m_stencilOp.dfail, m_stencilOp.dpass );
  1768. GCM_FUNC( cellGcmSetBackStencilOp, m_stencilOp.fail, m_stencilOp.dfail, m_stencilOp.dpass );
  1769. }
  1770. if ( nMask & CGcmDrawState::kDirtyStencilFunc )
  1771. {
  1772. GCM_FUNC( cellGcmSetStencilFunc, m_stencilFunc.func, m_stencilFunc.ref, m_stencilFunc.mask );
  1773. GCM_FUNC( cellGcmSetBackStencilFunc, m_stencilFunc.func, m_stencilFunc.ref, m_stencilFunc.mask );
  1774. }
  1775. if ( nMask & CGcmDrawState::kDirtyScissor )
  1776. {
  1777. if( m_scissor.enabled )
  1778. {
  1779. GCM_FUNC( cellGcmSetScissor, m_scissor.x, m_scissor.y, m_scissor.w, m_scissor.h );
  1780. }
  1781. else
  1782. {
  1783. GCM_FUNC( cellGcmSetScissor, 0, 0, 4095, 4095 ); // disable scissor
  1784. }
  1785. }
  1786. if ( nMask & CGcmDrawState::kDirtyDepthBias )
  1787. {
  1788. float units = *((float*)&m_depthBias.units);
  1789. GCM_FUNC( cellGcmSetPolygonOffset, *((float*)&m_depthBias.factor), /* NEED 2x here:see PSGL! */ 2.0f * units );
  1790. if ( ( m_depthBias.factor != 0.0f ) || ( m_depthBias.units != 0.0f ) )
  1791. {
  1792. GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_TRUE );
  1793. }
  1794. else
  1795. {
  1796. GCM_FUNC( cellGcmSetPolygonOffsetFillEnable, CELL_GCM_FALSE );
  1797. }
  1798. }
  1799. }
  1800. inline void CGcmDrawState::CommitVertexBindings(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex)
  1801. {
  1802. // push vertex buffer state for the current vertex decl
  1803. uint uiVertexSlotMask = m_pVertexShaderData->m_attributeInputMask;
  1804. if ( !uiVertexSlotMask) Error(">>>>Blank vertex shader attr\n");
  1805. for( int nStreamIndex = 0; nStreamIndex < D3D_MAX_STREAMS; ++ nStreamIndex, uiVertexSlotMask >>= 1 )
  1806. {
  1807. SetVertexDataArrayCache_t *pOldCache = &g_cacheSetVertexDataArray[nStreamIndex];
  1808. // Check if this attribute is unused by the shader program
  1809. // and try to find the match in the decl.
  1810. if ( int j = ( uiVertexSlotMask & 1 ) ? pDecl->m_cgAttrSlots[ nStreamIndex ] : 0 )
  1811. {
  1812. D3DVERTEXELEMENT9_GCM *elem = &pDecl->m_elements[ j - 1 ];
  1813. int streamIndex = elem->m_dxdecl.Stream;
  1814. Assert( streamIndex >= 0 && streamIndex < D3D_MAX_STREAMS );
  1815. D3DStreamDesc &dsd = g_dxGcmVertexStreamSources[ streamIndex ];
  1816. D3DVERTEXELEMENT9_GCM::GcmDecl_t const &gcmvad = elem->m_gcmdecl;
  1817. const uint8_t stride = dsd.m_stride;
  1818. const uint8_t size = gcmvad.m_datasize;
  1819. const uint8_t type = gcmvad.m_datatype;
  1820. SetVertexDataArrayCache_t newCache( dsd, gcmvad, baseVertexIndex );
  1821. if( *pOldCache != newCache )
  1822. {
  1823. // Msg(">>>>>>>>>> Offset 0x%x <<<<<<<<<<\n\n", newCache.GetLocalOffset());
  1824. GCM_FUNC( cellGcmSetVertexDataArray, nStreamIndex, 1, stride, size, type,
  1825. CELL_GCM_LOCATION_LOCAL, newCache.GetLocalOffset() ); //
  1826. // if (!newCache.GetLocalOffset()) Error (">>>>>>>>>>>>>>>>>address %x <<<<<<<<<<<<<<<<<<<<<<\n", newCache.GetLocalOffset());
  1827. *pOldCache = newCache;
  1828. }
  1829. continue;
  1830. }
  1831. if( !pOldCache->IsNull() )
  1832. {
  1833. // Disable data slot if we failed to bind proper data stream
  1834. GCM_FUNC( cellGcmSetVertexDataArray, nStreamIndex, 1, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0 );
  1835. pOldCache->SetNull(); // disable
  1836. }
  1837. }
  1838. }
  1839. inline void CGcmDrawState::CommitSampler(uint32 nSampler)
  1840. {
  1841. D3DSamplerDesc const & dxsamp = m_aSamplers[ nSampler ];
  1842. #ifdef SPU
  1843. extern CPs3gcmTextureLayout gaLayout[D3D_MAX_TEXTURES];
  1844. CPs3gcmTextureLayout const & texlayout = gaLayout[nSampler];
  1845. #else
  1846. CPs3gcmTextureLayout const & texlayout = *((CPs3gcmTextureLayout const *)m_textures[ nSampler ].m_eaLayout);
  1847. #endif
  1848. uint nMips = texlayout.m_mipCount;
  1849. Assert( nMips > 0 );
  1850. CPs3gcmTextureLayout::Format_t & texlayoutFormat = g_ps3texFormats[texlayout.m_nFormat];
  1851. // If bReadsRawDepth is true, a depth texture has been set but shadow filtering has NOT been enabled. In this case, the shader is expecting to read
  1852. // the texture as A8R8G8B8 and manually recover depth (used for depth feathering).
  1853. bool bReadsRawDepth = ( texlayoutFormat.m_gcmFormat == CELL_GCM_TEXTURE_DEPTH24_D8 ) && !dxsamp.m_shadowFilter;
  1854. // GCM_FUNC( cellGcmReserveMethodSize, 11 );
  1855. uint32_t *current = gpGcmContext->current;
  1856. current[0] = CELL_GCM_METHOD_HEADER_TEXTURE_OFFSET( nSampler, 8 );
  1857. current[1] = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET( m_textures[ nSampler ].Offset() );
  1858. uint locn;
  1859. if (current[1] & 1)
  1860. {
  1861. locn = CELL_GCM_LOCATION_LOCAL;
  1862. current[1] &= 0xFFFFFFFE;
  1863. }
  1864. else
  1865. {
  1866. locn = CELL_GCM_LOCATION_MAIN;
  1867. }
  1868. current[2] = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(
  1869. locn,
  1870. texlayout.IsCubeMap() ? CELL_GCM_TRUE : CELL_GCM_FALSE,
  1871. texlayout.IsVolumeTex() ? CELL_GCM_TEXTURE_DIMENSION_3 : CELL_GCM_TEXTURE_DIMENSION_2,
  1872. ( bReadsRawDepth
  1873. ? CELL_GCM_TEXTURE_A8R8G8B8 // bind depth textures as ARGB and reassemble depth in shader
  1874. : texlayoutFormat.m_gcmFormat
  1875. ) |
  1876. ( texlayout.IsSwizzled() ? CELL_GCM_TEXTURE_SZ : CELL_GCM_TEXTURE_LN ),
  1877. nMips
  1878. );
  1879. current[3] = CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS(
  1880. dxtogl_addressMode[ dxsamp.m_addressModeU ],
  1881. dxtogl_addressMode[ dxsamp.m_addressModeV ],
  1882. dxtogl_addressMode[ dxsamp.m_addressModeW ],
  1883. CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL,
  1884. dxsamp.m_shadowFilter ? CELL_GCM_TEXTURE_ZFUNC_GEQUAL : CELL_GCM_TEXTURE_ZFUNC_NEVER,
  1885. ( ( texlayoutFormat.m_gcmCaps & CPs3gcmTextureLayout::Format_t::kCapSRGB ) && dxsamp.m_srgb )
  1886. ? CELL_GCM_TEXTURE_GAMMA_R | CELL_GCM_TEXTURE_GAMMA_G | CELL_GCM_TEXTURE_GAMMA_B : 0,
  1887. 0
  1888. );
  1889. current[4] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL0( CELL_GCM_TRUE,
  1890. (uint16)( Max<uint>( Min<uint>( dxsamp.m_maxMipLevel, nMips - 1 ), 0u ) * 256.0f ),
  1891. (uint16)( Max<uint>( nMips - 1, 0u ) * 256.0f ),
  1892. texlayout.IsVolumeTex() || ( ( dxsamp.m_minFilter != D3DTEXF_ANISOTROPIC ) && ( dxsamp.m_magFilter != D3DTEXF_ANISOTROPIC ) )
  1893. ? CELL_GCM_TEXTURE_MAX_ANISO_1 // 3D textures cannot have anisotropic filtering!
  1894. : CELL_GCM_TEXTURE_MAX_ANISO_4 // dxtogl_anisoIndexHalf[ ( dxsamp.m_maxAniso / 2 ) & ( ARRAYSIZE( dxtogl_anisoIndexHalf ) - 1 ) ]
  1895. );
  1896. current[5] = bReadsRawDepth ?
  1897. CELL_GCM_REMAP_MODE( CELL_GCM_TEXTURE_REMAP_ORDER_XYXY, CELL_GCM_TEXTURE_REMAP_FROM_B, CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G,
  1898. CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP )
  1899. : texlayoutFormat.m_gcmRemap;
  1900. if( bReadsRawDepth )
  1901. current[6] = CELL_GCM_METHOD_DATA_TEXTURE_FILTER( 0, CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX );
  1902. else
  1903. current[6] = CELL_GCM_METHOD_DATA_TEXTURE_FILTER(
  1904. 0, // 0x1FBE, // 0x1FC0, // corresponding to PSGL 0 mip bias, formula: [( bias - .26 )*256] & 0x1FFF
  1905. dxtogl_minFilter[ dxsamp.m_minFilter ][ Min( (D3DTEXTUREFILTERTYPE)dxsamp.m_mipFilter, D3DTEXF_LINEAR ) ],
  1906. dxtogl_magFilter[ dxsamp.m_magFilter ],
  1907. CELL_GCM_TEXTURE_CONVOLUTION_QUINCUNX
  1908. );
  1909. current[7] = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(
  1910. texlayout.m_key.m_size[1],
  1911. texlayout.m_key.m_size[0]
  1912. );
  1913. current[8] = CELL_GCM_METHOD_DATA_TEXTURE_BORDER_COLOR(
  1914. 0 // Border color always 0 ... dxsamp.m_borderColor // R=>>16; G=>>8; B=>>0; A=>>24 (same thing as GCM, see JSGCM_CALC_COLOR_LE_ARGB8)
  1915. );
  1916. current[9] = CELL_GCM_METHOD_HEADER_TEXTURE_CONTROL3( nSampler, 1 );
  1917. current[10] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(
  1918. texlayout.DefaultPitch2( g_ps3texFormats ),
  1919. texlayout.m_key.m_size[2]
  1920. );
  1921. gpGcmContext->current = &current[11];
  1922. }
  1923. inline void CGcmDrawState::CommitSamplers()
  1924. {
  1925. // Unpack from Fixed data into m_aSamplers
  1926. for (uint32 lp = 0; lp < D3D_MAX_SAMPLERS; lp++)
  1927. {
  1928. uint32 SamplerIdx = m_pFixed->m_aSamplerIdx[lp];
  1929. if (SamplerIdx != 0xFF)
  1930. m_aSamplers[lp] = m_pFixed->m_aSamplers[SamplerIdx];
  1931. }
  1932. // PS3 is binding textures here
  1933. uint mask = m_dirtySamplersMask;
  1934. m_dirtySamplersMask = 0;
  1935. uint16 uiPixelShaderInputMask = m_pPixelShaderData ? m_pPixelShaderData->m_samplerInputMask : 0;
  1936. uint16 uiRunningUpBitMask = 1;
  1937. uint nDisabledSamplers = m_nDisabledSamplers;
  1938. m_nDisabledSamplers = 0;
  1939. for ( int nSampler = 0; nSampler < 16; ++ nSampler, mask >>= 1, uiPixelShaderInputMask >>= 1, uiRunningUpBitMask <<= 1 )
  1940. {
  1941. if ( ( uiPixelShaderInputMask & 1 ) == 0 ) // The texture will not be sampled by pixel shader, unset it
  1942. {
  1943. // optimization
  1944. if( !( nDisabledSamplers & uiRunningUpBitMask ) )
  1945. {
  1946. GCM_FUNC( cellGcmSetTextureControl, nSampler, CELL_GCM_FALSE, 0, 0, 0 );
  1947. }
  1948. m_dirtySamplersMask |= uiRunningUpBitMask; // Keep the sampler dirty because we might have textures previously set on it
  1949. m_nDisabledSamplers |= uiRunningUpBitMask; // don't disable repeatedly
  1950. continue;
  1951. }
  1952. if ( ( mask & 1 ) == 0 ) // If the sampler is not dirty then don't do anything
  1953. continue;
  1954. if ( m_textures[nSampler].IsNull() ) // The sampler is dirty, but no texture on it, disable the sampler
  1955. {
  1956. // optimization
  1957. if( !( nDisabledSamplers & uiRunningUpBitMask ) )
  1958. {
  1959. GCM_FUNC( cellGcmSetTextureControl, nSampler, CELL_GCM_FALSE, 0, 0, 0 );
  1960. }
  1961. m_nDisabledSamplers |= uiRunningUpBitMask; // don't disable repeatedly
  1962. continue;
  1963. }
  1964. CommitSampler(nSampler);
  1965. }
  1966. m_pFixed->m_nInstanced = 0;
  1967. }
  1968. static vector unsigned int g_swap16x32m1[5] =
  1969. {
  1970. {0x02030001, 0x14151617, 0x18191A1B, 0x1C1D1E1F},
  1971. {0x02030001, 0x06070405, 0x18191A1B, 0x1C1D1E1F},
  1972. {0x02030001, 0x06070405, 0x0A0B0809, 0x1C1D1E1F},
  1973. {0x02030001, 0x06070405, 0x0A0B0809, 0x0E0F0C0D}
  1974. };
  1975. static inline void PatchUcodeConstSwap( void * pDestination, const fltx4 f4Source, int nLengthMinus1 )
  1976. {
  1977. *( fltx4* )pDestination = vec_perm( f4Source, *( fltx4* )pDestination, ( vector unsigned char )g_swap16x32m1[nLengthMinus1] );
  1978. }
  1979. inline void CGcmDrawState::PatchUcode(fltx4 * pUCode16, uint32 * pPatchTable, uint nPatchCount )
  1980. {
  1981. for ( uint nPatchIndex = 0; nPatchIndex < nPatchCount; ++nPatchIndex )
  1982. {
  1983. uint nPatchWord = pPatchTable[ nPatchIndex ], nLengthMinus1 = nPatchWord >> 30;
  1984. uint nUcodeOffsetQword = nPatchWord & 0xFFFF;
  1985. uint nRegister = ( nPatchWord >> 16 ) & 0x3FF;
  1986. fltx4 & reg = g_aFPConst[nRegister];
  1987. PatchUcodeConstSwap( pUCode16 + nUcodeOffsetQword, reg, nLengthMinus1 );
  1988. }
  1989. }
  1990. #ifndef SPU
  1991. inline void CGcmDrawState::AllocateUcode(FpHeader_t* pFp)
  1992. {
  1993. uint32 patchIdx = g_ps3gcmGlobalState.m_nPatchIdx;
  1994. uint32 uCodeSize = pFp->m_nUcodeSize;
  1995. uint32 patchSize = AlignValue(uCodeSize + 400, 128);
  1996. uint32 nEndPos = patchIdx + patchSize;
  1997. uint32 nEndSeg = nEndPos/GCM_PATCHSEGSIZE;
  1998. uint32 writeSeg = patchIdx/GCM_PATCHSEGSIZE;
  1999. // are we out of space and so need to move to the next segment ?
  2000. if (nEndSeg != writeSeg)
  2001. {
  2002. // move to the next segment
  2003. uint32 nextSeg = (writeSeg + 1) % (GCM_PATCHBUFFSIZE/GCM_PATCHSEGSIZE);
  2004. // Wait for RSX not to be in this segment
  2005. uint32 readSeg = g_ps3gcmGlobalState.m_nPatchReadSeg;
  2006. if (nextSeg == readSeg) readSeg = *g_label_fppatch_ring_seg;
  2007. gpGcmDrawState->CmdBufferFlush();
  2008. uint32 spins = 0;
  2009. while (nextSeg == readSeg)
  2010. {
  2011. spins++;
  2012. sys_timer_usleep(60); // Not on SPU..
  2013. readSeg = *g_label_fppatch_ring_seg;
  2014. }
  2015. // if (spins > 0) Msg("Patch Spins %d\n", spins);
  2016. // Move to the next segment and record the new readSeg
  2017. patchIdx = (nextSeg * GCM_PATCHSEGSIZE);
  2018. writeSeg = nextSeg;
  2019. g_ps3gcmGlobalState.m_nPatchReadSeg = readSeg;
  2020. // Msg("New Patch Segment 0x%x\n", patchIdx);
  2021. }
  2022. uint8* pDst = g_ps3gcmGlobalState.m_pPatchBuff + patchIdx;
  2023. patchIdx += patchSize;
  2024. g_ps3gcmGlobalState.m_nPatchIdx = patchIdx;
  2025. m_eaOutputUCode = uintp(pDst);
  2026. }
  2027. #endif
  2028. inline fltx4* CGcmDrawState::CopyUcode(FpHeader_t* pFp)
  2029. {
  2030. uint8* pDst = (uint8*)m_eaOutputUCode;
  2031. uint32 patchIdx = pDst - g_ps3gcmGlobalState.m_pPatchBuff;
  2032. uint32 uCodeSize = pFp->m_nUcodeSize;
  2033. uint32 writeSeg = patchIdx/GCM_PATCHSEGSIZE;
  2034. #ifndef SPU
  2035. V_memcpy(pDst, (uint8*)(pFp+1), uCodeSize);
  2036. #endif
  2037. // Set the label to say we're using shaders in this part of the ring buffer now
  2038. GCM_FUNC(cellGcmSetWriteBackEndLabel, GCM_LABEL_FPPATCH_RING_SEG, writeSeg);
  2039. return (fltx4*) pDst;
  2040. }
  2041. inline void CGcmDrawState::BindFragmentProgram(uint32 nVertexToFragmentProgramAttributeMask)
  2042. {
  2043. FpHeader_t * fpHeader = m_pPixelShaderData->m_eaFp;
  2044. // Copy and Patch Ucode
  2045. uint32* pPatches = (uint32*)((uint8*)(fpHeader + 1) + fpHeader->m_nUcodeSize);
  2046. fltx4* pUcode = CopyUcode(fpHeader);
  2047. #ifndef SPU
  2048. PatchUcode(pUcode, pPatches, fpHeader->m_nPatchCount );
  2049. #else
  2050. fltx4* pUcodeSPU = (fltx4*) (fpHeader+1);
  2051. PatchUcode(pUcodeSPU, pPatches, fpHeader->m_nPatchCount );
  2052. gSpuMgr.DmaSync();
  2053. gSpuMgr.DmaPut(m_eaOutputUCode, (void*)pUcodeSPU, fpHeader->m_nUcodeSize, SPU_DMAPUT_TAG);
  2054. #endif
  2055. // Set Fragment Shader
  2056. uint32 nFragmentProgramOffset = uintp(pUcode);
  2057. nFragmentProgramOffset += g_ps3gcmGlobalState.m_nIoOffsetDelta;
  2058. uint32* pTexControls = pPatches + fpHeader->m_nPatchCount;
  2059. uint nTexControls = fpHeader->m_nTexControls;
  2060. // GCM_FUNC( cellGcmReserveMethodSize, 6 + (2 * nTexControls) );
  2061. CELL_GCM_METHOD_SET_SHADER_CONTROL( gpGcmContext->current, fpHeader->m_nShaderControl0 ); // +2
  2062. CELL_GCM_METHOD_SET_SHADER_PROGRAM( gpGcmContext->current, CELL_GCM_LOCATION_MAIN + 1, ( nFragmentProgramOffset & 0x1fffffff ) ); // +2
  2063. CELL_GCM_METHOD_SET_VERTEX_ATTRIB_OUTPUT_MASK( gpGcmContext->current, nVertexToFragmentProgramAttributeMask /*psh->m_attributeInputMask | 0x20*/ ); // +2 - this gets overwritten later, so it's useless here , but GPAD says "unrecognized sequence" if I don't insert this command here
  2064. V_memcpy( gpGcmContext->current, pTexControls, fpHeader->m_nTexControls * sizeof( uint32 ) * 2 );
  2065. gpGcmContext->current += 2 * nTexControls;
  2066. }
  2067. void CGcmDrawState::CommitShaders()
  2068. {
  2069. uint nMask = m_dirtyCachesMask;
  2070. m_dirtyCachesMask = 0;
  2071. if( nMask & kDirtyVxCache )
  2072. {
  2073. GCM_FUNC(cellGcmSetInvalidateVertexCache);
  2074. }
  2075. if( nMask & kDirtyTxCache )
  2076. {
  2077. GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE );
  2078. }
  2079. if ( nMask & kDirtyVxShader )
  2080. {
  2081. void* pVertexShaderCmdBuffer = (void*)(m_pVertexShaderData->m_pVertexShaderCmdBuffer );
  2082. if( pVertexShaderCmdBuffer )
  2083. {
  2084. uint32 nVertexShaderCmdBufferWords = m_pVertexShaderData->m_nVertexShaderCmdBufferWords;
  2085. // GCM_FUNC( cellGcmReserveMethodSize, nVertexShaderCmdBufferWords );
  2086. // uint32_t *current = gpGcmContext->current;
  2087. V_memcpy(gpGcmContext->current, pVertexShaderCmdBuffer, nVertexShaderCmdBufferWords * sizeof( uint32 ));
  2088. gpGcmContext->current += nVertexShaderCmdBufferWords;
  2089. }
  2090. }
  2091. if ( nMask & kDirtyVxConstants )
  2092. {
  2093. uint nBits = m_shaderVxConstants;
  2094. // Disabling this check because it causes lots of per-vertex dynamic lighting problems in common_vs_fxc.h function DoLighting().
  2095. if( m_nSetTransformBranchBits != nBits )
  2096. {
  2097. GCM_FUNC( cellGcmSetTransformBranchBits, nBits );
  2098. m_nSetTransformBranchBits = nBits;
  2099. }
  2100. }
  2101. if ( nMask & ( kDirtyVxShader | kDirtyClipPlanes ) )
  2102. {
  2103. // GCM_FUNC( cellGcmSetUserClipPlaneControl,
  2104. // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 0 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
  2105. // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 1 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
  2106. // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 2 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
  2107. // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 3 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
  2108. // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 4 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0,
  2109. // ( ( m_pGcmState->vertAttrOutputMask & ( 1 << ( 6 + 5 ) ) ) != 0 ) ? CELL_GCM_USER_CLIP_PLANE_ENABLE_GE : 0
  2110. // );
  2111. }
  2112. uint setVertexAttribOutputMask = ( nMask & ( kDirtyVxShader | kDirtyPxShader ) );
  2113. uint nVertexToFragmentProgramAttributeMask = m_pVertexShaderData->m_attributeOutputMask;
  2114. if ( m_pPixelShaderData )
  2115. {
  2116. nVertexToFragmentProgramAttributeMask = m_pPixelShaderData->m_attributeInputMask;
  2117. nVertexToFragmentProgramAttributeMask |= 0x20;
  2118. BindFragmentProgram( nVertexToFragmentProgramAttributeMask );
  2119. }
  2120. else
  2121. {
  2122. // we need to set the shader, but no shader specified, so set the default empty shader
  2123. if ( nMask & ( kDirtyPxShader | kDirtyPxConstants ) )
  2124. {
  2125. CELL_GCM_METHOD_SET_SHADER_CONTROL( gpGcmContext->current, g_ps3gcmGlobalState.m_nPsEmptyShaderControl0 ); // +2
  2126. CELL_GCM_METHOD_SET_SHADER_PROGRAM( gpGcmContext->current, CELL_GCM_LOCATION_LOCAL + 1,
  2127. ( g_ps3gcmGlobalState.m_pShaderPsEmptyBuffer.Offset() & 0x1fffffff ) ); // +2
  2128. CELL_GCM_METHOD_SET_VERTEX_ATTRIB_OUTPUT_MASK( gpGcmContext->current, g_ps3gcmGlobalState.m_nPsEmptyAttributeInputMask | 0x20 );
  2129. }
  2130. }
  2131. if ( setVertexAttribOutputMask )
  2132. {
  2133. GCM_FUNC( cellGcmSetVertexAttribOutputMask, nVertexToFragmentProgramAttributeMask );
  2134. }
  2135. }
  2136. inline void ZeroFPConsts()
  2137. {
  2138. memset(g_aFPConst, 0, sizeof(g_aFPConst));
  2139. }
  2140. inline void ZeroVPConsts()
  2141. {
  2142. GCM_FUNC( cellGcmSetVertexProgramParameterBlock, 0, GCM_DS_MAXVPCONST, (float*)g_aVPConst);
  2143. }
  2144. #ifndef SPU
  2145. inline void CGcmDrawState::EndFrame()
  2146. {
  2147. m_cmd = CmdEndFrame;
  2148. SendToSpu();
  2149. }
  2150. #endif
  2151. #ifndef SPU
  2152. inline void CGcmDrawState::CommitStates()
  2153. {
  2154. m_cmd = CmdCommitStates;
  2155. SendToSpu();
  2156. }
  2157. #else
  2158. inline void CGcmDrawState::CommitStates()
  2159. {
  2160. if (m_nFreeLabel) UnpackSetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, m_nFreeLabel);
  2161. if ( m_dirtyStatesMask & kDirtyResetRsx) UnpackResetRsxState();
  2162. if (m_dirtyStatesMask & kDirtyZeroAllPSConsts) ZeroFPConsts();
  2163. if (m_dirtyStatesMask & kDirtyZeroAllVSConsts) ZeroVPConsts();
  2164. UnpackData(); // Pulls out pixel shader consts and sets vertex shader consts
  2165. CommitRenderStates();
  2166. }
  2167. #endif
  2168. inline void CGcmDrawState::CommitAll(IDirect3DVertexDeclaration9 * pDecl, uint32 baseVertexIndex)
  2169. {
  2170. if (m_nFreeLabel) UnpackSetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, m_nFreeLabel);
  2171. if ( m_dirtyStatesMask & kDirtyResetRsx) UnpackResetRsxState();
  2172. if (m_dirtyStatesMask & kDirtyZeroAllPSConsts) ZeroFPConsts();
  2173. if (m_dirtyStatesMask & kDirtyZeroAllVSConsts) ZeroVPConsts();
  2174. UnpackData(); // Pulls out pixel shader consts and sets vertex shader consts
  2175. #ifdef SPU
  2176. extern void GetTextureLayouts();
  2177. GetTextureLayouts();
  2178. #endif
  2179. CommitRenderStates();
  2180. CommitVertexBindings(pDecl, baseVertexIndex);
  2181. CommitSamplers();
  2182. CommitShaders();
  2183. }
  2184. //--------------------------------------------------------------------------------------------------
  2185. // Draw Prim
  2186. //--------------------------------------------------------------------------------------------------
  2187. #ifndef SPU
  2188. inline void CGcmDrawState::DrawPrimitiveUP( IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE nPrimitiveType,UINT nPrimitiveCount,
  2189. CONST void *pVertexStreamZeroData, UINT nVertexStreamZeroStride )
  2190. {
  2191. // Put drawcall into call buffer
  2192. uint32 callAddr = g_ps3gcmGlobalState.DrawPrimitiveUP(nPrimitiveType, nPrimitiveCount, pVertexStreamZeroData, nVertexStreamZeroStride);
  2193. // Allocate space to patch frag prog
  2194. if ( m_pPixelShaderData)
  2195. {
  2196. AllocateUcode((FpHeader_t*)m_pPixelShaderData->m_eaFp);
  2197. }
  2198. // if (m_param[0] > uint32(0xD0000000) )
  2199. // Error("Decl on Stack\n");
  2200. m_cmd = CmdDrawPrimUP;
  2201. m_param[0] = uintp(pDecl);
  2202. m_param[1] = callAddr + g_ps3gcmGlobalState.m_nIoOffsetDelta;
  2203. m_param[2] = nVertexStreamZeroStride;
  2204. m_param[4] = (uint32)&g_ps3texFormats;
  2205. SendToSpu();
  2206. }
  2207. inline void CGcmDrawState::DrawIndexedPrimitive( uint32 offset, IDirect3DVertexDeclaration9 * pDecl, D3DPRIMITIVETYPE Type,INT BaseVertexIndex,UINT MinVertexIndex,
  2208. UINT NumVertices,UINT startIndex,UINT nDrawPrimCount )
  2209. {
  2210. uint8 uiGcmMode = GetGcmMode(Type);
  2211. if( !uiGcmMode ) Error("PS3 : Unsupported prim type\n");
  2212. uint32 nPartitionStartIndex = startIndex;
  2213. uint nPartitionPrimCount = nDrawPrimCount;
  2214. uint32 uiGcmCount = GetGcmCount( Type, nPartitionPrimCount );
  2215. uint32 ioMemoryIndexBuffer = offset + nPartitionStartIndex * sizeof( uint16 ) ;
  2216. if (uiGcmCount)
  2217. {
  2218. if ( m_pPixelShaderData)
  2219. {
  2220. AllocateUcode((FpHeader_t*)m_pPixelShaderData->m_eaFp);
  2221. }
  2222. m_param[0] = uintp(pDecl);
  2223. m_param[1] = BaseVertexIndex;
  2224. m_param[2] = uiGcmMode;
  2225. m_param[3] = ioMemoryIndexBuffer;
  2226. m_param[4] = (uint32)&g_ps3texFormats;
  2227. m_param[5] = uiGcmCount;
  2228. m_cmd = CmdDrawPrim;
  2229. SendToSpu();
  2230. }
  2231. }
  2232. #endif
  2233. //--------------------------------------------------------------------------------------------------
  2234. // Execute command shader buffers
  2235. //--------------------------------------------------------------------------------------------------
  2236. template<class T> FORCEINLINE T GetData( uint8 *pData )
  2237. {
  2238. return * ( reinterpret_cast< T const *>( pData ) );
  2239. }
  2240. inline void CGcmDrawState::BindTexture2( CPs3BindTexture_t bindTex)
  2241. {
  2242. // On SPU, we need to pull in the lmblock to get the correct offset
  2243. #ifdef SPU
  2244. extern CPs3gcmLocalMemoryBlock gLmBlock;
  2245. gSpuMgr.DmaGetUNSAFE(&gLmBlock, uintp(bindTex.m_pLmBlock), sizeof(gLmBlock), SPU_DMAGET_TAG );
  2246. #endif
  2247. // Check for same texture ?
  2248. // Check for NULL texture ?
  2249. uint32 stage = bindTex.m_sampler;
  2250. if(bindTex.m_nLayout)
  2251. {
  2252. // Msg("New Bind Flags %d\n", bindTex.m_nBindFlags);
  2253. // if(gBind != bindTex.m_nBindFlags) DebuggerBreak();
  2254. SetSamplerState( stage, D3DSAMP_SRGBTEXTURE, ( bindTex.m_nBindFlags & (TEXTURE_BINDFLAGS_SRGBREAD>>24) ) != 0 );
  2255. SetSamplerState( stage, D3DSAMP_SHADOWFILTER, ( bindTex.m_nBindFlags & (TEXTURE_BINDFLAGS_SHADOWDEPTH>>24) ) ? 1 : 0 );
  2256. SetSamplerState( stage, D3DSAMP_ADDRESSU, bindTex.m_UWrap );
  2257. SetSamplerState( stage, D3DSAMP_ADDRESSV, bindTex.m_VWrap );
  2258. SetSamplerState( stage, D3DSAMP_ADDRESSW, bindTex.m_WWrap );
  2259. SetSamplerState( stage, D3DSAMP_MINFILTER, bindTex.m_minFilter );
  2260. SetSamplerState( stage, D3DSAMP_MAGFILTER, bindTex.m_magFilter );
  2261. SetSamplerState( stage, D3DSAMP_MIPFILTER, bindTex.m_mipFilter );
  2262. // if (m_textures[stage].m_nLocalOffset != bindTex.m_pLmBlock->Offset()) DebuggerBreak();
  2263. // if (m_textures[stage].m_eaLayout != bindTex.m_nLayout) DebuggerBreak();
  2264. #ifdef SPU
  2265. gSpuMgr.DmaDone(SPU_DMAGET_TAG_WAIT);
  2266. bindTex.m_pLmBlock = &gLmBlock;
  2267. #endif
  2268. m_textures[stage].m_nLocalOffset = bindTex.m_pLmBlock->Offset();
  2269. m_textures[stage].m_eaLayout = bindTex.m_nLayout;
  2270. if (bindTex.m_pLmBlock->IsLocalMemory() )
  2271. {
  2272. m_textures[stage].m_nLocalOffset |= 1;
  2273. }
  2274. m_dirtySamplersMask |= ( 1 << stage );
  2275. //PackData(kDataTexture, stage, m_textures[stage].m_nLocalOffset, m_textures[stage].m_eaLayout );
  2276. UnpackSetTexture(stage, m_textures[stage].m_nLocalOffset, m_textures[stage].m_eaLayout );
  2277. }
  2278. else
  2279. {
  2280. #ifdef SPU
  2281. gSpuMgr.DmaDone(SPU_DMAGET_TAG_WAIT);
  2282. #endif
  2283. UnpackResetTexture(stage);
  2284. }
  2285. }
  2286. inline void CGcmDrawState::SetVertexShaderConstantInternal( int var, float const* pVec, int numVecs, bool bForce)
  2287. {
  2288. GCM_FUNC( cellGcmSetVertexProgramParameterBlock, var, numVecs, pVec );
  2289. }
  2290. inline void CGcmDrawState::SetPixelShaderConstantInternal( int var, float const* pValues, int nNumConsts, bool bForce)
  2291. {
  2292. V_memcpy(&g_aFPConst[var], pValues, nNumConsts * 16);
  2293. }
  2294. #ifndef SPU
  2295. #include "shaderapifast.h"
  2296. #endif
  2297. void CGcmDrawState::ExecuteCommandBuffer( uint8 *pCmdBuf )
  2298. {
  2299. #ifndef SPU
  2300. int* pOffset = (int*) (pCmdBuf + sizeof(int) + (2*sizeof(int)));
  2301. for ( int i = 0; i < CBCMD_MAX_PS3TEX; i++)
  2302. {
  2303. uint32 offset = pOffset[i];
  2304. if (!offset) break;
  2305. CPs3BindParams_t* pBindParams = (CPs3BindParams_t*)(offset + pCmdBuf);
  2306. CPs3BindTexture_t tex;
  2307. CPs3BindTexture_t* pTex = &tex;
  2308. pTex->m_sampler = pBindParams->m_sampler;
  2309. pTex->m_nBindFlags = pBindParams->m_nBindFlags;
  2310. pTex->m_boundStd = pBindParams->m_boundStd;
  2311. pTex->m_hTexture = pBindParams->m_hTexture;
  2312. if (pTex->m_boundStd == -1)
  2313. {
  2314. ShaderApiFast( pShaderAPI )->GetPs3Texture(pTex, (ShaderAPITextureHandle_t)pTex->m_hTexture);
  2315. }
  2316. else
  2317. {
  2318. ShaderApiFast( pShaderAPI )->GetPs3Texture(pTex, (StandardTextureId_t)pTex->m_boundStd);
  2319. }
  2320. PackData(kDataEcbTexture, (uint8) i, sizeof(CPs3BindTexture_t), pTex);
  2321. }
  2322. #endif
  2323. m_aECB[m_nNumECB] = pCmdBuf;
  2324. uint32 size = *((uint32*)(pCmdBuf+4));
  2325. m_aSizeECB[m_nNumECB] = size;
  2326. m_nNumECB++;
  2327. PackData(kDataECB);
  2328. }
  2329. void CGcmDrawState::UnpackExecuteCommandBuffer( uint8 *pCmdBuf )
  2330. {
  2331. uint8* pStart = pCmdBuf;
  2332. uint8 *pReturnStack[20];
  2333. uint8 **pSP = &pReturnStack[ARRAYSIZE(pReturnStack)];
  2334. uint8 *pLastCmd;
  2335. for(;;)
  2336. {
  2337. uint8 *pCmd=pCmdBuf;
  2338. int nCmd = GetData<int>( pCmdBuf );
  2339. if (nCmd > CBCMD_SET_VERTEX_SHADER_NEARZFARZ_STATE) DebuggerBreak();
  2340. switch( nCmd )
  2341. {
  2342. case CBCMD_END:
  2343. {
  2344. if ( pSP == &pReturnStack[ARRAYSIZE(pReturnStack)] )
  2345. return;
  2346. else
  2347. {
  2348. // pop pc
  2349. pCmdBuf = *( pSP ++ );
  2350. break;
  2351. }
  2352. }
  2353. case CBCMD_JUMP:
  2354. pCmdBuf = GetData<uint8 *>( pCmdBuf + sizeof( int ) );
  2355. break;
  2356. case CBCMD_JSR:
  2357. {
  2358. Assert( pSP > &(pReturnStack[0] ) );
  2359. // *(--pSP ) = pCmdBuf + sizeof( int ) + sizeof( uint8 *);
  2360. // pCmdBuf = GetData<uint8 *>( pCmdBuf + sizeof( int ) );
  2361. UnpackExecuteCommandBuffer( GetData<uint8 *>( pCmdBuf + sizeof( int ) ) );
  2362. pCmdBuf = pCmdBuf + sizeof( int ) + sizeof( uint8 *);
  2363. break;
  2364. }
  2365. case CBCMD_SET_PIXEL_SHADER_FLOAT_CONST:
  2366. {
  2367. int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
  2368. int nNumConsts = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
  2369. float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) );
  2370. pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int );
  2371. SetPixelShaderConstantInternal( nStartConst, pValues, nNumConsts, false );
  2372. break;
  2373. }
  2374. case CBCMD_SETPIXELSHADERFOGPARAMS:
  2375. {
  2376. Error("Pixel Shader Fog params not supported\n");
  2377. break;
  2378. }
  2379. case CBCMD_STORE_EYE_POS_IN_PSCONST:
  2380. {
  2381. int nReg = GetData<int>( pCmdBuf + sizeof( int ) );
  2382. float flWValue = GetData<float>( pCmdBuf + 2 * sizeof( int ) );
  2383. pCmdBuf += 2 * sizeof( int ) + sizeof( float );
  2384. float vecValue[4];
  2385. memcpy(vecValue, m_vecWorldSpaceCameraPosition, sizeof(vecValue));
  2386. vecValue[3] = flWValue;
  2387. SetPixelShaderConstantInternal( nReg, vecValue, 1, false );
  2388. break;
  2389. }
  2390. case CBCMD_SET_DEPTH_FEATHERING_CONST:
  2391. {
  2392. // int nConst = GetData<int>( pCmdBuf + sizeof( int ) );
  2393. // float fDepthBlendScale = GetData<float>( pCmdBuf + 2 * sizeof( int ) );
  2394. pCmdBuf += 2 * sizeof( int ) + sizeof( float );
  2395. // SetDepthFeatheringPixelShaderConstant( nConst, fDepthBlendScale );
  2396. break;
  2397. }
  2398. case CBCMD_SET_VERTEX_SHADER_FLOAT_CONST:
  2399. {
  2400. int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
  2401. int nNumConsts = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
  2402. float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) );
  2403. pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int );
  2404. SetVertexShaderConstantInternal( nStartConst, pValues, nNumConsts, false );
  2405. break;
  2406. }
  2407. case CBCMD_BIND_PS3_TEXTURE:
  2408. {
  2409. CPs3BindParams_t params = GetData<CPs3BindParams_t> (pCmdBuf + sizeof( int ));
  2410. CPs3BindTexture_t tex = m_aBindTexture[params.m_nBindTexIndex];
  2411. gpGcmDrawState->BindTexture2( tex );
  2412. pCmdBuf += sizeof(int) + sizeof(params);
  2413. break;
  2414. }
  2415. case CBCMD_BIND_PS3_STANDARD_TEXTURE:
  2416. {
  2417. CPs3BindParams_t params = GetData<CPs3BindParams_t> (pCmdBuf + sizeof( int ));
  2418. CPs3BindTexture_t tex = m_aBindTexture[params.m_nBindTexIndex];
  2419. if (m_pFixed->m_nInstanced)
  2420. {
  2421. uint32 nBindFlags = tex.m_nBindFlags;
  2422. uint32 nSampler = tex.m_sampler;
  2423. switch (tex.m_boundStd)
  2424. {
  2425. case TEXTURE_LOCAL_ENV_CUBEMAP:
  2426. if (m_pFixed->m_nInstanced & GCM_DS_INST_ENVMAP) tex = m_pFixed->m_instanceEnvCubemap;
  2427. break;
  2428. case TEXTURE_LIGHTMAP:
  2429. if (m_pFixed->m_nInstanced & GCM_DS_INST_LIGHTMAP) tex = m_pFixed->m_instanceLightmap;
  2430. break;
  2431. case TEXTURE_PAINT:
  2432. if (m_pFixed->m_nInstanced & GCM_DS_INST_PAINTMAP) tex = m_pFixed->m_instancePaintmap;
  2433. break;
  2434. }
  2435. tex.m_nBindFlags = nBindFlags;
  2436. tex.m_sampler = nSampler;
  2437. }
  2438. // Bind texture
  2439. gpGcmDrawState->BindTexture2( tex );
  2440. // Twice more for bumped...
  2441. if ( (tex.m_boundStd == TEXTURE_LIGHTMAP_BUMPED) || (tex.m_boundStd == TEXTURE_LIGHTMAP_BUMPED))
  2442. {
  2443. tex.m_sampler++;
  2444. gpGcmDrawState->BindTexture2( tex );
  2445. tex.m_sampler++;
  2446. gpGcmDrawState->BindTexture2( tex );
  2447. }
  2448. pCmdBuf += sizeof(int) + sizeof(params);
  2449. break;
  2450. }
  2451. case CBCMD_PS3TEX:
  2452. {
  2453. pCmdBuf += sizeof(int) + (CBCMD_MAX_PS3TEX*sizeof(int));
  2454. break;
  2455. }
  2456. case CBCMD_LENGTH:
  2457. {
  2458. pCmdBuf += sizeof(int) *2 ;
  2459. break;
  2460. }
  2461. case CBCMD_SET_PSHINDEX:
  2462. {
  2463. // int nIdx = GetData<int>( pCmdBuf + sizeof( int ) );
  2464. // ShaderManager()->SetPixelShaderIndex( nIdx );
  2465. // pCmdBuf += 2 * sizeof( int );
  2466. Error("PSHINDEX Not Supported\n");
  2467. break;
  2468. }
  2469. case CBCMD_SET_VSHINDEX:
  2470. {
  2471. // int nIdx = GetData<int>( pCmdBuf + sizeof( int ) );
  2472. // ShaderManager()->SetVertexShaderIndex( nIdx );
  2473. pCmdBuf += 2 * sizeof( int );
  2474. Error("VSHINDEX Not Supported\n");
  2475. break;
  2476. }
  2477. case CBCMD_SET_VERTEX_SHADER_FLASHLIGHT_STATE:
  2478. {
  2479. // int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
  2480. // SetVertexShaderConstantInternal( nStartConst, m_FlashlightWorldToTexture.Base(), 4, false );
  2481. // pCmdBuf += 2 * sizeof( int );
  2482. // Error("Flashlight unsupported\n");
  2483. pCmdBuf += 2 * sizeof( int );
  2484. break;
  2485. }
  2486. case CBCMD_SET_VERTEX_SHADER_NEARZFARZ_STATE:
  2487. {
  2488. Error("SetVertexShaderNearAndFarZ NOt SUPPORTED\n");
  2489. // int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
  2490. //
  2491. // VMatrix m;
  2492. //
  2493. // m = m_MaterialProjectionMatrix;
  2494. //
  2495. // // GetMatrix( MATERIAL_PROJECTION, m.m[0] );
  2496. //
  2497. // // m[2][2] = F/(N-F) (flip sign if RH)
  2498. // // m[3][2] = NF/(N-F)
  2499. //
  2500. // float vNearFar[4];
  2501. //
  2502. // float N = m[3][2] / m[2][2];
  2503. // float F = (m[3][2]*N) / (N + m[3][2]);
  2504. //
  2505. // vNearFar[0] = N;
  2506. // vNearFar[1] = F;
  2507. //
  2508. // SetVertexShaderConstantInternal( nStartConst, vNearFar, 1, false );
  2509. pCmdBuf += 2 * sizeof( int );
  2510. break;
  2511. }
  2512. case CBCMD_SET_PIXEL_SHADER_FLASHLIGHT_STATE:
  2513. {
  2514. // int nLightSampler = GetData<int>( pCmdBuf + sizeof( int ) );
  2515. // int nDepthSampler = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
  2516. // int nShadowNoiseSampler = GetData<int>( pCmdBuf + 3 * sizeof( int ) );
  2517. // int nColorConst = GetData<int>( pCmdBuf + 4 * sizeof( int ) );
  2518. // int nAttenConst = GetData<int>( pCmdBuf + 5 * sizeof( int ) );
  2519. // int nOriginConst = GetData<int>( pCmdBuf + 6 * sizeof( int ) );
  2520. // int nDepthTweakConst = GetData<int>( pCmdBuf + 7 * sizeof( int ) );
  2521. // int nScreenScaleConst = GetData<int>( pCmdBuf + 8 * sizeof( int ) );
  2522. // int nWorldToTextureConstant = GetData<int>( pCmdBuf + 9 * sizeof( int ) );
  2523. // bool bFlashlightNoLambert = GetData<int>( pCmdBuf + 10 * sizeof( int ) ) != 0;
  2524. // bool bSinglePassFlashlight = GetData<int>( pCmdBuf + 11 * sizeof( int ) ) != 0;
  2525. // pCmdBuf += 12 * sizeof( int );
  2526. //
  2527. // ShaderAPITextureHandle_t hTexture = g_pShaderUtil->GetShaderAPITextureBindHandle( m_FlashlightState.m_pSpotlightTexture, m_FlashlightState.m_nSpotlightTextureFrame, 0 );
  2528. // BindTexture( (Sampler_t)nLightSampler, TEXTURE_BINDFLAGS_SRGBREAD, hTexture ); // !!!BUG!!!srgb or not?
  2529. //
  2530. // SetPixelShaderConstantInternal( nAttenConst, m_pFlashlightAtten, 1, false );
  2531. // SetPixelShaderConstantInternal( nOriginConst, m_pFlashlightPos, 1, false );
  2532. //
  2533. // m_pFlashlightColor[3] = bFlashlightNoLambert ? 2.0f : 0.0f; // This will be added to N.L before saturate to force a 1.0 N.L term
  2534. //
  2535. // // DX10 hardware and single pass flashlight require a hack scalar since the flashlight is added in linear space
  2536. // float flashlightColor[4] = { m_pFlashlightColor[0], m_pFlashlightColor[1], m_pFlashlightColor[2], m_pFlashlightColor[3] };
  2537. // if ( ( g_pHardwareConfig->UsesSRGBCorrectBlending() ) || ( bSinglePassFlashlight ) )
  2538. // {
  2539. // // Magic number that works well on the 360 and NVIDIA 8800
  2540. // flashlightColor[0] *= 2.5f;
  2541. // flashlightColor[1] *= 2.5f;
  2542. // flashlightColor[2] *= 2.5f;
  2543. // }
  2544. //
  2545. // SetPixelShaderConstantInternal( nColorConst, flashlightColor, 1, false );
  2546. //
  2547. // if ( nWorldToTextureConstant >= 0 )
  2548. // {
  2549. // SetPixelShaderConstantInternal( nWorldToTextureConstant, m_FlashlightWorldToTexture.Base(), 4, false );
  2550. // }
  2551. //
  2552. // BindStandardTexture( (Sampler_t)nShadowNoiseSampler, TEXTURE_BINDFLAGS_NONE, TEXTURE_SHADOW_NOISE_2D );
  2553. // if( m_pFlashlightDepthTexture && m_FlashlightState.m_bEnableShadows && ShaderUtil()->GetConfig().ShadowDepthTexture() )
  2554. // {
  2555. // ShaderAPITextureHandle_t hDepthTexture = g_pShaderUtil->GetShaderAPITextureBindHandle( m_pFlashlightDepthTexture, 0, 0 );
  2556. // BindTexture( (Sampler_t)nDepthSampler, TEXTURE_BINDFLAGS_SHADOWDEPTH, hDepthTexture );
  2557. //
  2558. // SetPixelShaderConstantInternal( nDepthTweakConst, m_pFlashlightTweaks, 1, false );
  2559. //
  2560. // // Dimensions of screen, used for screen-space noise map sampling
  2561. // float vScreenScale[4] = {1280.0f / 32.0f, 720.0f / 32.0f, 0, 0};
  2562. // int nWidth, nHeight;
  2563. // BaseClass::GetBackBufferDimensions( nWidth, nHeight );
  2564. //
  2565. // int nTexWidth, nTexHeight;
  2566. // GetStandardTextureDimensions( &nTexWidth, &nTexHeight, TEXTURE_SHADOW_NOISE_2D );
  2567. //
  2568. // vScreenScale[0] = (float) nWidth / nTexWidth;
  2569. // vScreenScale[1] = (float) nHeight / nTexHeight;
  2570. // vScreenScale[2] = 1.0f / m_FlashlightState.m_flShadowMapResolution;
  2571. // vScreenScale[3] = 2.0f / m_FlashlightState.m_flShadowMapResolution;
  2572. // SetPixelShaderConstantInternal( nScreenScaleConst, vScreenScale, 1, false );
  2573. // }
  2574. // else
  2575. // {
  2576. // BindStandardTexture( (Sampler_t)nDepthSampler, TEXTURE_BINDFLAGS_NONE, TEXTURE_WHITE );
  2577. // }
  2578. // Error("Flashlight unsupported\n");
  2579. pCmdBuf += 12 * sizeof( int );
  2580. break;
  2581. }
  2582. case CBCMD_SET_PIXEL_SHADER_UBERLIGHT_STATE:
  2583. {
  2584. // int iEdge0Const = GetData<int>( pCmdBuf + sizeof( int ) );
  2585. // int iEdge1Const = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
  2586. // int iEdgeOOWConst = GetData<int>( pCmdBuf + 3 * sizeof( int ) );
  2587. // int iShearRoundConst = GetData<int>( pCmdBuf + 4 * sizeof( int ) );
  2588. // int iAABBConst = GetData<int>( pCmdBuf + 5 * sizeof( int ) );
  2589. // int iWorldToLightConst = GetData<int>( pCmdBuf + 6 * sizeof( int ) );
  2590. pCmdBuf += 7 * sizeof( int );
  2591. //
  2592. // SetPixelShaderConstantInternal( iEdge0Const, m_UberlightRenderState.m_vSmoothEdge0.Base(), 1, false );
  2593. // SetPixelShaderConstantInternal( iEdge1Const, m_UberlightRenderState.m_vSmoothEdge1.Base(), 1, false );
  2594. // SetPixelShaderConstantInternal( iEdgeOOWConst, m_UberlightRenderState.m_vSmoothOneOverW.Base(), 1, false );
  2595. // SetPixelShaderConstantInternal( iShearRoundConst, m_UberlightRenderState.m_vShearRound.Base(), 1, false );
  2596. // SetPixelShaderConstantInternal( iAABBConst, m_UberlightRenderState.m_vaAbB.Base(), 1, false );
  2597. // SetPixelShaderConstantInternal( iWorldToLightConst, m_UberlightRenderState.m_WorldToLight.Base(), 4, false );
  2598. Error("Uberlight state unsupported\n");
  2599. break;
  2600. }
  2601. #ifndef NDEBUG
  2602. default:
  2603. Assert(0);
  2604. break;
  2605. #endif
  2606. }
  2607. pLastCmd = pCmd;
  2608. }
  2609. }
  2610. inline void CGcmDrawState::TextureReplace(uint32 id, CPs3BindTexture_t tex)
  2611. {
  2612. switch (id)
  2613. {
  2614. case TEXTURE_LOCAL_ENV_CUBEMAP:
  2615. m_pFixed->m_nInstanced |= GCM_DS_INST_ENVMAP;
  2616. m_pFixed->m_instanceEnvCubemap = tex;
  2617. break;
  2618. case TEXTURE_LIGHTMAP:
  2619. m_pFixed->m_nInstanced |= GCM_DS_INST_LIGHTMAP;
  2620. m_pFixed->m_instanceLightmap = tex;
  2621. break;
  2622. case TEXTURE_PAINT:
  2623. m_pFixed->m_nInstanced |= GCM_DS_INST_ENVMAP;
  2624. m_pFixed->m_instancePaintmap = tex;
  2625. break;
  2626. }
  2627. }
  2628. #endif // INCLUDED_GCMDRAWSTATE_H