Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

317 lines
16 KiB

  1. //========== Copyright (c) Valve Corporation, All rights reserved. ==========//
  2. // PC cascaded shadow mapping
  3. // This defines must be kept in sync with the CSM_DEFAULT_DEPTH_TEXTURE_RESOLUTION, etc. macros in c_env_cascade_light.cpp - otherwise you'll get subtle filtering artifacts.
  4. #define CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW ( 640*2 )
  5. #define CSM_DEPTH_TEXTURE_RESOLUTION_LOW ( 768*2 )
  6. #define CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH ( 1024*2 )
  7. // Bilinear Percentage Closer Filtering with ATI Fetch4
  8. #if 1
  9. // This works on real ATI X1000-series hardware that uses a DX9-style FETCH4 swizzle.
  10. float CSMSampleShadowBuffer1TapATIBilinear( float2 vPositionLs, float flComparisonDepth )
  11. {
  12. float flSunShadowingShadowTextureWidth = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  13. float flSunShadowingShadowTextureHeight = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  14. float flSunShadowingInvShadowTextureWidth = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  15. float flSunShadowingInvShadowTextureHeight = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  16. float2 vFracPositionLs = frac( vPositionLs * float2( flSunShadowingShadowTextureWidth, flSunShadowingShadowTextureHeight ) );
  17. //float2 vSamplePositionLs = vPositionLs - vFracPositionLs * float2( flSunShadowingInvShadowTextureWidth, flSunShadowingInvShadowTextureHeight );
  18. //vSamplePositionLs += .00125f/CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  19. float2 vSamplePositionLs = vPositionLs;
  20. float4 vCmpSamples = tex2D( CSMDepthAtlasSampler, vSamplePositionLs.xy ).argb;
  21. vCmpSamples = vCmpSamples > flComparisonDepth;
  22. float4 vFactors = float4( ( 1.0f - vFracPositionLs.x ) * ( 1.0f - vFracPositionLs.y ), vFracPositionLs.x * ( 1.0f - vFracPositionLs.y ),
  23. ( 1.0f - vFracPositionLs.x ) * vFracPositionLs.y, vFracPositionLs.x * vFracPositionLs.y );
  24. return dot( vCmpSamples, vFactors );
  25. }
  26. #else
  27. // This works properly on recent ATI hardware that uses DX 10.1+ style GATHER4 swizzles. Argh.
  28. float CSMSampleShadowBuffer1TapATIBilinear( float2 vPositionLs, float flComparisonDepth )
  29. {
  30. float flSunShadowingShadowTextureWidth = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  31. float flSunShadowingShadowTextureHeight = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  32. float flSunShadowingInvShadowTextureWidth = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  33. float flSunShadowingInvShadowTextureHeight = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  34. float2 vFracPositionLs = frac( vPositionLs * float2( flSunShadowingShadowTextureWidth, flSunShadowingShadowTextureHeight ) );
  35. float2 vSamplePositionLs = vPositionLs - vFracPositionLs * float2( flSunShadowingInvShadowTextureWidth, flSunShadowingInvShadowTextureHeight );
  36. vSamplePositionLs += .00125f/CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
  37. float4 vCmpSamples = tex2D( CSMDepthAtlasSampler, vSamplePositionLs.xy ).abrg;
  38. vCmpSamples = vCmpSamples > flComparisonDepth;
  39. float4 vFactors = float4( ( 1.0f - vFracPositionLs.x ) * ( 1.0f - vFracPositionLs.y ), vFracPositionLs.x * ( 1.0f - vFracPositionLs.y ),
  40. ( 1.0f - vFracPositionLs.x ) * vFracPositionLs.y, vFracPositionLs.x * vFracPositionLs.y );
  41. return dot( vCmpSamples, vFactors );
  42. }
  43. #endif
  44. float CSMSampleShadowBuffer1Tap( float2 vPositionLs, float flComparisonDepth )
  45. {
  46. // Non-gameconsole
  47. return tex2Dlod( CSMDepthAtlasSampler, float4( vPositionLs.x, vPositionLs.y, flComparisonDepth, 0.0f ) ).x;
  48. }
  49. float CSMSampleShadowBuffer9Taps( float2 shadowMapCenter, float objDepth )
  50. {
  51. float fTexelEpsilon = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH;
  52. float4 vSampleBase = float4( shadowMapCenter, objDepth, 0.0f );
  53. float4 vOneTaps;
  54. vOneTaps.x = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, fTexelEpsilon, 0, 0 ) ).x;
  55. vOneTaps.y = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, fTexelEpsilon, 0, 0 ) ).x;
  56. vOneTaps.z = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, -fTexelEpsilon, 0, 0 ) ).x;
  57. vOneTaps.w = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, -fTexelEpsilon, 0, 0 ) ).x;
  58. float flOneTaps = dot( vOneTaps, float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f));
  59. float4 vTwoTaps;
  60. vTwoTaps.x = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, 0, 0, 0 ) ).x;
  61. vTwoTaps.y = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, 0, 0, 0 ) ).x;
  62. vTwoTaps.z = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( 0, -fTexelEpsilon, 0, 0 ) ).x;
  63. vTwoTaps.w = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( 0, fTexelEpsilon, 0, 0 ) ).x;
  64. float flTwoTaps = dot( vTwoTaps, float4(2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f));
  65. float flCenterTap = tex2Dlod( CSMDepthAtlasSampler, vSampleBase ).x * float(4.0f / 16.0f);
  66. // Sum all 9 Taps
  67. return flOneTaps + flTwoTaps + flCenterTap;
  68. }
  69. // 25 taps is crazy expensive, just here for comparison purposes.
  70. float CSMSampleShadowBuffer25Taps( float2 shadowMapCenter, float objDepth )
  71. {
  72. float flTexelEpsilon = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH;
  73. float flTwoTexelEpsilon = 2.0f * flTexelEpsilon;
  74. float4 c0 = float4( 1.0f / 331.0f, 7.0f / 331.0f, 4.0f / 331.0f, 20.0f / 331.0f );
  75. float4 c1 = float4( 33.0f / 331.0f, 55.0f / 331.0f, -flTexelEpsilon, 0.0f );
  76. float4 c2 = float4( flTwoTexelEpsilon, -flTwoTexelEpsilon, 0.0f, flTexelEpsilon );
  77. float4 c3 = float4( flTexelEpsilon, -flTexelEpsilon, flTwoTexelEpsilon, -flTwoTexelEpsilon );
  78. float4 vOneTaps;
  79. vOneTaps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xx, objDepth, 0 ) ).x; // 2 2
  80. vOneTaps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yx, objDepth, 0 ) ).x; // -2 2
  81. vOneTaps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xy, objDepth, 0 ) ).x; // 2 -2
  82. vOneTaps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yy, objDepth, 0 ) ).x; // -2 -2
  83. float flSum = dot( vOneTaps, c0.xxxx );
  84. float4 vSevenTaps;
  85. vSevenTaps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xz, objDepth, 0 ) ).x; // 2 0
  86. vSevenTaps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yz, objDepth, 0 ) ).x; // -2 0
  87. vSevenTaps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zx, objDepth, 0 ) ).x; // 0 2
  88. vSevenTaps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zy, objDepth, 0 ) ).x; // 0 -2
  89. flSum += dot( vSevenTaps, c0.yyyy );
  90. float4 vFourTapsA, vFourTapsB;
  91. vFourTapsA.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xw, objDepth, 0 ) ).x; // 2 1
  92. vFourTapsA.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.wx, objDepth, 0 ) ).x; // 1 2
  93. vFourTapsA.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yz, objDepth, 0 ) ).x; // -1 2
  94. vFourTapsA.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.wx, objDepth, 0 ) ).x; // -2 1
  95. vFourTapsB.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.wy, objDepth, 0 ) ).x; // -2 -1
  96. vFourTapsB.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yw, objDepth, 0 ) ).x; // -1 -2
  97. vFourTapsB.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xw, objDepth, 0 ) ).x; // 1 -2
  98. vFourTapsB.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.zy, objDepth, 0 ) ).x; // 2 -1
  99. flSum += dot( vFourTapsA, c0.zzzz );
  100. flSum += dot( vFourTapsB, c0.zzzz );
  101. float4 v20Taps;
  102. v20Taps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xx, objDepth, 0 ) ).x; // 1 1
  103. v20Taps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yx, objDepth, 0 ) ).x; // -1 1
  104. v20Taps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xy, objDepth, 0 ) ).x; // 1 -1
  105. v20Taps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yy, objDepth, 0 ) ).x; // -1 -1
  106. flSum += dot( v20Taps, c0.wwww );
  107. float4 v33Taps;
  108. v33Taps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.wz, objDepth, 0 ) ).x; // 1 0
  109. v33Taps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c1.zw, objDepth, 0 ) ).x; // -1 0
  110. v33Taps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c1.wz, objDepth, 0 ) ).x; // 0 -1
  111. v33Taps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zw, objDepth, 0 ) ).x; // 0 1
  112. flSum += dot( v33Taps, c1.xxxx );
  113. flSum += tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter, objDepth, 0 ) ).x * c1.y;
  114. return flSum;
  115. }
  116. float CSMSampleShadowBuffer( float2 vPositionLs, float flComparisonDepth )
  117. {
  118. #if (CSM_MODE == CSM_MODE_ATI_FETCH4 )
  119. return CSMSampleShadowBuffer1TapATIBilinear( vPositionLs, flComparisonDepth );
  120. #elif ( CSM_MODE == CSM_MODE_VERY_LOW_OR_LOW )
  121. return CSMSampleShadowBuffer1Tap( vPositionLs, flComparisonDepth );
  122. #else
  123. return CSMSampleShadowBuffer9Taps( vPositionLs, flComparisonDepth );
  124. #endif
  125. }
  126. int CSMRangeTestExpanded( float2 vCoords )
  127. {
  128. // Returns true if the coordinates are within [.02,.98] - purposely a little sloppy to prevent the shadow filter kernel from leaking outside the cascade's portion of the atlas.
  129. vCoords = vCoords * ( 1.0f / .96f ) - float2( .02f / .96f, .02f / .96f );
  130. return ( dot( saturate( vCoords.xy ) - vCoords.xy, float2( 1, 1 ) ) == 0.0f );
  131. }
  132. int CSMRangeTestNonExpanded( float2 vCoords )
  133. {
  134. return ( dot( saturate( vCoords.xy ) - vCoords.xy, float2( 1, 1 ) ) == 0.0f );
  135. }
  136. float CSMComputeSplitLerpFactor( float2 vPositionToSampleLs )
  137. {
  138. float2 vSplitLerpFactorTemp = float2( 1.0f, 1.0f ) - saturate( ( abs( vPositionToSampleLs.xy - float2( .5f, .5f ) ) - float2( g_flSunShadowingSplitLerpFactorBase, g_flSunShadowingSplitLerpFactorBase ) ) * float2( g_flSunShadowingSplitLerpFactorInvRange, g_flSunShadowingSplitLerpFactorInvRange ) );
  139. return vSplitLerpFactorTemp.x * vSplitLerpFactorTemp.y;
  140. }
  141. float4 CSMTransformLightToTexture( float4 pos, float4x4 mat )
  142. {
  143. return mul( pos, mat );
  144. }
  145. #if ( CASCADE_SIZE == 0 )
  146. float CSMComputeShadowing( float3 vPositionWs )
  147. {
  148. return 1.0f;
  149. }
  150. #elif ( CSM_MODE == CSM_MODE_HIGH )
  151. // Each cascade is 1024x1024, sample from up to 2 cascades, 9 tap filtering for each sample, smoothly lerp between each, 3 total cascades
  152. float CSMComputeShadowing( float3 vPositionWs )
  153. {
  154. float flShadowScalar = 1.0f;
  155. float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f );
  156. float3 vPositionToSampleLs = float3( 0.0f, 0.0f, 0.0f );
  157. int nCascadeIndex = 0;
  158. vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).xy;
  159. // Non-expanded texcoord range tests because the 2D lerp will haved faded to the next cascade long before the filter kernels leaks outside the cascade's atlas region
  160. [flatten]
  161. if ( !CSMRangeTestNonExpanded( vPositionToSampleLs.xy ) )
  162. {
  163. nCascadeIndex = 1;
  164. vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy;
  165. [flatten]
  166. if ( !CSMRangeTestNonExpanded( vPositionToSampleLs.xy ) )
  167. {
  168. nCascadeIndex = 2;
  169. vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy;
  170. }
  171. }
  172. vPositionToSampleLs.z = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z;
  173. float flSplitLerpFactor = CSMComputeSplitLerpFactor( vPositionToSampleLs.xy );
  174. vPositionToSampleLs.xy = saturate( vPositionToSampleLs.xy ) * g_vCascadeAtlasUVOffsets[nCascadeIndex].zw + g_vCascadeAtlasUVOffsets[nCascadeIndex].xy;
  175. flShadowScalar = CSMSampleShadowBuffer( vPositionToSampleLs.xy, vPositionToSampleLs.z );
  176. [branch]
  177. if ( flSplitLerpFactor < 1.0f )
  178. {
  179. float flShadowScalar1 = 1.0f;
  180. [flatten]
  181. if ( nCascadeIndex < 2 )
  182. {
  183. float2 vPosition1Ls = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[nCascadeIndex + 1] ).xy;
  184. vPosition1Ls.xy = saturate( vPosition1Ls.xy ) * g_vCascadeAtlasUVOffsets[nCascadeIndex + 1].zw + g_vCascadeAtlasUVOffsets[nCascadeIndex + 1].xy;
  185. flShadowScalar1 = CSMSampleShadowBuffer( vPosition1Ls.xy, vPositionToSampleLs.z );
  186. }
  187. flShadowScalar = lerp( flShadowScalar1, flShadowScalar, saturate( flSplitLerpFactor ) );
  188. }
  189. float3 vCamDelta = vPositionWs - g_vCamPosition.xyz;
  190. float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase );
  191. flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor );
  192. return flShadowScalar;
  193. }
  194. #elif ( ( CSM_MODE == CSM_MODE_VERY_LOW_OR_LOW ) || ( CSM_MODE == CSM_MODE_ATI_FETCH4 ) )
  195. // VERY_LOW = Each cascade is 640x640, sample from 1 cascade only, 2 total cascades
  196. // LOW = Each cascade is 768x768, sample from 1 cascade only, 2 total cascades
  197. float CSMComputeShadowing( float3 vPositionWs )
  198. {
  199. float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f );
  200. float3 vPositionToSampleLs = float3( 0.0f, 0.0f, CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z );
  201. float2 vCascadeUVOffset = g_vCascadeAtlasUVOffsets[1].xy;//float2( .5f, 0.0f );
  202. vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy;
  203. [flatten]
  204. if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) )
  205. {
  206. vCascadeUVOffset = g_vCascadeAtlasUVOffsets[2].xy;
  207. vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy;
  208. }
  209. float flShadowScalar = CSMSampleShadowBuffer( saturate( vPositionToSampleLs.xy ) * .5f + vCascadeUVOffset, vPositionToSampleLs.z );
  210. float3 vCamDelta = vPositionWs - g_vCamPosition.xyz;
  211. float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase );
  212. flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor );
  213. return flShadowScalar;
  214. }
  215. #elif ( CSM_MODE == CSM_MODE_MEDIUM )
  216. // MEDIUM = Each cascade is 1024x1024, sample from 1 cascade only, 9 tap filtering, 3 cascades on vertexlit/phong, 2 cascades on lightmappedgeneric, 3 total cascades
  217. float CSMComputeShadowing( float3 vPositionWs )
  218. {
  219. float flShadowScalar = 1.0f;
  220. float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f );
  221. float3 vPositionToSampleLs = float3( 0.0f, 0.0f, CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z );
  222. float2 vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[0].xy;
  223. float flLerpFactorDisable = 1.0f;
  224. #if !defined( CSM_LIGHTMAPPEDGENERIC )
  225. vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).xy;
  226. [flatten]
  227. if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) )
  228. #endif
  229. {
  230. vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[1].xy;
  231. vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy;
  232. [flatten]
  233. if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) )
  234. {
  235. flLerpFactorDisable = 0.0f;
  236. vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[2].xy;
  237. vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy;
  238. }
  239. }
  240. flShadowScalar = CSMSampleShadowBuffer( saturate( vPositionToSampleLs.xy ) * .5f + vCascadeAtlasUVOffset, vPositionToSampleLs.z );
  241. float2 vSplitLerpFactorTemp = float2( 1.0f, 1.0f ) - saturate( ( abs( vPositionToSampleLs.xy - float2( .5f, .5f ) ) - float2( g_flSunShadowingSplitLerpFactorBase, g_flSunShadowingSplitLerpFactorBase ) ) * float2( g_flSunShadowingSplitLerpFactorInvRange, g_flSunShadowingSplitLerpFactorInvRange ) );
  242. float flSplitLerpFactor = vSplitLerpFactorTemp.x * vSplitLerpFactorTemp.y;
  243. flShadowScalar = lerp( 1.0f, flShadowScalar, saturate( flSplitLerpFactor + flLerpFactorDisable ) );
  244. float3 vCamDelta = vPositionWs - g_vCamPosition.xyz;
  245. float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase );
  246. flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor );
  247. return flShadowScalar;
  248. }
  249. #elif ( CSM_MODE == CSM_MODE_ATI_FETCH4 )
  250. #error Invalid CSM_MODE
  251. #endif