//========== Copyright (c) Valve Corporation, All rights reserved. ==========// // PC cascaded shadow mapping // This defines must be kept in sync with the CSM_DEFAULT_DEPTH_TEXTURE_RESOLUTION, etc. macros in c_env_cascade_light.cpp - otherwise you'll get subtle filtering artifacts. #define CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW ( 640*2 ) #define CSM_DEPTH_TEXTURE_RESOLUTION_LOW ( 768*2 ) #define CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH ( 1024*2 ) // Bilinear Percentage Closer Filtering with ATI Fetch4 #if 1 // This works on real ATI X1000-series hardware that uses a DX9-style FETCH4 swizzle. float CSMSampleShadowBuffer1TapATIBilinear( float2 vPositionLs, float flComparisonDepth ) { float flSunShadowingShadowTextureWidth = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingShadowTextureHeight = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingInvShadowTextureWidth = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingInvShadowTextureHeight = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float2 vFracPositionLs = frac( vPositionLs * float2( flSunShadowingShadowTextureWidth, flSunShadowingShadowTextureHeight ) ); //float2 vSamplePositionLs = vPositionLs - vFracPositionLs * float2( flSunShadowingInvShadowTextureWidth, flSunShadowingInvShadowTextureHeight ); //vSamplePositionLs += .00125f/CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float2 vSamplePositionLs = vPositionLs; float4 vCmpSamples = tex2D( CSMDepthAtlasSampler, vSamplePositionLs.xy ).argb; vCmpSamples = vCmpSamples > flComparisonDepth; float4 vFactors = float4( ( 1.0f - vFracPositionLs.x ) * ( 1.0f - vFracPositionLs.y ), vFracPositionLs.x * ( 1.0f - vFracPositionLs.y ), ( 1.0f - vFracPositionLs.x ) * vFracPositionLs.y, vFracPositionLs.x * vFracPositionLs.y ); return dot( vCmpSamples, vFactors ); } #else // This works properly on recent ATI hardware that uses DX 10.1+ style GATHER4 swizzles. Argh. float CSMSampleShadowBuffer1TapATIBilinear( float2 vPositionLs, float flComparisonDepth ) { float flSunShadowingShadowTextureWidth = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingShadowTextureHeight = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingInvShadowTextureWidth = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingInvShadowTextureHeight = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float2 vFracPositionLs = frac( vPositionLs * float2( flSunShadowingShadowTextureWidth, flSunShadowingShadowTextureHeight ) ); float2 vSamplePositionLs = vPositionLs - vFracPositionLs * float2( flSunShadowingInvShadowTextureWidth, flSunShadowingInvShadowTextureHeight ); vSamplePositionLs += .00125f/CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float4 vCmpSamples = tex2D( CSMDepthAtlasSampler, vSamplePositionLs.xy ).abrg; vCmpSamples = vCmpSamples > flComparisonDepth; float4 vFactors = float4( ( 1.0f - vFracPositionLs.x ) * ( 1.0f - vFracPositionLs.y ), vFracPositionLs.x * ( 1.0f - vFracPositionLs.y ), ( 1.0f - vFracPositionLs.x ) * vFracPositionLs.y, vFracPositionLs.x * vFracPositionLs.y ); return dot( vCmpSamples, vFactors ); } #endif float CSMSampleShadowBuffer1Tap( float2 vPositionLs, float flComparisonDepth ) { // Non-gameconsole return tex2Dlod( CSMDepthAtlasSampler, float4( vPositionLs.x, vPositionLs.y, flComparisonDepth, 0.0f ) ).x; } float CSMSampleShadowBuffer9Taps( float2 shadowMapCenter, float objDepth ) { float fTexelEpsilon = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH; float4 vSampleBase = float4( shadowMapCenter, objDepth, 0.0f ); float4 vOneTaps; vOneTaps.x = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, fTexelEpsilon, 0, 0 ) ).x; vOneTaps.y = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, fTexelEpsilon, 0, 0 ) ).x; vOneTaps.z = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, -fTexelEpsilon, 0, 0 ) ).x; vOneTaps.w = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, -fTexelEpsilon, 0, 0 ) ).x; float flOneTaps = dot( vOneTaps, float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f)); float4 vTwoTaps; vTwoTaps.x = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, 0, 0, 0 ) ).x; vTwoTaps.y = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, 0, 0, 0 ) ).x; vTwoTaps.z = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( 0, -fTexelEpsilon, 0, 0 ) ).x; vTwoTaps.w = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( 0, fTexelEpsilon, 0, 0 ) ).x; float flTwoTaps = dot( vTwoTaps, float4(2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f)); float flCenterTap = tex2Dlod( CSMDepthAtlasSampler, vSampleBase ).x * float(4.0f / 16.0f); // Sum all 9 Taps return flOneTaps + flTwoTaps + flCenterTap; } // 25 taps is crazy expensive, just here for comparison purposes. float CSMSampleShadowBuffer25Taps( float2 shadowMapCenter, float objDepth ) { float flTexelEpsilon = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH; float flTwoTexelEpsilon = 2.0f * flTexelEpsilon; float4 c0 = float4( 1.0f / 331.0f, 7.0f / 331.0f, 4.0f / 331.0f, 20.0f / 331.0f ); float4 c1 = float4( 33.0f / 331.0f, 55.0f / 331.0f, -flTexelEpsilon, 0.0f ); float4 c2 = float4( flTwoTexelEpsilon, -flTwoTexelEpsilon, 0.0f, flTexelEpsilon ); float4 c3 = float4( flTexelEpsilon, -flTexelEpsilon, flTwoTexelEpsilon, -flTwoTexelEpsilon ); float4 vOneTaps; vOneTaps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xx, objDepth, 0 ) ).x; // 2 2 vOneTaps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yx, objDepth, 0 ) ).x; // -2 2 vOneTaps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xy, objDepth, 0 ) ).x; // 2 -2 vOneTaps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yy, objDepth, 0 ) ).x; // -2 -2 float flSum = dot( vOneTaps, c0.xxxx ); float4 vSevenTaps; vSevenTaps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xz, objDepth, 0 ) ).x; // 2 0 vSevenTaps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yz, objDepth, 0 ) ).x; // -2 0 vSevenTaps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zx, objDepth, 0 ) ).x; // 0 2 vSevenTaps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zy, objDepth, 0 ) ).x; // 0 -2 flSum += dot( vSevenTaps, c0.yyyy ); float4 vFourTapsA, vFourTapsB; vFourTapsA.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xw, objDepth, 0 ) ).x; // 2 1 vFourTapsA.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.wx, objDepth, 0 ) ).x; // 1 2 vFourTapsA.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yz, objDepth, 0 ) ).x; // -1 2 vFourTapsA.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.wx, objDepth, 0 ) ).x; // -2 1 vFourTapsB.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.wy, objDepth, 0 ) ).x; // -2 -1 vFourTapsB.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yw, objDepth, 0 ) ).x; // -1 -2 vFourTapsB.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xw, objDepth, 0 ) ).x; // 1 -2 vFourTapsB.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.zy, objDepth, 0 ) ).x; // 2 -1 flSum += dot( vFourTapsA, c0.zzzz ); flSum += dot( vFourTapsB, c0.zzzz ); float4 v20Taps; v20Taps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xx, objDepth, 0 ) ).x; // 1 1 v20Taps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yx, objDepth, 0 ) ).x; // -1 1 v20Taps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xy, objDepth, 0 ) ).x; // 1 -1 v20Taps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yy, objDepth, 0 ) ).x; // -1 -1 flSum += dot( v20Taps, c0.wwww ); float4 v33Taps; v33Taps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.wz, objDepth, 0 ) ).x; // 1 0 v33Taps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c1.zw, objDepth, 0 ) ).x; // -1 0 v33Taps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c1.wz, objDepth, 0 ) ).x; // 0 -1 v33Taps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zw, objDepth, 0 ) ).x; // 0 1 flSum += dot( v33Taps, c1.xxxx ); flSum += tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter, objDepth, 0 ) ).x * c1.y; return flSum; } float CSMSampleShadowBuffer( float2 vPositionLs, float flComparisonDepth ) { #if (CSM_MODE == CSM_MODE_ATI_FETCH4 ) return CSMSampleShadowBuffer1TapATIBilinear( vPositionLs, flComparisonDepth ); #elif ( CSM_MODE == CSM_MODE_VERY_LOW_OR_LOW ) return CSMSampleShadowBuffer1Tap( vPositionLs, flComparisonDepth ); #else return CSMSampleShadowBuffer9Taps( vPositionLs, flComparisonDepth ); #endif } int CSMRangeTestExpanded( float2 vCoords ) { // Returns true if the coordinates are within [.02,.98] - purposely a little sloppy to prevent the shadow filter kernel from leaking outside the cascade's portion of the atlas. vCoords = vCoords * ( 1.0f / .96f ) - float2( .02f / .96f, .02f / .96f ); return ( dot( saturate( vCoords.xy ) - vCoords.xy, float2( 1, 1 ) ) == 0.0f ); } int CSMRangeTestNonExpanded( float2 vCoords ) { return ( dot( saturate( vCoords.xy ) - vCoords.xy, float2( 1, 1 ) ) == 0.0f ); } float CSMComputeSplitLerpFactor( float2 vPositionToSampleLs ) { float2 vSplitLerpFactorTemp = float2( 1.0f, 1.0f ) - saturate( ( abs( vPositionToSampleLs.xy - float2( .5f, .5f ) ) - float2( g_flSunShadowingSplitLerpFactorBase, g_flSunShadowingSplitLerpFactorBase ) ) * float2( g_flSunShadowingSplitLerpFactorInvRange, g_flSunShadowingSplitLerpFactorInvRange ) ); return vSplitLerpFactorTemp.x * vSplitLerpFactorTemp.y; } float4 CSMTransformLightToTexture( float4 pos, float4x4 mat ) { return mul( pos, mat ); } #if ( CASCADE_SIZE == 0 ) float CSMComputeShadowing( float3 vPositionWs ) { return 1.0f; } #elif ( CSM_MODE == CSM_MODE_HIGH ) // Each cascade is 1024x1024, sample from up to 2 cascades, 9 tap filtering for each sample, smoothly lerp between each, 3 total cascades float CSMComputeShadowing( float3 vPositionWs ) { float flShadowScalar = 1.0f; float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f ); float3 vPositionToSampleLs = float3( 0.0f, 0.0f, 0.0f ); int nCascadeIndex = 0; vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).xy; // Non-expanded texcoord range tests because the 2D lerp will haved faded to the next cascade long before the filter kernels leaks outside the cascade's atlas region [flatten] if ( !CSMRangeTestNonExpanded( vPositionToSampleLs.xy ) ) { nCascadeIndex = 1; vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy; [flatten] if ( !CSMRangeTestNonExpanded( vPositionToSampleLs.xy ) ) { nCascadeIndex = 2; vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy; } } vPositionToSampleLs.z = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z; float flSplitLerpFactor = CSMComputeSplitLerpFactor( vPositionToSampleLs.xy ); vPositionToSampleLs.xy = saturate( vPositionToSampleLs.xy ) * g_vCascadeAtlasUVOffsets[nCascadeIndex].zw + g_vCascadeAtlasUVOffsets[nCascadeIndex].xy; flShadowScalar = CSMSampleShadowBuffer( vPositionToSampleLs.xy, vPositionToSampleLs.z ); [branch] if ( flSplitLerpFactor < 1.0f ) { float flShadowScalar1 = 1.0f; [flatten] if ( nCascadeIndex < 2 ) { float2 vPosition1Ls = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[nCascadeIndex + 1] ).xy; vPosition1Ls.xy = saturate( vPosition1Ls.xy ) * g_vCascadeAtlasUVOffsets[nCascadeIndex + 1].zw + g_vCascadeAtlasUVOffsets[nCascadeIndex + 1].xy; flShadowScalar1 = CSMSampleShadowBuffer( vPosition1Ls.xy, vPositionToSampleLs.z ); } flShadowScalar = lerp( flShadowScalar1, flShadowScalar, saturate( flSplitLerpFactor ) ); } float3 vCamDelta = vPositionWs - g_vCamPosition.xyz; float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase ); flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor ); return flShadowScalar; } #elif ( ( CSM_MODE == CSM_MODE_VERY_LOW_OR_LOW ) || ( CSM_MODE == CSM_MODE_ATI_FETCH4 ) ) // VERY_LOW = Each cascade is 640x640, sample from 1 cascade only, 2 total cascades // LOW = Each cascade is 768x768, sample from 1 cascade only, 2 total cascades float CSMComputeShadowing( float3 vPositionWs ) { float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f ); float3 vPositionToSampleLs = float3( 0.0f, 0.0f, CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z ); float2 vCascadeUVOffset = g_vCascadeAtlasUVOffsets[1].xy;//float2( .5f, 0.0f ); vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy; [flatten] if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) ) { vCascadeUVOffset = g_vCascadeAtlasUVOffsets[2].xy; vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy; } float flShadowScalar = CSMSampleShadowBuffer( saturate( vPositionToSampleLs.xy ) * .5f + vCascadeUVOffset, vPositionToSampleLs.z ); float3 vCamDelta = vPositionWs - g_vCamPosition.xyz; float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase ); flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor ); return flShadowScalar; } #elif ( CSM_MODE == CSM_MODE_MEDIUM ) // MEDIUM = Each cascade is 1024x1024, sample from 1 cascade only, 9 tap filtering, 3 cascades on vertexlit/phong, 2 cascades on lightmappedgeneric, 3 total cascades float CSMComputeShadowing( float3 vPositionWs ) { float flShadowScalar = 1.0f; float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f ); float3 vPositionToSampleLs = float3( 0.0f, 0.0f, CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z ); float2 vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[0].xy; float flLerpFactorDisable = 1.0f; #if !defined( CSM_LIGHTMAPPEDGENERIC ) vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).xy; [flatten] if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) ) #endif { vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[1].xy; vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy; [flatten] if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) ) { flLerpFactorDisable = 0.0f; vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[2].xy; vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy; } } flShadowScalar = CSMSampleShadowBuffer( saturate( vPositionToSampleLs.xy ) * .5f + vCascadeAtlasUVOffset, vPositionToSampleLs.z ); float2 vSplitLerpFactorTemp = float2( 1.0f, 1.0f ) - saturate( ( abs( vPositionToSampleLs.xy - float2( .5f, .5f ) ) - float2( g_flSunShadowingSplitLerpFactorBase, g_flSunShadowingSplitLerpFactorBase ) ) * float2( g_flSunShadowingSplitLerpFactorInvRange, g_flSunShadowingSplitLerpFactorInvRange ) ); float flSplitLerpFactor = vSplitLerpFactorTemp.x * vSplitLerpFactorTemp.y; flShadowScalar = lerp( 1.0f, flShadowScalar, saturate( flSplitLerpFactor + flLerpFactorDisable ) ); float3 vCamDelta = vPositionWs - g_vCamPosition.xyz; float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase ); flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor ); return flShadowScalar; } #elif ( CSM_MODE == CSM_MODE_ATI_FETCH4 ) #error Invalid CSM_MODE #endif