|
|
//========== Copyright (c) Valve Corporation, All rights reserved. ==========//
// PC cascaded shadow mapping
// This defines must be kept in sync with the CSM_DEFAULT_DEPTH_TEXTURE_RESOLUTION, etc. macros in c_env_cascade_light.cpp - otherwise you'll get subtle filtering artifacts.
#define CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW ( 640*2 )
#define CSM_DEPTH_TEXTURE_RESOLUTION_LOW ( 768*2 )
#define CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH ( 1024*2 )
// Bilinear Percentage Closer Filtering with ATI Fetch4
#if 1
// This works on real ATI X1000-series hardware that uses a DX9-style FETCH4 swizzle.
float CSMSampleShadowBuffer1TapATIBilinear( float2 vPositionLs, float flComparisonDepth ) { float flSunShadowingShadowTextureWidth = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingShadowTextureHeight = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingInvShadowTextureWidth = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingInvShadowTextureHeight = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float2 vFracPositionLs = frac( vPositionLs * float2( flSunShadowingShadowTextureWidth, flSunShadowingShadowTextureHeight ) ); //float2 vSamplePositionLs = vPositionLs - vFracPositionLs * float2( flSunShadowingInvShadowTextureWidth, flSunShadowingInvShadowTextureHeight );
//vSamplePositionLs += .00125f/CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW;
float2 vSamplePositionLs = vPositionLs; float4 vCmpSamples = tex2D( CSMDepthAtlasSampler, vSamplePositionLs.xy ).argb; vCmpSamples = vCmpSamples > flComparisonDepth; float4 vFactors = float4( ( 1.0f - vFracPositionLs.x ) * ( 1.0f - vFracPositionLs.y ), vFracPositionLs.x * ( 1.0f - vFracPositionLs.y ), ( 1.0f - vFracPositionLs.x ) * vFracPositionLs.y, vFracPositionLs.x * vFracPositionLs.y ); return dot( vCmpSamples, vFactors ); } #else
// This works properly on recent ATI hardware that uses DX 10.1+ style GATHER4 swizzles. Argh.
float CSMSampleShadowBuffer1TapATIBilinear( float2 vPositionLs, float flComparisonDepth ) { float flSunShadowingShadowTextureWidth = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingShadowTextureHeight = CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingInvShadowTextureWidth = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float flSunShadowingInvShadowTextureHeight = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float2 vFracPositionLs = frac( vPositionLs * float2( flSunShadowingShadowTextureWidth, flSunShadowingShadowTextureHeight ) ); float2 vSamplePositionLs = vPositionLs - vFracPositionLs * float2( flSunShadowingInvShadowTextureWidth, flSunShadowingInvShadowTextureHeight ); vSamplePositionLs += .00125f/CSM_DEPTH_TEXTURE_RESOLUTION_VERY_LOW; float4 vCmpSamples = tex2D( CSMDepthAtlasSampler, vSamplePositionLs.xy ).abrg; vCmpSamples = vCmpSamples > flComparisonDepth; float4 vFactors = float4( ( 1.0f - vFracPositionLs.x ) * ( 1.0f - vFracPositionLs.y ), vFracPositionLs.x * ( 1.0f - vFracPositionLs.y ), ( 1.0f - vFracPositionLs.x ) * vFracPositionLs.y, vFracPositionLs.x * vFracPositionLs.y ); return dot( vCmpSamples, vFactors ); } #endif
float CSMSampleShadowBuffer1Tap( float2 vPositionLs, float flComparisonDepth ) { // Non-gameconsole
return tex2Dlod( CSMDepthAtlasSampler, float4( vPositionLs.x, vPositionLs.y, flComparisonDepth, 0.0f ) ).x; }
float CSMSampleShadowBuffer9Taps( float2 shadowMapCenter, float objDepth ) { float fTexelEpsilon = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH; float4 vSampleBase = float4( shadowMapCenter, objDepth, 0.0f );
float4 vOneTaps; vOneTaps.x = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, fTexelEpsilon, 0, 0 ) ).x; vOneTaps.y = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, fTexelEpsilon, 0, 0 ) ).x; vOneTaps.z = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, -fTexelEpsilon, 0, 0 ) ).x; vOneTaps.w = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, -fTexelEpsilon, 0, 0 ) ).x; float flOneTaps = dot( vOneTaps, float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f));
float4 vTwoTaps; vTwoTaps.x = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( fTexelEpsilon, 0, 0, 0 ) ).x; vTwoTaps.y = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( -fTexelEpsilon, 0, 0, 0 ) ).x; vTwoTaps.z = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( 0, -fTexelEpsilon, 0, 0 ) ).x; vTwoTaps.w = tex2Dlod( CSMDepthAtlasSampler, vSampleBase + float4( 0, fTexelEpsilon, 0, 0 ) ).x; float flTwoTaps = dot( vTwoTaps, float4(2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f));
float flCenterTap = tex2Dlod( CSMDepthAtlasSampler, vSampleBase ).x * float(4.0f / 16.0f);
// Sum all 9 Taps
return flOneTaps + flTwoTaps + flCenterTap; }
// 25 taps is crazy expensive, just here for comparison purposes.
float CSMSampleShadowBuffer25Taps( float2 shadowMapCenter, float objDepth ) { float flTexelEpsilon = 1.0f / CSM_DEPTH_TEXTURE_RESOLUTION_MEDIUM_OR_HIGH; float flTwoTexelEpsilon = 2.0f * flTexelEpsilon;
float4 c0 = float4( 1.0f / 331.0f, 7.0f / 331.0f, 4.0f / 331.0f, 20.0f / 331.0f ); float4 c1 = float4( 33.0f / 331.0f, 55.0f / 331.0f, -flTexelEpsilon, 0.0f ); float4 c2 = float4( flTwoTexelEpsilon, -flTwoTexelEpsilon, 0.0f, flTexelEpsilon ); float4 c3 = float4( flTexelEpsilon, -flTexelEpsilon, flTwoTexelEpsilon, -flTwoTexelEpsilon );
float4 vOneTaps; vOneTaps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xx, objDepth, 0 ) ).x; // 2 2
vOneTaps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yx, objDepth, 0 ) ).x; // -2 2
vOneTaps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xy, objDepth, 0 ) ).x; // 2 -2
vOneTaps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yy, objDepth, 0 ) ).x; // -2 -2
float flSum = dot( vOneTaps, c0.xxxx );
float4 vSevenTaps; vSevenTaps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xz, objDepth, 0 ) ).x; // 2 0
vSevenTaps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.yz, objDepth, 0 ) ).x; // -2 0
vSevenTaps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zx, objDepth, 0 ) ).x; // 0 2
vSevenTaps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zy, objDepth, 0 ) ).x; // 0 -2
flSum += dot( vSevenTaps, c0.yyyy );
float4 vFourTapsA, vFourTapsB; vFourTapsA.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.xw, objDepth, 0 ) ).x; // 2 1
vFourTapsA.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.wx, objDepth, 0 ) ).x; // 1 2
vFourTapsA.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yz, objDepth, 0 ) ).x; // -1 2
vFourTapsA.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.wx, objDepth, 0 ) ).x; // -2 1
vFourTapsB.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.wy, objDepth, 0 ) ).x; // -2 -1
vFourTapsB.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yw, objDepth, 0 ) ).x; // -1 -2
vFourTapsB.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xw, objDepth, 0 ) ).x; // 1 -2
vFourTapsB.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.zy, objDepth, 0 ) ).x; // 2 -1
flSum += dot( vFourTapsA, c0.zzzz ); flSum += dot( vFourTapsB, c0.zzzz );
float4 v20Taps; v20Taps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xx, objDepth, 0 ) ).x; // 1 1
v20Taps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yx, objDepth, 0 ) ).x; // -1 1
v20Taps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.xy, objDepth, 0 ) ).x; // 1 -1
v20Taps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c3.yy, objDepth, 0 ) ).x; // -1 -1
flSum += dot( v20Taps, c0.wwww );
float4 v33Taps; v33Taps.x = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.wz, objDepth, 0 ) ).x; // 1 0
v33Taps.y = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c1.zw, objDepth, 0 ) ).x; // -1 0
v33Taps.z = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c1.wz, objDepth, 0 ) ).x; // 0 -1
v33Taps.w = tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter + c2.zw, objDepth, 0 ) ).x; // 0 1
flSum += dot( v33Taps, c1.xxxx );
flSum += tex2Dlod( CSMDepthAtlasSampler, float4( shadowMapCenter, objDepth, 0 ) ).x * c1.y; return flSum; }
float CSMSampleShadowBuffer( float2 vPositionLs, float flComparisonDepth ) { #if (CSM_MODE == CSM_MODE_ATI_FETCH4 )
return CSMSampleShadowBuffer1TapATIBilinear( vPositionLs, flComparisonDepth ); #elif ( CSM_MODE == CSM_MODE_VERY_LOW_OR_LOW )
return CSMSampleShadowBuffer1Tap( vPositionLs, flComparisonDepth ); #else
return CSMSampleShadowBuffer9Taps( vPositionLs, flComparisonDepth ); #endif
}
int CSMRangeTestExpanded( float2 vCoords ) { // Returns true if the coordinates are within [.02,.98] - purposely a little sloppy to prevent the shadow filter kernel from leaking outside the cascade's portion of the atlas.
vCoords = vCoords * ( 1.0f / .96f ) - float2( .02f / .96f, .02f / .96f ); return ( dot( saturate( vCoords.xy ) - vCoords.xy, float2( 1, 1 ) ) == 0.0f ); }
int CSMRangeTestNonExpanded( float2 vCoords ) { return ( dot( saturate( vCoords.xy ) - vCoords.xy, float2( 1, 1 ) ) == 0.0f ); }
float CSMComputeSplitLerpFactor( float2 vPositionToSampleLs ) { float2 vSplitLerpFactorTemp = float2( 1.0f, 1.0f ) - saturate( ( abs( vPositionToSampleLs.xy - float2( .5f, .5f ) ) - float2( g_flSunShadowingSplitLerpFactorBase, g_flSunShadowingSplitLerpFactorBase ) ) * float2( g_flSunShadowingSplitLerpFactorInvRange, g_flSunShadowingSplitLerpFactorInvRange ) ); return vSplitLerpFactorTemp.x * vSplitLerpFactorTemp.y; }
float4 CSMTransformLightToTexture( float4 pos, float4x4 mat ) { return mul( pos, mat ); }
#if ( CASCADE_SIZE == 0 )
float CSMComputeShadowing( float3 vPositionWs ) { return 1.0f; } #elif ( CSM_MODE == CSM_MODE_HIGH )
// Each cascade is 1024x1024, sample from up to 2 cascades, 9 tap filtering for each sample, smoothly lerp between each, 3 total cascades
float CSMComputeShadowing( float3 vPositionWs ) { float flShadowScalar = 1.0f; float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f );
float3 vPositionToSampleLs = float3( 0.0f, 0.0f, 0.0f ); int nCascadeIndex = 0; vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).xy;
// Non-expanded texcoord range tests because the 2D lerp will haved faded to the next cascade long before the filter kernels leaks outside the cascade's atlas region
[flatten] if ( !CSMRangeTestNonExpanded( vPositionToSampleLs.xy ) ) { nCascadeIndex = 1; vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy;
[flatten] if ( !CSMRangeTestNonExpanded( vPositionToSampleLs.xy ) ) { nCascadeIndex = 2; vPositionToSampleLs.xy = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy; } } vPositionToSampleLs.z = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z;
float flSplitLerpFactor = CSMComputeSplitLerpFactor( vPositionToSampleLs.xy );
vPositionToSampleLs.xy = saturate( vPositionToSampleLs.xy ) * g_vCascadeAtlasUVOffsets[nCascadeIndex].zw + g_vCascadeAtlasUVOffsets[nCascadeIndex].xy; flShadowScalar = CSMSampleShadowBuffer( vPositionToSampleLs.xy, vPositionToSampleLs.z );
[branch] if ( flSplitLerpFactor < 1.0f ) { float flShadowScalar1 = 1.0f;
[flatten] if ( nCascadeIndex < 2 ) { float2 vPosition1Ls = mul( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[nCascadeIndex + 1] ).xy;
vPosition1Ls.xy = saturate( vPosition1Ls.xy ) * g_vCascadeAtlasUVOffsets[nCascadeIndex + 1].zw + g_vCascadeAtlasUVOffsets[nCascadeIndex + 1].xy; flShadowScalar1 = CSMSampleShadowBuffer( vPosition1Ls.xy, vPositionToSampleLs.z ); } flShadowScalar = lerp( flShadowScalar1, flShadowScalar, saturate( flSplitLerpFactor ) ); } float3 vCamDelta = vPositionWs - g_vCamPosition.xyz; float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase ); flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor );
return flShadowScalar; } #elif ( ( CSM_MODE == CSM_MODE_VERY_LOW_OR_LOW ) || ( CSM_MODE == CSM_MODE_ATI_FETCH4 ) )
// VERY_LOW = Each cascade is 640x640, sample from 1 cascade only, 2 total cascades
// LOW = Each cascade is 768x768, sample from 1 cascade only, 2 total cascades
float CSMComputeShadowing( float3 vPositionWs ) { float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f ); float3 vPositionToSampleLs = float3( 0.0f, 0.0f, CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z ); float2 vCascadeUVOffset = g_vCascadeAtlasUVOffsets[1].xy;//float2( .5f, 0.0f );
vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy;
[flatten] if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) ) { vCascadeUVOffset = g_vCascadeAtlasUVOffsets[2].xy; vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy; } float flShadowScalar = CSMSampleShadowBuffer( saturate( vPositionToSampleLs.xy ) * .5f + vCascadeUVOffset, vPositionToSampleLs.z );
float3 vCamDelta = vPositionWs - g_vCamPosition.xyz; float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase ); flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor );
return flShadowScalar; } #elif ( CSM_MODE == CSM_MODE_MEDIUM )
// MEDIUM = Each cascade is 1024x1024, sample from 1 cascade only, 9 tap filtering, 3 cascades on vertexlit/phong, 2 cascades on lightmappedgeneric, 3 total cascades
float CSMComputeShadowing( float3 vPositionWs ) { float flShadowScalar = 1.0f;
float4 vPosition4Ws = float4( vPositionWs.xyz, 1.0f ); float3 vPositionToSampleLs = float3( 0.0f, 0.0f, CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).z ); float2 vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[0].xy; float flLerpFactorDisable = 1.0f;
#if !defined( CSM_LIGHTMAPPEDGENERIC )
vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[0] ).xy;
[flatten] if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) ) #endif
{ vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[1].xy; vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[1] ).xy;
[flatten] if ( !CSMRangeTestExpanded( vPositionToSampleLs.xy ) ) { flLerpFactorDisable = 0.0f; vCascadeAtlasUVOffset = g_vCascadeAtlasUVOffsets[2].xy; vPositionToSampleLs.xy = CSMTransformLightToTexture( vPosition4Ws.xyzw, g_matWorldToShadowTexMatrices[2] ).xy; } } flShadowScalar = CSMSampleShadowBuffer( saturate( vPositionToSampleLs.xy ) * .5f + vCascadeAtlasUVOffset, vPositionToSampleLs.z );
float2 vSplitLerpFactorTemp = float2( 1.0f, 1.0f ) - saturate( ( abs( vPositionToSampleLs.xy - float2( .5f, .5f ) ) - float2( g_flSunShadowingSplitLerpFactorBase, g_flSunShadowingSplitLerpFactorBase ) ) * float2( g_flSunShadowingSplitLerpFactorInvRange, g_flSunShadowingSplitLerpFactorInvRange ) ); float flSplitLerpFactor = vSplitLerpFactorTemp.x * vSplitLerpFactorTemp.y; flShadowScalar = lerp( 1.0f, flShadowScalar, saturate( flSplitLerpFactor + flLerpFactorDisable ) );
float3 vCamDelta = vPositionWs - g_vCamPosition.xyz; float flZLerpFactor = saturate( dot( vCamDelta, vCamDelta ) * g_flSunShadowingZLerpFactorRange + g_flSunShadowingZLerpFactorBase ); flShadowScalar = lerp( flShadowScalar, 1.0f, flZLerpFactor );
return flShadowScalar; } #elif ( CSM_MODE == CSM_MODE_ATI_FETCH4 )
#error Invalid CSM_MODE
#endif
|