Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1391 lines
66 KiB

  1. //========== Copyright (c) Valve Corporation, All rights reserved. ==========//
  2. //
  3. // Purpose: Common pixel shader code specific to flashlights
  4. //
  5. // $NoKeywords: $
  6. //
  7. //=============================================================================//
  8. #ifndef COMMON_FLASHLIGHT_FXC_H_
  9. #define COMMON_FLASHLIGHT_FXC_H_
  10. #include "common_ps_fxc.h"
  11. // Superellipse soft clipping
  12. //
  13. // Input:
  14. // - Point Q on the x-y plane
  15. // - The equations of two superellipses (with major/minor axes given by
  16. // a,b and A,B for the inner and outer ellipses, respectively)
  17. // - This is changed a bit from the original RenderMan code to be better vectorized
  18. //
  19. // Return value:
  20. // - 0 if Q was inside the inner ellipse
  21. // - 1 if Q was outside the outer ellipse
  22. // - smoothly varying from 0 to 1 in between
  23. float2 ClipSuperellipse( float2 Q, // Point on the xy plane
  24. float4 aAbB, // Dimensions of superellipses
  25. float2 rounds ) // Same roundness for both ellipses
  26. {
  27. float2 qr, Qabs = abs(Q); // Project to +x +y quadrant
  28. float2 bx_Bx = Qabs.x * aAbB.zw;
  29. float2 ay_Ay = Qabs.y * aAbB.xy;
  30. qr.x = pow( pow( bx_Bx.x, rounds.x ) + pow( ay_Ay.x, rounds.x ), rounds.y ); // rounds.x = 2 / roundness
  31. qr.y = pow( pow( bx_Bx.y, rounds.x ) + pow( ay_Ay.y, rounds.x ), rounds.y ); // rounds.y = -roundness/2
  32. return qr * aAbB.xy * aAbB.zw;
  33. }
  34. // Volumetric light shaping
  35. //
  36. // Inputs:
  37. // - the point being shaded, in the local light space
  38. // - all information about the light shaping, including z smooth depth
  39. // clipping, superellipse xy shaping, and distance falloff.
  40. // Return value:
  41. // - attenuation factor based on the falloff and shaping
  42. float uberlight(float3 PL, // Point in light space
  43. float3 smoothEdge0, // edge0 for three smooth steps
  44. float3 smoothEdge1, // edge1 for three smooth steps
  45. float3 smoothOneOverWidth, // width of three smooth steps
  46. float2 shear, // shear in X and Y
  47. float4 aAbB, // Superellipse dimensions
  48. float2 rounds ) // two functions of roundness packed together
  49. {
  50. float2 qr = ClipSuperellipse( (PL.xy / PL.z) - shear, aAbB, rounds );
  51. smoothEdge0.x = qr.x; // Fill in the dynamic parts of the smoothsteps
  52. smoothEdge1.x = qr.y; // The other components are pre-computed outside of the shader
  53. smoothOneOverWidth.x = 1.0f / ( qr.y - qr.x );
  54. float3 x = float3( 1, PL.z, PL.z );
  55. float3 atten3 = smoothstep3( smoothEdge0, smoothEdge1, smoothOneOverWidth, x );
  56. // Modulate the three resulting attenuations (flipping the sense of the attenuation from the superellipse and the far clip)
  57. return (1.0f - atten3.x) * atten3.y * (1.0f - atten3.z);
  58. }
  59. #if defined( _X360 )
  60. #define FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION ( 720.0f )
  61. #elif defined( _PS3 )
  62. #define FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION ( 864.0f )
  63. #else
  64. #define FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION ( 1024.0f )
  65. #endif
  66. // JasonM - TODO: remove this simpleton version
  67. float DoShadow( sampler DepthSampler, float4 texCoord )
  68. {
  69. const float g_flShadowBias = 0.0005f;
  70. float2 uoffset = float2( 0.5f/FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, 0.0f );
  71. float2 voffset = float2( 0.0f, 0.5f/FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION );
  72. float3 projTexCoord = texCoord.xyz / texCoord.w;
  73. float4 flashlightDepth = float4( tex2D( DepthSampler, projTexCoord.xy + uoffset + voffset ).x,
  74. tex2D( DepthSampler, projTexCoord.xy + uoffset - voffset ).x,
  75. tex2D( DepthSampler, projTexCoord.xy - uoffset + voffset ).x,
  76. tex2D( DepthSampler, projTexCoord.xy - uoffset - voffset ).x );
  77. # if ( defined( REVERSE_DEPTH_ON_X360 ) )
  78. {
  79. flashlightDepth = 1.0f - flashlightDepth;
  80. }
  81. # endif
  82. float shadowed = 0.0f;
  83. float z = texCoord.z/texCoord.w;
  84. float4 dz = float4(z,z,z,z) - (flashlightDepth + float4( g_flShadowBias, g_flShadowBias, g_flShadowBias, g_flShadowBias));
  85. float4 shadow = float4(0.25f,0.25f,0.25f,0.25f);
  86. if( dz.x <= 0.0f )
  87. shadowed += shadow.x;
  88. if( dz.y <= 0.0f )
  89. shadowed += shadow.y;
  90. if( dz.z <= 0.0f )
  91. shadowed += shadow.z;
  92. if( dz.w <= 0.0f )
  93. shadowed += shadow.w;
  94. return shadowed;
  95. }
  96. float DoShadowNvidiaRAWZOneTap( sampler DepthSampler, const float4 shadowMapPos )
  97. {
  98. float ooW = 1.0f / shadowMapPos.w; // 1 / w
  99. float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
  100. float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
  101. float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
  102. float fDepth = dot(tex2D(DepthSampler, shadowMapCenter).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
  103. return fDepth > objDepth;
  104. }
  105. float DoShadowNvidiaRAWZ( sampler DepthSampler, const float4 shadowMapPos )
  106. {
  107. float fE = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION; // Epsilon
  108. float ooW = 1.0f / shadowMapPos.w; // 1 / w
  109. float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
  110. float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
  111. float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
  112. float4 vDepths;
  113. vDepths.x = dot(tex2D(DepthSampler, shadowMapCenter + float2( fE, fE )).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
  114. vDepths.y = dot(tex2D(DepthSampler, shadowMapCenter + float2( -fE, fE )).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
  115. vDepths.z = dot(tex2D(DepthSampler, shadowMapCenter + float2( fE, -fE )).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
  116. vDepths.w = dot(tex2D(DepthSampler, shadowMapCenter + float2( -fE, -fE )).arg, float3(0.996093809371817670572857294849, 0.0038909914428586627756752238080039, 1.5199185323666651467481343000015e-5));
  117. return dot(vDepths > objDepth.xxxx, float4(0.25, 0.25, 0.25, 0.25));
  118. }
  119. float DoShadowNvidiaCheap( sampler DepthSampler, const float4 shadowMapPos )
  120. {
  121. float fTexelEpsilon = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  122. float ooW = 1.0f / shadowMapPos.w; // 1 / w
  123. float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
  124. float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
  125. float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
  126. float4 vTaps;
  127. vTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, fTexelEpsilon), objDepth, 1 ) ).x;
  128. vTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, fTexelEpsilon), objDepth, 1 ) ).x;
  129. vTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, -fTexelEpsilon), objDepth, 1 ) ).x;
  130. vTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, -fTexelEpsilon), objDepth, 1 ) ).x;
  131. return dot(vTaps, float4(0.25, 0.25, 0.25, 0.25));
  132. }
  133. float DoShadowNvidiaPCF3x3Box( sampler DepthSampler, const float3 vProjCoords )
  134. {
  135. float fTexelEpsilon = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  136. //float ooW = 1.0f / shadowMapPos.w; // 1 / w
  137. //float3 shadowMapCenter_objDepth = shadowMapPos.xyz * ooW; // Do both projections at once
  138. float3 shadowMapCenter_objDepth = vProjCoords;
  139. float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
  140. float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
  141. float4 vOneTaps;
  142. vOneTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, fTexelEpsilon ), objDepth, 1 ) ).x;
  143. vOneTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, fTexelEpsilon ), objDepth, 1 ) ).x;
  144. vOneTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, -fTexelEpsilon ), objDepth, 1 ) ).x;
  145. vOneTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, -fTexelEpsilon ), objDepth, 1 ) ).x;
  146. float flOneTaps = dot( vOneTaps, float4(1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f));
  147. float4 vTwoTaps;
  148. vTwoTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, 0 ), objDepth, 1 ) ).x;
  149. vTwoTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, 0 ), objDepth, 1 ) ).x;
  150. vTwoTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fTexelEpsilon ), objDepth, 1 ) ).x;
  151. vTwoTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fTexelEpsilon ), objDepth, 1 ) ).x;
  152. float flTwoTaps = dot( vTwoTaps, float4(1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f, 1.0f / 9.0f));
  153. float flCenterTap = tex2Dproj( DepthSampler, float4( shadowMapCenter, objDepth, 1 ) ).x * (1.0f / 9.0f);
  154. // Sum all 9 Taps
  155. return flOneTaps + flTwoTaps + flCenterTap;
  156. }
  157. // 1 2 1
  158. // 2 4 2
  159. // 1 2 1
  160. #ifdef _PS3
  161. // Tweaked for good code gen with the SCE Cg compiler.
  162. half DoShadowNvidiaPCF3x3Gaussian( sampler DepthSampler, const float3 shadowMapPos )
  163. {
  164. float fTexelEpsilon = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  165. float3 shadowMapCenter_objDepth = shadowMapPos.xyz;
  166. float3 shadowMapCenter = shadowMapCenter_objDepth.xyz; // Center of shadow filter
  167. float4 vUV0 = shadowMapCenter.xyzx + float4( fTexelEpsilon, fTexelEpsilon, 0.0f, -fTexelEpsilon );
  168. float4 vUV1 = shadowMapCenter.xyzx + float4( fTexelEpsilon, -fTexelEpsilon, 0.0f, -fTexelEpsilon );
  169. half4 vOneTaps;
  170. vOneTaps.x = h4tex2D( DepthSampler, vUV0.xyz ).x;
  171. vOneTaps.y = h4tex2D( DepthSampler, vUV0.wyz ).y;
  172. vOneTaps.z = h4tex2D( DepthSampler, vUV1.xyz ).z;
  173. vOneTaps.w = h4tex2D( DepthSampler, vUV1.wyz ).w;
  174. half flSum = dot( vOneTaps, half4(1.0f, 1.0f, 1.0f, 1.0f));
  175. float4 vUV2 = shadowMapCenter.xyzx + float4( fTexelEpsilon, 0.0f, 0.0f, -fTexelEpsilon );
  176. float4 vUV3 = shadowMapCenter.xyzy + float4( 0.0f, -fTexelEpsilon, 0.0f, fTexelEpsilon );
  177. half4 vTwoTaps;
  178. vTwoTaps.x = h4tex2D( DepthSampler, vUV2.xyz ).x;
  179. vTwoTaps.y = h4tex2D( DepthSampler, vUV2.wyz ).y;
  180. vTwoTaps.z = h4tex2D( DepthSampler, vUV3.xyz ).z;
  181. vTwoTaps.w = h4tex2D( DepthSampler, vUV3.xwz ).w;
  182. flSum += dot( vTwoTaps, half4(2.0f, 2.0f, 2.0f, 2.0f));
  183. flSum += tex2D( DepthSampler, shadowMapCenter ).x * half(4.0f);
  184. // Sum all 9 Taps
  185. return flSum * (1.0h / 16.0h);
  186. }
  187. #else
  188. float DoShadowNvidiaPCF3x3Gaussian( sampler DepthSampler, const float3 shadowMapPos )
  189. {
  190. float fTexelEpsilon = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  191. float3 shadowMapCenter_objDepth = shadowMapPos.xyz;
  192. float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
  193. float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
  194. float4 vOneTaps;
  195. vOneTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, fTexelEpsilon ), objDepth, 1 ) ).x;
  196. vOneTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, fTexelEpsilon ), objDepth, 1 ) ).x;
  197. vOneTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, -fTexelEpsilon ), objDepth, 1 ) ).x;
  198. vOneTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, -fTexelEpsilon ), objDepth, 1 ) ).x;
  199. float flOneTaps = dot( vOneTaps, float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f));
  200. float4 vTwoTaps;
  201. vTwoTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( fTexelEpsilon, 0 ), objDepth, 1 ) ).x;
  202. vTwoTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -fTexelEpsilon, 0 ), objDepth, 1 ) ).x;
  203. vTwoTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, -fTexelEpsilon ), objDepth, 1 ) ).x;
  204. vTwoTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 0, fTexelEpsilon ), objDepth, 1 ) ).x;
  205. float flTwoTaps = dot( vTwoTaps, float4(2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f));
  206. float flCenterTap = tex2Dproj( DepthSampler, float4( shadowMapCenter, objDepth, 1 ) ).x * float(4.0f / 16.0f);
  207. // Sum all 9 Taps
  208. return flOneTaps + flTwoTaps + flCenterTap;
  209. }
  210. #endif
  211. //
  212. // 1 4 7 4 1
  213. // 4 20 33 20 4
  214. // 7 33 55 33 7
  215. // 4 20 33 20 4
  216. // 1 4 7 4 1
  217. //
  218. #ifdef _PS3
  219. // Tweaked for good code gen with the SCE Cg compiler.
  220. float DoShadowNvidiaPCF5x5Gaussian( sampler DepthSampler, const float3 vProjCoords )
  221. {
  222. float flTexelEpsilon = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  223. float flTwoTexelEpsilon = 2.0f * flTexelEpsilon;
  224. //float ooW = 1.0f / shadowMapPos.w; // 1 / w
  225. float3 shadowMapCenter_objDepth = vProjCoords;//shadowMapPos.xyz * ooW; // Do both projections at once
  226. float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
  227. float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
  228. half4 c0 = half4( 1.0f / 331.0f, 7.0f / 331.0f, 4.0f / 331.0f, 20.0f / 331.0f );
  229. half4 c1 = half4( 33.0f / 331.0f, 55.0f / 331.0f, -flTexelEpsilon, 0.0f );
  230. float4 c2 = float4( flTwoTexelEpsilon, -flTwoTexelEpsilon, 0.0f, flTexelEpsilon );
  231. float4 c3 = float4( flTexelEpsilon, -flTexelEpsilon, flTwoTexelEpsilon, -flTwoTexelEpsilon );
  232. half4 vOneTaps;
  233. vOneTaps.x = tex2D( DepthSampler, float3( shadowMapCenter + c2.xx, objDepth ) ).x; // 2 2
  234. vOneTaps.y = tex2D( DepthSampler, float3( shadowMapCenter + c2.yx, objDepth ) ).y; // -2 2
  235. vOneTaps.z = tex2D( DepthSampler, float3( shadowMapCenter + c2.xy, objDepth ) ).z; // 2 -2
  236. vOneTaps.w = tex2D( DepthSampler, float3( shadowMapCenter + c2.yy, objDepth ) ).w; // -2 -2
  237. half flSum = dot( vOneTaps, c0.xxxx );
  238. half4 vSevenTaps;
  239. vSevenTaps.x = tex2D( DepthSampler, float3( shadowMapCenter + c2.xz, objDepth ) ).x; // 2 0
  240. vSevenTaps.y = tex2D( DepthSampler, float3( shadowMapCenter + c2.yz, objDepth ) ).y; // -2 0
  241. vSevenTaps.z = tex2D( DepthSampler, float3( shadowMapCenter + c2.zx, objDepth ) ).z; // 0 2
  242. vSevenTaps.w = tex2D( DepthSampler, float3( shadowMapCenter + c2.zy, objDepth ) ).w; // 0 -2
  243. flSum += dot( vSevenTaps, c0.yyyy );
  244. half4 vFourTapsA, vFourTapsB;
  245. vFourTapsA.x = tex2D( DepthSampler, float3( shadowMapCenter + c2.xw, objDepth ) ).x; // 2 1
  246. vFourTapsA.y = tex2D( DepthSampler, float3( shadowMapCenter + c2.wx, objDepth ) ).y; // 1 2
  247. vFourTapsA.z = tex2D( DepthSampler, float3( shadowMapCenter + c3.yz, objDepth ) ).z; // -1 2
  248. vFourTapsA.w = tex2D( DepthSampler, float3( shadowMapCenter + c3.wx, objDepth ) ).w; // -2 1
  249. vFourTapsB.x = tex2D( DepthSampler, float3( shadowMapCenter + c3.wy, objDepth ) ).x; // -2 -1
  250. vFourTapsB.y = tex2D( DepthSampler, float3( shadowMapCenter + c3.yw, objDepth ) ).y; // -1 -2
  251. vFourTapsB.z = tex2D( DepthSampler, float3( shadowMapCenter + c3.xw, objDepth ) ).z; // 1 -2
  252. vFourTapsB.w = tex2D( DepthSampler, float3( shadowMapCenter + c3.zy, objDepth ) ).w; // 2 -1
  253. flSum += dot( vFourTapsA, c0.zzzz );
  254. flSum += dot( vFourTapsB, c0.zzzz );
  255. half4 v20Taps;
  256. v20Taps.x = tex2D( DepthSampler, float3( shadowMapCenter + c3.xx, objDepth ) ).x; // 1 1
  257. v20Taps.y = tex2D( DepthSampler, float3( shadowMapCenter + c3.yx, objDepth ) ).y; // -1 1
  258. v20Taps.z = tex2D( DepthSampler, float3( shadowMapCenter + c3.xy, objDepth ) ).z; // 1 -1
  259. v20Taps.w = tex2D( DepthSampler, float3( shadowMapCenter + c3.yy, objDepth ) ).w; // -1 -1
  260. flSum += dot( v20Taps, c0.wwww );
  261. half4 v33Taps;
  262. v33Taps.x = tex2D( DepthSampler, float3( shadowMapCenter + c2.wz, objDepth ) ).x; // 1 0
  263. v33Taps.y = tex2D( DepthSampler, float3( shadowMapCenter + c1.zw, objDepth ) ).y; // -1 0
  264. v33Taps.z = tex2D( DepthSampler, float3( shadowMapCenter + c1.wz, objDepth ) ).z; // 0 -1
  265. v33Taps.w = tex2D( DepthSampler, float3( shadowMapCenter + c2.zw, objDepth ) ).w; // 0 1
  266. flSum += dot( v33Taps, c1.xxxx );
  267. flSum += tex2D( DepthSampler, float3( shadowMapCenter, objDepth ) ).x * c1.y;
  268. return flSum;
  269. }
  270. #else
  271. float DoShadowNvidiaPCF5x5GaussianPC( sampler DepthSampler, const float3 vProjCoords )
  272. {
  273. float flTexelEpsilon = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  274. float flTwoTexelEpsilon = 2.0f * flTexelEpsilon;
  275. //float ooW = 1.0f / shadowMapPos.w; // 1 / w
  276. float3 shadowMapCenter_objDepth = vProjCoords;//shadowMapPos.xyz * ooW; // Do both projections at once
  277. float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
  278. float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
  279. float4 c0 = float4( 1.0f / 331.0f, 7.0f / 331.0f, 4.0f / 331.0f, 20.0f / 331.0f );
  280. float4 c1 = float4( 33.0f / 331.0f, 55.0f / 331.0f, -flTexelEpsilon, 0.0f );
  281. float4 c2 = float4( flTwoTexelEpsilon, -flTwoTexelEpsilon, 0.0f, flTexelEpsilon );
  282. float4 c3 = float4( flTexelEpsilon, -flTexelEpsilon, flTwoTexelEpsilon, -flTwoTexelEpsilon );
  283. float4 vOneTaps;
  284. vOneTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.xx, objDepth, 1 ) ).x; // 2 2
  285. vOneTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.yx, objDepth, 1 ) ).x; // -2 2
  286. vOneTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.xy, objDepth, 1 ) ).x; // 2 -2
  287. vOneTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.yy, objDepth, 1 ) ).x; // -2 -2
  288. float flSum = dot( vOneTaps, c0.xxxx );
  289. float4 vSevenTaps;
  290. vSevenTaps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.xz, objDepth, 1 ) ).x; // 2 0
  291. vSevenTaps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.yz, objDepth, 1 ) ).x; // -2 0
  292. vSevenTaps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.zx, objDepth, 1 ) ).x; // 0 2
  293. vSevenTaps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.zy, objDepth, 1 ) ).x; // 0 -2
  294. flSum += dot( vSevenTaps, c0.yyyy );
  295. float4 vFourTapsA, vFourTapsB;
  296. vFourTapsA.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.xw, objDepth, 1 ) ).x; // 2 1
  297. vFourTapsA.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.wx, objDepth, 1 ) ).x; // 1 2
  298. vFourTapsA.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.yz, objDepth, 1 ) ).x; // -1 2
  299. vFourTapsA.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.wx, objDepth, 1 ) ).x; // -2 1
  300. vFourTapsB.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.wy, objDepth, 1 ) ).x; // -2 -1
  301. vFourTapsB.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.yw, objDepth, 1 ) ).x; // -1 -2
  302. vFourTapsB.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.xw, objDepth, 1 ) ).x; // 1 -2
  303. vFourTapsB.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.zy, objDepth, 1 ) ).x; // 2 -1
  304. flSum += dot( vFourTapsA, c0.zzzz );
  305. flSum += dot( vFourTapsB, c0.zzzz );
  306. float4 v20Taps;
  307. v20Taps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.xx, objDepth, 1 ) ).x; // 1 1
  308. v20Taps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.yx, objDepth, 1 ) ).x; // -1 1
  309. v20Taps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.xy, objDepth, 1 ) ).x; // 1 -1
  310. v20Taps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + c3.yy, objDepth, 1 ) ).x; // -1 -1
  311. flSum += dot( v20Taps, c0.wwww );
  312. float4 v33Taps;
  313. v33Taps.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.wz, objDepth, 1 ) ).x; // 1 0
  314. v33Taps.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + c1.zw, objDepth, 1 ) ).x; // -1 0
  315. v33Taps.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + c1.wz, objDepth, 1 ) ).x; // 0 -1
  316. v33Taps.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + c2.zw, objDepth, 1 ) ).x; // 0 1
  317. flSum += dot( v33Taps, c1.xxxx );
  318. flSum += tex2Dproj( DepthSampler, float4( shadowMapCenter, objDepth, 1 ) ).x * c1.y;
  319. flSum = pow( flSum, 1.4f );
  320. return flSum;
  321. }
  322. #endif
  323. float DoShadowATICheap( sampler DepthSampler, const float4 shadowMapPos )
  324. {
  325. float2 shadowMapCenter = shadowMapPos.xy/shadowMapPos.w;
  326. float objDepth = shadowMapPos.z / shadowMapPos.w;
  327. float fSampleDepth = tex2D( DepthSampler, shadowMapCenter ).x;
  328. objDepth = min( objDepth, 0.99999 ); //HACKHACK: On 360, surfaces at or past the far flashlight plane have an abrupt cutoff. This is temp until a smooth falloff is implemented
  329. return fSampleDepth > objDepth;
  330. }
  331. // Smooth filter using ATI Fetch 4
  332. float DoShadowATIFetch4( sampler DepthSampler, const float3 vProjCoords )
  333. {
  334. // This should only ever get run on a ps_3_0 part
  335. #if ( !defined( SHADER_MODEL_PS_3_0 ) )
  336. {
  337. return 1.0f;
  338. }
  339. #endif
  340. float4 shadowMapVals[ 4 ];
  341. float4 shadowMapWeights[ 4 ];
  342. // Important: This shader was originally in DoTA. To get this shader working in Portal2, I had to eliminate the -.5f offsets and weird swizzle.
  343. // I'm not positive, but the min/mag filter settings must differ between the two titles, which might account for the difference?
  344. float4 quadOffsets[ 4 ] =
  345. {
  346. { -1.0f, -1.0f, 0, 0 },
  347. { 1.0f, -1.0f, 0, 0 },
  348. { -1.0f, 1.0f, 0, 0 },
  349. { 1.0f, 1.0f, 0, 0 },
  350. };
  351. float3 shadowMapCenter_objDepth = vProjCoords;
  352. float2 shadowMapCenter = shadowMapCenter_objDepth.xy; // Center of shadow filter
  353. float objDepth = shadowMapCenter_objDepth.z; // Object depth in shadow space
  354. float4 vFullTexelOffset = float4( 1.0 / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, 1.0 / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, 0.0, 0.0 );
  355. float2 vTexRes = float2( FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION );
  356. // Fetch 4 2x2 quads
  357. shadowMapVals[ 0 ] = tex2D( DepthSampler, shadowMapCenter + ( vFullTexelOffset.xy * quadOffsets[ 0 ].xy ) );
  358. shadowMapVals[ 1 ] = tex2D( DepthSampler, shadowMapCenter + ( vFullTexelOffset.xy * quadOffsets[ 1 ].xy ) );
  359. shadowMapVals[ 2 ] = tex2D( DepthSampler, shadowMapCenter + ( vFullTexelOffset.xy * quadOffsets[ 2 ].xy ) );
  360. shadowMapVals[ 3 ] = tex2D( DepthSampler, shadowMapCenter + ( vFullTexelOffset.xy * quadOffsets[ 3 ].xy ) );
  361. // Fraction component of projected coordinates
  362. float2 pFrac = frac( shadowMapCenter * vTexRes );
  363. shadowMapWeights[ 0 ] = float4( 1, 1 - pFrac.x, 1, 1 - pFrac.x );
  364. shadowMapWeights[ 1 ] = float4( pFrac.x, 1, pFrac.x, 1 );
  365. shadowMapWeights[ 2 ] = float4( 1, 1 - pFrac.x, 1, 1 - pFrac.x);
  366. shadowMapWeights[ 3 ] = float4( pFrac.x, 1, pFrac.x, 1 );
  367. shadowMapWeights[ 0 ] *= float4( 1 - pFrac.y, 1, 1, 1 - pFrac.y );
  368. shadowMapWeights[ 1 ] *= float4( 1 - pFrac.y, 1, 1, 1 - pFrac.y );
  369. shadowMapWeights[ 2 ] *= float4( 1, pFrac.y, pFrac.y, 1 );
  370. shadowMapWeights[ 3 ] *= float4( 1, pFrac.y, pFrac.y, 1 );
  371. // Projective distance from z plane in view coords
  372. float flDist = objDepth - 0.005;
  373. float4 dist = float4( flDist, flDist, flDist, flDist );
  374. float4 inLight = ( dist < shadowMapVals[ 0 ] );
  375. float percentInLight = dot( inLight, shadowMapWeights[ 0 ] );
  376. inLight = ( dist < shadowMapVals[ 1 ] );
  377. percentInLight += dot( inLight, shadowMapWeights[ 1 ] );
  378. inLight = ( dist < shadowMapVals[ 2 ] );
  379. percentInLight += dot( inLight, shadowMapWeights[ 2 ] );
  380. inLight = ( dist < shadowMapVals[ 3 ] );
  381. percentInLight += dot( inLight, shadowMapWeights[ 3 ] );
  382. // Sum of weights is 9 since border taps are bilinearly filtered
  383. return ( 1.0f / 9.0f ) * percentInLight;
  384. }
  385. // Bilinear Percentage Closer Filtering, fetching depths and manually doing the four compares and the bilerp
  386. float DoShadowATIBilinear( sampler DepthSampler, const float3 vProjCoords )
  387. {
  388. float2 vPositionLs = vProjCoords.xy;
  389. float flComparisonDepth = vProjCoords.z;
  390. // Emulate bilinear PCF - shader originally from source2 (src/shaders/include/sun_shadowing.fxc).
  391. float flSunShadowingShadowTextureWidth = FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  392. float flSunShadowingShadowTextureHeight = FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  393. float flSunShadowingInvShadowTextureWidth = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  394. float flSunShadowingInvShadowTextureHeight = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  395. float2 vFracPositionLs = frac( vPositionLs * float2( flSunShadowingShadowTextureWidth, flSunShadowingShadowTextureHeight ) );
  396. float2 vSamplePositionLs = vPositionLs - vFracPositionLs * float2( flSunShadowingInvShadowTextureWidth, flSunShadowingInvShadowTextureHeight );
  397. float4 vCmpSamples;
  398. vCmpSamples.x = tex2D( DepthSampler, vSamplePositionLs + float2( 0.0f * flSunShadowingInvShadowTextureWidth, 0.0f * flSunShadowingInvShadowTextureHeight ) ).x;
  399. vCmpSamples.y = tex2D( DepthSampler, vSamplePositionLs + float2( 1.0f * flSunShadowingInvShadowTextureWidth, 0.0f * flSunShadowingInvShadowTextureHeight ) ).x;
  400. vCmpSamples.z = tex2D( DepthSampler, vSamplePositionLs + float2( 0.0f * flSunShadowingInvShadowTextureWidth, 1.0f * flSunShadowingInvShadowTextureHeight ) ).x;
  401. vCmpSamples.w = tex2D( DepthSampler, vSamplePositionLs + float2( 1.0f * flSunShadowingInvShadowTextureWidth, 1.0f * flSunShadowingInvShadowTextureHeight ) ).x;
  402. vCmpSamples = vCmpSamples > flComparisonDepth;
  403. float4 vFactors = float4( ( 1.0f - vFracPositionLs.x ) * ( 1.0f - vFracPositionLs.y ), vFracPositionLs.x * ( 1.0f - vFracPositionLs.y ),
  404. ( 1.0f - vFracPositionLs.x ) * vFracPositionLs.y, vFracPositionLs.x * vFracPositionLs.y );
  405. return dot( vCmpSamples, vFactors );
  406. }
  407. // Poisson disc, randomly rotated at different UVs
  408. float DoShadowPoisson16Sample( sampler DepthSampler, sampler RandomRotationSampler, const float3 vProjCoords, const float2 vScreenPos, const float4 vShadowTweaks, bool bNvidiaHardwarePCF, bool bFetch4 )
  409. {
  410. float2 vPoissonOffset[8] = { float2( 0.3475f, 0.0042f ), float2( 0.8806f, 0.3430f ), float2( -0.0041f, -0.6197f ), float2( 0.0472f, 0.4964f ),
  411. float2( -0.3730f, 0.0874f ), float2( -0.9217f, -0.3177f ), float2( -0.6289f, 0.7388f ), float2( 0.5744f, -0.7741f ) };
  412. float flScaleOverMapSize = vShadowTweaks.x * 2; // Tweak parameters to shader
  413. float2 vNoiseOffset = vShadowTweaks.zw;
  414. float4 vLightDepths = 0, accum = 0.0f;
  415. float2 rotOffset = 0;
  416. float2 shadowMapCenter = vProjCoords.xy; // Center of shadow filter
  417. float objDepth = min( vProjCoords.z, 0.99999 ); // Object depth in shadow space
  418. // 2D Rotation Matrix setup
  419. float3 RMatTop = 0, RMatBottom = 0;
  420. #if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
  421. RMatTop.xy = tex2D( RandomRotationSampler, cFlashlightScreenScale.xy * (vScreenPos * 0.5 + 0.5) + vNoiseOffset).xy * 2.0 - 1.0;
  422. RMatBottom.xy = float2(-1.0, 1.0) * RMatTop.yx; // 2x2 rotation matrix in 4-tuple
  423. #endif
  424. RMatTop *= flScaleOverMapSize; // Scale up kernel while accounting for texture resolution
  425. RMatBottom *= flScaleOverMapSize;
  426. RMatTop.z = shadowMapCenter.x; // To be added in d2adds generated below
  427. RMatBottom.z = shadowMapCenter.y;
  428. float fResult = 0.0f;
  429. if ( bNvidiaHardwarePCF )
  430. {
  431. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[0].xy ) + RMatTop.z;
  432. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[0].xy ) + RMatBottom.z;
  433. vLightDepths.x += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
  434. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[1].xy ) + RMatTop.z;
  435. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[1].xy ) + RMatBottom.z;
  436. vLightDepths.y += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
  437. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[2].xy ) + RMatTop.z;
  438. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[2].xy ) + RMatBottom.z;
  439. vLightDepths.z += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
  440. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[3].xy ) + RMatTop.z;
  441. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[3].xy ) + RMatBottom.z;
  442. vLightDepths.w += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
  443. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[4].xy ) + RMatTop.z;
  444. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[4].xy ) + RMatBottom.z;
  445. vLightDepths.x += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
  446. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[5].xy ) + RMatTop.z;
  447. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[5].xy ) + RMatBottom.z;
  448. vLightDepths.y += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
  449. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[6].xy ) + RMatTop.z;
  450. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[6].xy ) + RMatBottom.z;
  451. vLightDepths.z += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
  452. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[7].xy ) + RMatTop.z;
  453. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[7].xy ) + RMatBottom.z;
  454. vLightDepths.w += tex2Dproj( DepthSampler, float4(rotOffset, objDepth, 1) ).x;
  455. // This should actually be float4( 0.125, 0.125, 0.125, 0.125) but we've tuned so many shots in the SFM
  456. // relying on this bug that it doesn't seem right to fix until we have done something like move
  457. // this code out to a staging branch for a shipping game.
  458. // This is certainly one source of difference between ATI and nVidia in SFM layoffs
  459. return dot( vLightDepths, float4( 0.25, 0.25, 0.25, 0.25) );
  460. }
  461. else if ( bFetch4 )
  462. {
  463. for( int i=0; i<8; i++ )
  464. {
  465. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[i].xy ) + RMatTop.z;
  466. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[i].xy ) + RMatBottom.z;
  467. vLightDepths = tex2D( DepthSampler, rotOffset.xy );
  468. accum += (vLightDepths > objDepth.xxxx);
  469. }
  470. return dot( accum, float4( 1.0f/32.0f, 1.0f/32.0f, 1.0f/32.0f, 1.0f/32.0f) );
  471. }
  472. else // ATI vanilla hardware shadow mapping
  473. {
  474. for( int i=0; i<2; i++ )
  475. {
  476. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[4*i+0].xy ) + RMatTop.z;
  477. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[4*i+0].xy ) + RMatBottom.z;
  478. vLightDepths.x = tex2D( DepthSampler, rotOffset.xy ).x;
  479. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[4*i+1].xy ) + RMatTop.z;
  480. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[4*i+1].xy ) + RMatBottom.z;
  481. vLightDepths.y = tex2D( DepthSampler, rotOffset.xy ).x;
  482. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[4*i+2].xy ) + RMatTop.z;
  483. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[4*i+2].xy ) + RMatBottom.z;
  484. vLightDepths.z = tex2D( DepthSampler, rotOffset.xy ).x;
  485. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[4*i+3].xy ) + RMatTop.z;
  486. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[4*i+3].xy ) + RMatBottom.z;
  487. vLightDepths.w = tex2D( DepthSampler, rotOffset.xy ).x;
  488. accum += (vLightDepths > objDepth.xxxx);
  489. }
  490. return dot( accum, float4( 0.125, 0.125, 0.125, 0.125 ) );
  491. }
  492. }
  493. #if defined( _X360 )
  494. // Poisson disc, randomly rotated at different UVs
  495. float DoShadow360Simple( sampler DepthSampler, const float3 vProjCoords )
  496. {
  497. float fLOD;
  498. float2 shadowMapCenter = vProjCoords.xy; // Center of shadow filter
  499. float objDepth = min( vProjCoords.z, 0.99999 ); // Object depth in shadow space
  500. #if defined( REVERSE_DEPTH_ON_X360 )
  501. objDepth = 1.0f - objDepth;
  502. #endif
  503. float4 vSampledDepths, vWeights;
  504. asm
  505. {
  506. tfetch2D vSampledDepths.x___, shadowMapCenter, DepthSampler, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  507. tfetch2D vSampledDepths._x__, shadowMapCenter, DepthSampler, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  508. tfetch2D vSampledDepths.__x_, shadowMapCenter, DepthSampler, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  509. tfetch2D vSampledDepths.___x, shadowMapCenter, DepthSampler, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  510. };
  511. asm
  512. {
  513. getWeights2D vWeights, shadowMapCenter.xy, DepthSampler, MagFilter=linear, MinFilter=linear, UseComputedLOD=false, UseRegisterLOD=false
  514. };
  515. vWeights = float4( (1-vWeights.x)*(1-vWeights.y), vWeights.x*(1-vWeights.y), (1-vWeights.x)*vWeights.y, vWeights.x*vWeights.y );
  516. #if defined( REVERSE_DEPTH_ON_X360 )
  517. float4 vCompare = (vSampledDepths < objDepth.xxxx);
  518. #else
  519. float4 vCompare = (vSampledDepths > objDepth.xxxx);
  520. #endif
  521. return dot( vCompare, vWeights );
  522. }
  523. float DoShadowXbox4x4Samples( sampler DepthSampler, const float3 vProjCoords, float NdotL )
  524. {
  525. float2 vShadowMapCenter = vProjCoords.xy + float2( .5f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, .5f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION ); // Center of shadow filter
  526. // This shader assumes REVERSE_DEPTH_ON_X360 is always defined.
  527. float flObjDepth = 1.0f - min( vProjCoords.z, 0.99999f ); // Object depth in shadow space
  528. // projective distance from z plane in view coords
  529. float4 vDist4 = float4( flObjDepth, flObjDepth, flObjDepth, flObjDepth );
  530. //fraction component of projected coordinates; here FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION represents the shadowmap size
  531. float2 vTexRes = float2( FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION );
  532. float2 vFrac = frac( vShadowMapCenter * vTexRes );
  533. float4 vWeights = float4( vFrac.x, vFrac.y, 1.0f - vFrac.x, 1.0f - vFrac.y );
  534. float flPercentInLight;
  535. [isolate]
  536. {
  537. float4 vShadowMapVals, vInLight;
  538. asm
  539. {
  540. tfetch2D vShadowMapVals.x___, vShadowMapCenter, DepthSampler, OffsetX = -1.0, OffsetY = -2.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  541. tfetch2D vShadowMapVals._x__, vShadowMapCenter, DepthSampler, OffsetX = -2.0, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  542. tfetch2D vShadowMapVals.__x_, vShadowMapCenter, DepthSampler, OffsetX = -1.0, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  543. tfetch2D vShadowMapVals.___x, vShadowMapCenter, DepthSampler, OffsetX = -2.0, OffsetY = -2.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  544. sgt vInLight, vDist4, vShadowMapVals
  545. };
  546. float4 vShadowMapWeights = float4( vWeights.w, vWeights.z, 1, vWeights.z * vWeights.w );
  547. flPercentInLight = dot( vInLight, vShadowMapWeights );
  548. }
  549. [isolate]
  550. {
  551. float4 vShadowMapVals, vInLight;
  552. asm
  553. {
  554. tfetch2D vShadowMapVals.x___, vShadowMapCenter, DepthSampler, OffsetX = 1.0, OffsetY = -2.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  555. tfetch2D vShadowMapVals._x__, vShadowMapCenter, DepthSampler, OffsetX = 0.0, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  556. tfetch2D vShadowMapVals.__x_, vShadowMapCenter, DepthSampler, OffsetX = 1.0, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  557. tfetch2D vShadowMapVals.___x, vShadowMapCenter, DepthSampler, OffsetX = 0.0, OffsetY = -2.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  558. sgt vInLight, vDist4, vShadowMapVals
  559. };
  560. float4 vShadowMapWeights = float4( vWeights.x * vWeights.w, 1, vWeights.x, vWeights.w );
  561. flPercentInLight += dot( vInLight, vShadowMapWeights );
  562. }
  563. [isolate]
  564. {
  565. float4 vShadowMapVals, vInLight;
  566. asm
  567. {
  568. tfetch2D vShadowMapVals.x___, vShadowMapCenter, DepthSampler, OffsetX = -1.0, OffsetY = 0.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  569. tfetch2D vShadowMapVals._x__, vShadowMapCenter, DepthSampler, OffsetX = -2.0, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  570. tfetch2D vShadowMapVals.__x_, vShadowMapCenter, DepthSampler, OffsetX = -1.0, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  571. tfetch2D vShadowMapVals.___x, vShadowMapCenter, DepthSampler, OffsetX = -2.0, OffsetY = 0.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  572. sgt vInLight, vDist4, vShadowMapVals
  573. };
  574. float4 vShadowMapWeights = float4( 1, vWeights.z * vWeights.y, vWeights.y, vWeights.z );
  575. flPercentInLight += dot( vInLight, vShadowMapWeights );
  576. }
  577. [isolate]
  578. {
  579. float4 vShadowMapVals, vInLight;
  580. asm
  581. {
  582. tfetch2D vShadowMapVals.x___, vShadowMapCenter, DepthSampler, OffsetX = 1.0, OffsetY = 0.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  583. tfetch2D vShadowMapVals._x__, vShadowMapCenter, DepthSampler, OffsetX = 0.0, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  584. tfetch2D vShadowMapVals.__x_, vShadowMapCenter, DepthSampler, OffsetX = 1.0, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  585. tfetch2D vShadowMapVals.___x, vShadowMapCenter, DepthSampler, OffsetX = 0.0, OffsetY = 0.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  586. sgt vInLight, vDist4, vShadowMapVals
  587. };
  588. float4 vShadowMapWeights = float4( vWeights.x, vWeights.y, vWeights.x * vWeights.y, 1 );
  589. flPercentInLight += dot( vInLight, vShadowMapWeights );
  590. }
  591. //sum of weights is 9 since border taps are bilinearly filtered
  592. return ( 1.0f / 9.0f ) * flPercentInLight;
  593. }
  594. // Return value: x = dZ/dX, y=dZ/dY (dX/dY in normalized shadow texture space)
  595. float2 ComputeReceiverPlaneDepthBiasGradients( float3 vProjCoords, float NdotL )
  596. {
  597. // See http://developer.amd.com/media/gpu_assets/Isidoro-ShadowMapping.pdf
  598. float3 vDUVDistDX = ddx( vProjCoords );
  599. float3 vDUVDistDY = ddy( vProjCoords );
  600. float flDet = ( ( vDUVDistDX.x * vDUVDistDY.y ) - ( vDUVDistDX.y * vDUVDistDY.x ) );
  601. float flInvDet = ( flDet != 0.0f ) ? ( 1.0f / flDet ) : 0.0f;
  602. vDUVDistDY *= flInvDet;
  603. float2 vDDistDUV;
  604. vDDistDUV.x = vDUVDistDY.y * vDUVDistDX.z - vDUVDistDX.y * vDUVDistDY.z;
  605. vDDistDUV.y = vDUVDistDX.x * vDUVDistDY.z - vDUVDistDY.x * vDUVDistDX.z;
  606. // Stable work around for when abs(flDet) gets extremely small - fade out receiver plane bias as NdotL approaches 0.
  607. NdotL = saturate( NdotL);
  608. vDDistDUV = lerp(float2(0.0f, 0.0f), vDDistDUV, NdotL * NdotL );
  609. return vDDistDUV;
  610. }
  611. float DoShadow360BilinearX( sampler DepthSampler, float3 vProjCoords, float2 vCenterOfs, float2 vDDistDuv, float4 vSampledDepths, float4 vWeights )
  612. {
  613. vCenterOfs *= ( 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION );
  614. float flReceiverPlaneBias = vCenterOfs.x * vDDistDuv.x + vCenterOfs.y * vDDistDuv.y;
  615. float flDepthComparisonValue = vProjCoords.z + flReceiverPlaneBias;
  616. float4 vCompare = vSampledDepths < float4( flDepthComparisonValue, flDepthComparisonValue, flDepthComparisonValue, flDepthComparisonValue );
  617. return dot( vCompare, vWeights );
  618. }
  619. float DoShadowXbox4x4SamplesX( sampler DepthSampler, float3 vProjCoords, float NdotL )
  620. {
  621. [branch]
  622. if ( NdotL <= 0.0f)
  623. return 0.0f;
  624. vProjCoords.z = 1.0f - min( vProjCoords.z, 0.99999f ); // Object depth in shadow space
  625. float2 vDDistDUV = ComputeReceiverPlaneDepthBiasGradients( vProjCoords, NdotL );
  626. float4 vWeights;
  627. asm
  628. {
  629. getWeights2D vWeights, vProjCoords.xy, DepthSampler, MagFilter=linear, MinFilter=linear, UseComputedLOD=false, UseRegisterLOD=false
  630. };
  631. vWeights = float4( (1-vWeights.x)*(1-vWeights.y), vWeights.x*(1-vWeights.y), (1-vWeights.x)*vWeights.y, vWeights.x*vWeights.y );
  632. float4 vr00, vr10, vr01, vr11;
  633. asm
  634. {
  635. // r00=(-1, -1)
  636. tfetch2D vr00.x___, vProjCoords, DepthSampler, OffsetX = -1.5, OffsetY = -1.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  637. tfetch2D vr00._x__, vProjCoords, DepthSampler, OffsetX = -0.5, OffsetY = -1.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  638. tfetch2D vr00.__x_, vProjCoords, DepthSampler, OffsetX = -1.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  639. tfetch2D vr00.___x, vProjCoords, DepthSampler, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  640. };
  641. asm
  642. {
  643. // r10=(+1, -1)
  644. tfetch2D vr10.x___, vProjCoords, DepthSampler, OffsetX = 0.5, OffsetY = -1.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  645. tfetch2D vr10._x__, vProjCoords, DepthSampler, OffsetX = 1.5, OffsetY = -1.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  646. tfetch2D vr10.__x_, vProjCoords, DepthSampler, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  647. tfetch2D vr10.___x, vProjCoords, DepthSampler, OffsetX = 1.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  648. };
  649. asm
  650. {
  651. // r01=(-1, +1)
  652. tfetch2D vr01.x___, vProjCoords, DepthSampler, OffsetX = -1.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  653. tfetch2D vr01._x__, vProjCoords, DepthSampler, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  654. tfetch2D vr01.__x_, vProjCoords, DepthSampler, OffsetX = -1.5, OffsetY = 1.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  655. tfetch2D vr01.___x, vProjCoords, DepthSampler, OffsetX = -0.5, OffsetY = 1.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  656. };
  657. asm
  658. {
  659. // r11=(+1, +1)
  660. tfetch2D vr11.x___, vProjCoords, DepthSampler, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  661. tfetch2D vr11._x__, vProjCoords, DepthSampler, OffsetX = 1.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  662. tfetch2D vr11.__x_, vProjCoords, DepthSampler, OffsetX = 0.5, OffsetY = 1.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  663. tfetch2D vr11.___x, vProjCoords, DepthSampler, OffsetX = 1.5, OffsetY = 1.5, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  664. };
  665. float4 vOneTaps;
  666. vOneTaps.x = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( 1.0f, 1.0f ), vDDistDUV, vr11, vWeights );
  667. vOneTaps.y = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( -1.0f, 1.0f ), vDDistDUV, vr01, vWeights );
  668. vOneTaps.z = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( 1.0f, -1.0f ), vDDistDUV, vr10, vWeights );
  669. vOneTaps.w = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( -1.0f, -1.0f ), vDDistDUV, vr00, vWeights );
  670. float flOneTaps = dot( vOneTaps, float4(1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f, 1.0f / 16.0f));
  671. float4 vTwoTaps;
  672. vTwoTaps.x = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( 1.0f, 0 ), vDDistDUV, float4( vr10.z, vr10.w, vr11.x, vr11.y ), vWeights );
  673. vTwoTaps.y = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( -1.0f, 0 ), vDDistDUV, float4( vr00.z, vr00.w, vr01.x, vr01.y ), vWeights );
  674. vTwoTaps.z = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( 0, -1.0f ), vDDistDUV, float4( vr00.y, vr10.x, vr00.w, vr10.z ), vWeights );
  675. vTwoTaps.w = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( 0, 1.0f ), vDDistDUV, float4( vr01.y, vr11.x, vr01.w, vr11.z), vWeights );
  676. float flTwoTaps = dot( vTwoTaps, float4(2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f, 2.0f / 16.0f));
  677. float flCenterTap = DoShadow360BilinearX( DepthSampler, vProjCoords, float2( 0.0f, 0.0f ), vDDistDUV, float4( vr00.w, vr10.z, vr01.y, vr11.x ), vWeights ) * float(4.0f / 16.0f);
  678. // Sum all 9 Taps
  679. float flShadowFactor = saturate( flOneTaps + flTwoTaps + flCenterTap );
  680. // Complete hack here, but it looks good (falloff is more circular/less blocky).
  681. flShadowFactor = pow( flShadowFactor, 1.45f );
  682. return flShadowFactor;
  683. }
  684. float DoShadowXbox3x3Samples( sampler DepthSampler, const float3 vProjCoords )
  685. {
  686. float2 vShadowMapCenter = vProjCoords.xy; // Center of shadow filter
  687. // This shader assumes REVERSE_DEPTH_ON_X360 is always defined.
  688. float flObjDepth = 1.0f - min( vProjCoords.z, 0.99999f ); // Object depth in shadow space
  689. // projective distance from z plane in view coords
  690. float4 vDist4 = float4( flObjDepth, flObjDepth, flObjDepth, flObjDepth );
  691. //fraction component of projected coordinates; here FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION represents the shadowmap size
  692. float2 vTexRes = float2( FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION );
  693. float2 vFrac = frac( vShadowMapCenter * vTexRes );
  694. float4 vWeights = float4( vFrac.x, vFrac.y, 1.0f - vFrac.x, 1.0f - vFrac.y );
  695. float flPercentInLight = 1.0f;
  696. [isolate]
  697. {
  698. float4 vShadowMapVals, vInLight;
  699. asm
  700. {
  701. tfetch2D vShadowMapVals.x___, vShadowMapCenter, DepthSampler, OffsetX = -1.0, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  702. tfetch2D vShadowMapVals._x__, vShadowMapCenter, DepthSampler, OffsetX = 0.0, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  703. tfetch2D vShadowMapVals.__x_, vShadowMapCenter, DepthSampler, OffsetX = 1.0, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  704. sgt vInLight, vDist4, vShadowMapVals
  705. };
  706. float3 vShadowMapWeights = float3( vWeights.z * vWeights.w, vWeights.w, vWeights.x * vWeights.w );
  707. flPercentInLight = dot( vInLight, vShadowMapWeights );
  708. }
  709. [isolate]
  710. {
  711. float4 vShadowMapVals, vInLight;
  712. asm
  713. {
  714. tfetch2D vShadowMapVals.x___, vShadowMapCenter, DepthSampler, OffsetX = -1.0, OffsetY = 0.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  715. tfetch2D vShadowMapVals._x__, vShadowMapCenter, DepthSampler, OffsetX = 0.0, OffsetY = 0.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  716. tfetch2D vShadowMapVals.__x_, vShadowMapCenter, DepthSampler, OffsetX = 1.0, OffsetY = 0.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  717. sgt vInLight, vDist4, vShadowMapVals
  718. };
  719. float3 vShadowMapWeights = float3( vWeights.z, 1.0f, vWeights.x );
  720. flPercentInLight += dot( vInLight, vShadowMapWeights );
  721. }
  722. [isolate]
  723. {
  724. float4 vShadowMapVals, vInLight;
  725. asm
  726. {
  727. tfetch2D vShadowMapVals.x___, vShadowMapCenter, DepthSampler, OffsetX = -1.0, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  728. tfetch2D vShadowMapVals._x__, vShadowMapCenter, DepthSampler, OffsetX = 0.0, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  729. tfetch2D vShadowMapVals.__x_, vShadowMapCenter, DepthSampler, OffsetX = 1.0, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=false, MagFilter = point, MinFilter = point
  730. sgt vInLight, vDist4, vShadowMapVals
  731. };
  732. float3 vShadowMapWeights = float3( vWeights.z * vWeights.y, vWeights.y, vWeights.x * vWeights.y );
  733. flPercentInLight += dot( vInLight, vShadowMapWeights );
  734. }
  735. return ( 1.0f / 4.0f ) * flPercentInLight;
  736. }
  737. float Do360PCFFetch( sampler DepthSampler, float2 tc, float objDepth )
  738. {
  739. float fLOD;
  740. float4 vSampledDepths, vWeights;
  741. asm {
  742. getCompTexLOD2D fLOD.x, tc.xy, DepthSampler, AnisoFilter=max16to1
  743. setTexLOD fLOD.x
  744. tfetch2D vSampledDepths.x___, tc, DepthSampler, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  745. tfetch2D vSampledDepths._x__, tc, DepthSampler, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  746. tfetch2D vSampledDepths.__x_, tc, DepthSampler, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  747. tfetch2D vSampledDepths.___x, tc, DepthSampler, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  748. getWeights2D vWeights, tc.xy, DepthSampler, MagFilter=linear, MinFilter=linear, UseComputedLOD=false, UseRegisterLOD=true
  749. };
  750. vWeights = float4( (1-vWeights.x)*(1-vWeights.y), vWeights.x*(1-vWeights.y), (1-vWeights.x)*vWeights.y, vWeights.x*vWeights.y );
  751. #if defined( REVERSE_DEPTH_ON_X360 )
  752. float4 vCompare = (vSampledDepths < objDepth.xxxx);
  753. #else
  754. float4 vCompare = (vSampledDepths > objDepth.xxxx);
  755. #endif
  756. return dot( vCompare, vWeights );
  757. }
  758. float Do360NearestFetch( sampler DepthSampler, float2 tc, float objDepth )
  759. {
  760. float fLOD;
  761. float4 vSampledDepth;
  762. asm {
  763. getCompTexLOD2D fLOD.x, tc.xy, DepthSampler, AnisoFilter=max16to1
  764. setTexLOD fLOD.x
  765. tfetch2D vSampledDepth.x___, tc, DepthSampler, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  766. };
  767. #if defined( REVERSE_DEPTH_ON_X360 )
  768. return (vSampledDepth.x < objDepth.x);
  769. #else
  770. return (vSampledDepth.x > objDepth.x);
  771. #endif
  772. }
  773. float AmountShadowed_8Tap_360( sampler DepthSampler, float2 tc, float objDepth )
  774. {
  775. float fLOD;
  776. float4 vSampledDepthsA, vSampledDepthsB;
  777. // Optimal 8 rooks pattern to get an idea about whether we're at a penumbra or not
  778. // From [Kallio07] "Scanline Edge-Flag Algorithm for Antialiasing"
  779. //
  780. // +---+---+---+---+---+---+---+---+
  781. // | | | | | | o | | |
  782. // +---+---+---+---+---+---+---+---+
  783. // | o | | | | | | | |
  784. // +---+---+---+---+---+---+---+---+
  785. // | | | | o | | | | |
  786. // +---+---+---+---+---+---+---+---+
  787. // | | | | | | | o | |
  788. // +---+---+---+---+---+---+---+---+
  789. // | | o | | | | | | |
  790. // +---+---+---+---+---+---+---+---+
  791. // | | | | | o | | | |
  792. // +---+---+---+---+---+---+---+---+
  793. // | | | | | | | | o |
  794. // +---+---+---+---+---+---+---+---+
  795. // | | | o | | | | | |
  796. // +---+---+---+---+---+---+---+---+
  797. //
  798. asm {
  799. getCompTexLOD2D fLOD.x, tc.xy, DepthSampler, AnisoFilter=max16to1
  800. setTexLOD fLOD.x
  801. tfetch2D vSampledDepthsA.x___, tc, DepthSampler, OffsetX = -2.0, OffsetY = -1.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  802. tfetch2D vSampledDepthsA._x__, tc, DepthSampler, OffsetX = -1.5, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  803. tfetch2D vSampledDepthsA.__x_, tc, DepthSampler, OffsetX = -1.0, OffsetY = 2.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  804. tfetch2D vSampledDepthsA.___x, tc, DepthSampler, OffsetX = -0.5, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  805. tfetch2D vSampledDepthsB.x___, tc, DepthSampler, OffsetX = 0.5, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  806. tfetch2D vSampledDepthsB._x__, tc, DepthSampler, OffsetX = 1.0, OffsetY = -2.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  807. tfetch2D vSampledDepthsB.__x_, tc, DepthSampler, OffsetX = 1.5, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  808. tfetch2D vSampledDepthsB.___x, tc, DepthSampler, OffsetX = 2.0, OffsetY = 1.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  809. };
  810. #if defined( REVERSE_DEPTH_ON_X360 )
  811. float4 vCompareA = (vSampledDepthsA < objDepth.xxxx);
  812. float4 vCompareB = (vSampledDepthsB < objDepth.xxxx);
  813. #else
  814. float4 vCompareA = (vSampledDepthsA > objDepth.xxxx);
  815. float4 vCompareB = (vSampledDepthsB > objDepth.xxxx);
  816. #endif
  817. return dot( vCompareA, float4(0.125,0.125,0.125,0.125) ) + dot( vCompareB, float4(0.125,0.125,0.125,0.125) );
  818. }
  819. float AmountShadowed_4Tap_360( sampler DepthSampler, float2 tc, float objDepth )
  820. {
  821. float fLOD;
  822. float4 vSampledDepths;
  823. // Rotated grid pattern to get an idea about whether we're at a penumbra or not
  824. asm {
  825. getCompTexLOD2D fLOD.x, tc.xy, DepthSampler, AnisoFilter=max16to1
  826. setTexLOD fLOD.x
  827. tfetch2D vSampledDepths.x___, tc, DepthSampler, OffsetX = -1.0, OffsetY = 0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  828. tfetch2D vSampledDepths._x__, tc, DepthSampler, OffsetX = -0.5, OffsetY = -1.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  829. tfetch2D vSampledDepths.__x_, tc, DepthSampler, OffsetX = 0.5, OffsetY = 1.0, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  830. tfetch2D vSampledDepths.___x, tc, DepthSampler, OffsetX = 1.0, OffsetY = -0.5, UseComputedLOD=false, UseRegisterLOD=true, MagFilter = point, MinFilter = point
  831. };
  832. #if defined( REVERSE_DEPTH_ON_X360 )
  833. float4 vCompare = (vSampledDepths < objDepth.xxxx);
  834. #else
  835. float4 vCompare = (vSampledDepths > objDepth.xxxx);
  836. #endif
  837. return dot( vCompare, float4(0.25,0.25,0.25,0.25) );
  838. }
  839. // Poisson disc, randomly rotated at different UVs
  840. float DoShadowPoisson360( sampler DepthSampler, sampler RandomRotationSampler, const float3 vProjCoords, const float2 vScreenPos, const float4 vShadowTweaks )
  841. {
  842. float2 vPoissonOffset[8] = { float2( 0.3475f, 0.0042f ), float2( 0.8806f, 0.3430f ),
  843. float2( -0.0041f, -0.6197f ), float2( 0.0472f, 0.4964f ),
  844. float2( -0.3730f, 0.0874f ), float2( -0.9217f, -0.3177f ),
  845. float2( -0.6289f, 0.7388f ), float2( 0.5744f, -0.7741f ) };
  846. float2 shadowMapCenter = vProjCoords.xy; // Center of shadow filter
  847. float objDepth = min( vProjCoords.z, 0.99999 ); // Object depth in shadow space
  848. #if defined( REVERSE_DEPTH_ON_X360 )
  849. objDepth = 1.0f - objDepth;
  850. #endif
  851. float fAmountShadowed = AmountShadowed_4Tap_360( DepthSampler, shadowMapCenter, objDepth );
  852. if ( fAmountShadowed >= 1.0f ) // Fully in light
  853. {
  854. return 1.0f;
  855. }
  856. else // Do the expensive filtering since we're at least partially shadowed
  857. {
  858. float flScaleOverMapSize = 1.7f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION; // Tweak parameters to shader
  859. // 2D Rotation Matrix setup
  860. float3 RMatTop = 0, RMatBottom = 0;
  861. #if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
  862. RMatTop.xy = tex2D( RandomRotationSampler, cFlashlightScreenScale.xy * (vScreenPos * 0.5 + 0.5)) * 2.0 - 1.0;
  863. RMatBottom.xy = float2(-1.0, 1.0) * RMatTop.yx; // 2x2 rotation matrix in 4-tuple
  864. #endif
  865. RMatTop *= flScaleOverMapSize; // Scale up kernel while accounting for texture resolution
  866. RMatBottom *= flScaleOverMapSize;
  867. RMatTop.z = shadowMapCenter.x; // To be added in d2adds generated below
  868. RMatBottom.z = shadowMapCenter.y;
  869. float2 rotOffset = float2(0,0);
  870. float4 vAccum = 0;
  871. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[0].xy) + RMatTop.z;
  872. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[0].xy) + RMatBottom.z;
  873. vAccum.x = Do360NearestFetch( DepthSampler, rotOffset, objDepth );
  874. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[1].xy) + RMatTop.z;
  875. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[1].xy) + RMatBottom.z;
  876. vAccum.y = Do360NearestFetch( DepthSampler, rotOffset, objDepth );
  877. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[2].xy) + RMatTop.z;
  878. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[2].xy) + RMatBottom.z;
  879. vAccum.z = Do360NearestFetch( DepthSampler, rotOffset, objDepth );
  880. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[3].xy) + RMatTop.z;
  881. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[3].xy) + RMatBottom.z;
  882. vAccum.w = Do360NearestFetch( DepthSampler, rotOffset, objDepth );
  883. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[4].xy) + RMatTop.z;
  884. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[4].xy) + RMatBottom.z;
  885. vAccum.x += Do360NearestFetch( DepthSampler, rotOffset, objDepth );
  886. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[5].xy) + RMatTop.z;
  887. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[5].xy) + RMatBottom.z;
  888. vAccum.y += Do360NearestFetch( DepthSampler, rotOffset, objDepth );
  889. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[6].xy) + RMatTop.z;
  890. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[6].xy) + RMatBottom.z;
  891. vAccum.z += Do360NearestFetch( DepthSampler, rotOffset, objDepth );
  892. rotOffset.x = dot( RMatTop.xy, vPoissonOffset[7].xy) + RMatTop.z;
  893. rotOffset.y = dot( RMatBottom.xy, vPoissonOffset[7].xy) + RMatBottom.z;
  894. vAccum.w += Do360NearestFetch( DepthSampler, rotOffset, objDepth );
  895. return dot( vAccum, float4( 0.25, 0.25, 0.25, 0.25) );
  896. }
  897. }
  898. #endif // _X360
  899. float AmountShadowed_1Tap_NVidiaPCF( sampler DepthSampler, float3 vProjPos )
  900. {
  901. float2 shadowMapCenter = vProjPos.xy;
  902. float objDepth = vProjPos.z;
  903. return tex2Dproj( DepthSampler, float4( shadowMapCenter, objDepth, 1 ) ).x;
  904. }
  905. float AmountShadowed_4Tap_NVidiaPCF( sampler DepthSampler, float3 vProjPos )
  906. {
  907. float fTexelEpsilon = 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION;
  908. float2 shadowMapCenter = vProjPos.xy;
  909. float objDepth = vProjPos.z;
  910. float4 s;
  911. s.x = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -1.0f, .5f ) * fTexelEpsilon, objDepth, 1 ) ).x;
  912. s.y = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( -.5f, -1.0f ) * fTexelEpsilon, objDepth, 1 ) ).x;
  913. s.z = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( .5f, -1.0f ) * fTexelEpsilon, objDepth, 1 ) ).x;
  914. s.w = tex2Dproj( DepthSampler, float4( shadowMapCenter + float2( 1.0f, -.5f ) * fTexelEpsilon, objDepth, 1 ) ).x;
  915. return dot( s, float4( .25f, .25f, .25f, .25f ) );
  916. }
  917. float AmountShadowed_5Tap_NVidiaPCF( sampler DepthSampler, float3 vProjPos )
  918. {
  919. float2 vShadowMapCenter = vProjPos.xy;
  920. float4 vTexelEpsilon = float4( -1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, 1.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, 0.0f, 1.0f );
  921. HALF4 r;
  922. r.x = tex2Dproj( DepthSampler, float4( vShadowMapCenter + vTexelEpsilon.xz, vProjPos.z, 1.0f ) ).x;
  923. r.y = tex2Dproj( DepthSampler, float4( vShadowMapCenter + vTexelEpsilon.yz, vProjPos.z, 1.0f ) ).y;
  924. r.z = tex2Dproj( DepthSampler, float4( vShadowMapCenter + vTexelEpsilon.zx, vProjPos.z, 1.0f ) ).z;
  925. r.w = tex2Dproj( DepthSampler, float4( vShadowMapCenter + vTexelEpsilon.zy, vProjPos.z, 1.0f ) ).w;
  926. HALF flSum = dot( r, HALF4( .175f, .175f, .175f, .175f ) );
  927. flSum += (HALF)( tex2Dproj( DepthSampler, float4( vShadowMapCenter, vProjPos.z, 1.0f ) ).x * (HALF)( .3f ) );
  928. return flSum;
  929. }
  930. HALF DoFlashlightShadow( sampler DepthSampler, sampler RandomRotationSampler, float3 vProjCoords, float2 vScreenPos, int nShadowLevel, float4 vShadowTweaks, float nDotL = 1.0f )
  931. {
  932. HALF flShadow = 1.0h;
  933. #if defined( _PS3 )
  934. #if 0
  935. // Original code, which samples an A8R8G8B8 texture and recovers depth.
  936. float4 flUnpackedDepth = tex2Dproj( DepthSampler, float4( vProjCoords, 1 ) );
  937. float flDepth = ( flUnpackedDepth.r * 256.0f * 255.0f + flUnpackedDepth.g * 255.0f + flUnpackedDepth.b ) / ( 255.0f * 256.0f );
  938. flShadow = ( vProjCoords.z < flDepth ) ? 1.0f : 0.0f;
  939. return flShadow;
  940. #else
  941. if ( nShadowLevel == GAMECONSOLE_SINGLE_TAP_PCF )
  942. {
  943. return h4tex2D( DepthSampler, float3( vProjCoords.x, vProjCoords.y, vProjCoords.z ) ).x;
  944. }
  945. else
  946. {
  947. //return AmountShadowed_4Tap_NVidiaPCF( DepthSampler, vProjCoords );
  948. //return AmountShadowed_5Tap_NVidiaPCF( DepthSampler, vProjCoords );
  949. //return DoShadowNvidiaPCF5x5Gaussian( DepthSampler, vProjCoords );
  950. return DoShadowNvidiaPCF3x3Gaussian( DepthSampler, vProjCoords );
  951. }
  952. #endif
  953. #elif !defined( _X360 ) // PC
  954. if ( nShadowLevel == NVIDIA_PCF )
  955. {
  956. #if defined( SHADER_MODEL_PS_2_0 ) || defined( SHADER_MODEL_PS_2_B )
  957. flShadow = AmountShadowed_4Tap_NVidiaPCF( DepthSampler, vProjCoords ); // This is pretty much just high-end Macs
  958. #else
  959. flShadow = DoShadowNvidiaPCF5x5GaussianPC( DepthSampler, vProjCoords ); // NVIDIA ps_3 parts and ATI DX10 parts
  960. #endif
  961. }
  962. else if( nShadowLevel == ATI_NO_PCF_FETCH4 )
  963. {
  964. flShadow = DoShadowATIFetch4( DepthSampler, vProjCoords ); // ATI DX9 ps_3_0 parts
  965. }
  966. else if ( nShadowLevel == NVIDIA_PCF_CHEAP )
  967. {
  968. flShadow = AmountShadowed_1Tap_NVidiaPCF( DepthSampler, vProjCoords ); // Low-end NVIDIA parts and low-end Macs
  969. }
  970. else if( nShadowLevel == ATI_NOPCF )
  971. {
  972. flShadow = DoShadowATIBilinear( DepthSampler, vProjCoords ); // ATI ps_2_b parts
  973. }
  974. return flShadow;
  975. #else // 360
  976. if ( nShadowLevel == GAMECONSOLE_SINGLE_TAP_PCF )
  977. {
  978. flShadow = DoShadow360Simple( DepthSampler, vProjCoords );
  979. }
  980. else
  981. {
  982. flShadow = DoShadowXbox4x4SamplesX( DepthSampler, vProjCoords, nDotL );
  983. }
  984. return flShadow;
  985. #endif // PS3 / PC / 360
  986. }
  987. float3 SpecularLight( const float3 vWorldNormal, const float3 vLightDir, const float fSpecularExponent,
  988. const float3 vEyeDir, const bool bDoSpecularWarp, in sampler specularWarpSampler, float fFresnel )
  989. {
  990. float3 result = float3(0.0f, 0.0f, 0.0f);
  991. float3 vReflect = reflect( -vEyeDir, vWorldNormal ); // Reflect view through normal
  992. float3 vSpecular = saturate(dot( vReflect, vLightDir )); // L.R (use half-angle instead?)
  993. vSpecular = pow( vSpecular.x, fSpecularExponent ); // Raise to specular power
  994. // Optionally warp as function of scalar specular and fresnel
  995. if ( bDoSpecularWarp )
  996. vSpecular *= tex2D( specularWarpSampler, float2(vSpecular.x, fFresnel) ).rgb; // Sample at { (L.R)^k, fresnel }
  997. return vSpecular;
  998. }
  999. float RemapNormalizedValClamped( float val, float A, float B)
  1000. {
  1001. return saturate( (val - A) / (B - A) );
  1002. }
  1003. void DoSpecularFlashlight( float3 flashlightPos, float3 worldPos, float4 flashlightSpacePosition, float3 worldNormal,
  1004. float3 attenuationFactors, float farZ, sampler FlashlightSampler, sampler FlashlightDepthSampler, sampler RandomRotationSampler,
  1005. int nShadowLevel, bool bDoShadows, const float2 vScreenPos, const float fSpecularExponent, const float3 vEyeDir,
  1006. const bool bDoDiffuseWarp, sampler DiffuseWarpSampler, const bool bDoSpecularWarp, sampler specularWarpSampler, float fFresnel, float4 vShadowTweaks,
  1007. // Outputs of this shader...separate shadowed diffuse and specular from the flashlight
  1008. out float3 diffuseLighting, out float3 specularLighting )
  1009. {
  1010. float3 vProjCoords = flashlightSpacePosition.xyz / flashlightSpacePosition.w;
  1011. float3 flashlightColor = float3(1,1,1);
  1012. flashlightColor = tex2D( FlashlightSampler, vProjCoords.xy ).rgb;
  1013. #if defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
  1014. flashlightColor *= flashlightSpacePosition.www > float3(0,0,0); // Catch back projection (ps2b and up)
  1015. #endif
  1016. #if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
  1017. flashlightColor *= cFlashlightColor.xyz; // Flashlight color
  1018. #endif
  1019. float3 delta = flashlightPos - worldPos;
  1020. float3 L = normalize( delta );
  1021. float distSquared = dot( delta, delta );
  1022. float dist = sqrt( distSquared );
  1023. float endFalloffFactor = RemapNormalizedValClamped( dist, farZ, 0.6f * farZ );
  1024. // Attenuation for light and to fade out shadow over distance
  1025. float fAtten = saturate( dot( attenuationFactors, float3( 1.0f, 1.0f/dist, 1.0f/distSquared ) ) );
  1026. float NdotL = dot( L.xyz, worldNormal.xyz );
  1027. // Shadowing and coloring terms
  1028. #if (defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0))
  1029. if ( bDoShadows )
  1030. {
  1031. float flShadow = DoFlashlightShadow( FlashlightDepthSampler, RandomRotationSampler, vProjCoords, vScreenPos, nShadowLevel, vShadowTweaks, NdotL );
  1032. float flAttenuated = lerp( flShadow, 1.0f, vShadowTweaks.y ); // Blend between fully attenuated and not attenuated
  1033. flShadow = saturate( lerp( flAttenuated, flShadow, fAtten ) ); // Blend between shadow and above, according to light attenuation
  1034. flashlightColor *= flShadow; // Shadow term
  1035. }
  1036. #endif
  1037. diffuseLighting = fAtten;
  1038. // JasonM - experimenting with light-warping the flashlight
  1039. if ( false )//bDoDiffuseWarp )
  1040. {
  1041. float warpCoord = saturate(NdotL * 0.5f + 0.5f); // 0..1
  1042. diffuseLighting *= tex2D( DiffuseWarpSampler, float2( warpCoord, 0.0f) ).rgb; // Look up warped light
  1043. }
  1044. else // common path
  1045. {
  1046. #if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
  1047. NdotL += flFlashlightNoLambertValue;
  1048. #endif
  1049. diffuseLighting *= saturate( NdotL ); // Lambertian term
  1050. }
  1051. diffuseLighting *= flashlightColor;
  1052. diffuseLighting *= endFalloffFactor;
  1053. // Specular term (masked by diffuse)
  1054. specularLighting = diffuseLighting * SpecularLight ( worldNormal, L, fSpecularExponent, vEyeDir, bDoSpecularWarp, specularWarpSampler, fFresnel );
  1055. }
  1056. // Diffuse only version
  1057. HALF3 DoFlashlight( float3 flashlightPos, float3 worldPos, float4 flashlightSpacePosition, float3 worldNormal,
  1058. float3 attenuationFactors, float farZ, sampler FlashlightSampler, sampler FlashlightDepthSampler,
  1059. sampler RandomRotationSampler, int nShadowLevel, bool bDoShadows,
  1060. const float2 vScreenPos, bool bClip, float4 vShadowTweaks = float4( 3.0f / FLASHLIGHT_SHADOW_TEXTURE_RESOLUTION, 0.0005f, 0.0f, 0.0f), bool bHasNormal = true )
  1061. {
  1062. float3 vProjCoords = flashlightSpacePosition.xyz / flashlightSpacePosition.w;
  1063. // rg - Always fetching the flashlight texture on X360 so the GPU computes the LOD factors used to select the mipmap level properly.
  1064. // Otherwise, if we fetch after the [branch] we'll sometimes get edge artifacts because the GPU fetches from a lower mipmap level when it shouldn't.
  1065. HALF3 flashlightColor = h3tex2D( FlashlightSampler, vProjCoords.xy ).rgb;
  1066. #if ( defined( _X360 ) )
  1067. {
  1068. float3 ltz = vProjCoords.xyz < float3( 0.0f, 0.0f, 0.0f );
  1069. ltz.z = 0.0f; // don't clip the near plane per pixel since we don't do that on the PC.
  1070. float3 gto = vProjCoords.xyz > float3( 1.0f, 1.0f, 1.0f );
  1071. [branch]
  1072. if ( dot(ltz + gto, float3(1,1,1)) > 0 )
  1073. {
  1074. if ( bClip )
  1075. {
  1076. clip(-1);
  1077. }
  1078. return float3(0,0,0);
  1079. }
  1080. }
  1081. #endif
  1082. #if ( defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0) )
  1083. flashlightColor *= (HALF)(flashlightSpacePosition.www > float3(0,0,0)); // Catch back projection (ps2b and up)
  1084. #endif
  1085. #if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
  1086. {
  1087. flashlightColor *= (HALF3)cFlashlightColor.xyz; // Flashlight color
  1088. }
  1089. #endif
  1090. float3 delta = flashlightPos - worldPos;
  1091. HALF3 L = normalize( delta );
  1092. float distSquared = dot( delta, delta );
  1093. float dist = sqrt( distSquared );
  1094. HALF endFalloffFactor = RemapNormalizedValClamped( dist, farZ, 0.6f * farZ );
  1095. // Attenuation for light and to fade out shadow over distance
  1096. HALF fAtten = saturate( dot( attenuationFactors, float3( 1.0f, 1.0f/dist, 1.0f/distSquared ) ) );
  1097. HALF flLDotWorldNormal;
  1098. if ( bHasNormal )
  1099. {
  1100. flLDotWorldNormal = dot( L.xyz, (HALF3)worldNormal.xyz );
  1101. }
  1102. else
  1103. {
  1104. flLDotWorldNormal = 1.0h;
  1105. }
  1106. // Shadowing and coloring terms
  1107. #if (defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0))
  1108. if ( bDoShadows )
  1109. {
  1110. HALF flShadow = DoFlashlightShadow( FlashlightDepthSampler, RandomRotationSampler, vProjCoords, vScreenPos, nShadowLevel, vShadowTweaks, flLDotWorldNormal );
  1111. HALF flAttenuated = lerp( saturate( flShadow ), 1.0h, (HALF)vShadowTweaks.y ); // Blend between fully attenuated and not attenuated
  1112. flShadow = saturate( lerp( flAttenuated, flShadow, (HALF)fAtten ) ); // Blend between shadow and above, according to light attenuation
  1113. flashlightColor *= flShadow; // Shadow term
  1114. }
  1115. #endif
  1116. HALF3 diffuseLighting = fAtten;
  1117. #if defined(SHADER_MODEL_PS_2_0) || defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0)
  1118. diffuseLighting *= saturate( flLDotWorldNormal + (HALF)flFlashlightNoLambertValue ); // Lambertian term
  1119. #else
  1120. diffuseLighting *= saturate( flLDotWorldNormal ); // Lambertian (not Half-Lambert) term
  1121. #endif
  1122. diffuseLighting *= flashlightColor;
  1123. diffuseLighting *= endFalloffFactor;
  1124. return diffuseLighting;
  1125. }
  1126. #endif //#ifndef COMMON_FLASHLIGHT_FXC_H_