Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

433 lines
17 KiB

  1. // STATIC: "CONVERT_TO_SRGB" "0..1" [ps20b][= g_pHardwareConfig->NeedsShaderSRGBConversion()] [PC]
  2. // STATIC: "CONVERT_TO_SRGB" "0..0" [= 0] [XBOX]
  3. // STATIC: "LINEAR_INPUT" "0..1" [ps20b]
  4. // STATIC: "LINEAR_OUTPUT" "0..1" [ps20b]
  5. // DYNAMIC: "AA_ENABLE" "0..1"
  6. // rem DYNAMIC: "AA_DEBUG_MODE" "0..3"
  7. #define AA_DEBUG_MODE 0
  8. // DYNAMIC: "AA_QUALITY_MODE" "0..0" [ps20]
  9. // DYNAMIC: "AA_QUALITY_MODE" "0..1" [ps20b]
  10. // DYNAMIC: "AA_QUALITY_MODE" "0..1" [ps30]
  11. // DYNAMIC: "AA_REDUCE_ONE_PIXEL_LINE_BLUR" "0..0" [ps20]
  12. // DYNAMIC: "AA_REDUCE_ONE_PIXEL_LINE_BLUR" "0..1" [ps20b]
  13. // DYNAMIC: "AA_REDUCE_ONE_PIXEL_LINE_BLUR" "0..1" [ps30]
  14. // DYNAMIC: "COL_CORRECT_NUM_LOOKUPS" "0..4"
  15. #define HDRTYPE HDR_TYPE_NONE
  16. #include "common_ps_fxc.h"
  17. #if !(defined(SHADER_MODEL_PS_2_B) || defined(SHADER_MODEL_PS_3_0))
  18. // Only allow debug modes and high-quality mode if in ps2b or higher (not enough instruction slots in ps20)
  19. #undef AA_DEBUG_MODE
  20. #define AA_DEBUG_MODE 0
  21. #endif
  22. /*
  23. * Engine_Post combines bloom (the final simple addition) with software anti-aliasing
  24. * and colour-correction. Combining them has these benefits:
  25. * (a) saves fillrate+bandwidth (big on PC)
  26. * (b) saves calls to UpdateScreenEffectTexture (big on 360)
  27. * (c) reduces quantization errors caused by multiple passes
  28. * (d) improves AA quality (it works better on sRGB values than linear)
  29. *
  30. *
  31. * Software AA Summary
  32. * -------------------
  33. *
  34. * This AA process works by sampling neighbour pixels (4 or 8 of them):
  35. *
  36. * 5-tap filter: # 9-tap filter: ###
  37. * (AA_QUALITY_MODE 0) ### (AA_QUALITY_MODE 1) ###
  38. * # ###
  39. *
  40. * It then figures out which of these neighbours are 'unlike' the centre pixel.
  41. * This is based on RGB distance, weighted by the maximum luminance of the samples
  42. * (so the difference between 0.1 and 0.2 is the same as between 0.5 and 1.0).
  43. * This detects high-contrast edges in both dark and bright scenes.
  44. *
  45. * It then counts how many 'unlike' samples there are. Some example cases for 5-tap:
  46. *
  47. * O # # # # #
  48. * OOO OOO #OO OOO #O# #O#
  49. * O O O # O #
  50. * Zero One TwoA TwoB Three Four
  51. *
  52. * We then blend towards the average of the unlike neighbours, based on how many
  53. * unlike neighbours there are. The key case is 'TwoA' - this detects stairstep pixels
  54. * on non-axis-aligned edges. In that case, we blend the output colour towards the
  55. * average of the unlike samples by 33%. This yields a 3-pixel transition (0->33->66->100)
  56. * where before there was a 1-pixel transition (0->100).
  57. *
  58. * The 9-tap filter (which works the same as 5-tap, just with more samples and different
  59. * weights) has two advantages over the 5-tap filter:
  60. * - it can differentiate between stairsteps on 45-degree edges and near-horizontal edges
  61. * (so the 5-tap version smudges 45-degree edges more than you want, e.g. chain-link fences)
  62. * - it blurs less texture detail, by virtue of averaging out noise over more samples
  63. *
  64. * One problem case that both filters have to consider is one-pixel-thick lines (this is
  65. * case 'TwoB' above). Sometimes you do want to soften these lines (for slivers of brightly-lit
  66. * geometry in a dark area, e.g. a window frame), but sometimes you do NOT want to soften them
  67. * (for thin geometry which is alternating between 1-pixel and 2-pixel thickness, e.g. cables,
  68. * and also where 1-pixel lines appear in textures, e.g. roof tiles). So, blurring of 1-pixel
  69. * lines is tunable (it defaults to half-blurred as a compromise between the want/don't cases),
  70. * in the 'AA_REDUCE_ONE_PIXEL_LINE_BLUR' section below. Case TwoB is differentiated from TwoA by
  71. * computing the centroid of the unlike samples (the centroid will be at zero for case TwoB,
  72. * but not for TwoA).
  73. *
  74. */
  75. sampler BaseTextureSampler : register( s0 );
  76. sampler FBTextureSampler : register( s1 );
  77. sampler ColorCorrectionVolumeTexture0 : register( s2 );
  78. sampler ColorCorrectionVolumeTexture1 : register( s3 );
  79. sampler ColorCorrectionVolumeTexture2 : register( s4 );
  80. sampler ColorCorrectionVolumeTexture3 : register( s5 );
  81. float4 psTapOffs_Packed : register( c0 ); // psTapOffs_packed contains 1-pixel offsets: ( +dX, 0, +dY, -dX )
  82. float4 tweakables : register( c1 ); // (x - AA strength/unused) (y - reduction of 1-pixel-line blur)
  83. // (z - edge threshold multipler) (w - tap offset multiplier)
  84. float4 uvTransform : register( c2 ); // Transform BaseTexture UVs for use with the FBTexture
  85. float ColorCorrectionDefaultWeight : register( c3 );
  86. float4 ColorCorrectionVolumeWeights : register( c4 );
  87. float BloomFactor : register( c5 );
  88. float4 GetBloomColor( float2 bloomUV )
  89. {
  90. #if ( LINEAR_INPUT == 1 )
  91. {
  92. // In this case, which is only used on OpenGL, we want sRGB data from this tex2D.
  93. // Hence, we have to undo the sRGB conversion that we are forced to apply by OpenGL
  94. return LinearToGamma( tex2D( BaseTextureSampler, bloomUV ) );
  95. }
  96. #else
  97. {
  98. return tex2D( BaseTextureSampler, bloomUV );
  99. }
  100. #endif
  101. }
  102. float4 PerformColorCorrection( float4 outColor, float2 fbTexCoord )
  103. {
  104. #if ( COL_CORRECT_NUM_LOOKUPS > 0 )
  105. {
  106. // NOTE: This code requires the color correction texture to be 32 units to be correct.
  107. // This code will cause (0,0,0) to be read from 0.5f/32
  108. // and (1,1,1) to be read from 31.5f/32
  109. float4 offsetOutColor = outColor*(31.0f/32.0f) + (0.5f/32.0f);
  110. outColor.rgb = outColor.rgb * ColorCorrectionDefaultWeight;
  111. outColor.rgb += tex3D( ColorCorrectionVolumeTexture0, offsetOutColor.rgb ) * ColorCorrectionVolumeWeights.x;
  112. #if ( COL_CORRECT_NUM_LOOKUPS > 1 )
  113. {
  114. outColor.rgb += tex3D( ColorCorrectionVolumeTexture1, offsetOutColor.rgb ) * ColorCorrectionVolumeWeights.y;
  115. #if ( COL_CORRECT_NUM_LOOKUPS > 2 )
  116. {
  117. outColor.rgb += tex3D( ColorCorrectionVolumeTexture2, offsetOutColor.rgb ) * ColorCorrectionVolumeWeights.z;
  118. #if ( COL_CORRECT_NUM_LOOKUPS > 3 )
  119. {
  120. outColor.rgb += tex3D( ColorCorrectionVolumeTexture3, offsetOutColor.rgb ) * ColorCorrectionVolumeWeights.w;
  121. }
  122. #endif
  123. }
  124. #endif
  125. }
  126. #endif
  127. }
  128. #endif
  129. return outColor;
  130. }
  131. float3 PerformAA( float3 baseColor, float2 fbTexCoord, out float3 unlike, out float unlikeSum, out float lerpFactor )
  132. {
  133. float3 a, b, c, d, e, f, g, h;
  134. float3 dA, dB, dC, dD, dE, dF, dG, dH;
  135. float4 deltas, deltas2;
  136. float4 weights, weights2;
  137. float4 lumS;
  138. float maxLumS;
  139. // Set FAST_DELTAS to '1' to use Manhattan distance (in colour-space) rather than Euclidean distance:
  140. const int FAST_DELTAS = 1;
  141. #if AA_QUALITY_MODE == 0
  142. const float COLOUR_DELTA_BASE = (FAST_DELTAS == 0) ? 0.11f : 0.5f;
  143. const float COLOUR_DELTA_CONTRAST = 100;
  144. // Scaling down colour deltas (DELTA_SCALE) reduces the over-blurring of 45-degree edges
  145. // by the 5-tap filter. Conversely, increasing it smooths stairsteps more strongly.
  146. const float DELTA_SCALE = 0.75f;
  147. #else // AA_QUALITY_MODE == 0
  148. const float COLOUR_DELTA_BASE = (FAST_DELTAS == 0) ? 0.24f : 0.65f;
  149. const float COLOUR_DELTA_CONTRAST = 100;
  150. const float DELTA_SCALE = 1.0f;
  151. #endif // AA_QUALITY_MODE == 0
  152. const float MAX_LERP_FACTOR = 0.66f;
  153. const float SQRT3 = 1.73205080757f;
  154. float onePixelLineBlurReduction = tweakables.y;
  155. // psTapOffs_packed contains 1-pixel offsets: ( +dX, 0, +dY, -dX )
  156. float4 texelDelta = psTapOffs_Packed*tweakables.w;
  157. // Allowed ps20 swizzles:
  158. // .xyzw on (+dX,0,+dY,-dX) gives: (+dX, 0) & (-dX, 0) (former with 'add', latter with 'sub')
  159. // .yzxw on (+dX,0,+dY,-dX) gives: ( 0,+dY) & ( 0,-dY)
  160. // .wzyx on (+dX,0,+dY,-dX) gives: (-dX,+dY) & (+dX,-dY)
  161. // .zxyw on (not used)
  162. // NOTE: These don't give us (+dX,+dY) and (-dX,-dY), we need to copy +dY: ( +dX, 0, +dY, -dX ) -> ( +dX, +dY, +dY, -dX )
  163. // NOTE: tex2D() can't swizzle the source register in ps2x, so we have no
  164. // choice but to add each float2 offset to fbTexCoord one at a time :o/
  165. a = tex2D( FBTextureSampler, fbTexCoord + texelDelta.yz ).rgb; // ( 0,+1)
  166. b = tex2D( FBTextureSampler, fbTexCoord + texelDelta.xy ).rgb; // (+1, 0)
  167. c = tex2D( FBTextureSampler, fbTexCoord - texelDelta.yz ).rgb; // ( 0,-1)
  168. d = tex2D( FBTextureSampler, fbTexCoord - texelDelta.xy ).rgb; // (-1, 0)
  169. #if AA_QUALITY_MODE == 1
  170. // 9-tap method (do diagonal neighbours too)
  171. e = tex2D( FBTextureSampler, fbTexCoord + texelDelta.wz ).rgb; // (-1,+1)
  172. f = tex2D( FBTextureSampler, fbTexCoord - texelDelta.wz ).rgb; // (+1,-1)
  173. texelDelta.y = texelDelta.z; // Can't quite get all 8 sample offsets from a single float4 with the allowed swizzles! :o/
  174. g = tex2D( FBTextureSampler, fbTexCoord + texelDelta.xy ).rgb; // (+1,+1)
  175. h = tex2D( FBTextureSampler, fbTexCoord - texelDelta.xy ).rgb; // (-1,-1)
  176. #endif // AA_QUALITY_MODE == 1
  177. // Compute the like<-->unlike weights
  178. dA = a - baseColor;
  179. dB = b - baseColor;
  180. dC = c - baseColor;
  181. dD = d - baseColor;
  182. #if AA_QUALITY_MODE == 1
  183. dE = e - baseColor;
  184. dF = f - baseColor;
  185. dG = g - baseColor;
  186. dH = h - baseColor;
  187. #endif // AA_QUALITY_MODE == 1
  188. #if ( FAST_DELTAS == 0 )
  189. {
  190. // Colour-space Euclidean distance
  191. deltas = float4( dot(dA, dA), dot(dB, dB), dot(dC, dC), dot(dD, dD) );
  192. deltas = DELTA_SCALE*DELTA_SCALE*(deltas / 3);
  193. deltas = sqrt(deltas);
  194. }
  195. #else
  196. {
  197. // Colour-space Manhattan distance
  198. // OPT: to avoid the 'abs', try dividing colours by maxLumS then dotprodding w/ baseColor
  199. deltas.x = dot( abs( dA ), 1 );
  200. deltas.y = dot( abs( dB ), 1 );
  201. deltas.z = dot( abs( dC ), 1 );
  202. deltas.w = dot( abs( dD ), 1 );
  203. deltas *= DELTA_SCALE;
  204. }
  205. #endif
  206. weights = deltas;
  207. #if AA_QUALITY_MODE == 1
  208. #if ( FAST_DELTAS == 0 )
  209. {
  210. deltas2 = float4( dot(dE, dE), dot(dF, dF), dot(dG, dG), dot(dH, dH) );
  211. deltas2 = DELTA_SCALE*DELTA_SCALE*(deltas2 / 3);
  212. deltas2 = sqrt(deltas2);
  213. }
  214. #else
  215. {
  216. deltas2.x = dot( abs( dE ), 1);
  217. deltas2.y = dot( abs( dF ), 1);
  218. deltas2.z = dot( abs( dG ), 1);
  219. deltas2.w = dot( abs( dH ), 1);
  220. deltas2 *= DELTA_SCALE;
  221. }
  222. #endif
  223. weights2 = deltas2;
  224. #endif // AA_QUALITY_MODE == 1
  225. // Adjust weights relative to maximum sample luminance (local, relative contrast: 0.1 Vs 0.2 is the same as 0.5 Vs 1.0)
  226. lumS = float4( dot(a, a), dot(b, b), dot(c, c), dot(d, d) );
  227. lumS.xy = max( lumS.xy, lumS.wz );
  228. lumS.x = max( lumS.x, lumS.y );
  229. maxLumS = max( lumS.x, dot( baseColor, baseColor ) );
  230. #if AA_QUALITY_MODE == 1
  231. lumS = float4( dot(e, e), dot(f, f), dot(g, g), dot(h, h) );
  232. lumS.xy = max( lumS.xy, lumS.wz );
  233. lumS.x = max( lumS.x, lumS.y );
  234. maxLumS = max( lumS.x, maxLumS );
  235. #endif // AA_QUALITY_MODE == 1
  236. float lumScale = 1.0f / sqrt( maxLumS );
  237. weights *= lumScale;
  238. #if AA_QUALITY_MODE == 1
  239. weights2 *= lumScale;
  240. #endif // AA_QUALITY_MODE == 1
  241. // Contrast-adjust weights such that only large contrast differences are taken into account
  242. // (pushes weights to 0.0 for 'like' neighbours and to 1.0 for 'unlike' neighbours)
  243. float colourDeltaBase = tweakables.z*COLOUR_DELTA_BASE;
  244. weights = saturate(colourDeltaBase + COLOUR_DELTA_CONTRAST*(weights - colourDeltaBase));
  245. #if AA_QUALITY_MODE == 1
  246. weights2 = saturate(colourDeltaBase + COLOUR_DELTA_CONTRAST*(weights2 - colourDeltaBase));
  247. #endif // AA_QUALITY_MODE == 1
  248. // Determine the average 'unlike' colour
  249. unlikeSum = dot(weights, 1);
  250. unlike = weights.x*a + weights.y*b + weights.z*c + weights.w*d;
  251. #if AA_QUALITY_MODE == 1
  252. unlikeSum += dot(weights2, 1);
  253. unlike += weights2.x*e + weights2.y*f + weights2.z*g + weights2.w*h;
  254. #endif // AA_QUALITY_MODE == 1
  255. // NOTE: this can cause div-by-zero, but lerpFactor ends up at zero in that case so it doesn't matter
  256. unlike = unlike / unlikeSum;
  257. #if AA_REDUCE_ONE_PIXEL_LINE_BLUR
  258. // Reduce lerpFactor for 1-pixel-thick lines - otherwise you lose texture detail, and it looks
  259. // really weird where geometry (e.g. cables) alternates between being 1 and 2 pixels thick.
  260. // [ The "*2" below is because the values here were tuned to reduce blurring one 1-pixel lines
  261. // by about half (which is a good compromise between the bad cases at either end). So you
  262. // want the controlling convar to default to 0.5 ]
  263. const float ONE_PIXEL_LINE_BIAS_BASE = 0.4f;
  264. const float ONE_PIXEL_LINE_BIAS_CONTRAST = 16.0f;
  265. float2 unlikeCentroid = 0;
  266. unlikeCentroid.x += dot( 1-weights, float4( 0, +1, 0, -1 ) ); // This 2x4 matrix is the transpose of
  267. unlikeCentroid.y += dot( 1-weights, float4( +1, 0, -1, 0 ) ); // the neighbour sample texel offsets
  268. #if AA_QUALITY_MODE == 0
  269. unlikeCentroid /= 4 - unlikeSum;
  270. #else // AA_QUALITY_MODE == 0
  271. unlikeCentroid.x += dot( 1-weights2, float4( -1, +1, +1, -1 ) );
  272. unlikeCentroid.y += dot( 1-weights2, float4( +1, -1, +1, -1 ) );
  273. unlikeCentroid /= 8 - unlikeSum;
  274. #endif // AA_QUALITY_MODE == 0
  275. float onePixelLineBias = 1 - saturate( length(unlikeCentroid) ); // OPTIMIZE: try using distSquared, remove this sqrt
  276. onePixelLineBias = onePixelLineBlurReduction*saturate(ONE_PIXEL_LINE_BIAS_BASE + ONE_PIXEL_LINE_BIAS_CONTRAST*(onePixelLineBias - ONE_PIXEL_LINE_BIAS_BASE));
  277. #if AA_QUALITY_MODE == 0
  278. unlikeSum -= 2*onePixelLineBias*0.4f*saturate( 3 - unlikeSum ); // The 'min' thing avoids this affecting lone/pair pixels
  279. #else // AA_QUALITY_MODE == 0
  280. unlikeSum -= 2*onePixelLineBias*1.9f*saturate( 7 - unlikeSum );
  281. #endif // AA_QUALITY_MODE == 0
  282. #endif // AA_REDUCE_ONE_PIXEL_LINE_BLUR
  283. // Compute the lerp factor we use to blend between 'baseColor' and 'unlike'.
  284. // We want to lerp 'stairstep' pixels (which have 2 unlike neighbours)
  285. // 33% towards the 'unlike' colour, such that these hard, 1-pixel transitions
  286. // (0% -> 100%) become soft, 3-pixel transitions (0% -> 33% -> 66% -> 100%).
  287. float strengthMultiplier = tweakables.x;
  288. #if ( AA_QUALITY_MODE == 0 )
  289. {
  290. lerpFactor = saturate( strengthMultiplier*DELTA_SCALE*( (unlikeSum - 1) / 3 ) );
  291. // Uncomment the following to blend slightly across vertical/horizontal edges (better for 45-degree edges, worse for 90-degree edges)
  292. //lerpFactor = saturate( strengthMultiplier*DELTA_SCALE*( unlikeSum / 6 ) );
  293. }
  294. #else // AA_QUALITY_MODE != 0
  295. {
  296. lerpFactor = saturate( strengthMultiplier*DELTA_SCALE*( (unlikeSum - 3) / 3 ) );
  297. }
  298. #endif
  299. // Clamp the blend factor so that lone dot pixels aren't blurred into oblivion
  300. lerpFactor = min( lerpFactor, MAX_LERP_FACTOR );
  301. baseColor = lerp( baseColor, unlike, lerpFactor );
  302. return baseColor;
  303. }
  304. float4 GenerateAADebugColor( float4 outColor, float3 unlike, float unlikeSum, float lerpFactor )
  305. {
  306. #if ( AA_DEBUG_MODE == 1 )
  307. {
  308. // Debug: Visualize the number of 'unlike' samples
  309. outColor.rgb = 0;
  310. if ( AA_QUALITY_MODE == 0 )
  311. {
  312. if (unlikeSum >= 0.95f) outColor.rgb = float3(1,0,0);
  313. if (unlikeSum >= 1.95f) outColor.rgb = float3(0,1,0);
  314. if (unlikeSum >= 2.95f) outColor.rgb = float3(0,0,1);
  315. }
  316. else
  317. {
  318. if (unlikeSum >= 2.95f) outColor.rgb = float3(1,0,0);
  319. if (unlikeSum >= 3.95f) outColor.rgb = float3(0,1,0);
  320. if (unlikeSum >= 4.95f) outColor.rgb = float3(0,0,1);
  321. }
  322. // Don't sRGB-write
  323. }
  324. #elif ( AA_DEBUG_MODE == 2 )
  325. {
  326. // Debug: Visualize the strength of lerpFactor
  327. outColor.rgb = 0;
  328. outColor.g = lerpFactor;
  329. // Don't sRGB-write
  330. }
  331. #elif ( AA_DEBUG_MODE == 3 )
  332. {
  333. // Debug: Visualize the 'unlike' colour that we blend towards
  334. outColor.rgb = lerp( 0, unlike, saturate(5*lerpFactor) );
  335. // Do sRGB-write (if it's enabled)
  336. outColor = FinalOutput( outColor, 0, PIXEL_FOG_TYPE_NONE, TONEMAP_SCALE_NONE );
  337. }
  338. #endif
  339. return outColor;
  340. }
  341. float2 PerformUVTransform( float2 bloomUVs )
  342. {
  343. // NOTE: 'wz' is used since 'zw' is not a valid swizzle for ps20 shaders
  344. return bloomUVs*uvTransform.wz + uvTransform.xy;
  345. }
  346. struct PS_INPUT
  347. {
  348. float2 baseTexCoord : TEXCOORD0;
  349. #if defined( _X360 ) //avoid a shader patch on 360 due to pixel shader inputs being fewer than vertex shader outputs
  350. float2 ZeroTexCoord : TEXCOORD1;
  351. float2 bloomTexCoord : TEXCOORD2;
  352. #endif
  353. };
  354. float4 main( PS_INPUT i ) : COLOR
  355. {
  356. float2 fbTexCoord = PerformUVTransform( i.baseTexCoord );
  357. float3 baseColor = tex2D( FBTextureSampler, fbTexCoord ).rgb;
  358. #if ( LINEAR_INPUT == 1 )
  359. {
  360. // In this case, which is only used on OpenGL, we want sRGB data from this tex2D.
  361. // Hence, we have to undo the sRGB conversion that we are forced to apply by OpenGL
  362. baseColor = LinearToGamma( baseColor );
  363. }
  364. #endif
  365. float4 outColor = float4( baseColor, 1 );
  366. #if ( AA_ENABLE == 1 )
  367. {
  368. float unlikeSum, lerpFactor;
  369. float3 unlike;
  370. outColor.rgb = PerformAA( outColor.rgb, fbTexCoord, unlike, unlikeSum, lerpFactor );
  371. #if ( AA_DEBUG_MODE > 0 )
  372. {
  373. return GenerateAADebugColor( outColor, unlike, unlikeSum, lerpFactor );
  374. }
  375. #endif
  376. }
  377. #endif
  378. float4 bloomColor = BloomFactor * GetBloomColor( i.baseTexCoord );
  379. outColor.rgb += bloomColor.rgb;
  380. outColor = PerformColorCorrection( outColor, fbTexCoord );
  381. outColor = FinalOutput( outColor, 0, PIXEL_FOG_TYPE_NONE, TONEMAP_SCALE_NONE );
  382. // Go to linear since we're forced to do an sRGB write on OpenGL in ps2b
  383. #if ( LINEAR_OUTPUT == 1 )
  384. {
  385. outColor = GammaToLinear( outColor );
  386. }
  387. #endif
  388. return outColor;
  389. }