#version 450 // AMD FidelityFX Super Resolution 1.0.2 - RCAS (Robust Contrast Adaptive Sharpening) #include "compiled.inc" uniform sampler2D tex; // Sharpness in "stops": 0.0 = maximum sharpness, higher = less sharp // Converted to linear via exp2(-sharpness) #ifdef _FSR1_Ultra_Quality const float SHARPNESS_STOPS = 0.0; #elif defined(_FSR1_Balanced) const float SHARPNESS_STOPS = 1.0; #elif defined(_FSR1_Performance) const float SHARPNESS_STOPS = 2.0; #elif defined(_FSR1_Custom) uniform vec4 PPComp15; #define SHARPNESS_STOPS (PPComp15.x * 2.0) #else const float SHARPNESS_STOPS = 0.5; // Quality (default) #endif // FSR RCAS limit - prevents unnatural sharpening artifacts #define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0)) in vec2 texCoord; out vec4 fragColor; // AMD helper functions from ffx_a.h float AMin3F1(float x, float y, float z) { return min(x, min(y, z)); } float AMax3F1(float x, float y, float z) { return max(x, max(y, z)); } // High precision reciprocal (required for limiters per AMD docs) // Added epsilon to prevent division by zero in dark areas float ARcpF1(float a) { return 1.0 / max(a, 1e-8); } // Medium precision reciprocal approximation (from AMD ffx_a.h) // Only used for noise detection and final resolve float APrxMedRcpF1(float a) { return uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a)); } void main() { // Get texture size and texel offset vec2 texSize = vec2(textureSize(tex, 0)); vec2 texelSize = 1.0 / texSize; // Algorithm uses minimal 3x3 pixel neighborhood // b // d e f // h // Clamp inputs to [0,1] - FSR expects sRGB normalized input vec3 b = clamp(texture(tex, texCoord + vec2(0.0, -texelSize.y)).rgb, 0.0, 1.0); vec3 d = clamp(texture(tex, texCoord + vec2(-texelSize.x, 0.0)).rgb, 0.0, 1.0); vec4 ee = texture(tex, texCoord); vec3 e = clamp(ee.rgb, 0.0, 1.0); vec3 f = clamp(texture(tex, texCoord + vec2(texelSize.x, 0.0)).rgb, 0.0, 1.0); vec3 h = clamp(texture(tex, texCoord + vec2(0.0, texelSize.y)).rgb, 0.0, 1.0); // Luma times 2 (AMD's luma calculation: B*0.5 + R*0.5 + G) float bL = b.b * 0.5 + (b.r * 0.5 + b.g); float dL = d.b * 0.5 + (d.r * 0.5 + d.g); float eL = e.b * 0.5 + (e.r * 0.5 + e.g); float fL = f.b * 0.5 + (f.r * 0.5 + f.g); float hL = h.b * 0.5 + (h.r * 0.5 + h.g); // Noise detection (official AMD algorithm with safety for flat areas) float nz = 0.25 * bL + 0.25 * dL + 0.25 * fL + 0.25 * hL - eL; float range = AMax3F1(AMax3F1(bL, dL, eL), fL, hL) - AMin3F1(AMin3F1(bL, dL, eL), fL, hL); // Use safe division instead of APrxMedRcpF1 for range to avoid NaN in flat areas nz = clamp(abs(nz) / max(range, 1e-5), 0.0, 1.0); nz = -0.5 * nz + 1.0; // Min and max of ring (per channel) float mn4R = min(AMin3F1(b.r, d.r, f.r), h.r); float mn4G = min(AMin3F1(b.g, d.g, f.g), h.g); float mn4B = min(AMin3F1(b.b, d.b, f.b), h.b); float mx4R = max(AMax3F1(b.r, d.r, f.r), h.r); float mx4G = max(AMax3F1(b.g, d.g, f.g), h.g); float mx4B = max(AMax3F1(b.b, d.b, f.b), h.b); // Immediate constants for peak range vec2 peakC = vec2(1.0, -4.0); // Limiters - these need HIGH PRECISION reciprocals (per AMD docs) float hitMinR = min(mn4R, e.r) * ARcpF1(4.0 * mx4R); float hitMinG = min(mn4G, e.g) * ARcpF1(4.0 * mx4G); float hitMinB = min(mn4B, e.b) * ARcpF1(4.0 * mx4B); float hitMaxR = (peakC.x - max(mx4R, e.r)) * ARcpF1(4.0 * mn4R + peakC.y); float hitMaxG = (peakC.x - max(mx4G, e.g)) * ARcpF1(4.0 * mn4G + peakC.y); float hitMaxB = (peakC.x - max(mx4B, e.b)) * ARcpF1(4.0 * mn4B + peakC.y); float lobeR = max(-hitMinR, hitMaxR); float lobeG = max(-hitMinG, hitMaxG); float lobeB = max(-hitMinB, hitMaxB); // Apply sharpness (convert from stops to linear) float sharpness = exp2(-SHARPNESS_STOPS); float lobe = max(-FSR_RCAS_LIMIT, min(AMax3F1(lobeR, lobeG, lobeB), 0.0)) * sharpness; // Apply noise removal lobe *= nz; // Resolve using safe reciprocal to avoid any edge case issues float denom = 4.0 * lobe + 1.0; float rcpL = 1.0 / max(denom, 0.25); // denom should be in [0.25, 1.0] range vec3 pix; pix.r = (lobe * b.r + lobe * d.r + lobe * h.r + lobe * f.r + e.r) * rcpL; pix.g = (lobe * b.g + lobe * d.g + lobe * h.g + lobe * f.g + e.g) * rcpL; pix.b = (lobe * b.b + lobe * d.b + lobe * h.b + lobe * f.b + e.b) * rcpL; // Ensure output is clamped to valid range fragColor = vec4(clamp(pix, 0.0, 1.0), ee.a); }