Files
LNXSDK/leenkx/Shaders/fsr1_rcas_pass/fsr1_rcas_pass.frag.glsl
2026-02-24 11:44:01 -08:00

117 lines
4.5 KiB
GLSL

#version 450
// AMD FidelityFX Super Resolution 1.0.2 - RCAS (Robust Contrast Adaptive Sharpening)
#include "compiled.inc"
uniform sampler2D tex;
// Sharpness in "stops": 0.0 = maximum sharpness, higher = less sharp
// Converted to linear via exp2(-sharpness)
#ifdef _FSR1_Ultra_Quality
const float SHARPNESS_STOPS = 0.0;
#elif defined(_FSR1_Balanced)
const float SHARPNESS_STOPS = 1.0;
#elif defined(_FSR1_Performance)
const float SHARPNESS_STOPS = 2.0;
#elif defined(_FSR1_Custom)
uniform vec4 PPComp15;
#define SHARPNESS_STOPS (PPComp15.x * 2.0)
#else
const float SHARPNESS_STOPS = 0.5; // Quality (default)
#endif
// FSR RCAS limit - prevents unnatural sharpening artifacts
#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0))
in vec2 texCoord;
out vec4 fragColor;
// AMD helper functions from ffx_a.h
float AMin3F1(float x, float y, float z) { return min(x, min(y, z)); }
float AMax3F1(float x, float y, float z) { return max(x, max(y, z)); }
// High precision reciprocal (required for limiters per AMD docs)
// Added epsilon to prevent division by zero in dark areas
float ARcpF1(float a) {
return 1.0 / max(a, 1e-8);
}
// Medium precision reciprocal approximation (from AMD ffx_a.h)
// Only used for noise detection and final resolve
float APrxMedRcpF1(float a) {
return uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a));
}
void main() {
// Get texture size and texel offset
vec2 texSize = vec2(textureSize(tex, 0));
vec2 texelSize = 1.0 / texSize;
// Algorithm uses minimal 3x3 pixel neighborhood
// b
// d e f
// h
// Clamp inputs to [0,1] - FSR expects sRGB normalized input
vec3 b = clamp(texture(tex, texCoord + vec2(0.0, -texelSize.y)).rgb, 0.0, 1.0);
vec3 d = clamp(texture(tex, texCoord + vec2(-texelSize.x, 0.0)).rgb, 0.0, 1.0);
vec4 ee = texture(tex, texCoord);
vec3 e = clamp(ee.rgb, 0.0, 1.0);
vec3 f = clamp(texture(tex, texCoord + vec2(texelSize.x, 0.0)).rgb, 0.0, 1.0);
vec3 h = clamp(texture(tex, texCoord + vec2(0.0, texelSize.y)).rgb, 0.0, 1.0);
// Luma times 2 (AMD's luma calculation: B*0.5 + R*0.5 + G)
float bL = b.b * 0.5 + (b.r * 0.5 + b.g);
float dL = d.b * 0.5 + (d.r * 0.5 + d.g);
float eL = e.b * 0.5 + (e.r * 0.5 + e.g);
float fL = f.b * 0.5 + (f.r * 0.5 + f.g);
float hL = h.b * 0.5 + (h.r * 0.5 + h.g);
// Noise detection (official AMD algorithm with safety for flat areas)
float nz = 0.25 * bL + 0.25 * dL + 0.25 * fL + 0.25 * hL - eL;
float range = AMax3F1(AMax3F1(bL, dL, eL), fL, hL) - AMin3F1(AMin3F1(bL, dL, eL), fL, hL);
// Use safe division instead of APrxMedRcpF1 for range to avoid NaN in flat areas
nz = clamp(abs(nz) / max(range, 1e-5), 0.0, 1.0);
nz = -0.5 * nz + 1.0;
// Min and max of ring (per channel)
float mn4R = min(AMin3F1(b.r, d.r, f.r), h.r);
float mn4G = min(AMin3F1(b.g, d.g, f.g), h.g);
float mn4B = min(AMin3F1(b.b, d.b, f.b), h.b);
float mx4R = max(AMax3F1(b.r, d.r, f.r), h.r);
float mx4G = max(AMax3F1(b.g, d.g, f.g), h.g);
float mx4B = max(AMax3F1(b.b, d.b, f.b), h.b);
// Immediate constants for peak range
vec2 peakC = vec2(1.0, -4.0);
// Limiters - these need HIGH PRECISION reciprocals (per AMD docs)
float hitMinR = min(mn4R, e.r) * ARcpF1(4.0 * mx4R);
float hitMinG = min(mn4G, e.g) * ARcpF1(4.0 * mx4G);
float hitMinB = min(mn4B, e.b) * ARcpF1(4.0 * mx4B);
float hitMaxR = (peakC.x - max(mx4R, e.r)) * ARcpF1(4.0 * mn4R + peakC.y);
float hitMaxG = (peakC.x - max(mx4G, e.g)) * ARcpF1(4.0 * mn4G + peakC.y);
float hitMaxB = (peakC.x - max(mx4B, e.b)) * ARcpF1(4.0 * mn4B + peakC.y);
float lobeR = max(-hitMinR, hitMaxR);
float lobeG = max(-hitMinG, hitMaxG);
float lobeB = max(-hitMinB, hitMaxB);
// Apply sharpness (convert from stops to linear)
float sharpness = exp2(-SHARPNESS_STOPS);
float lobe = max(-FSR_RCAS_LIMIT, min(AMax3F1(lobeR, lobeG, lobeB), 0.0)) * sharpness;
// Apply noise removal
lobe *= nz;
// Resolve using safe reciprocal to avoid any edge case issues
float denom = 4.0 * lobe + 1.0;
float rcpL = 1.0 / max(denom, 0.25); // denom should be in [0.25, 1.0] range
vec3 pix;
pix.r = (lobe * b.r + lobe * d.r + lobe * h.r + lobe * f.r + e.r) * rcpL;
pix.g = (lobe * b.g + lobe * d.g + lobe * h.g + lobe * f.g + e.g) * rcpL;
pix.b = (lobe * b.b + lobe * d.b + lobe * h.b + lobe * f.b + e.b) * rcpL;
// Ensure output is clamped to valid range
fragColor = vec4(clamp(pix, 0.0, 1.0), ee.a);
}