Patch_2

2026-02-24 11:44:01 -08:00
parent c9839c9be6
commit 1c3c30e6ce
34 changed files with 1629 additions and 1271 deletions
--- a/leenkx/Shaders/fsr1_rcas_pass/fsr1_rcas_pass.frag.glsl
+++ b/leenkx/Shaders/fsr1_rcas_pass/fsr1_rcas_pass.frag.glsl
@ -0,0 +1,116 @@
+#version 450
+
+// AMD FidelityFX Super Resolution 1.0.2 - RCAS (Robust Contrast Adaptive Sharpening)
+
+#include "compiled.inc"
+
+uniform sampler2D tex;
+
+// Sharpness in "stops": 0.0 = maximum sharpness, higher = less sharp
+// Converted to linear via exp2(-sharpness)
+#ifdef _FSR1_Ultra_Quality
+const float SHARPNESS_STOPS = 0.0;
+#elif defined(_FSR1_Balanced)
+const float SHARPNESS_STOPS = 1.0;
+#elif defined(_FSR1_Performance)
+const float SHARPNESS_STOPS = 2.0;
+#elif defined(_FSR1_Custom)
+uniform vec4 PPComp15;
+#define SHARPNESS_STOPS (PPComp15.x * 2.0)
+#else
+const float SHARPNESS_STOPS = 0.5; // Quality (default)
+#endif
+
+// FSR RCAS limit - prevents unnatural sharpening artifacts
+#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0))
+
+in vec2 texCoord;
+out vec4 fragColor;
+
+// AMD helper functions from ffx_a.h
+float AMin3F1(float x, float y, float z) { return min(x, min(y, z)); }
+float AMax3F1(float x, float y, float z) { return max(x, max(y, z)); }
+
+// High precision reciprocal (required for limiters per AMD docs)
+// Added epsilon to prevent division by zero in dark areas
+float ARcpF1(float a) {
+    return 1.0 / max(a, 1e-8);
+}
+
+// Medium precision reciprocal approximation (from AMD ffx_a.h)
+// Only used for noise detection and final resolve
+float APrxMedRcpF1(float a) {
+    return uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a));
+}
+
+void main() {
+    // Get texture size and texel offset
+    vec2 texSize = vec2(textureSize(tex, 0));
+    vec2 texelSize = 1.0 / texSize;
+    
+    // Algorithm uses minimal 3x3 pixel neighborhood
+    //    b 
+    //  d e f
+    //    h
+    // Clamp inputs to [0,1] - FSR expects sRGB normalized input
+    vec3 b = clamp(texture(tex, texCoord + vec2(0.0, -texelSize.y)).rgb, 0.0, 1.0);
+    vec3 d = clamp(texture(tex, texCoord + vec2(-texelSize.x, 0.0)).rgb, 0.0, 1.0);
+    vec4 ee = texture(tex, texCoord);
+    vec3 e = clamp(ee.rgb, 0.0, 1.0);
+    vec3 f = clamp(texture(tex, texCoord + vec2(texelSize.x, 0.0)).rgb, 0.0, 1.0);
+    vec3 h = clamp(texture(tex, texCoord + vec2(0.0, texelSize.y)).rgb, 0.0, 1.0);
+    
+    // Luma times 2 (AMD's luma calculation: B*0.5 + R*0.5 + G)
+    float bL = b.b * 0.5 + (b.r * 0.5 + b.g);
+    float dL = d.b * 0.5 + (d.r * 0.5 + d.g);
+    float eL = e.b * 0.5 + (e.r * 0.5 + e.g);
+    float fL = f.b * 0.5 + (f.r * 0.5 + f.g);
+    float hL = h.b * 0.5 + (h.r * 0.5 + h.g);
+    
+    // Noise detection (official AMD algorithm with safety for flat areas)
+    float nz = 0.25 * bL + 0.25 * dL + 0.25 * fL + 0.25 * hL - eL;
+    float range = AMax3F1(AMax3F1(bL, dL, eL), fL, hL) - AMin3F1(AMin3F1(bL, dL, eL), fL, hL);
+    // Use safe division instead of APrxMedRcpF1 for range to avoid NaN in flat areas
+    nz = clamp(abs(nz) / max(range, 1e-5), 0.0, 1.0);
+    nz = -0.5 * nz + 1.0;
+    
+    // Min and max of ring (per channel)
+    float mn4R = min(AMin3F1(b.r, d.r, f.r), h.r);
+    float mn4G = min(AMin3F1(b.g, d.g, f.g), h.g);
+    float mn4B = min(AMin3F1(b.b, d.b, f.b), h.b);
+    float mx4R = max(AMax3F1(b.r, d.r, f.r), h.r);
+    float mx4G = max(AMax3F1(b.g, d.g, f.g), h.g);
+    float mx4B = max(AMax3F1(b.b, d.b, f.b), h.b);
+    
+    // Immediate constants for peak range
+    vec2 peakC = vec2(1.0, -4.0);
+    
+    // Limiters - these need HIGH PRECISION reciprocals (per AMD docs)
+    float hitMinR = min(mn4R, e.r) * ARcpF1(4.0 * mx4R);
+    float hitMinG = min(mn4G, e.g) * ARcpF1(4.0 * mx4G);
+    float hitMinB = min(mn4B, e.b) * ARcpF1(4.0 * mx4B);
+    float hitMaxR = (peakC.x - max(mx4R, e.r)) * ARcpF1(4.0 * mn4R + peakC.y);
+    float hitMaxG = (peakC.x - max(mx4G, e.g)) * ARcpF1(4.0 * mn4G + peakC.y);
+    float hitMaxB = (peakC.x - max(mx4B, e.b)) * ARcpF1(4.0 * mn4B + peakC.y);
+    float lobeR = max(-hitMinR, hitMaxR);
+    float lobeG = max(-hitMinG, hitMaxG);
+    float lobeB = max(-hitMinB, hitMaxB);
+    
+    // Apply sharpness (convert from stops to linear)
+    float sharpness = exp2(-SHARPNESS_STOPS);
+    float lobe = max(-FSR_RCAS_LIMIT, min(AMax3F1(lobeR, lobeG, lobeB), 0.0)) * sharpness;
+    
+    // Apply noise removal
+    lobe *= nz;
+    
+    // Resolve using safe reciprocal to avoid any edge case issues
+    float denom = 4.0 * lobe + 1.0;
+    float rcpL = 1.0 / max(denom, 0.25); // denom should be in [0.25, 1.0] range
+    vec3 pix;
+    pix.r = (lobe * b.r + lobe * d.r + lobe * h.r + lobe * f.r + e.r) * rcpL;
+    pix.g = (lobe * b.g + lobe * d.g + lobe * h.g + lobe * f.g + e.g) * rcpL;
+    pix.b = (lobe * b.b + lobe * d.b + lobe * h.b + lobe * f.b + e.b) * rcpL;
+    
+    // Ensure output is clamped to valid range
+    fragColor = vec4(clamp(pix, 0.0, 1.0), ee.a);
+}