Skip to main content

proof_engine/render/postfx/
bloom.rs

1//! Bloom post-processing pass.
2//!
3//! Implements a multi-level Gaussian bloom using a luminance threshold extract
4//! followed by a separable ping-pong blur pyramid. The result is additively
5//! blended onto the scene framebuffer.
6//!
7//! ## Pipeline
8//! ```text
9//! scene_color + scene_emission
10//!   ── extract bright pixels ──▶ bright_fbo
11//!   ── H blur (radius 1) ──────▶ blur_h[0]
12//!   ── V blur (radius 1) ──────▶ blur_v[0]
13//!   ── H blur (radius 2) ──────▶ blur_h[1]   ← pyramid level 1
14//!   ── V blur (radius 2) ──────▶ blur_v[1]
15//!   ─── additive composite ────▶ output
16//! ```
17//!
18//! Each pyramid level is half-resolution, giving a wider, softer halo.
19
20use std::f32;
21
22// ── Bloom parameters ──────────────────────────────────────────────────────────
23
24/// Configuration for the bloom pass.
25#[derive(Clone, Debug)]
26pub struct BloomParams {
27    /// Enable or disable bloom entirely.
28    pub enabled:   bool,
29    /// Minimum luminance to include in bloom (0=all, 1=only bright pixels).
30    pub threshold: f32,
31    /// Additive blend weight of the bloom result.
32    pub intensity: f32,
33    /// Blur kernel radius in pixels (higher = softer/larger).
34    pub radius:    f32,
35    /// Number of pyramid levels (1=single, 3=multi-scale wide bloom).
36    pub levels:    u8,
37    /// Knee: soft threshold falloff width (higher = smoother cutoff).
38    pub knee:      f32,
39    /// Whether to use emission texture as additional bloom input.
40    pub use_emission: bool,
41    /// Contribution weight of the emission texture in bloom.
42    pub emission_weight: f32,
43}
44
45impl Default for BloomParams {
46    fn default() -> Self {
47        Self {
48            enabled:          true,
49            threshold:        0.5,
50            intensity:        1.0,
51            radius:           4.0,
52            levels:           3,
53            knee:             0.1,
54            use_emission:     true,
55            emission_weight:  1.5,
56        }
57    }
58}
59
60impl BloomParams {
61    pub fn disabled() -> Self {
62        Self { enabled: false, ..Self::default() }
63    }
64
65    pub fn subtle() -> Self {
66        Self { threshold: 0.7, intensity: 0.4, radius: 2.0, levels: 2, ..Self::default() }
67    }
68
69    pub fn intense() -> Self {
70        Self { threshold: 0.3, intensity: 2.5, radius: 8.0, levels: 4, ..Self::default() }
71    }
72
73    pub fn retro_crt() -> Self {
74        Self {
75            threshold:   0.6,
76            intensity:   1.2,
77            radius:      3.0,
78            levels:      3,
79            knee:        0.05,
80            emission_weight: 2.0,
81            ..Self::default()
82        }
83    }
84
85    /// Validate and clamp parameters to safe ranges.
86    pub fn validate(&mut self) {
87        self.threshold       = self.threshold.clamp(0.0, 1.0);
88        self.intensity       = self.intensity.clamp(0.0, 10.0);
89        self.radius          = self.radius.clamp(0.5, 32.0);
90        self.levels          = self.levels.clamp(1, 6);
91        self.knee            = self.knee.clamp(0.0, 0.5);
92        self.emission_weight = self.emission_weight.clamp(0.0, 5.0);
93    }
94}
95
96// ── Gaussian kernel ───────────────────────────────────────────────────────────
97
98/// Compute a 1D Gaussian kernel of given `radius` (standard deviation).
99/// Returns weights summing to 1 for a kernel of `2*size+1` taps.
100pub fn gaussian_kernel(sigma: f32, size: usize) -> Vec<f32> {
101    let mut weights: Vec<f32> = (0..=(size as i32 * 2))
102        .map(|i| {
103            let x = (i - size as i32) as f32;
104            (-x * x / (2.0 * sigma * sigma)).exp()
105        })
106        .collect();
107    let sum: f32 = weights.iter().sum();
108    weights.iter_mut().for_each(|w| *w /= sum);
109    weights
110}
111
112/// Separable Gaussian weights optimised for bilinear texture fetches.
113/// Returns `(offsets, weights)` for a half-kernel (center + positive taps).
114/// Linear sampling combines two adjacent texels, halving the tap count.
115pub fn linear_gaussian_kernel(sigma: f32, taps: usize) -> (Vec<f32>, Vec<f32>) {
116    let full = gaussian_kernel(sigma, taps);
117    let half = taps + 1; // center + positive side
118
119    let mut offsets = Vec::with_capacity(half);
120    let mut weights = Vec::with_capacity(half);
121
122    // Center tap
123    offsets.push(0.0);
124    weights.push(full[taps]);
125
126    // Bilinear taps: each combines tap[k] and tap[k+1]
127    let mut k = taps + 1;
128    while k < full.len() - 1 {
129        let w0 = full[k];
130        let w1 = full[k + 1];
131        let w  = w0 + w1;
132        let o  = (k as f32 - taps as f32) + w1 / w;
133        offsets.push(o);
134        weights.push(w);
135        k += 2;
136    }
137    if k < full.len() {
138        offsets.push((k - taps) as f32);
139        weights.push(full[k]);
140    }
141
142    (offsets, weights)
143}
144
145// ── Luminance utilities ───────────────────────────────────────────────────────
146
147/// ITU-R BT.709 luminance coefficients.
148const LUM_R: f32 = 0.2126;
149const LUM_G: f32 = 0.7152;
150const LUM_B: f32 = 0.0722;
151
152/// Compute perceptual luminance from linear RGB.
153#[inline]
154pub fn luminance(r: f32, g: f32, b: f32) -> f32 {
155    LUM_R * r + LUM_G * g + LUM_B * b
156}
157
158/// Soft-threshold a luminance value with knee falloff.
159/// Pixels below `threshold - knee` contribute 0, above `threshold + knee` contribute fully.
160pub fn soft_threshold(lum: f32, threshold: f32, knee: f32) -> f32 {
161    if knee < 1e-5 {
162        return if lum > threshold { 1.0 } else { 0.0 };
163    }
164    let lo = threshold - knee;
165    let hi = threshold + knee;
166    if lum <= lo  { return 0.0; }
167    if lum >= hi  { return 1.0; }
168    let t = (lum - lo) / (2.0 * knee);
169    t * t * (3.0 - 2.0 * t) // smoothstep
170}
171
172/// Extract the bloom contribution from a pixel with a soft threshold.
173/// Returns `(r, g, b)` with the threshold applied.
174pub fn extract_bloom_pixel(r: f32, g: f32, b: f32, threshold: f32, knee: f32) -> (f32, f32, f32) {
175    let lum    = luminance(r, g, b);
176    let weight = soft_threshold(lum, threshold, knee);
177    (r * weight, g * weight, b * weight)
178}
179
180// ── Pyramid level descriptor ──────────────────────────────────────────────────
181
182/// Descriptor for one level of the bloom pyramid.
183#[derive(Debug, Clone)]
184pub struct BloomPyramidLevel {
185    /// Width of this level in pixels.
186    pub width:  u32,
187    /// Height of this level in pixels.
188    pub height: u32,
189    /// Blur sigma (standard deviation) for this level.
190    pub sigma:  f32,
191    /// Contribution weight when compositing all levels.
192    pub weight: f32,
193}
194
195/// Compute the pyramid levels for a given base resolution and params.
196pub fn compute_pyramid(
197    base_width:  u32,
198    base_height: u32,
199    params:      &BloomParams,
200) -> Vec<BloomPyramidLevel> {
201    let n = params.levels as usize;
202    let mut levels = Vec::with_capacity(n);
203
204    for i in 0..n {
205        let scale  = 1u32 << (i + 1); // level 0 = half-res, level 1 = quarter-res ...
206        let w      = (base_width  / scale).max(1);
207        let h      = (base_height / scale).max(1);
208        let sigma  = params.radius * (i as f32 * 0.5 + 1.0);
209        // Higher pyramid levels contribute less (exponential decay)
210        let weight = 1.0 / (i as f32 + 1.0);
211        levels.push(BloomPyramidLevel { width: w, height: h, sigma, weight });
212    }
213    levels
214}
215
216/// Normalise pyramid weights so they sum to 1.
217pub fn normalise_pyramid_weights(levels: &mut [BloomPyramidLevel]) {
218    let total: f32 = levels.iter().map(|l| l.weight).sum();
219    if total > 0.0 {
220        for l in levels.iter_mut() { l.weight /= total; }
221    }
222}
223
224// ── GLSL shader source fragments ──────────────────────────────────────────────
225
226/// GLSL fragment shader source for the bright-extract pass.
227/// Expects:
228///   `u_scene`:     sampler2D — full scene color
229///   `u_emission`:  sampler2D — emission texture (optional)
230///   `u_threshold`: float
231///   `u_knee`:      float
232///   `u_emission_weight`: float
233pub const EXTRACT_FRAG: &str = r#"
234#version 330 core
235
236in  vec2 v_uv;
237out vec4 frag_color;
238
239uniform sampler2D u_scene;
240uniform sampler2D u_emission;
241uniform float     u_threshold;
242uniform float     u_knee;
243uniform float     u_emission_weight;
244
245const vec3 LUMA = vec3(0.2126, 0.7152, 0.0722);
246
247float soft_threshold(float lum) {
248    float lo = u_threshold - u_knee;
249    float hi = u_threshold + u_knee;
250    if (lum <= lo) return 0.0;
251    if (lum >= hi) return 1.0;
252    float t = (lum - lo) / (2.0 * u_knee + 0.0001);
253    return t * t * (3.0 - 2.0 * t);
254}
255
256void main() {
257    vec3 scene = texture(u_scene, v_uv).rgb;
258    vec3 emiss = texture(u_emission, v_uv).rgb * u_emission_weight;
259    vec3 combined = scene + emiss;
260
261    float lum    = dot(combined, LUMA);
262    float weight = soft_threshold(lum);
263
264    frag_color = vec4(combined * weight, 1.0);
265}
266"#;
267
268/// GLSL fragment shader source for the separable Gaussian blur pass.
269/// Expects:
270///   `u_texture`:    sampler2D — input texture
271///   `u_texel_size`: vec2      — 1/resolution
272///   `u_direction`:  vec2      — (1,0) for H, (0,1) for V
273///   `u_sigma`:      float     — Gaussian sigma in pixels
274pub const BLUR_FRAG: &str = r#"
275#version 330 core
276
277in  vec2 v_uv;
278out vec4 frag_color;
279
280uniform sampler2D u_texture;
281uniform vec2      u_texel_size;
282uniform vec2      u_direction;
283uniform float     u_sigma;
284
285// Fixed 9-tap kernel weights + offsets (radius 4, precomputed for sigma≈1.5)
286// For variable sigma, you'd compute these on the CPU and upload as uniforms.
287const int  N_TAPS    = 5;
288const float OFFSETS[5] = float[](0.0, 1.3846153846, 3.2307692308, 5.0769230769, 6.9230769231);
289const float WEIGHTS[5] = float[](0.2270270270, 0.3162162162, 0.0702702703, 0.0162162162, 0.0054054054);
290
291void main() {
292    vec4 result = texture(u_texture, v_uv) * WEIGHTS[0];
293    for (int i = 1; i < N_TAPS; ++i) {
294        vec2 off = u_direction * u_texel_size * OFFSETS[i] * (u_sigma / 1.5);
295        result += texture(u_texture, v_uv + off) * WEIGHTS[i];
296        result += texture(u_texture, v_uv - off) * WEIGHTS[i];
297    }
298    frag_color = result;
299}
300"#;
301
302/// GLSL fragment shader source for the bloom composite pass.
303/// Expects:
304///   `u_scene`:     sampler2D — original scene
305///   `u_bloom`:     sampler2D — blurred bloom
306///   `u_intensity`: float     — additive blend weight
307///   `u_dirt`:      sampler2D — optional lens dirt mask
308///   `u_dirt_intensity`: float
309pub const COMPOSITE_FRAG: &str = r#"
310#version 330 core
311
312in  vec2 v_uv;
313out vec4 frag_color;
314
315uniform sampler2D u_scene;
316uniform sampler2D u_bloom;
317uniform float     u_intensity;
318
319void main() {
320    vec3 scene = texture(u_scene, v_uv).rgb;
321    vec3 bloom = texture(u_bloom, v_uv).rgb;
322    // Additive bloom blend
323    vec3 result = scene + bloom * u_intensity;
324    frag_color = vec4(result, 1.0);
325}
326"#;
327
328// ── CPU-side bloom simulation (for testing / software path) ───────────────────
329
330/// Simulate one horizontal Gaussian blur pass on a flat `width × height` RGBA buffer.
331/// `buffer` is `RGBA` interleaved (stride = width * 4).
332pub fn cpu_blur_h(src: &[f32], dst: &mut [f32], width: usize, height: usize, sigma: f32) {
333    let (offsets, weights) = linear_gaussian_kernel(sigma, (sigma * 3.0) as usize + 1);
334    for y in 0..height {
335        for x in 0..width {
336            let mut r = 0.0f32;
337            let mut g = 0.0f32;
338            let mut b = 0.0f32;
339            let mut a = 0.0f32;
340            for (i, &w) in weights.iter().enumerate() {
341                let offset = offsets[i];
342                let xi = (x as f32 + offset).round() as isize;
343                let xi = xi.clamp(0, width as isize - 1) as usize;
344                let idx = (y * width + xi) * 4;
345                r += src[idx    ] * w;
346                g += src[idx + 1] * w;
347                b += src[idx + 2] * w;
348                a += src[idx + 3] * w;
349                if i > 0 {
350                    let xim = (x as f32 - offset).round() as isize;
351                    let xim = xim.clamp(0, width as isize - 1) as usize;
352                    let idxm = (y * width + xim) * 4;
353                    r += src[idxm    ] * w;
354                    g += src[idxm + 1] * w;
355                    b += src[idxm + 2] * w;
356                    a += src[idxm + 3] * w;
357                }
358            }
359            let out = (y * width + x) * 4;
360            dst[out    ] = r;
361            dst[out + 1] = g;
362            dst[out + 2] = b;
363            dst[out + 3] = a;
364        }
365    }
366}
367
368/// Simulate one vertical Gaussian blur pass on a flat RGBA buffer.
369pub fn cpu_blur_v(src: &[f32], dst: &mut [f32], width: usize, height: usize, sigma: f32) {
370    let (offsets, weights) = linear_gaussian_kernel(sigma, (sigma * 3.0) as usize + 1);
371    for y in 0..height {
372        for x in 0..width {
373            let mut r = 0.0f32;
374            let mut g = 0.0f32;
375            let mut b = 0.0f32;
376            let mut a = 0.0f32;
377            for (i, &w) in weights.iter().enumerate() {
378                let offset = offsets[i];
379                let yi  = (y as f32 + offset).round() as isize;
380                let yi  = yi.clamp(0, height as isize - 1) as usize;
381                let idx = (yi * width + x) * 4;
382                r += src[idx    ] * w;
383                g += src[idx + 1] * w;
384                b += src[idx + 2] * w;
385                a += src[idx + 3] * w;
386                if i > 0 {
387                    let yim = (y as f32 - offset).round() as isize;
388                    let yim = yim.clamp(0, height as isize - 1) as usize;
389                    let idxm = (yim * width + x) * 4;
390                    r += src[idxm    ] * w;
391                    g += src[idxm + 1] * w;
392                    b += src[idxm + 2] * w;
393                    a += src[idxm + 3] * w;
394                }
395            }
396            let out = (y * width + x) * 4;
397            dst[out    ] = r;
398            dst[out + 1] = g;
399            dst[out + 2] = b;
400            dst[out + 3] = a;
401        }
402    }
403}
404
405/// Full CPU bloom simulation (extract → H blur → V blur → composite).
406/// Returns a new RGBA buffer with bloom composited onto the input.
407pub fn cpu_bloom(
408    input:  &[f32],
409    width:  usize,
410    height: usize,
411    params: &BloomParams,
412) -> Vec<f32> {
413    let n = width * height * 4;
414    let mut extracted = vec![0.0f32; n];
415    let mut blurred   = vec![0.0f32; n];
416
417    // Extract bright pixels
418    for i in 0..(width * height) {
419        let base  = i * 4;
420        let (r, g, b) = extract_bloom_pixel(input[base], input[base + 1], input[base + 2],
421                                            params.threshold, params.knee);
422        extracted[base    ] = r;
423        extracted[base + 1] = g;
424        extracted[base + 2] = b;
425        extracted[base + 3] = input[base + 3];
426    }
427
428    // H blur
429    let mut tmp = vec![0.0f32; n];
430    cpu_blur_h(&extracted, &mut tmp, width, height, params.radius);
431    // V blur
432    cpu_blur_v(&tmp, &mut blurred, width, height, params.radius);
433
434    // Composite: input + bloom * intensity
435    let mut output = input.to_vec();
436    for i in 0..(width * height) {
437        let base = i * 4;
438        output[base    ] = (output[base    ] + blurred[base    ] * params.intensity).min(1.0);
439        output[base + 1] = (output[base + 1] + blurred[base + 1] * params.intensity).min(1.0);
440        output[base + 2] = (output[base + 2] + blurred[base + 2] * params.intensity).min(1.0);
441    }
442    output
443}
444
445// ── Tests ─────────────────────────────────────────────────────────────────────
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    #[test]
452    fn gaussian_kernel_sums_to_one() {
453        let k = gaussian_kernel(2.0, 4);
454        let sum: f32 = k.iter().sum();
455        assert!((sum - 1.0).abs() < 1e-5, "sum={sum}");
456    }
457
458    #[test]
459    fn soft_threshold_at_zero_knee() {
460        assert_eq!(soft_threshold(0.4, 0.5, 0.0), 0.0);
461        assert_eq!(soft_threshold(0.6, 0.5, 0.0), 1.0);
462    }
463
464    #[test]
465    fn soft_threshold_smooth_at_knee() {
466        let t = soft_threshold(0.5, 0.5, 0.1);
467        assert!(t > 0.0 && t < 1.0, "expected soft transition, got {t}");
468    }
469
470    #[test]
471    fn pyramid_has_correct_level_count() {
472        let params = BloomParams { levels: 3, ..Default::default() };
473        let levels = compute_pyramid(1280, 720, &params);
474        assert_eq!(levels.len(), 3);
475        assert_eq!(levels[0].width, 640);
476        assert_eq!(levels[1].width, 320);
477    }
478
479    #[test]
480    fn cpu_bloom_preserves_size() {
481        let w = 4usize; let h = 4usize;
482        let input: Vec<f32> = vec![0.5; w * h * 4];
483        let output = cpu_bloom(&input, w, h, &BloomParams::default());
484        assert_eq!(output.len(), input.len());
485    }
486}