facett-core 0.1.10

facett — visual kernel: render a node/edge Scene into egui (wgpu fast path to come)
Documentation
//! **Reusable separable Gaussian blur pass** (feature `wgpu`) — the GPU substrate
//! for the frosted-glass backdrop blur (`SurfaceSpec::Frosted`, roadmap T2.3) and
//! any post effect that wants a cheap, large-radius blur of an offscreen texture.
//!
//! A 2D Gaussian is **separable** into a horizontal then a vertical 1D pass, so the
//! kernel costs O(2·taps) instead of O(taps²). [`GaussianBlur`] owns the pipeline +
//! a half-(or full-)res **ping-pong** pair of textures (wgpu **panics on read==write
//! of one texture**, so the H pass writes B and the V pass writes back A) + the two
//! per-axis uniforms. The 9-tap kernel weights are computed on the CPU from a σ
//! ([`gaussian_taps`]) so the caller picks the radius; [`blur`](GaussianBlur::blur)
//! records both passes into a caller-supplied encoder and returns the blurred view.
//!
//! Not yet wired into `overlay.rs` (Tier-1 owns that file); this lands the pipeline +
//! the CPU-side kernel math (unit-tested without a device) as the substrate.

use wgpu::TextureFormat;

/// The reusable blur shader (separable Gaussian; `blur_vs`/`blur_fs`). The fragment
/// reads the per-axis uniform for the texel step + the CPU-computed weights.
pub const BLUR_WGSL: &str = include_str!("blur.wgsl");

/// Compute the normalised **9-tap** symmetric Gaussian weights for a given standard
/// deviation `sigma` (in texels) — entries are `[w0, w1, w2, w3, w4]` for offsets
/// `0, ±1, ±2, ±3, ±4`. Normalised so the full kernel sums to 1
/// (`w0 + 2·(w1+w2+w3+w4) == 1`), so a flat input is preserved (no brightness
/// drift). Pure math: unit-tested with no GPU device.
#[must_use]
pub fn gaussian_taps(sigma: f32) -> [f32; 5] {
    let s = sigma.max(1e-3);
    let mut w = [0.0f32; 5];
    let mut sum = 0.0f32;
    for (i, wi) in w.iter_mut().enumerate() {
        let x = i as f32;
        let g = (-(x * x) / (2.0 * s * s)).exp();
        *wi = g;
        // The centre tap is counted once, the rest are mirrored (×2).
        sum += if i == 0 { g } else { 2.0 * g };
    }
    let inv = 1.0 / sum;
    for wi in &mut w {
        *wi *= inv;
    }
    w
}

/// Per-axis blur uniform — mirrors `BlurUniforms` in `blur.wgsl`.
#[repr(C)]
#[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)]
struct BlurUniforms {
    /// texel.xy = 1/src_w, 1/src_h ; texel.zw = blur axis (texels).
    texel: [f32; 4],
    /// Gaussian weights for offsets 0..3.
    w0123: [f32; 4],
    /// Weight for offset 4 (x) + padding.
    w4: [f32; 4],
}

/// A reusable separable-Gaussian blur over an offscreen colour texture of `format`.
/// Construct once; call [`ensure`](Self::ensure) for the working size, then
/// [`blur`](Self::blur) each frame.
pub struct GaussianBlur {
    pipeline: wgpu::RenderPipeline,
    bgl: wgpu::BindGroupLayout,
    sampler: wgpu::Sampler,
    u_h: wgpu::Buffer,
    u_v: wgpu::Buffer,
    format: TextureFormat,

    tex_a: Option<wgpu::TextureView>,
    tex_b: Option<wgpu::TextureView>,
    size: (u32, u32),
    sigma: f32,
}

impl GaussianBlur {
    /// Build the blur pipeline + sampler + per-axis uniforms for a texture `format`
    /// (e.g. the offscreen `OFFSCREEN_FORMAT`). No targets allocated until
    /// [`ensure`](Self::ensure).
    pub fn new(device: &wgpu::Device, format: TextureFormat) -> Self {
        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
            label: Some("l0_gaussian_blur"),
            source: wgpu::ShaderSource::Wgsl(BLUR_WGSL.into()),
        });
        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
            label: Some("l0_blur_bgl"),
            entries: &[
                wgpu::BindGroupLayoutEntry {
                    binding: 0,
                    visibility: wgpu::ShaderStages::FRAGMENT,
                    ty: wgpu::BindingType::Buffer {
                        ty: wgpu::BufferBindingType::Uniform,
                        has_dynamic_offset: false,
                        min_binding_size: None,
                    },
                    count: None,
                },
                wgpu::BindGroupLayoutEntry {
                    binding: 1,
                    visibility: wgpu::ShaderStages::FRAGMENT,
                    ty: wgpu::BindingType::Texture {
                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
                        view_dimension: wgpu::TextureViewDimension::D2,
                        multisampled: false,
                    },
                    count: None,
                },
                wgpu::BindGroupLayoutEntry {
                    binding: 2,
                    visibility: wgpu::ShaderStages::FRAGMENT,
                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
                    count: None,
                },
            ],
        });
        let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
            label: Some("l0_blur_pipeline"),
            layout: Some(&device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
                label: Some("l0_blur_pll"),
                bind_group_layouts: &[Some(&bgl)],
                immediate_size: 0,
            })),
            vertex: wgpu::VertexState {
                module: &shader,
                entry_point: Some("blur_vs"),
                compilation_options: Default::default(),
                buffers: &[],
            },
            primitive: wgpu::PrimitiveState { topology: wgpu::PrimitiveTopology::TriangleList, ..Default::default() },
            depth_stencil: None,
            multisample: wgpu::MultisampleState::default(),
            fragment: Some(wgpu::FragmentState {
                module: &shader,
                entry_point: Some("blur_fs"),
                compilation_options: Default::default(),
                targets: &[Some(wgpu::ColorTargetState {
                    format,
                    blend: None,
                    write_mask: wgpu::ColorWrites::ALL,
                })],
            }),
            multiview_mask: None,
            cache: None,
        });
        let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
            label: Some("l0_blur_sampler"),
            mag_filter: wgpu::FilterMode::Linear,
            min_filter: wgpu::FilterMode::Linear,
            address_mode_u: wgpu::AddressMode::ClampToEdge,
            address_mode_v: wgpu::AddressMode::ClampToEdge,
            ..Default::default()
        });
        let mkbuf = |label: &str| device.create_buffer(&wgpu::BufferDescriptor {
            label: Some(label),
            size: std::mem::size_of::<BlurUniforms>() as u64,
            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
            mapped_at_creation: false,
        });
        Self {
            pipeline,
            bgl,
            sampler,
            u_h: mkbuf("l0_blur_u_h"),
            u_v: mkbuf("l0_blur_u_v"),
            format,
            tex_a: None,
            tex_b: None,
            size: (0, 0),
            sigma: 0.0,
        }
    }

    /// (Re)allocate the ping-pong textures for `w×h` and (re)upload the per-axis
    /// kernel for `sigma`. Idempotent within the same size + sigma.
    pub fn ensure(&mut self, device: &wgpu::Device, queue: &wgpu::Queue, w: u32, h: u32, sigma: f32) {
        let w = w.max(1);
        let h = h.max(1);
        if self.size == (w, h) && (self.sigma - sigma).abs() < 1e-6 && self.tex_a.is_some() {
            return;
        }
        if self.size != (w, h) || self.tex_a.is_none() {
            let mk = |label: &str| {
                device
                    .create_texture(&wgpu::TextureDescriptor {
                        label: Some(label),
                        size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 },
                        mip_level_count: 1,
                        sample_count: 1,
                        dimension: wgpu::TextureDimension::D2,
                        format: self.format,
                        usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::TEXTURE_BINDING,
                        view_formats: &[],
                    })
                    .create_view(&Default::default())
            };
            self.tex_a = Some(mk("l0_blur_tex_a"));
            self.tex_b = Some(mk("l0_blur_tex_b"));
            self.size = (w, h);
        }
        // Per-axis uniforms: same weights, different blur axis.
        let taps = gaussian_taps(sigma);
        let texel = [1.0 / w as f32, 1.0 / h as f32];
        let w0123 = [taps[0], taps[1], taps[2], taps[3]];
        let w4 = [taps[4], 0.0, 0.0, 0.0];
        queue.write_buffer(&self.u_h, 0, bytemuck::bytes_of(&BlurUniforms {
            texel: [texel[0], texel[1], 1.0, 0.0],
            w0123,
            w4,
        }));
        queue.write_buffer(&self.u_v, 0, bytemuck::bytes_of(&BlurUniforms {
            texel: [texel[0], texel[1], 0.0, 1.0],
            w0123,
            w4,
        }));
        self.sigma = sigma;
    }

    /// Blur `src` (must be `format`-compatible) into the internal ping-pong and
    /// return the blurred view (`None` before [`ensure`]). Records a horizontal then
    /// a vertical pass into `encoder`: `src → A → B`, leaving the result in **B**.
    pub fn blur<'a>(
        &'a self,
        device: &wgpu::Device,
        encoder: &mut wgpu::CommandEncoder,
        src: &wgpu::TextureView,
    ) -> Option<&'a wgpu::TextureView> {
        let (tex_a, tex_b) = (self.tex_a.as_ref()?, self.tex_b.as_ref()?);
        // H: src → A
        self.pass(device, encoder, "l0_blur_h", &self.u_h, src, tex_a);
        // V: A → B
        self.pass(device, encoder, "l0_blur_v", &self.u_v, tex_a, tex_b);
        Some(tex_b)
    }

    fn pass(
        &self,
        device: &wgpu::Device,
        encoder: &mut wgpu::CommandEncoder,
        label: &str,
        uniform: &wgpu::Buffer,
        src: &wgpu::TextureView,
        dst: &wgpu::TextureView,
    ) {
        let bind = device.create_bind_group(&wgpu::BindGroupDescriptor {
            label: Some(label),
            layout: &self.bgl,
            entries: &[
                wgpu::BindGroupEntry { binding: 0, resource: uniform.as_entire_binding() },
                wgpu::BindGroupEntry { binding: 1, resource: wgpu::BindingResource::TextureView(src) },
                wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::Sampler(&self.sampler) },
            ],
        });
        let mut rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
            label: Some(label),
            color_attachments: &[Some(wgpu::RenderPassColorAttachment {
                view: dst,
                resolve_target: None,
                depth_slice: None,
                ops: wgpu::Operations { load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT), store: wgpu::StoreOp::Store },
            })],
            depth_stencil_attachment: None,
            timestamp_writes: None,
            occlusion_query_set: None,
            multiview_mask: None,
        });
        rp.set_pipeline(&self.pipeline);
        rp.set_bind_group(0, &bind, &[]);
        rp.draw(0..3, 0..1);
    }

    /// The current working size in physical pixels.
    #[must_use]
    pub fn size(&self) -> (u32, u32) {
        self.size
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// INJECT-ASSERT: the 9-tap Gaussian kernel is **normalised** — centre + mirrored
    /// rings sum to 1, so a flat input is preserved (no brightness drift through the
    /// blur). The single load-bearing CPU invariant, checked without a GPU.
    #[test]
    fn gaussian_kernel_is_normalised() {
        for &sigma in &[0.5f32, 1.0, 2.0, 4.0, 8.0] {
            let w = gaussian_taps(sigma);
            let sum = w[0] + 2.0 * (w[1] + w[2] + w[3] + w[4]);
            assert!((sum - 1.0).abs() < 1e-5, "sigma={sigma}: kernel sums to {sum}, not 1");
        }
    }

    /// INJECT-ASSERT: the kernel is monotonically non-increasing from the centre
    /// (a real Gaussian falls off), and a wider sigma spreads more weight to the
    /// rings (less in the centre) than a tight one.
    #[test]
    fn gaussian_kernel_falls_off_and_widens_with_sigma() {
        let tight = gaussian_taps(0.8);
        let wide = gaussian_taps(4.0);
        for i in 1..5 {
            assert!(tight[i] <= tight[i - 1], "tap {i} not falling off");
            assert!(wide[i] <= wide[i - 1], "tap {i} not falling off");
        }
        assert!(wide[0] < tight[0], "wider sigma keeps less weight in the centre");
        assert!(wide[4] > tight[4], "wider sigma spreads more weight to the edge tap");
    }

    /// INJECT-ASSERT: the shared blur shader exposes the separable entry points the
    /// pipeline names (cheap compile-time tripwire on the WGSL).
    #[test]
    fn blur_shader_has_separable_entry_points() {
        assert!(BLUR_WGSL.contains("fn blur_vs"));
        assert!(BLUR_WGSL.contains("fn blur_fs"));
        assert!(BLUR_WGSL.contains("texel.zw"), "the per-axis step rides texel.zw");
    }

    /// Spin up a headless wgpu device (downlevel limits), `None` if the env has no
    /// adapter (CI without a GPU) so the smoke test self-skips.
    fn headless_device() -> Option<(wgpu::Device, wgpu::Queue)> {
        let instance = wgpu::Instance::default();
        let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
            power_preference: wgpu::PowerPreference::default(),
            force_fallback_adapter: false,
            compatible_surface: None,
        }))
        .ok()?;
        pollster::block_on(adapter.request_device(&wgpu::DeviceDescriptor {
            label: Some("l0-blur-smoke"),
            required_features: wgpu::Features::empty(),
            required_limits: wgpu::Limits::downlevel_defaults(),
            memory_hints: wgpu::MemoryHints::default(),
            experimental_features: wgpu::ExperimentalFeatures::disabled(),
            trace: wgpu::Trace::Off,
        }))
        .ok()
    }

    /// GPU SMOKE (the "compile test" for the substrate): on a real device, building
    /// the pipeline VALIDATES `blur.wgsl` (a bad shader panics in `new`), and
    /// recording + submitting the H/V passes validates the bind groups + attachments.
    /// Self-skips when no adapter is present. Proves the separable blur substrate is
    /// drivable, not just that the Rust types compile.
    #[test]
    fn gaussian_blur_builds_pipeline_and_records_passes_on_device() {
        let Some((device, queue)) = headless_device() else {
            eprintln!("[blur] no GPU adapter — skipping device smoke test");
            return;
        };
        let format = TextureFormat::Rgba16Float; // the HDR offscreen format it blurs
        let (w, h) = (64u32, 48u32);
        // A source texture to blur (zero-initialised by wgpu on first sample).
        let src = device
            .create_texture(&wgpu::TextureDescriptor {
                label: Some("l0-blur-smoke-src"),
                size: wgpu::Extent3d { width: w, height: h, depth_or_array_layers: 1 },
                mip_level_count: 1,
                sample_count: 1,
                dimension: wgpu::TextureDimension::D2,
                format,
                usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::RENDER_ATTACHMENT,
                view_formats: &[],
            })
            .create_view(&Default::default());

        // Building the pipeline here is the WGSL validation (panics on a bad shader).
        let mut blur = GaussianBlur::new(&device, format);
        assert_eq!(blur.size(), (0, 0), "no targets before ensure");
        blur.ensure(&device, &queue, w, h, 3.0);
        assert_eq!(blur.size(), (w, h));

        let mut enc = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
            label: Some("l0-blur-smoke-enc"),
        });
        let out = blur.blur(&device, &mut enc, &src);
        assert!(out.is_some(), "blur returns the result view after ensure");
        queue.submit(Some(enc.finish())); // submission validates the recorded passes
        device.poll(wgpu::PollType::wait_indefinitely()).ok();
    }
}