kael 0.2.0

GPU-accelerated native UI framework for Rust — build desktop apps with Metal, DirectX, and Vulkan rendering
Documentation
//! Headless off-screen rendering for benchmarks and golden-image tests.
//!
//! Drives the real rasterization pipeline without creating a window, so CI and
//! local tooling can measure and pixel-verify genuine rendering work instead of
//! simulated sleeps. On macOS the scene is rasterized on the GPU (Metal) and
//! read back; on platforms whose off-screen GPU path is not yet implemented the
//! renderer still builds and batches the real scene on the CPU.

use anyhow::Result;

#[cfg(all(target_os = "macos", not(feature = "macos-blade")))]
use crate::DevicePixels;
use crate::{
    Background, Bounds, ContentMask, ScaledPixels, Scene, TransformationMatrix, hsla, point, size,
};

/// Which rendering backend a [`HeadlessRenderer`] is using.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HeadlessBackend {
    /// Real GPU rasterization with read-back pixels (macOS / Metal today).
    Gpu,
    /// Real scene construction and batching on the CPU, without rasterization.
    ///
    /// Used on platforms whose off-screen GPU path is not yet implemented. The
    /// work performed is genuine (no simulated delays), but no pixels are read
    /// back, so the frame checksum is derived from scene structure instead.
    CpuOnly,
}

/// One frame produced by a [`HeadlessRenderer`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RenderedFrame {
    /// Frame width in device pixels.
    pub width: u32,
    /// Frame height in device pixels.
    pub height: u32,
    /// Stable content checksum: pixel-derived on a GPU backend, structure-derived on CPU-only.
    pub checksum: u64,
    /// Whether the frame was rasterized on the GPU.
    pub gpu: bool,
}

/// One frame rendered into an `RGBA16Float` off-screen target (linear, ≥16-bit).
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct HdrFrame {
    /// Frame width in device pixels.
    pub width: u32,
    /// Frame height in device pixels.
    pub height: u32,
    /// Peak channel value across the frame; may exceed `1.0` (HDR headroom).
    pub peak: f32,
    /// Stable checksum of the decoded float pixels.
    pub checksum: u64,
}

/// A windowless renderer used by benchmarks and golden-image tests.
///
/// Construct once, then call [`HeadlessRenderer::render_frame`] repeatedly to
/// drive frames through the real draw path.
pub struct HeadlessRenderer {
    width: u32,
    height: u32,
    backend: HeadlessBackend,
    #[cfg(all(target_os = "macos", not(feature = "macos-blade")))]
    metal: Option<crate::metal_renderer::MetalRenderer>,
}

impl HeadlessRenderer {
    /// Create a headless renderer for the given device-pixel dimensions.
    ///
    /// Selects the GPU backend when a compatible device is present, otherwise
    /// falls back to the CPU-only backend.
    pub fn new(width: u32, height: u32) -> Result<Self> {
        if width == 0 || height == 0 {
            anyhow::bail!("headless renderer requires non-zero dimensions");
        }

        #[cfg(all(target_os = "macos", not(feature = "macos-blade")))]
        {
            use crate::metal_renderer::{MetalRenderer, metal_is_available};
            use parking_lot::Mutex;
            use std::sync::Arc;

            if metal_is_available() {
                let renderer = MetalRenderer::new(Arc::new(Mutex::new(Default::default())));
                return Ok(Self {
                    width,
                    height,
                    backend: HeadlessBackend::Gpu,
                    metal: Some(renderer),
                });
            }
        }

        Ok(Self {
            width,
            height,
            backend: HeadlessBackend::CpuOnly,
            #[cfg(all(target_os = "macos", not(feature = "macos-blade")))]
            metal: None,
        })
    }

    /// The backend actually in use.
    pub fn backend(&self) -> HeadlessBackend {
        self.backend
    }

    /// The configured frame dimensions in device pixels.
    pub fn dimensions(&self) -> (u32, u32) {
        (self.width, self.height)
    }

    /// Build a procedural scene of `complexity` quads and process one frame.
    ///
    /// On a GPU backend the scene is rasterized off-screen and the checksum is
    /// derived from the read-back pixels; on a CPU-only backend the real scene
    /// is built and batched and the checksum is derived from its structure.
    pub fn render_frame(&mut self, complexity: usize) -> Result<RenderedFrame> {
        let scene = build_benchmark_scene(self.width, self.height, complexity);

        #[cfg(all(target_os = "macos", not(feature = "macos-blade")))]
        if let Some(renderer) = self.metal.as_mut() {
            let viewport = size(
                DevicePixels(self.width as i32),
                DevicePixels(self.height as i32),
            );
            let readback = renderer.render_scene_to_bytes(&scene, viewport)?;
            return Ok(RenderedFrame {
                width: readback.width,
                height: readback.height,
                checksum: seahash::hash(&readback.bgra),
                gpu: true,
            });
        }

        Ok(RenderedFrame {
            width: self.width,
            height: self.height,
            checksum: scene.structural_checksum(),
            gpu: false,
        })
    }

    /// Render a procedural scene into an `RGBA16Float` off-screen target (the
    /// linear ≥16-bit working format), returning peak/checksum stats. Available
    /// only on the GPU backend.
    pub fn render_frame_rgba16f(&mut self, complexity: usize) -> Result<HdrFrame> {
        #[cfg(all(target_os = "macos", not(feature = "macos-blade")))]
        if let Some(renderer) = self.metal.as_mut() {
            let scene = build_benchmark_scene(self.width, self.height, complexity);
            let viewport = size(
                DevicePixels(self.width as i32),
                DevicePixels(self.height as i32),
            );
            let readback = renderer.render_scene_to_f16(&scene, viewport)?;
            let mut peak = 0.0f32;
            let mut checksum = 0xcbf2_9ce4_8422_2325u64;
            for &value in &readback.rgba {
                peak = peak.max(value);
                checksum ^= value.to_bits() as u64;
                checksum = checksum.wrapping_mul(0x0000_0100_0000_01b3);
            }
            return Ok(HdrFrame {
                width: readback.width,
                height: readback.height,
                peak,
                checksum,
            });
        }

        let _ = complexity;
        anyhow::bail!("RGBA16Float rendering is only available on the GPU backend")
    }

    /// Run a built-in GPU compute kernel that doubles each input value, proving
    /// the compute-pipeline path end-to-end. Available only on the GPU backend.
    pub fn run_compute_doubler(&self, data: &[f32]) -> Result<Vec<f32>> {
        #[cfg(all(target_os = "macos", not(feature = "macos-blade")))]
        if let Some(renderer) = self.metal.as_ref() {
            const KERNEL: &str = concat!(
                "#include <metal_stdlib>\n",
                "using namespace metal;\n",
                "kernel void double_values(device float* data [[buffer(0)]],\n",
                "                          uint id [[thread_position_in_grid]]) {\n",
                "    data[id] = data[id] * 2.0;\n",
                "}\n",
            );
            let mut buffer = data.to_vec();
            renderer.run_compute_kernel(KERNEL, "double_values", &mut buffer)?;
            return Ok(buffer);
        }
        let _ = data;
        anyhow::bail!("compute is only available on the GPU backend")
    }
}

fn build_benchmark_scene(width: u32, height: u32, complexity: usize) -> Scene {
    let mut scene = Scene::default();
    let count = complexity.max(1);
    let cols = ((count as f64).sqrt().ceil() as u32).max(1);
    let rows = (count as u32).div_ceil(cols).max(1);
    let cell_w = (width as f32 / cols as f32).max(1.0);
    let cell_h = (height as f32 / rows as f32).max(1.0);

    let viewport = Bounds {
        origin: point(ScaledPixels(0.0), ScaledPixels(0.0)),
        size: size(ScaledPixels(width as f32), ScaledPixels(height as f32)),
    };

    for i in 0..count {
        let col = (i as u32 % cols) as f32;
        let row = (i as u32 / cols) as f32;
        let bounds = Bounds {
            origin: point(ScaledPixels(col * cell_w), ScaledPixels(row * cell_h)),
            size: size(ScaledPixels(cell_w), ScaledPixels(cell_h)),
        };
        let hue = (i as f32 / count as f32).fract();
        scene.insert_primitive(crate::Quad {
            bounds,
            content_mask: ContentMask { bounds: viewport },
            background: Background::from(hsla(hue, 0.7, 0.5, 1.0)),
            transform: TransformationMatrix::unit(),
            ..Default::default()
        });
    }

    scene.finish();
    scene
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn render_frame_is_deterministic_and_does_real_work() {
        let mut renderer = match HeadlessRenderer::new(64, 64) {
            Ok(renderer) => renderer,
            Err(_) => return,
        };

        let first = renderer.render_frame(32).unwrap();
        let second = renderer.render_frame(32).unwrap();

        assert_eq!((first.width, first.height), (64, 64));
        assert_eq!(
            first.checksum, second.checksum,
            "identical scenes must produce identical frames"
        );

        let denser = renderer.render_frame(64).unwrap();
        assert_ne!(
            first.checksum, denser.checksum,
            "a different scene must produce a different checksum"
        );

        match renderer.backend() {
            HeadlessBackend::Gpu => assert!(first.gpu),
            HeadlessBackend::CpuOnly => assert!(!first.gpu),
        }
    }

    #[test]
    fn rgba16f_frame_is_deterministic_or_unsupported() {
        let mut renderer = match HeadlessRenderer::new(32, 32) {
            Ok(renderer) => renderer,
            Err(_) => return,
        };
        match renderer.render_frame_rgba16f(16) {
            Ok(frame) => {
                assert_eq!((frame.width, frame.height), (32, 32));
                assert!(frame.peak > 0.0);
                let again = renderer.render_frame_rgba16f(16).unwrap();
                assert_eq!(frame.checksum, again.checksum);
            }
            Err(_) => assert_eq!(renderer.backend(), HeadlessBackend::CpuOnly),
        }
    }

    #[test]
    fn compute_doubler_runs_on_gpu_or_is_unsupported() {
        let renderer = match HeadlessRenderer::new(8, 8) {
            Ok(renderer) => renderer,
            Err(_) => return,
        };
        match renderer.run_compute_doubler(&[1.0, 2.0, 3.0, 4.0]) {
            Ok(output) => assert_eq!(output, vec![2.0, 4.0, 6.0, 8.0]),
            Err(_) => assert_eq!(renderer.backend(), HeadlessBackend::CpuOnly),
        }
    }
}