shdrlib 0.1.0

A three-tiered Vulkan shader compilation and rendering framework built in pure Rust
Documentation
//! # Buffer Performance Benchmarks (Simplified)
//!
//! **Purpose**: Measure real-world buffer operation performance.
//!
//! This simplified suite focuses on operations we can accurately benchmark:
//! - Buffer allocation performance across different sizes
//! - Helper function overhead vs raw CORE operations
//! - Batch buffer creation (realistic frame workloads)
//!
//! ## Performance Budget
//! - Small buffer (<64KB): <100μs allocation
//! - Large buffer (>1MB): <1ms allocation
//! - Helper overhead: <20% vs raw CORE

use criterion::{
    black_box, criterion_group, criterion_main, 
    Criterion, BenchmarkId, Throughput, PlotConfiguration, AxisScale
};
use shdrlib::core::Buffer;
use ash::vk;

mod bench_utils;
use bench_utils::setup_device;

/// Comprehensive buffer allocation benchmark across realistic size ranges
fn bench_buffer_creation(c: &mut Criterion) {
    let device = setup_device().expect("Failed to setup device");
    
    let mut group = c.benchmark_group("buffer_creation");
    group.sample_size(100);
    group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
    
    // Size ranges matching real-world use cases
    let test_cases = vec![
        (256, "uniform_buffer_small"),
        (1024, "uniform_buffer_typical"),
        (4096, "uniform_buffer_large"),
        (16384, "vertex_buffer_small"),
        (65536, "vertex_buffer_medium"),
        (262144, "vertex_buffer_large"),
        (1048576, "vertex_buffer_huge"),
        (4194304, "staging_buffer_4MB"),
    ];
    
    for (size, name) in test_cases {
        group.throughput(Throughput::Bytes(size as u64));
        
        // Device-local buffer (GPU VRAM)
        group.bench_with_input(
            BenchmarkId::new("device_local", name),
            &size,
            |b, &size| {
                b.iter(|| {
                    let buffer = Buffer::new(
                        black_box(&device),
                        black_box(size as u64),
                        black_box(vk::BufferUsageFlags::VERTEX_BUFFER),
                        black_box(vk::MemoryPropertyFlags::DEVICE_LOCAL),
                    );
                    black_box(buffer)
                })
            }
        );
        
        // Host-visible buffer (system RAM, mapped)
        group.bench_with_input(
            BenchmarkId::new("host_visible", name),
            &size,
            |b, &size| {
                b.iter(|| {
                    let buffer = Buffer::new(
                        black_box(&device),
                        black_box(size as u64),
                        black_box(vk::BufferUsageFlags::TRANSFER_SRC),
                        black_box(vk::MemoryPropertyFlags::HOST_VISIBLE | vk::MemoryPropertyFlags::HOST_COHERENT),
                    );
                    black_box(buffer)
                })
            }
        );
    }
    
    group.finish();
}

/// Helper function overhead tests
fn bench_helper_functions(c: &mut Criterion) {
    let device = setup_device().expect("Failed to setup device");
    
    let mut group = c.benchmark_group("helper_functions");
    group.sample_size(100);
    
    // Vertex buffer helper
    group.bench_function("vertex_buffer_helper", |b| {
        #[repr(C)]
        #[derive(Clone, Copy)]
        struct Vertex {
            pos: [f32; 3],
            normal: [f32; 3],
            uv: [f32; 2],
        }
        
        let vertices: Vec<Vertex> = (0..1024).map(|i| Vertex {
            pos: [i as f32, 0.0, 0.0],
            normal: [0.0, 1.0, 0.0],
            uv: [0.0, 0.0],
        }).collect();
        
        b.iter(|| {
            let buffer = shdrlib::ex::helpers::buffer::create_vertex_buffer(
                black_box(&device),
                black_box(&vertices),
            );
            black_box(buffer)
        })
    });
    
    // Uniform buffer helper (just allocation, no data)
    group.bench_function("uniform_buffer_helper", |b| {
        b.iter(|| {
            let buffer = shdrlib::ex::helpers::buffer::create_uniform_buffer::<[[f32; 4]; 4]>(
                black_box(&device),
            );
            black_box(buffer)
        })
    });
    
    // Storage buffer helper
    group.bench_function("storage_buffer_helper_64k", |b| {
        b.iter(|| {
            let buffer = shdrlib::ex::helpers::buffer::create_storage_buffer(
                black_box(&device),
                black_box(65536),
            );
            black_box(buffer)
        })
    });
    
    group.finish();
}

/// Batch buffer operations (realistic frame workload)
fn bench_batch_operations(c: &mut Criterion) {
    let device = setup_device().expect("Failed to setup device");
    
    let mut group = c.benchmark_group("batch_operations");
    group.sample_size(50);
    
    // Mixed buffer creation (typical frame workload)
    group.bench_function("mixed_buffer_creation_typical_frame", |b| {
        b.iter(|| {
            // 5 uniform buffers (camera, lights, materials)
            let _ubos: Vec<_> = (0..5)
                .map(|_| {
                    Buffer::new(
                        &device,
                        256,
                        vk::BufferUsageFlags::UNIFORM_BUFFER,
                        vk::MemoryPropertyFlags::HOST_VISIBLE | vk::MemoryPropertyFlags::HOST_COHERENT,
                    )
                })
                .collect();
            
            // 3 vertex buffers (dynamic geometry)
            let _vbos: Vec<_> = (0..3)
                .map(|_| {
                    Buffer::new(
                        &device,
                        16384,
                        vk::BufferUsageFlags::VERTEX_BUFFER,
                        vk::MemoryPropertyFlags::DEVICE_LOCAL,
                    )
                })
                .collect();
            
            // 1 storage buffer (compute results)
            let _ssbo = Buffer::new(
                &device,
                65536,
                vk::BufferUsageFlags::STORAGE_BUFFER,
                vk::MemoryPropertyFlags::DEVICE_LOCAL,
            );
            
            black_box(());
        })
    });
    
    // Many small allocations (particle system scenario)
    group.bench_function("many_small_buffers_100x", |b| {
        b.iter(|| {
            let buffers: Vec<_> = (0..100)
                .map(|_| {
                    Buffer::new(
                        &device,
                        256,
                        vk::BufferUsageFlags::UNIFORM_BUFFER,
                        vk::MemoryPropertyFlags::HOST_VISIBLE | vk::MemoryPropertyFlags::HOST_COHERENT,
                    )
                })
                .collect();
            black_box(buffers);
        })
    });
    
    group.finish();
}

criterion_group!(
    benches,
    bench_buffer_creation,
    bench_helper_functions,
    bench_batch_operations
);
criterion_main!(benches);