nexus-rt 2.0.3

Single-threaded, event-driven runtime primitives with pre-resolved dispatch
Documentation
//! Template instantiation benchmark: `generate()` vs `into_handler()`.
//!
//! Measures the cost of stamping handlers from pre-resolved state
//! compared to full `into_handler` construction (HashMap lookups +
//! conflict detection each time).
//!
//! Run with:
//! ```bash
//! taskset -c 0 cargo run --release -p nexus-rt --example perf_template
//! ```

use std::hint::black_box;

use nexus_rt::{
    CallbackTemplate, HandlerTemplate, IntoCallback, IntoHandler, Res, ResMut, WorldBuilder,
    callback_blueprint, handler_blueprint, new_resource,
};

new_resource!(ResU64(u64));
new_resource!(ResU32(u32));
new_resource!(ResBool(bool));
new_resource!(ResF64(f64));
new_resource!(ResI64(i64));
new_resource!(ResI32(i32));
new_resource!(ResU8(u8));
new_resource!(ResU16(u16));

// =============================================================================
// Bench infrastructure (same as perf_construction.rs)
// =============================================================================

const ITERATIONS: usize = 100_000;
const WARMUP: usize = 10_000;
const BATCH: u64 = 100;

#[inline(always)]
#[cfg(target_arch = "x86_64")]
fn rdtsc_start() -> u64 {
    unsafe {
        core::arch::x86_64::_mm_lfence();
        core::arch::x86_64::_rdtsc()
    }
}

#[inline(always)]
#[cfg(target_arch = "x86_64")]
fn rdtsc_end() -> u64 {
    unsafe {
        let mut aux = 0u32;
        let tsc = core::arch::x86_64::__rdtscp(&raw mut aux);
        core::arch::x86_64::_mm_lfence();
        tsc
    }
}

fn percentile(sorted: &[u64], p: f64) -> u64 {
    let idx = ((sorted.len() as f64) * p / 100.0) as usize;
    sorted[idx.min(sorted.len() - 1)]
}

fn bench_batched<F: FnMut()>(name: &str, mut f: F) {
    for _ in 0..WARMUP {
        f();
    }
    let mut samples = Vec::with_capacity(ITERATIONS);
    for _ in 0..ITERATIONS {
        let start = rdtsc_start();
        for _ in 0..BATCH {
            f();
        }
        let end = rdtsc_end();
        samples.push(end.wrapping_sub(start) / BATCH);
    }
    samples.sort_unstable();
    let p50 = percentile(&samples, 50.0);
    let p99 = percentile(&samples, 99.0);
    let p999 = percentile(&samples, 99.9);
    println!("{:<50} {:>8} {:>8} {:>8}", name, p50, p99, p999);
}

fn print_header(title: &str) {
    println!("=== {} ===\n", title);
    println!(
        "{:<50} {:>8} {:>8} {:>8}",
        "Operation", "p50", "p99", "p999"
    );
    println!("{}", "-".repeat(78));
}

// =============================================================================
// Handler functions at various arities
// =============================================================================

fn sys_1p(_a: Res<ResU64>, _e: ()) {}
fn sys_2p(_a: Res<ResU64>, _b: ResMut<ResU32>, _e: ()) {}
fn sys_4p(_a: Res<ResU64>, _b: ResMut<ResU32>, _c: Res<ResBool>, _d: Res<ResF64>, _e: ()) {}

#[allow(clippy::too_many_arguments)]
fn sys_8p(
    _a: Res<ResU64>,
    _b: ResMut<ResU32>,
    _c: Res<ResBool>,
    _d: Res<ResF64>,
    _e2: Res<ResI64>,
    _f: Res<ResI32>,
    _g: Res<ResU8>,
    _h: ResMut<ResU16>,
    _e: (),
) {
}

// =============================================================================
// Callback functions
// =============================================================================

fn cb_2p(_ctx: &mut u64, _a: Res<ResU64>, _b: ResMut<ResU32>, _e: ()) {}
fn cb_4p(
    _ctx: &mut u64,
    _a: Res<ResU64>,
    _b: ResMut<ResU32>,
    _c: Res<ResBool>,
    _d: Res<ResF64>,
    _e: (),
) {
}

// =============================================================================
// Blueprint keys
// =============================================================================

handler_blueprint!(K1P, Event = (), Params = (Res<'static, ResU64>,));
handler_blueprint!(K2P, Event = (), Params = (Res<'static, ResU64>, ResMut<'static, ResU32>));
handler_blueprint!(K4P, Event = (), Params = (
    Res<'static, ResU64>, ResMut<'static, ResU32>,
    Res<'static, ResBool>, Res<'static, ResF64>,
));
handler_blueprint!(K8P, Event = (), Params = (
    Res<'static, ResU64>, ResMut<'static, ResU32>,
    Res<'static, ResBool>, Res<'static, ResF64>,
    Res<'static, ResI64>, Res<'static, ResI32>,
    Res<'static, ResU8>, ResMut<'static, ResU16>,
));

callback_blueprint!(KCb2P, Context = u64, Event = (), Params = (
    Res<'static, ResU64>, ResMut<'static, ResU32>,
));
callback_blueprint!(KCb4P, Context = u64, Event = (), Params = (
    Res<'static, ResU64>, ResMut<'static, ResU32>,
    Res<'static, ResBool>, Res<'static, ResF64>,
));

// =============================================================================
// Main
// =============================================================================

fn main() {
    let mut wb = WorldBuilder::new();
    wb.register(ResU64(0));
    wb.register(ResU32(0));
    wb.register(ResBool(false));
    wb.register(ResF64(0.0));
    wb.register(ResI64(0));
    wb.register(ResI32(0));
    wb.register(ResU8(0));
    wb.register(ResU16(0));
    let world = wb.build();
    let r = world.registry();

    // ── Baseline: into_handler (HashMap lookups each time) ──────────────

    print_header("into_handler Construction — baseline (cycles)");

    bench_batched("into_handler  1-param", || {
        black_box(sys_1p.into_handler(r));
    });
    bench_batched("into_handler  2-param", || {
        black_box(sys_2p.into_handler(r));
    });
    bench_batched("into_handler  4-param", || {
        black_box(sys_4p.into_handler(r));
    });
    bench_batched("into_handler  8-param", || {
        black_box(sys_8p.into_handler(r));
    });

    println!();

    // ── Template: generate (Copy only, no HashMap) ──────────────────────

    let tpl_1p = HandlerTemplate::<K1P>::new(sys_1p, r);
    let tpl_2p = HandlerTemplate::<K2P>::new(sys_2p, r);
    let tpl_4p = HandlerTemplate::<K4P>::new(sys_4p, r);
    let tpl_8p = HandlerTemplate::<K8P>::new(sys_8p, r);

    print_header("HandlerTemplate::generate — template (cycles)");

    bench_batched("generate      1-param", || {
        black_box(tpl_1p.generate());
    });
    bench_batched("generate      2-param", || {
        black_box(tpl_2p.generate());
    });
    bench_batched("generate      4-param", || {
        black_box(tpl_4p.generate());
    });
    bench_batched("generate      8-param", || {
        black_box(tpl_8p.generate());
    });

    println!();

    // ── Callbacks: into_callback vs generate ────────────────────────────

    print_header("into_callback Construction — baseline (cycles)");

    bench_batched("into_callback 2-param", || {
        black_box(cb_2p.into_callback(0u64, r));
    });
    bench_batched("into_callback 4-param", || {
        black_box(cb_4p.into_callback(0u64, r));
    });

    println!();

    let cb_tpl_2p = CallbackTemplate::<KCb2P>::new(cb_2p, r);
    let cb_tpl_4p = CallbackTemplate::<KCb4P>::new(cb_4p, r);

    print_header("CallbackTemplate::generate — template (cycles)");

    bench_batched("generate      cb 2-param", || {
        black_box(cb_tpl_2p.generate(0u64));
    });
    bench_batched("generate      cb 4-param", || {
        black_box(cb_tpl_4p.generate(0u64));
    });

    println!();
}