ripvec-core 3.1.1

//! Criterion micro-bench: trait-dispatch overhead for `RerankBackend`.
//!
//! Measures the latency of calling `score_batch` via `Box<dyn RerankBackend>`
//! (Variant T - trait path) vs a direct concrete call to a stub impl (Variant C
//! - concrete path) at batch sizes 1, 8, 32, and 128.
//!
//! The stub returns a constant `Vec<f32>` in O(1) so the bench isolates the
//! vtable dispatch cost, not a forward pass. Per @Box (1979) "All models are
//! wrong but some are useful": this bench will not perfectly isolate the
//! call overhead from cache effects and inlining decisions - it is usefully
//! informative, not perfectly clean.
//!
//! # Spec reference
//!
//! `docs/surgery/backend_trait_microbench.md` Section 2a.
//!
//! # Decision rule
//!
//! If the `boxed_dyn` vs `concrete` delta at B=32 exceeds 5 ns per call, the trait abstraction
//! collapses `RerankBackend` to a concrete `CpuRerankBackend`. See spec Section 3.

use std::hint::black_box;

use criterion::{Criterion, criterion_group, criterion_main};
use ripvec_core::backend::{Encoding, RerankBackend};

/// Zero-cost stub: satisfies `RerankBackend` without loading any weights.
///
/// Returns `0.5` for every encoding in the batch, unconditionally. This is
/// sufficient to isolate the vtable call overhead; the forward pass is not
/// under measurement here. @Lampson (1983) "Hints for Computer System Design":
/// measure the exact path under test, not a superset of it.
struct StubRerank;

impl RerankBackend for StubRerank {
    fn score_batch(&self, encodings: &[Encoding]) -> ripvec_core::Result<Vec<f32>> {
        Ok(vec![0.5_f32; encodings.len()])
    }

    fn is_gpu(&self) -> bool {
        false
    }
}

/// Build a synthetic batch of `batch_size` `Encoding` values.
///
/// Tokens: `[101 (CLS), 1000 (synthetic token), 102 (SEP)]`.
/// Attention mask: all 1s. Token type IDs: all 0s.
/// Matches the spec's "fill `input_ids` with token 1000" prescription.
fn make_encodings(batch_size: usize) -> Vec<Encoding> {
    (0..batch_size)
        .map(|_| Encoding {
            input_ids: vec![101, 1000, 102],
            attention_mask: vec![1, 1, 1],
            token_type_ids: vec![0, 0, 0],
        })
        .collect()
}

fn bench_dispatch(c: &mut Criterion) {
    let stub_concrete = StubRerank;
    let stub_box: Box<dyn RerankBackend + Send + Sync> = Box::new(StubRerank);

    for batch_size in [1_usize, 8, 32, 128] {
        let encodings = make_encodings(batch_size);

        let mut group = c.benchmark_group(format!("rerank_dispatch_b{batch_size}"));

        // Variant C: direct concrete call - no vtable indirection.
        group.bench_function("concrete", |b| {
            b.iter(|| black_box(stub_concrete.score_batch(black_box(&encodings))));
        });

        // Variant T: call through Box<dyn RerankBackend> - vtable dispatch path
        // as used in `crates/ripvec-core/src/rerank.rs`.
        group.bench_function("boxed_dyn", |b| {
            b.iter(|| black_box(stub_box.score_batch(black_box(&encodings))));
        });

        group.finish();
    }
}

criterion_group!(benches, bench_dispatch);
criterion_main!(benches);