pub struct BaselineComparison {
pub dataset: String,
pub model: String,
pub run_id_memory_on: String,
pub run_id_memory_off: String,
pub deltas: Vec<ScenarioDelta>,
pub aggregate_delta: f64,
}Expand description
Comparison between two benchmark runs (memory-on vs memory-off).
Use BaselineComparison::compute to join two BenchRuns by scenario ID
and compute per-scenario deltas and an aggregate mean delta.
§Examples
use zeph_bench::{BenchRun, RunStatus, ScenarioResult, Aggregate};
use zeph_bench::baseline::BaselineComparison;
fn make_run(run_id: &str, scores: &[(&str, f64)]) -> BenchRun {
BenchRun {
dataset: "test".into(),
model: "model".into(),
run_id: run_id.into(),
started_at: "2026-01-01T00:00:00Z".into(),
finished_at: "2026-01-01T00:01:00Z".into(),
status: RunStatus::Completed,
results: scores.iter().map(|(id, score)| ScenarioResult {
scenario_id: id.to_string(),
score: *score,
response_excerpt: String::new(),
error: None,
elapsed_ms: 0,
}).collect(),
aggregate: Aggregate::default(),
}
}
let on = make_run("r1", &[("s1", 1.0), ("s2", 0.5)]);
let off = make_run("r2", &[("s1", 0.5), ("s2", 0.0)]);
let cmp = BaselineComparison::compute(&on, &off);
assert_eq!(cmp.deltas.len(), 2);
assert!((cmp.aggregate_delta - 0.5).abs() < f64::EPSILON);Fields§
§dataset: StringDataset name (from the memory-on run).
model: StringModel identifier (from the memory-on run).
run_id_memory_on: StringRun ID of the memory-on run.
run_id_memory_off: StringRun ID of the memory-off run.
deltas: Vec<ScenarioDelta>Per-scenario deltas, sorted by scenario_id.
Only scenarios present in both runs are included (inner join).
aggregate_delta: f64Arithmetic mean of all delta values. 0.0 if no scenarios overlap.
Implementations§
Source§impl BaselineComparison
impl BaselineComparison
Sourcepub fn compute(memory_on: &BenchRun, memory_off: &BenchRun) -> Self
pub fn compute(memory_on: &BenchRun, memory_off: &BenchRun) -> Self
Compute deltas by joining memory_on and memory_off runs on scenario_id.
Only scenarios present in both runs are included. Non-overlapping
scenarios are silently dropped. aggregate_delta is the arithmetic mean
of all per-scenario deltas; 0.0 when there are no overlapping scenarios.
Sourcepub fn write_comparison_json(&self, output_dir: &Path) -> Result<(), BenchError>
pub fn write_comparison_json(&self, output_dir: &Path) -> Result<(), BenchError>
Write this comparison as pretty-printed JSON to {output_dir}/comparison.json.
The file is written atomically via a .tmp sibling + rename, so a concurrent
SIGINT cannot leave a half-written file.
§Errors
Returns BenchError::InvalidFormat on serialization failure and
BenchError::Io on write failure.
Sourcepub fn write_delta_table(&self, summary_path: &Path) -> Result<(), BenchError>
pub fn write_delta_table(&self, summary_path: &Path) -> Result<(), BenchError>
Append a delta table section to the Markdown file at summary_path.
Creates the file if it does not exist. The section header is
## Baseline Comparison (Memory On vs Off) followed by a Markdown table
of per-scenario deltas and a final aggregate delta line.
§Errors
Returns BenchError::Io on read/write failure.
Trait Implementations§
Source§impl Clone for BaselineComparison
impl Clone for BaselineComparison
Source§fn clone(&self) -> BaselineComparison
fn clone(&self) -> BaselineComparison
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for BaselineComparison
impl Debug for BaselineComparison
Source§impl<'de> Deserialize<'de> for BaselineComparison
impl<'de> Deserialize<'de> for BaselineComparison
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Auto Trait Implementations§
impl Freeze for BaselineComparison
impl RefUnwindSafe for BaselineComparison
impl Send for BaselineComparison
impl Sync for BaselineComparison
impl Unpin for BaselineComparison
impl UnsafeUnpin for BaselineComparison
impl UnwindSafe for BaselineComparison
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T in a tonic::Request