Struct BenchRun

Source

pub struct BenchRun {
    pub dataset: String,
    pub model: String,
    pub run_id: String,
    pub started_at: String,
    pub finished_at: String,
    pub status: RunStatus,
    pub results: Vec<ScenarioResult>,
    pub aggregate: Aggregate,
}

Expand description

Top-level benchmark run record written to results.json.

The schema is a superset of the LongMemEval leaderboard submission format (NFR-008), making it directly usable for leaderboard submission after a longmemeval run.

Create a default instance, then populate BenchRun::results incrementally and call BenchRun::recompute_aggregate before persisting with ResultWriter.

§Examples

use zeph_bench::{BenchRun, RunStatus, Aggregate};

let run = BenchRun {
    dataset: "gaia".into(),
    model: "openai/gpt-4o".into(),
    run_id: "a1b2c3".into(),
    started_at: "2026-04-09T10:00:00Z".into(),
    finished_at: String::new(),
    status: RunStatus::Running,
    results: vec![],
    aggregate: Aggregate::default(),
};
assert_eq!(run.dataset, "gaia");
assert!(run.results.is_empty());

Fields§

§dataset: String

Dataset name (e.g. "longmemeval").

§model: String

Provider/model identifier (e.g. "openai/gpt-4o").

§run_id: String

UUID v4 uniquely identifying this run.

§started_at: String

RFC 3339 timestamp when the run started.

§finished_at: String

RFC 3339 timestamp when the run ended (empty string if interrupted).

§status: RunStatus

Run status.

§results: Vec<ScenarioResult>

Per-scenario results.

§aggregate: Aggregate

Aggregate statistics.

Implementations§

Source §

impl BenchRun

Source

pub fn recompute_aggregate(&mut self)

Recompute BenchRun::aggregate from the current BenchRun::results list.

Call this after appending one or more ScenarioResults to keep the aggregate statistics in sync before writing to disk.

§Examples

use zeph_bench::{BenchRun, RunStatus, ScenarioResult, Aggregate};

let mut run = BenchRun {
    dataset: "frames".into(),
    model: "openai/gpt-4o-mini".into(),
    run_id: "r1".into(),
    started_at: "2026-01-01T00:00:00Z".into(),
    finished_at: String::new(),
    status: RunStatus::Running,
    results: vec![
        ScenarioResult {
            scenario_id: "frames_0".into(),
            score: 1.0,
            response_excerpt: "Paris".into(),
            error: None,
            elapsed_ms: 500,
        },
    ],
    aggregate: Aggregate::default(),
};

run.recompute_aggregate();
assert_eq!(run.aggregate.total, 1);
assert!((run.aggregate.mean_score - 1.0).abs() < f64::EPSILON);
assert_eq!(run.aggregate.exact_match, 1);

Source

pub fn completed_ids(&self) -> HashSet<String>

Return the set of scenario IDs already present in BenchRun::results.

Used by the --resume logic to determine which scenarios can be skipped.

§Examples

use zeph_bench::{BenchRun, RunStatus, ScenarioResult, Aggregate};

let run = BenchRun {
    dataset: "gaia".into(),
    model: "openai/gpt-4o".into(),
    run_id: "r2".into(),
    started_at: "2026-01-01T00:00:00Z".into(),
    finished_at: String::new(),
    status: RunStatus::Interrupted,
    results: vec![
        ScenarioResult {
            scenario_id: "t1".into(),
            score: 1.0,
            response_excerpt: "1945".into(),
            error: None,
            elapsed_ms: 300,
        },
    ],
    aggregate: Aggregate::default(),
};

let done = run.completed_ids();
assert!(done.contains("t1"));
assert!(!done.contains("t2"));

Trait Implementations§

Source §

impl Clone for BenchRun

Source §

fn clone(&self) -> BenchRun

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for BenchRun

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl<'de> Deserialize<'de> for BenchRun

Source §

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more

Source §

impl Serialize for BenchRun

Source §

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

§

impl UnwindSafe for BenchRun

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> DynClone for T
where T: Clone,

Source §

fn __clone_box(&self, _: Private) -> *mut ()

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> FromRef<T> for T
where T: Clone,

Source §

fn from_ref(input: &T) -> T

Converts to this type from a reference to the input type.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §