pub struct EvalReport {
pub mean_score: f64,
pub p50_latency_ms: u64,
pub p95_latency_ms: u64,
pub total_tokens: u64,
pub cases_scored: usize,
pub cases_total: usize,
pub is_partial: bool,
pub error_count: usize,
pub per_case: Vec<CaseScore>,
}Expand description
Aggregate evaluation report returned by Evaluator::evaluate.
mean_score is NaN when no cases were successfully scored — callers must
check cases_scored > 0 or mean_score.is_finite() before using it as an
acceptance threshold.
§Examples
use zeph_experiments::EvalReport;
// mean_score is NaN when no cases are scored
// This is a documentation-only example; construct via Evaluator::evaluate in practice.
let partial_report_has_nan_mean = f64::NAN;
assert!(partial_report_has_nan_mean.is_nan());Fields§
§mean_score: f64Mean score across all successfully scored cases (NaN if cases_scored == 0).
p50_latency_ms: u64Median (p50) latency in milliseconds across scored cases (0 if none).
p95_latency_ms: u6495th-percentile latency in milliseconds across scored cases (0 if none).
total_tokens: u64Total tokens consumed by all judge calls in this evaluation.
cases_scored: usizeNumber of cases that were successfully scored.
cases_total: usizeTotal number of cases in the benchmark set (including failed ones).
is_partial: booltrue if any case was excluded due to budget exhaustion or judge errors.
error_count: usizeNumber of cases that failed (LLM error, parse error, or budget exceeded).
per_case: Vec<CaseScore>Per-case scores for successfully evaluated cases, sorted by case_index.
Trait Implementations§
Source§impl Clone for EvalReport
impl Clone for EvalReport
Source§fn clone(&self) -> EvalReport
fn clone(&self) -> EvalReport
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for EvalReport
impl Debug for EvalReport
Source§impl<'de> Deserialize<'de> for EvalReport
impl<'de> Deserialize<'de> for EvalReport
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Auto Trait Implementations§
impl Freeze for EvalReport
impl RefUnwindSafe for EvalReport
impl Send for EvalReport
impl Sync for EvalReport
impl Unpin for EvalReport
impl UnsafeUnpin for EvalReport
impl UnwindSafe for EvalReport
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T in a tonic::Request