pub struct BenchmarkResults {
pub benchmark_name: String,
pub total_problems: usize,
pub correct: usize,
pub accuracy: f32,
pub avg_confidence: f32,
pub avg_latency_ms: f64,
pub total_tokens: usize,
pub category_accuracy: HashMap<String, f32>,
pub difficulty_accuracy: HashMap<u8, f32>,
pub results: Vec<EvaluationResult>,
pub calibration: CalibrationMetrics,
}Expand description
Aggregate benchmark results
Fields§
§benchmark_name: String§total_problems: usize§correct: usize§accuracy: f32§avg_confidence: f32§avg_latency_ms: f64§total_tokens: usize§category_accuracy: HashMap<String, f32>Accuracy by category
difficulty_accuracy: HashMap<u8, f32>Accuracy by difficulty
results: Vec<EvaluationResult>Individual results
calibration: CalibrationMetricsCalibration metrics
Implementations§
Source§impl BenchmarkResults
impl BenchmarkResults
pub fn compute(benchmark_name: &str, results: Vec<EvaluationResult>) -> Self
Sourcepub fn compare(&self, baseline: &BenchmarkResults) -> ComparisonReport
pub fn compare(&self, baseline: &BenchmarkResults) -> ComparisonReport
Generate a comparison report against baseline
Trait Implementations§
Source§impl Clone for BenchmarkResults
impl Clone for BenchmarkResults
Source§fn clone(&self) -> BenchmarkResults
fn clone(&self) -> BenchmarkResults
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for BenchmarkResults
impl Debug for BenchmarkResults
Source§impl<'de> Deserialize<'de> for BenchmarkResults
impl<'de> Deserialize<'de> for BenchmarkResults
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for BenchmarkResults
impl RefUnwindSafe for BenchmarkResults
impl Send for BenchmarkResults
impl Sync for BenchmarkResults
impl Unpin for BenchmarkResults
impl UnwindSafe for BenchmarkResults
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more