Skip to main content

rs_adk/evaluation/
eval_result.rs

1//! Evaluation result types — metric scores and per-invocation breakdowns.
2
3use serde::{Deserialize, Serialize};
4
5/// A single metric evaluation score for one invocation.
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct PerInvocationResult {
8    /// The invocation ID.
9    pub invocation_id: String,
10    /// Score for this invocation (0.0–1.0 typically).
11    pub score: f64,
12    /// Optional explanation of the score.
13    #[serde(default)]
14    pub explanation: Option<String>,
15}
16
17/// A named metric with its aggregated and per-invocation results.
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct EvalMetric {
20    /// Name of this metric (e.g., "response_match", "tool_use_quality").
21    pub name: String,
22    /// Aggregated score across all invocations.
23    pub score: f64,
24    /// Per-invocation breakdown.
25    pub per_invocation: Vec<PerInvocationResult>,
26}
27
28/// The result of evaluating an evaluation set.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct EvalResult {
31    /// Overall aggregated score.
32    pub overall_score: f64,
33    /// Per-metric results.
34    pub metrics: Vec<EvalMetric>,
35}
36
37#[cfg(test)]
38mod tests {
39    use super::*;
40
41    #[test]
42    fn eval_result_construction() {
43        let result = EvalResult {
44            overall_score: 0.85,
45            metrics: vec![EvalMetric {
46                name: "response_match".into(),
47                score: 0.85,
48                per_invocation: vec![PerInvocationResult {
49                    invocation_id: "inv-1".into(),
50                    score: 0.9,
51                    explanation: Some("Good match".into()),
52                }],
53            }],
54        };
55        assert!((result.overall_score - 0.85).abs() < f64::EPSILON);
56        assert_eq!(result.metrics.len(), 1);
57    }
58
59    #[test]
60    fn eval_result_serde_roundtrip() {
61        let result = EvalResult {
62            overall_score: 0.75,
63            metrics: vec![],
64        };
65        let json = serde_json::to_string(&result).unwrap();
66        let deserialized: EvalResult = serde_json::from_str(&json).unwrap();
67        assert!((deserialized.overall_score - 0.75).abs() < f64::EPSILON);
68    }
69}