rs_adk/evaluation/
eval_case.rs

1//! Evaluation case and set types — define test scenarios for agents.
2
3use serde::{Deserialize, Serialize};
4
5/// A single turn in a conversation for evaluation.
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct InvocationTurn {
8    /// The role of this turn (e.g., "user", "model").
9    pub role: String,
10    /// The text content of this turn.
11    pub content: String,
12    /// Tool calls made during this turn (if any).
13    #[serde(default)]
14    pub tool_calls: Vec<serde_json::Value>,
15    /// Tool results returned during this turn (if any).
16    #[serde(default)]
17    pub tool_results: Vec<serde_json::Value>,
18}
19
20/// A single invocation (conversation) for evaluation.
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct Invocation {
23    /// Unique identifier for this invocation.
24    #[serde(default)]
25    pub id: String,
26    /// The turns of conversation in this invocation.
27    pub turns: Vec<InvocationTurn>,
28    /// Optional metadata about this invocation.
29    #[serde(default)]
30    pub metadata: serde_json::Value,
31}
32
33/// A single evaluation case — pairs actual invocations with optional expected results.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct EvalCase {
36    /// Name of the eval case.
37    pub name: String,
38    /// The actual agent invocations to evaluate.
39    pub actual: Vec<Invocation>,
40    /// The expected (golden) invocations for comparison.
41    #[serde(default)]
42    pub expected: Vec<Invocation>,
43    /// Optional conversation scenario description.
44    #[serde(default)]
45    pub scenario: Option<String>,
46}
47
48/// A collection of evaluation cases.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct EvalSet {
51    /// Name of this evaluation set.
52    pub name: String,
53    /// The evaluation cases in this set.
54    pub cases: Vec<EvalCase>,
55    /// Optional description.
56    #[serde(default)]
57    pub description: Option<String>,
58}
59
60#[cfg(test)]
61mod tests {
62    use super::*;
63
64    #[test]
65    fn eval_case_serde_roundtrip() {
66        let case = EvalCase {
67            name: "test-case".into(),
68            actual: vec![Invocation {
69                id: "inv-1".into(),
70                turns: vec![InvocationTurn {
71                    role: "user".into(),
72                    content: "What is the weather?".into(),
73                    tool_calls: vec![],
74                    tool_results: vec![],
75                }],
76                metadata: serde_json::Value::Null,
77            }],
78            expected: vec![],
79            scenario: Some("Weather query".into()),
80        };
81
82        let json = serde_json::to_string(&case).unwrap();
83        let deserialized: EvalCase = serde_json::from_str(&json).unwrap();
84        assert_eq!(deserialized.name, "test-case");
85        assert_eq!(deserialized.actual.len(), 1);
86    }
87
88    #[test]
89    fn eval_set_construction() {
90        let set = EvalSet {
91            name: "suite-1".into(),
92            cases: vec![],
93            description: Some("Test suite".into()),
94        };
95        assert_eq!(set.name, "suite-1");
96        assert!(set.cases.is_empty());
97    }
98}
rs_adk/evaluation/eval_case.rs

rs_adk/evaluation/
eval_case.rs