Skip to main content

scouter_evaluate/
scenario.rs

1use crate::error::EvaluationError;
2use crate::evaluate::scenario_results::{EvalMetrics, ScenarioResult};
3use crate::evaluate::types::EvalResults;
4use crate::genai::EvalDataset;
5use potato_head::PyHelperFuncs;
6use pyo3::prelude::*;
7use scouter_types::genai::EvalScenario;
8use serde::{Deserialize, Serialize};
9use serde_json::Value;
10use std::collections::HashMap;
11
12/// Collection of evaluation scenarios with their associated data and results.
13///
14/// `EvalScenarios` is the data model that holds:
15/// - The scenario definitions (`Vec<EvalScenario>`)
16/// - Internal state populated by `EvalRunner::collect_scenario_data()` (not serialized)
17/// - Output populated by `EvalRunner::evaluate()` (serialized)
18#[pyclass]
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct EvalScenarios {
21    #[pyo3(get)]
22    pub scenarios: Vec<EvalScenario>,
23
24    // Internal — not Python-visible, skipped in serde
25    #[serde(skip)]
26    pub(crate) scenario_datasets: HashMap<String, HashMap<String, EvalDataset>>,
27    #[serde(skip)]
28    pub(crate) scenario_contexts: HashMap<String, Value>,
29
30    // Output — populated after evaluate()
31    pub dataset_results: HashMap<String, EvalResults>,
32    pub scenario_results: Vec<ScenarioResult>,
33    #[pyo3(get)]
34    pub metrics: Option<EvalMetrics>,
35}
36
37#[pymethods]
38impl EvalScenarios {
39    #[new]
40    pub fn new(scenarios: Vec<EvalScenario>) -> Self {
41        Self {
42            scenarios,
43            scenario_datasets: HashMap::new(),
44            scenario_contexts: HashMap::new(),
45            dataset_results: HashMap::new(),
46            scenario_results: Vec::new(),
47            metrics: None,
48        }
49    }
50
51    #[getter]
52    pub fn dataset_results(&self) -> HashMap<String, EvalResults> {
53        self.dataset_results.clone()
54    }
55
56    #[getter]
57    pub fn scenario_results(&self) -> Vec<ScenarioResult> {
58        self.scenario_results.clone()
59    }
60
61    pub fn __len__(&self) -> usize {
62        self.scenarios.len()
63    }
64
65    pub fn __bool__(&self) -> bool {
66        !self.scenarios.is_empty()
67    }
68
69    pub fn is_evaluated(&self) -> bool {
70        self.metrics.is_some()
71    }
72
73    pub fn model_dump_json(&self) -> Result<String, EvaluationError> {
74        serde_json::to_string(self).map_err(Into::into)
75    }
76
77    #[staticmethod]
78    pub fn model_validate_json(json_string: String) -> Result<Self, EvaluationError> {
79        serde_json::from_str(&json_string).map_err(Into::into)
80    }
81
82    pub fn __str__(&self) -> String {
83        PyHelperFuncs::__str__(self)
84    }
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90    use scouter_types::genai::utils::AssertionTasks;
91
92    fn make_scenario(id: &str, query: &str) -> EvalScenario {
93        EvalScenario {
94            id: id.to_string(),
95            initial_query: query.to_string(),
96            predefined_turns: vec![],
97            simulated_user_persona: None,
98            termination_signal: None,
99            max_turns: 10,
100            expected_outcome: None,
101            tasks: AssertionTasks {
102                assertion: vec![],
103                judge: vec![],
104                trace: vec![],
105                agent: vec![],
106            },
107            metadata: None,
108        }
109    }
110
111    #[test]
112    fn construction_and_len() {
113        let scenarios = EvalScenarios::new(vec![
114            make_scenario("s1", "Hello"),
115            make_scenario("s2", "World"),
116        ]);
117        assert_eq!(scenarios.__len__(), 2);
118        assert!(!scenarios.is_evaluated());
119    }
120
121    #[test]
122    fn is_evaluated_before_and_after() {
123        let mut scenarios = EvalScenarios::new(vec![make_scenario("s1", "Hello")]);
124        assert!(!scenarios.is_evaluated());
125
126        scenarios.metrics = Some(EvalMetrics {
127            overall_pass_rate: 1.0,
128            dataset_pass_rates: HashMap::new(),
129            scenario_pass_rate: 1.0,
130            total_scenarios: 1,
131            passed_scenarios: 1,
132            scenario_task_pass_rates: HashMap::new(),
133        });
134        assert!(scenarios.is_evaluated());
135    }
136
137    #[test]
138    fn is_empty_true_and_false() {
139        let empty = EvalScenarios::new(vec![]);
140        assert!(!empty.__bool__());
141
142        let non_empty = EvalScenarios::new(vec![make_scenario("s1", "Hello")]);
143        assert!(non_empty.__bool__());
144    }
145
146    #[test]
147    fn model_dump_json_roundtrip() {
148        let scenarios = EvalScenarios::new(vec![make_scenario("s1", "Hello")]);
149        let json = scenarios.model_dump_json().unwrap();
150        let loaded: EvalScenarios = serde_json::from_str(&json).unwrap();
151
152        assert_eq!(loaded.scenarios.len(), 1);
153        assert_eq!(loaded.scenarios[0].id, "s1");
154        // serde-skipped fields should be empty
155        assert!(loaded.scenario_datasets.is_empty());
156        assert!(loaded.scenario_contexts.is_empty());
157    }
158}