scouter_evaluate/
scenario.rs1use crate::error::EvaluationError;
2use crate::evaluate::scenario_results::{EvalMetrics, ScenarioResult};
3use crate::evaluate::types::EvalResults;
4use crate::genai::EvalDataset;
5use potato_head::PyHelperFuncs;
6use pyo3::prelude::*;
7use scouter_types::genai::EvalScenario;
8use serde::{Deserialize, Serialize};
9use serde_json::Value;
10use std::collections::HashMap;
11
12#[pyclass]
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct EvalScenarios {
21 #[pyo3(get)]
22 pub scenarios: Vec<EvalScenario>,
23
24 #[serde(skip)]
26 pub(crate) scenario_datasets: HashMap<String, HashMap<String, EvalDataset>>,
27 #[serde(skip)]
28 pub(crate) scenario_contexts: HashMap<String, Value>,
29
30 pub dataset_results: HashMap<String, EvalResults>,
32 pub scenario_results: Vec<ScenarioResult>,
33 #[pyo3(get)]
34 pub metrics: Option<EvalMetrics>,
35}
36
37#[pymethods]
38impl EvalScenarios {
39 #[new]
40 pub fn new(scenarios: Vec<EvalScenario>) -> Self {
41 Self {
42 scenarios,
43 scenario_datasets: HashMap::new(),
44 scenario_contexts: HashMap::new(),
45 dataset_results: HashMap::new(),
46 scenario_results: Vec::new(),
47 metrics: None,
48 }
49 }
50
51 #[getter]
52 pub fn dataset_results(&self) -> HashMap<String, EvalResults> {
53 self.dataset_results.clone()
54 }
55
56 #[getter]
57 pub fn scenario_results(&self) -> Vec<ScenarioResult> {
58 self.scenario_results.clone()
59 }
60
61 pub fn __len__(&self) -> usize {
62 self.scenarios.len()
63 }
64
65 pub fn __bool__(&self) -> bool {
66 !self.scenarios.is_empty()
67 }
68
69 pub fn is_evaluated(&self) -> bool {
70 self.metrics.is_some()
71 }
72
73 pub fn model_dump_json(&self) -> Result<String, EvaluationError> {
74 serde_json::to_string(self).map_err(Into::into)
75 }
76
77 #[staticmethod]
78 pub fn model_validate_json(json_string: String) -> Result<Self, EvaluationError> {
79 serde_json::from_str(&json_string).map_err(Into::into)
80 }
81
82 pub fn __str__(&self) -> String {
83 PyHelperFuncs::__str__(self)
84 }
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90 use scouter_types::genai::utils::AssertionTasks;
91
92 fn make_scenario(id: &str, query: &str) -> EvalScenario {
93 EvalScenario {
94 id: id.to_string(),
95 initial_query: query.to_string(),
96 predefined_turns: vec![],
97 simulated_user_persona: None,
98 termination_signal: None,
99 max_turns: 10,
100 expected_outcome: None,
101 tasks: AssertionTasks {
102 assertion: vec![],
103 judge: vec![],
104 trace: vec![],
105 agent: vec![],
106 },
107 metadata: None,
108 }
109 }
110
111 #[test]
112 fn construction_and_len() {
113 let scenarios = EvalScenarios::new(vec![
114 make_scenario("s1", "Hello"),
115 make_scenario("s2", "World"),
116 ]);
117 assert_eq!(scenarios.__len__(), 2);
118 assert!(!scenarios.is_evaluated());
119 }
120
121 #[test]
122 fn is_evaluated_before_and_after() {
123 let mut scenarios = EvalScenarios::new(vec![make_scenario("s1", "Hello")]);
124 assert!(!scenarios.is_evaluated());
125
126 scenarios.metrics = Some(EvalMetrics {
127 overall_pass_rate: 1.0,
128 dataset_pass_rates: HashMap::new(),
129 scenario_pass_rate: 1.0,
130 total_scenarios: 1,
131 passed_scenarios: 1,
132 scenario_task_pass_rates: HashMap::new(),
133 });
134 assert!(scenarios.is_evaluated());
135 }
136
137 #[test]
138 fn is_empty_true_and_false() {
139 let empty = EvalScenarios::new(vec![]);
140 assert!(!empty.__bool__());
141
142 let non_empty = EvalScenarios::new(vec![make_scenario("s1", "Hello")]);
143 assert!(non_empty.__bool__());
144 }
145
146 #[test]
147 fn model_dump_json_roundtrip() {
148 let scenarios = EvalScenarios::new(vec![make_scenario("s1", "Hello")]);
149 let json = scenarios.model_dump_json().unwrap();
150 let loaded: EvalScenarios = serde_json::from_str(&json).unwrap();
151
152 assert_eq!(loaded.scenarios.len(), 1);
153 assert_eq!(loaded.scenarios[0].id, "s1");
154 assert!(loaded.scenario_datasets.is_empty());
156 assert!(loaded.scenario_contexts.is_empty());
157 }
158}