skilltest_core/
testcase.rs1use std::path::{Path, PathBuf};
6
7use serde::{Deserialize, Serialize};
8
9use crate::error::{Error, Result};
10use crate::eval::Eval;
11
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
16pub struct SimulatedUser {
17 pub persona: String,
19 #[serde(default)]
23 pub done_when: Option<String>,
24 #[serde(default)]
26 pub max_turns: Option<u32>,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
31#[serde(deny_unknown_fields)]
32pub struct TestCase {
33 #[serde(default)]
35 pub name: String,
36 pub skill: PathBuf,
38 pub input: String,
40 #[serde(default)]
42 pub user: Option<SimulatedUser>,
43 pub evals: Vec<Eval>,
45}
46
47impl TestCase {
48 pub fn load(path: &Path) -> Result<Self> {
55 let text = std::fs::read_to_string(path).map_err(|source| Error::Io {
56 path: path.to_path_buf(),
57 source,
58 })?;
59 let mut case: TestCase = serde_yaml::from_str(&text).map_err(|source| Error::Yaml {
60 path: path.to_path_buf(),
61 source,
62 })?;
63 if case.name.is_empty() {
64 case.name = path
65 .file_stem()
66 .and_then(|s| s.to_str())
67 .unwrap_or("case")
68 .to_string();
69 }
70 if let Some(parent) = path.parent() {
71 if case.skill.is_relative() {
72 case.skill = parent.join(&case.skill);
73 }
74 }
75 case.validate()?;
76 Ok(case)
77 }
78
79 #[must_use]
81 pub fn is_multi_turn(&self) -> bool {
82 self.user.is_some()
83 }
84
85 pub fn validate(&self) -> Result<()> {
90 if self.input.trim().is_empty() {
91 return Err(Error::Invalid(format!(
92 "test case `{}` has an empty `input`",
93 self.name
94 )));
95 }
96 if self.evals.is_empty() {
97 return Err(Error::Invalid(format!(
98 "test case `{}` defines no `evals`",
99 self.name
100 )));
101 }
102 for eval in &self.evals {
103 eval.validate()?;
104 }
105 if let Some(user) = &self.user {
106 if user.persona.trim().is_empty() {
107 return Err(Error::Invalid(format!(
108 "test case `{}` has a `user` block with an empty `persona`",
109 self.name
110 )));
111 }
112 if user.max_turns == Some(0) {
113 return Err(Error::Invalid(format!(
114 "test case `{}` sets `user.max_turns` to 0",
115 self.name
116 )));
117 }
118 }
119 Ok(())
120 }
121}
122
123pub fn discover_cases(path: &Path) -> Result<Vec<PathBuf>> {
130 if path.is_file() {
131 return Ok(vec![path.to_path_buf()]);
132 }
133 if path.is_dir() {
134 let entries = std::fs::read_dir(path).map_err(|source| Error::Io {
135 path: path.to_path_buf(),
136 source,
137 })?;
138 let mut files: Vec<PathBuf> = entries
139 .filter_map(std::result::Result::ok)
140 .map(|e| e.path())
141 .filter(|p| {
142 p.is_file()
143 && matches!(p.extension().and_then(|s| s.to_str()), Some("yaml" | "yml"))
144 })
145 .collect();
146 files.sort();
147 if files.is_empty() {
148 return Err(Error::Invalid(format!(
149 "no .yaml test cases found in {}",
150 path.display()
151 )));
152 }
153 return Ok(files);
154 }
155 Err(Error::Invalid(format!(
156 "path does not exist: {}",
157 path.display()
158 )))
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164 use crate::eval::Eval;
165
166 #[test]
167 fn parses_single_turn_case() {
168 let yaml = r#"
169skill: ./greeter
170input: "Greet Dr. Smith"
171evals:
172 - type: boolean
173 criterion: "greets Dr. Smith by name"
174"#;
175 let case: TestCase = serde_yaml::from_str(yaml).unwrap();
176 assert!(!case.is_multi_turn());
177 assert_eq!(case.evals.len(), 1);
178 assert!(matches!(case.evals[0], Eval::Boolean { .. }));
179 }
180
181 #[test]
182 fn parses_multi_turn_case() {
183 let yaml = r#"
184name: booking
185skill: ./booker
186input: "I want to book an appointment"
187user:
188 persona: "You are a terse patient."
189 done_when: "the assistant has confirmed a booking"
190 max_turns: 5
191evals:
192 - type: numeric
193 criterion: "how clearly was the appointment confirmed"
194 min: 0
195 max: 10
196 threshold: 7
197"#;
198 let case: TestCase = serde_yaml::from_str(yaml).unwrap();
199 assert!(case.is_multi_turn());
200 assert_eq!(case.user.as_ref().unwrap().max_turns, Some(5));
201 case.validate().unwrap();
202 }
203
204 #[test]
205 fn empty_evals_is_invalid() {
206 let yaml = "skill: ./x\ninput: hi\nevals: []\n";
207 let case: TestCase = serde_yaml::from_str(yaml).unwrap();
208 assert!(case.validate().is_err());
209 }
210
211 #[test]
212 fn unknown_field_is_rejected() {
213 let yaml = "skill: ./x\ninput: hi\nbogus: 1\nevals: []\n";
214 assert!(serde_yaml::from_str::<TestCase>(yaml).is_err());
215 }
216}