use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use crate::error::{Error, Result};
use crate::eval::Eval;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SimulatedUser {
pub persona: String,
#[serde(default)]
pub done_when: Option<String>,
#[serde(default)]
pub max_turns: Option<u32>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct TestCase {
#[serde(default)]
pub name: String,
pub skill: PathBuf,
pub input: String,
#[serde(default)]
pub user: Option<SimulatedUser>,
pub evals: Vec<Eval>,
}
impl TestCase {
pub fn load(path: &Path) -> Result<Self> {
let text = std::fs::read_to_string(path).map_err(|source| Error::Io {
path: path.to_path_buf(),
source,
})?;
let mut case: TestCase = serde_yaml::from_str(&text).map_err(|source| Error::Yaml {
path: path.to_path_buf(),
source,
})?;
if case.name.is_empty() {
case.name = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("case")
.to_string();
}
if let Some(parent) = path.parent() {
if case.skill.is_relative() {
case.skill = parent.join(&case.skill);
}
}
case.validate()?;
Ok(case)
}
#[must_use]
pub fn is_multi_turn(&self) -> bool {
self.user.is_some()
}
pub fn validate(&self) -> Result<()> {
if self.input.trim().is_empty() {
return Err(Error::Invalid(format!(
"test case `{}` has an empty `input`",
self.name
)));
}
if self.evals.is_empty() {
return Err(Error::Invalid(format!(
"test case `{}` defines no `evals`",
self.name
)));
}
for eval in &self.evals {
eval.validate()?;
}
if let Some(user) = &self.user {
if user.persona.trim().is_empty() {
return Err(Error::Invalid(format!(
"test case `{}` has a `user` block with an empty `persona`",
self.name
)));
}
if user.max_turns == Some(0) {
return Err(Error::Invalid(format!(
"test case `{}` sets `user.max_turns` to 0",
self.name
)));
}
}
Ok(())
}
}
pub fn discover_cases(path: &Path) -> Result<Vec<PathBuf>> {
if path.is_file() {
return Ok(vec![path.to_path_buf()]);
}
if path.is_dir() {
let entries = std::fs::read_dir(path).map_err(|source| Error::Io {
path: path.to_path_buf(),
source,
})?;
let mut files: Vec<PathBuf> = entries
.filter_map(std::result::Result::ok)
.map(|e| e.path())
.filter(|p| {
p.is_file()
&& matches!(p.extension().and_then(|s| s.to_str()), Some("yaml" | "yml"))
})
.collect();
files.sort();
if files.is_empty() {
return Err(Error::Invalid(format!(
"no .yaml test cases found in {}",
path.display()
)));
}
return Ok(files);
}
Err(Error::Invalid(format!(
"path does not exist: {}",
path.display()
)))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::eval::Eval;
#[test]
fn parses_single_turn_case() {
let yaml = r#"
skill: ./greeter
input: "Greet Dr. Smith"
evals:
- type: boolean
criterion: "greets Dr. Smith by name"
"#;
let case: TestCase = serde_yaml::from_str(yaml).unwrap();
assert!(!case.is_multi_turn());
assert_eq!(case.evals.len(), 1);
assert!(matches!(case.evals[0], Eval::Boolean { .. }));
}
#[test]
fn parses_multi_turn_case() {
let yaml = r#"
name: booking
skill: ./booker
input: "I want to book an appointment"
user:
persona: "You are a terse patient."
done_when: "the assistant has confirmed a booking"
max_turns: 5
evals:
- type: numeric
criterion: "how clearly was the appointment confirmed"
min: 0
max: 10
threshold: 7
"#;
let case: TestCase = serde_yaml::from_str(yaml).unwrap();
assert!(case.is_multi_turn());
assert_eq!(case.user.as_ref().unwrap().max_turns, Some(5));
case.validate().unwrap();
}
#[test]
fn empty_evals_is_invalid() {
let yaml = "skill: ./x\ninput: hi\nevals: []\n";
let case: TestCase = serde_yaml::from_str(yaml).unwrap();
assert!(case.validate().is_err());
}
#[test]
fn unknown_field_is_rejected() {
let yaml = "skill: ./x\ninput: hi\nbogus: 1\nevals: []\n";
assert!(serde_yaml::from_str::<TestCase>(yaml).is_err());
}
}