use std::path::Path;
use crate::errors::{AigentError, Result};
use crate::parser::read_file_checked;
use crate::tester;
#[derive(Debug, serde::Serialize)]
pub struct TestSuiteResult {
pub passed: usize,
pub failed: usize,
pub results: Vec<TestCaseResult>,
}
#[derive(Debug, serde::Serialize)]
pub struct TestCaseResult {
pub input: String,
pub should_match: bool,
pub actual_match: bool,
pub score: f64,
pub passed: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "lowercase")]
pub enum MatchStrength {
Strong,
Weak,
None,
}
impl MatchStrength {
#[must_use]
pub fn min_score(&self) -> f64 {
match self {
MatchStrength::Strong => 0.6,
MatchStrength::Weak => 0.3,
MatchStrength::None => 0.0,
}
}
}
#[derive(Debug, serde::Deserialize)]
struct TestFixture {
queries: Vec<TestQuery>,
}
#[derive(Debug, serde::Deserialize)]
struct TestQuery {
input: String,
should_match: bool,
#[serde(default)]
min_score: Option<f64>,
#[serde(default)]
strength: Option<MatchStrength>,
}
pub fn run_test_suite(skill_dir: &Path) -> Result<TestSuiteResult> {
let fixture_path = skill_dir.join("tests.yml");
if !fixture_path.exists() {
return Err(AigentError::Parse {
message: format!(
"no tests.yml found in {}. Use --generate to create one.",
skill_dir.display()
),
});
}
let content = read_file_checked(&fixture_path)?;
let fixture: TestFixture =
serde_yaml_ng::from_str(&content).map_err(|e| AigentError::Parse {
message: format!("invalid tests.yml: {e}"),
})?;
let mut results = Vec::new();
let mut passed = 0;
let mut failed = 0;
for query in &fixture.queries {
let probe_result = tester::test_skill(skill_dir, &query.input)?;
let actual_match = !matches!(probe_result.query_match, tester::QueryMatch::None);
let score = probe_result.score;
let mut case_passed = actual_match == query.should_match;
let mut reason = None;
if case_passed && query.should_match {
let effective_min = query
.min_score
.or_else(|| query.strength.as_ref().map(MatchStrength::min_score));
if let Some(min) = effective_min {
if score < min {
case_passed = false;
reason = Some(format!("score {score:.2} below minimum {min:.2}"));
}
}
}
if !case_passed && reason.is_none() {
reason = Some(format!(
"expected {} match, got {}",
if query.should_match { "a" } else { "no" },
if actual_match { "match" } else { "no match" },
));
}
if case_passed {
passed += 1;
} else {
failed += 1;
}
results.push(TestCaseResult {
input: query.input.clone(),
should_match: query.should_match,
actual_match,
score,
passed: case_passed,
reason,
});
}
Ok(TestSuiteResult {
passed,
failed,
results,
})
}
#[derive(Debug, serde::Serialize)]
struct GeneratedFixture {
queries: Vec<GeneratedQuery>,
}
#[derive(Debug, serde::Serialize)]
struct GeneratedQuery {
input: String,
should_match: bool,
#[serde(skip_serializing_if = "Option::is_none")]
strength: Option<MatchStrength>,
}
pub fn generate_fixture(skill_dir: &Path) -> Result<String> {
let props = crate::read_properties(skill_dir)?;
let positive = props
.description
.split('.')
.next()
.unwrap_or(&props.description)
.trim()
.to_lowercase();
let fixture = GeneratedFixture {
queries: vec![
GeneratedQuery {
input: positive,
should_match: true,
strength: Some(MatchStrength::Strong),
},
GeneratedQuery {
input: "something completely unrelated to this skill".to_string(),
should_match: false,
strength: None,
},
],
};
let yaml = serde_yaml_ng::to_string(&fixture).map_err(|e| AigentError::Parse {
message: format!("failed to generate tests.yml: {e}"),
})?;
Ok(format!(
"# Test fixture for {name}\n# Run with: aigent test {name}/\n{yaml}",
name = props.name,
))
}
#[must_use]
pub fn format_text(result: &TestSuiteResult) -> String {
let mut out = String::new();
for case in &result.results {
let status = if case.passed { "PASS" } else { "FAIL" };
out.push_str(&format!(
"[{status}] \"{input}\" (score: {score:.2})\n",
input = case.input,
score = case.score,
));
if let Some(reason) = &case.reason {
out.push_str(&format!(" → {reason}\n"));
}
}
out.push_str(&format!(
"\n{passed} passed, {failed} failed, {total} total\n",
passed = result.passed,
failed = result.failed,
total = result.passed + result.failed,
));
out
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
fn make_skill_with_tests(
name: &str,
skill_content: &str,
tests_content: &str,
) -> (tempfile::TempDir, std::path::PathBuf) {
let parent = tempdir().unwrap();
let dir = parent.path().join(name);
fs::create_dir(&dir).unwrap();
fs::write(dir.join("SKILL.md"), skill_content).unwrap();
fs::write(dir.join("tests.yml"), tests_content).unwrap();
(parent, dir)
}
#[test]
fn run_suite_all_pass() {
let (_parent, dir) = make_skill_with_tests(
"my-skill",
"---\nname: my-skill\ndescription: Processes PDF files and generates reports. Use when working with documents.\n---\nBody.\n",
"queries:\n - input: \"process PDF files\"\n should_match: true\n - input: \"deploy kubernetes\"\n should_match: false\n",
);
let result = run_test_suite(&dir).unwrap();
assert_eq!(result.passed, 2);
assert_eq!(result.failed, 0);
}
#[test]
fn run_suite_with_failure() {
let (_parent, dir) = make_skill_with_tests(
"my-skill",
"---\nname: my-skill\ndescription: Processes PDF files and generates reports. Use when working with documents.\n---\nBody.\n",
"queries:\n - input: \"process PDF files\"\n should_match: false\n",
);
let result = run_test_suite(&dir).unwrap();
assert_eq!(result.failed, 1);
assert!(result.results[0].reason.is_some());
}
#[test]
fn run_suite_min_score_check() {
let (_parent, dir) = make_skill_with_tests(
"my-skill",
"---\nname: my-skill\ndescription: Processes PDF files and generates reports. Use when working with documents.\n---\nBody.\n",
"queries:\n - input: \"process PDF files\"\n should_match: true\n min_score: 0.99\n",
);
let result = run_test_suite(&dir).unwrap();
assert_eq!(result.failed, 1);
assert!(result.results[0]
.reason
.as_ref()
.unwrap()
.contains("below minimum"));
}
#[test]
fn missing_tests_yml_returns_error() {
let parent = tempdir().unwrap();
let dir = parent.path().join("no-tests");
fs::create_dir(&dir).unwrap();
fs::write(
dir.join("SKILL.md"),
"---\nname: no-tests\ndescription: Does things\n---\nBody.\n",
)
.unwrap();
let result = run_test_suite(&dir);
assert!(result.is_err());
}
#[test]
fn generate_fixture_produces_valid_yaml() {
let parent = tempdir().unwrap();
let dir = parent.path().join("gen-test");
fs::create_dir(&dir).unwrap();
fs::write(
dir.join("SKILL.md"),
"---\nname: gen-test\ndescription: Processes documents. Use when handling files.\n---\nBody.\n",
)
.unwrap();
let yaml = generate_fixture(&dir).unwrap();
assert!(yaml.contains("queries:"));
assert!(yaml.contains("should_match: true"));
assert!(yaml.contains("should_match: false"));
let fixture: TestFixture = serde_yaml_ng::from_str(&yaml).unwrap();
assert_eq!(fixture.queries.len(), 2);
}
#[test]
fn strength_strong_maps_to_min_score() {
assert_eq!(MatchStrength::Strong.min_score(), 0.6);
}
#[test]
fn strength_weak_maps_to_min_score() {
assert_eq!(MatchStrength::Weak.min_score(), 0.3);
}
#[test]
fn strength_none_maps_to_zero() {
assert_eq!(MatchStrength::None.min_score(), 0.0);
}
#[test]
fn strength_strong_assertion_fails_on_low_score() {
let (_parent, dir) = make_skill_with_tests(
"my-skill",
"---\nname: my-skill\ndescription: Processes PDF files and generates reports. Use when working with documents.\n---\nBody.\n",
"queries:\n - input: \"process PDF files\"\n should_match: true\n strength: strong\n",
);
let result = run_test_suite(&dir).unwrap();
if result.failed > 0 {
assert!(result.results[0]
.reason
.as_ref()
.unwrap()
.contains("below minimum"));
}
}
#[test]
fn strength_weak_assertion_passes_on_moderate_score() {
let (_parent, dir) = make_skill_with_tests(
"my-skill",
"---\nname: my-skill\ndescription: Processes PDF files and generates reports. Use when working with documents.\n---\nBody.\n",
"queries:\n - input: \"process PDF files\"\n should_match: true\n strength: weak\n",
);
let result = run_test_suite(&dir).unwrap();
assert_eq!(
result.passed, 1,
"strength: weak should pass for relevant query"
);
}
#[test]
fn min_score_takes_precedence_over_strength() {
let (_parent, dir) = make_skill_with_tests(
"my-skill",
"---\nname: my-skill\ndescription: Processes PDF files and generates reports. Use when working with documents.\n---\nBody.\n",
"queries:\n - input: \"process PDF files\"\n should_match: true\n min_score: 0.99\n strength: weak\n",
);
let result = run_test_suite(&dir).unwrap();
assert_eq!(result.failed, 1);
assert!(result.results[0].reason.as_ref().unwrap().contains("0.99"));
}
#[test]
fn generate_fixture_emits_strength() {
let parent = tempdir().unwrap();
let dir = parent.path().join("gen-strength");
fs::create_dir(&dir).unwrap();
fs::write(
dir.join("SKILL.md"),
"---\nname: gen-strength\ndescription: Processes documents. Use when handling files.\n---\nBody.\n",
)
.unwrap();
let yaml = generate_fixture(&dir).unwrap();
assert!(
yaml.contains("strength: strong"),
"generated fixture should use strength, got:\n{yaml}"
);
assert!(
!yaml.contains("min_score"),
"generated fixture should not use min_score"
);
}
#[test]
fn strength_deserializes_from_yaml() {
let yaml = "queries:\n - input: test\n should_match: true\n strength: weak\n";
let fixture: TestFixture = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(fixture.queries[0].strength, Some(MatchStrength::Weak));
}
#[test]
fn format_text_shows_pass_fail() {
let result = TestSuiteResult {
passed: 1,
failed: 1,
results: vec![
TestCaseResult {
input: "query one".into(),
should_match: true,
actual_match: true,
score: 0.75,
passed: true,
reason: None,
},
TestCaseResult {
input: "query two".into(),
should_match: true,
actual_match: false,
score: 0.1,
passed: false,
reason: Some("expected a match, got no match".into()),
},
],
};
let text = format_text(&result);
assert!(text.contains("[PASS]"));
assert!(text.contains("[FAIL]"));
assert!(text.contains("1 passed, 1 failed"));
}
}