#[cfg(test)]
use crate::FileError;
use crate::{
CoreError, Diagnostic, LintConfig, ValidationOutcome, file_utils::safe_read_file, validate_file,
};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalCase {
pub file: PathBuf,
pub expected: Vec<String>,
#[serde(default)]
pub description: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct EvalResult {
pub case: EvalCase,
pub actual: Vec<String>,
pub true_positives: Vec<String>,
pub false_positives: Vec<String>,
pub false_negatives: Vec<String>,
}
impl EvalResult {
pub fn passed(&self) -> bool {
self.false_positives.is_empty() && self.false_negatives.is_empty()
}
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct RuleMetrics {
pub rule_id: String,
pub tp: usize,
pub fp: usize,
pub fn_count: usize,
}
impl RuleMetrics {
pub fn new(rule_id: impl Into<String>) -> Self {
Self {
rule_id: rule_id.into(),
tp: 0,
fp: 0,
fn_count: 0,
}
}
pub fn precision(&self) -> f64 {
let denom = self.tp + self.fp;
if denom == 0 {
1.0
} else {
self.tp as f64 / denom as f64
}
}
pub fn recall(&self) -> f64 {
let denom = self.tp + self.fn_count;
if denom == 0 {
1.0
} else {
self.tp as f64 / denom as f64
}
}
pub fn f1(&self) -> f64 {
let p = self.precision();
let r = self.recall();
let denom = p + r;
if denom == 0.0 {
0.0
} else {
2.0 * p * r / denom
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct EvalSummary {
pub cases_run: usize,
pub cases_passed: usize,
pub cases_failed: usize,
pub rules: HashMap<String, RuleMetrics>,
pub overall_precision: f64,
pub overall_recall: f64,
pub overall_f1: f64,
}
impl EvalSummary {
pub fn from_results(results: &[EvalResult]) -> Self {
let mut rules: HashMap<String, RuleMetrics> = HashMap::new();
for result in results {
for rule_id in &result.true_positives {
rules
.entry(rule_id.clone())
.or_insert_with(|| RuleMetrics::new(rule_id))
.tp += 1;
}
for rule_id in &result.false_positives {
rules
.entry(rule_id.clone())
.or_insert_with(|| RuleMetrics::new(rule_id))
.fp += 1;
}
for rule_id in &result.false_negatives {
rules
.entry(rule_id.clone())
.or_insert_with(|| RuleMetrics::new(rule_id))
.fn_count += 1;
}
}
let total_tp: usize = rules.values().map(|m| m.tp).sum();
let total_fp: usize = rules.values().map(|m| m.fp).sum();
let total_fn: usize = rules.values().map(|m| m.fn_count).sum();
let overall_precision = if total_tp + total_fp == 0 {
1.0
} else {
total_tp as f64 / (total_tp + total_fp) as f64
};
let overall_recall = if total_tp + total_fn == 0 {
1.0
} else {
total_tp as f64 / (total_tp + total_fn) as f64
};
let overall_f1 = if overall_precision + overall_recall == 0.0 {
0.0
} else {
2.0 * overall_precision * overall_recall / (overall_precision + overall_recall)
};
let cases_passed = results.iter().filter(|r| r.passed()).count();
Self {
cases_run: results.len(),
cases_passed,
cases_failed: results.len() - cases_passed,
rules,
overall_precision,
overall_recall,
overall_f1,
}
}
pub fn to_json(&self) -> serde_json::Result<String> {
serde_json::to_string_pretty(self)
}
pub fn to_csv(&self) -> String {
let mut lines = vec!["rule_id,tp,fp,fn,precision,recall,f1".to_string()];
let mut sorted_rules: Vec<_> = self.rules.iter().collect();
sorted_rules.sort_by_key(|(k, _)| *k);
for (rule_id, metrics) in sorted_rules {
lines.push(format!(
"{},{},{},{},{:.4},{:.4},{:.4}",
rule_id,
metrics.tp,
metrics.fp,
metrics.fn_count,
metrics.precision(),
metrics.recall(),
metrics.f1()
));
}
let total_tp: usize = self.rules.values().map(|m| m.tp).sum();
let total_fp: usize = self.rules.values().map(|m| m.fp).sum();
let total_fn: usize = self.rules.values().map(|m| m.fn_count).sum();
lines.push(format!(
"OVERALL,{},{},{},{:.4},{:.4},{:.4}",
total_tp,
total_fp,
total_fn,
self.overall_precision,
self.overall_recall,
self.overall_f1
));
lines.join("\n")
}
pub fn to_markdown(&self) -> String {
let mut lines = vec![
format!("## Evaluation Summary"),
String::new(),
format!(
"**Cases**: {} run, {} passed, {} failed",
self.cases_run, self.cases_passed, self.cases_failed
),
format!(
"**Overall**: precision={:.2}%, recall={:.2}%, F1={:.2}%",
self.overall_precision * 100.0,
self.overall_recall * 100.0,
self.overall_f1 * 100.0
),
String::new(),
"### Per-Rule Metrics".to_string(),
String::new(),
"| Rule | TP | FP | FN | Precision | Recall | F1 |".to_string(),
"|------|----|----|----|-----------:|-------:|----:|".to_string(),
];
let mut sorted_rules: Vec<_> = self.rules.iter().collect();
sorted_rules.sort_by_key(|(k, _)| *k);
for (rule_id, metrics) in sorted_rules {
lines.push(format!(
"| {} | {} | {} | {} | {:.2}% | {:.2}% | {:.2}% |",
rule_id,
metrics.tp,
metrics.fp,
metrics.fn_count,
metrics.precision() * 100.0,
metrics.recall() * 100.0,
metrics.f1() * 100.0
));
}
lines.join("\n")
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalManifest {
pub cases: Vec<EvalCase>,
}
impl EvalManifest {
pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, EvalError> {
let content = safe_read_file(path.as_ref()).map_err(|e| EvalError::Read {
path: path.as_ref().to_path_buf(),
source: e,
})?;
serde_yaml::from_str(&content).map_err(|e| EvalError::Parse {
path: path.as_ref().to_path_buf(),
message: e.to_string(),
})
}
fn base_dir<P: AsRef<Path>>(manifest_path: P) -> PathBuf {
manifest_path
.as_ref()
.parent()
.map(|p| p.to_path_buf())
.unwrap_or_else(|| PathBuf::from("."))
}
}
#[derive(Debug, thiserror::Error)]
pub enum EvalError {
#[error("Failed to read manifest: {path}")]
Read {
path: PathBuf,
#[source]
source: CoreError,
},
#[error("Failed to read file: {path}")]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("Failed to parse manifest {path}: {message}")]
Parse { path: PathBuf, message: String },
#[error("Validation error for {path}: {message}")]
Validation { path: PathBuf, message: String },
#[error("Path traversal attempt detected: {path} escapes base directory {base_dir}")]
PathTraversal { path: PathBuf, base_dir: PathBuf },
}
fn validate_path_within_base(file: &Path, base_dir: &Path) -> Result<PathBuf, EvalError> {
if file.is_absolute() {
return Err(EvalError::PathTraversal {
path: file.to_path_buf(),
base_dir: base_dir.to_path_buf(),
});
}
let joined = base_dir.join(file);
let canonical_file = joined.canonicalize().map_err(|e| EvalError::Io {
path: joined.clone(),
source: e,
})?;
let canonical_base = base_dir.canonicalize().map_err(|e| EvalError::Io {
path: base_dir.to_path_buf(),
source: e,
})?;
if !canonical_file.starts_with(&canonical_base) {
return Err(EvalError::PathTraversal {
path: file.to_path_buf(),
base_dir: base_dir.to_path_buf(),
});
}
Ok(canonical_file)
}
pub fn evaluate_case(case: &EvalCase, base_dir: &Path, config: &LintConfig) -> EvalResult {
let file_path = match validate_path_within_base(&case.file, base_dir) {
Ok(path) => path,
Err(_) => {
return EvalResult {
case: case.clone(),
actual: vec!["eval::path-traversal".to_string()],
true_positives: vec![],
false_positives: vec!["eval::path-traversal".to_string()],
false_negatives: case.expected.clone(),
};
}
};
let diagnostics = match validate_file(&file_path, config) {
Ok(ValidationOutcome::Success(diags)) => diags,
Ok(ValidationOutcome::IoError(file_error)) => {
vec![Diagnostic::error(
file_path.clone(),
0,
0,
"eval::io-error",
format!("File I/O error: {}", file_error),
)]
}
Ok(ValidationOutcome::Skipped) => vec![],
Err(e) => {
vec![Diagnostic::error(
file_path.clone(),
0,
0,
"eval::error",
format!("Validation failed: {}", e),
)]
}
};
let actual: Vec<String> = diagnostics
.iter()
.map(|d| d.rule.clone())
.collect::<HashSet<_>>()
.into_iter()
.collect();
let expected_set: HashSet<&str> = case.expected.iter().map(|s| s.as_str()).collect();
let actual_set: HashSet<&str> = actual.iter().map(|s| s.as_str()).collect();
let true_positives: Vec<String> = expected_set
.intersection(&actual_set)
.map(|s| s.to_string())
.collect();
let false_positives: Vec<String> = actual_set
.difference(&expected_set)
.map(|s| s.to_string())
.collect();
let false_negatives: Vec<String> = expected_set
.difference(&actual_set)
.map(|s| s.to_string())
.collect();
EvalResult {
case: case.clone(),
actual,
true_positives,
false_positives,
false_negatives,
}
}
pub fn evaluate_manifest(
manifest: &EvalManifest,
base_dir: &Path,
config: &LintConfig,
filter: Option<&str>,
) -> Vec<EvalResult> {
manifest
.cases
.iter()
.filter(|case| {
match filter {
Some(f) => {
case.expected.is_empty() || case.expected.iter().any(|rule| rule.starts_with(f))
}
None => true,
}
})
.map(|case| evaluate_case(case, base_dir, config))
.collect()
}
pub fn evaluate_manifest_file<P: AsRef<Path>>(
manifest_path: P,
config: &LintConfig,
filter: Option<&str>,
) -> Result<(Vec<EvalResult>, EvalSummary), EvalError> {
let manifest = EvalManifest::load(&manifest_path)?;
let base_dir = EvalManifest::base_dir(&manifest_path);
let results = evaluate_manifest(&manifest, &base_dir, config, filter);
let summary = EvalSummary::from_results(&results);
Ok((results, summary))
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum EvalFormat {
#[default]
Markdown,
Json,
Csv,
}
impl std::str::FromStr for EvalFormat {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"markdown" | "md" => Ok(EvalFormat::Markdown),
"json" => Ok(EvalFormat::Json),
"csv" => Ok(EvalFormat::Csv),
_ => Err(format!(
"Unknown format: {}. Use markdown, json, or csv.",
s
)),
}
}
}
impl std::fmt::Display for EvalFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
EvalFormat::Markdown => write!(f, "markdown"),
EvalFormat::Json => write!(f, "json"),
EvalFormat::Csv => write!(f, "csv"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rule_metrics_precision() {
let mut m = RuleMetrics::new("TEST-001");
m.tp = 8;
m.fp = 2;
m.fn_count = 0;
assert!((m.precision() - 0.8).abs() < 0.001);
}
#[test]
fn test_rule_metrics_recall() {
let mut m = RuleMetrics::new("TEST-001");
m.tp = 8;
m.fp = 0;
m.fn_count = 2;
assert!((m.recall() - 0.8).abs() < 0.001);
}
#[test]
fn test_rule_metrics_f1() {
let mut m = RuleMetrics::new("TEST-001");
m.tp = 8;
m.fp = 2;
m.fn_count = 2;
assert!((m.f1() - 0.8).abs() < 0.001);
}
#[test]
fn test_rule_metrics_zero_division() {
let m = RuleMetrics::new("TEST-001");
assert!((m.precision() - 1.0).abs() < 0.001);
assert!((m.recall() - 1.0).abs() < 0.001);
}
#[test]
fn test_rule_metrics_f1_zero() {
let mut m = RuleMetrics::new("TEST-001");
m.tp = 0;
m.fp = 5;
m.fn_count = 5;
assert!((m.f1() - 0.0).abs() < 0.001);
}
#[test]
fn test_eval_result_passed() {
let result = EvalResult {
case: EvalCase {
file: PathBuf::from("test.md"),
expected: vec!["AS-001".to_string()],
description: None,
},
actual: vec!["AS-001".to_string()],
true_positives: vec!["AS-001".to_string()],
false_positives: vec![],
false_negatives: vec![],
};
assert!(result.passed());
}
#[test]
fn test_eval_result_failed_fp() {
let result = EvalResult {
case: EvalCase {
file: PathBuf::from("test.md"),
expected: vec!["AS-001".to_string()],
description: None,
},
actual: vec!["AS-001".to_string(), "AS-002".to_string()],
true_positives: vec!["AS-001".to_string()],
false_positives: vec!["AS-002".to_string()],
false_negatives: vec![],
};
assert!(!result.passed());
}
#[test]
fn test_eval_result_failed_fn() {
let result = EvalResult {
case: EvalCase {
file: PathBuf::from("test.md"),
expected: vec!["AS-001".to_string(), "AS-002".to_string()],
description: None,
},
actual: vec!["AS-001".to_string()],
true_positives: vec!["AS-001".to_string()],
false_positives: vec![],
false_negatives: vec!["AS-002".to_string()],
};
assert!(!result.passed());
}
#[test]
fn test_eval_summary_from_results() {
let results = vec![
EvalResult {
case: EvalCase {
file: PathBuf::from("test1.md"),
expected: vec!["AS-001".to_string()],
description: None,
},
actual: vec!["AS-001".to_string()],
true_positives: vec!["AS-001".to_string()],
false_positives: vec![],
false_negatives: vec![],
},
EvalResult {
case: EvalCase {
file: PathBuf::from("test2.md"),
expected: vec!["AS-001".to_string()],
description: None,
},
actual: vec!["AS-001".to_string(), "AS-002".to_string()],
true_positives: vec!["AS-001".to_string()],
false_positives: vec!["AS-002".to_string()],
false_negatives: vec![],
},
];
let summary = EvalSummary::from_results(&results);
assert_eq!(summary.cases_run, 2);
assert_eq!(summary.cases_passed, 1);
assert_eq!(summary.cases_failed, 1);
let as_001 = summary.rules.get("AS-001").unwrap();
assert_eq!(as_001.tp, 2);
assert_eq!(as_001.fp, 0);
assert_eq!(as_001.fn_count, 0);
let as_002 = summary.rules.get("AS-002").unwrap();
assert_eq!(as_002.tp, 0);
assert_eq!(as_002.fp, 1);
assert_eq!(as_002.fn_count, 0);
}
#[test]
fn test_eval_manifest_parse() {
let yaml = r#"
cases:
- file: test1.md
expected: [AS-001]
description: "Test case 1"
- file: test2.md
expected: [AS-002, AS-003]
"#;
let manifest: EvalManifest = serde_yaml::from_str(yaml).unwrap();
assert_eq!(manifest.cases.len(), 2);
assert_eq!(manifest.cases[0].expected, vec!["AS-001"]);
assert_eq!(manifest.cases[1].expected, vec!["AS-002", "AS-003"]);
}
#[test]
fn test_eval_summary_to_csv() {
let results = vec![EvalResult {
case: EvalCase {
file: PathBuf::from("test.md"),
expected: vec!["AS-001".to_string()],
description: None,
},
actual: vec!["AS-001".to_string()],
true_positives: vec!["AS-001".to_string()],
false_positives: vec![],
false_negatives: vec![],
}];
let summary = EvalSummary::from_results(&results);
let csv = summary.to_csv();
assert!(csv.contains("rule_id,tp,fp,fn,precision,recall,f1"));
assert!(csv.contains("AS-001,1,0,0"));
assert!(csv.contains("OVERALL"));
}
#[test]
fn test_eval_summary_to_markdown() {
let results = vec![EvalResult {
case: EvalCase {
file: PathBuf::from("test.md"),
expected: vec!["AS-001".to_string()],
description: None,
},
actual: vec!["AS-001".to_string()],
true_positives: vec!["AS-001".to_string()],
false_positives: vec![],
false_negatives: vec![],
}];
let summary = EvalSummary::from_results(&results);
let md = summary.to_markdown();
assert!(md.contains("## Evaluation Summary"));
assert!(md.contains("| Rule | TP | FP | FN |"));
assert!(md.contains("| AS-001 |"));
}
#[test]
fn test_eval_format_from_str() {
assert_eq!(
"markdown".parse::<EvalFormat>().unwrap(),
EvalFormat::Markdown
);
assert_eq!("md".parse::<EvalFormat>().unwrap(), EvalFormat::Markdown);
assert_eq!("json".parse::<EvalFormat>().unwrap(), EvalFormat::Json);
assert_eq!("csv".parse::<EvalFormat>().unwrap(), EvalFormat::Csv);
assert!("invalid".parse::<EvalFormat>().is_err());
}
#[test]
fn test_evaluate_case_with_fixture() {
let temp = tempfile::TempDir::new().unwrap();
let skill_path = temp.path().join("SKILL.md");
std::fs::write(
&skill_path,
"---\nname: deploy-prod\ndescription: Deploys to production\n---\nBody",
)
.unwrap();
let case = EvalCase {
file: PathBuf::from("SKILL.md"),
expected: vec!["CC-SK-006".to_string()],
description: Some("Dangerous skill name".to_string()),
};
let config = LintConfig::default();
let result = evaluate_case(&case, temp.path(), &config);
assert!(
result.true_positives.contains(&"CC-SK-006".to_string()),
"Expected CC-SK-006 in true_positives, got: {:?}",
result
);
}
#[test]
fn test_evaluate_case_empty_expected() {
let temp = tempfile::TempDir::new().unwrap();
let skill_path = temp.path().join("SKILL.md");
std::fs::write(
&skill_path,
"---\nname: code-review\ndescription: Use when reviewing code\n---\nBody",
)
.unwrap();
let case = EvalCase {
file: PathBuf::from("SKILL.md"),
expected: vec![],
description: Some("Valid skill, no rules expected".to_string()),
};
let config = LintConfig::default();
let result = evaluate_case(&case, temp.path(), &config);
assert!(
result.false_negatives.is_empty(),
"Should have no false negatives"
);
assert!(result.true_positives.is_empty());
}
#[test]
fn test_eval_manifest_load_file_not_found() {
let result = EvalManifest::load("nonexistent-manifest-file.yaml");
assert!(result.is_err());
match result {
Err(EvalError::Read { path, .. }) => {
assert!(path.to_string_lossy().contains("nonexistent"));
}
_ => panic!("Expected EvalError::Read"),
}
}
#[test]
fn test_eval_manifest_load_large_file_rejected() {
let temp = tempfile::TempDir::new().unwrap();
let manifest_path = temp.path().join("large.yaml");
let content = vec![b'x'; (crate::file_utils::DEFAULT_MAX_FILE_SIZE + 1) as usize];
std::fs::write(&manifest_path, &content).unwrap();
let result = EvalManifest::load(&manifest_path);
assert!(result.is_err());
match result {
Err(EvalError::Read { source, .. }) => {
assert!(matches!(source, CoreError::File(FileError::TooBig { .. })));
}
_ => panic!("Expected EvalError::Read"),
}
}
#[test]
fn test_eval_manifest_load_invalid_yaml() {
let temp = tempfile::TempDir::new().unwrap();
let manifest_path = temp.path().join("invalid.yaml");
std::fs::write(&manifest_path, "invalid: yaml: : syntax").unwrap();
let result = EvalManifest::load(&manifest_path);
assert!(result.is_err());
match result {
Err(EvalError::Parse { path, message }) => {
assert_eq!(path, manifest_path);
assert!(!message.is_empty());
}
_ => panic!("Expected EvalError::Parse"),
}
}
#[test]
fn test_evaluate_manifest_with_filter() {
let temp = tempfile::TempDir::new().unwrap();
let skill1_path = temp.path().join("skill1.md");
std::fs::write(
&skill1_path,
"---\nname: deploy-prod\ndescription: Deploy\n---\nBody",
)
.unwrap();
let skill2_path = temp.path().join("skill2.md");
std::fs::write(
&skill2_path,
"---\nname: run-tests\ndescription: Tests\n---\nBody",
)
.unwrap();
let manifest = EvalManifest {
cases: vec![
EvalCase {
file: PathBuf::from("skill1.md"),
expected: vec!["CC-SK-006".to_string()],
description: None,
},
EvalCase {
file: PathBuf::from("skill2.md"),
expected: vec!["AS-004".to_string()],
description: None,
},
],
};
let config = LintConfig::default();
let results = evaluate_manifest(&manifest, temp.path(), &config, Some("CC-SK"));
assert_eq!(results.len(), 1);
assert_eq!(results[0].case.file, PathBuf::from("skill1.md"));
let results = evaluate_manifest(&manifest, temp.path(), &config, Some("AS-"));
assert_eq!(results.len(), 1);
assert_eq!(results[0].case.file, PathBuf::from("skill2.md"));
let results = evaluate_manifest(&manifest, temp.path(), &config, None);
assert_eq!(results.len(), 2);
}
#[test]
fn test_evaluate_manifest_filter_no_matches() {
let temp = tempfile::TempDir::new().unwrap();
let skill_path = temp.path().join("skill.md");
std::fs::write(&skill_path, "---\nname: test\ndescription: Test\n---\nBody").unwrap();
let manifest = EvalManifest {
cases: vec![EvalCase {
file: PathBuf::from("skill.md"),
expected: vec!["AS-001".to_string()],
description: None,
}],
};
let config = LintConfig::default();
let results = evaluate_manifest(&manifest, temp.path(), &config, Some("NONEXISTENT"));
assert!(results.is_empty());
}
#[test]
fn test_evaluate_manifest_file_entry_point() {
let temp = tempfile::TempDir::new().unwrap();
let skill_path = temp.path().join("SKILL.md");
std::fs::write(
&skill_path,
"---\nname: deploy-prod\ndescription: Deploy\n---\nBody",
)
.unwrap();
let manifest_path = temp.path().join("eval.yaml");
std::fs::write(
&manifest_path,
r#"cases:
- file: SKILL.md
expected: [CC-SK-006]
description: "Dangerous skill name"
"#,
)
.unwrap();
let config = LintConfig::default();
let result = evaluate_manifest_file(&manifest_path, &config, None);
assert!(result.is_ok());
let (results, summary) = result.unwrap();
assert_eq!(results.len(), 1);
assert_eq!(summary.cases_run, 1);
}
#[test]
fn test_path_traversal_absolute_path() {
let temp = tempfile::TempDir::new().unwrap();
let case = EvalCase {
file: PathBuf::from("/etc/passwd"),
expected: vec!["SOME-RULE".to_string()],
description: Some("Absolute path attempt".to_string()),
};
let config = LintConfig::default();
let result = evaluate_case(&case, temp.path(), &config);
assert!(
result.actual.contains(&"eval::path-traversal".to_string()),
"Should reject absolute paths, got: {:?}",
result.actual
);
}
#[test]
fn test_relative_path_escaping_base_dir_blocked() {
let temp = tempfile::TempDir::new().unwrap();
let subdir = temp.path().join("subdir");
std::fs::create_dir(&subdir).unwrap();
let file_path = temp.path().join("file.md");
std::fs::write(&file_path, "---\nname: test\n---\nContent").unwrap();
let case = EvalCase {
file: PathBuf::from("../file.md"),
expected: vec![],
description: Some("Relative path going up one level".to_string()),
};
let config = LintConfig::default();
let result = evaluate_case(&case, &subdir, &config);
assert!(
result.actual.contains(&"eval::path-traversal".to_string()),
"Relative ../sibling paths that escape base_dir should be blocked, got: {:?}",
result.actual
);
}
#[test]
fn test_relative_path_within_base_dir_allowed() {
let temp = tempfile::TempDir::new().unwrap();
let file_path = temp.path().join("SKILL.md");
std::fs::write(
&file_path,
"---\nname: test-skill\ndescription: Test\n---\nBody",
)
.unwrap();
let case = EvalCase {
file: PathBuf::from("SKILL.md"),
expected: vec![],
description: Some("File in base directory".to_string()),
};
let config = LintConfig::default();
let result = evaluate_case(&case, temp.path(), &config);
assert!(
!result.actual.contains(&"eval::path-traversal".to_string()),
"Files within base_dir should be allowed, got: {:?}",
result.actual
);
}
#[test]
fn test_evaluate_case_validation_error() {
let temp = tempfile::TempDir::new().unwrap();
let file_path = temp.path().join("invalid.md");
std::fs::write(&file_path, "invalid content that won't parse").unwrap();
let case = EvalCase {
file: PathBuf::from("invalid.md"),
expected: vec!["SOME-RULE".to_string()],
description: Some("Invalid file content".to_string()),
};
let config = LintConfig::default();
let result = evaluate_case(&case, temp.path(), &config);
assert!(
result.false_negatives.contains(&"SOME-RULE".to_string()),
"Expected rule should be in false_negatives when it doesn't fire"
);
}
#[test]
fn test_eval_format_display() {
assert_eq!(format!("{}", EvalFormat::Markdown), "markdown");
assert_eq!(format!("{}", EvalFormat::Json), "json");
assert_eq!(format!("{}", EvalFormat::Csv), "csv");
}
#[cfg(unix)]
#[test]
fn test_evaluate_case_io_error_arm() {
use std::os::unix::fs::PermissionsExt;
let temp = tempfile::TempDir::new().unwrap();
let skill_path = temp.path().join("SKILL.md");
std::fs::write(&skill_path, "# Test skill\n").unwrap();
let original_mode = std::fs::metadata(&skill_path).unwrap().permissions().mode();
std::fs::set_permissions(&skill_path, std::fs::Permissions::from_mode(0o000)).unwrap();
let probe_readable = std::fs::read(&skill_path).is_ok();
if probe_readable {
std::fs::set_permissions(&skill_path, std::fs::Permissions::from_mode(original_mode))
.unwrap();
return;
}
let case = EvalCase {
file: PathBuf::from("SKILL.md"),
expected: vec![],
description: Some("Unreadable file triggers IoError arm".to_string()),
};
let config = LintConfig::default();
let result = evaluate_case(&case, temp.path(), &config);
std::fs::set_permissions(&skill_path, std::fs::Permissions::from_mode(0o644)).unwrap();
assert!(
result.actual.contains(&"eval::io-error".to_string()),
"Expected eval::io-error diagnostic for unreadable file, got: {:?}",
result.actual
);
}
#[test]
fn test_eval_summary_empty_results() {
let results: Vec<EvalResult> = vec![];
let summary = EvalSummary::from_results(&results);
assert_eq!(summary.cases_run, 0);
assert_eq!(summary.cases_passed, 0);
assert_eq!(summary.cases_failed, 0);
assert!(summary.rules.is_empty());
assert!((summary.overall_precision - 1.0).abs() < 0.001);
assert!((summary.overall_recall - 1.0).abs() < 0.001);
}
#[test]
fn test_eval_manifest_base_dir() {
let base = EvalManifest::base_dir("some/path/to/manifest.yaml");
assert_eq!(base, PathBuf::from("some/path/to"));
let base = EvalManifest::base_dir("manifest.yaml");
assert!(base == std::path::Path::new(".") || base == std::path::Path::new(""));
}
#[test]
fn test_eval_summary_zero_fixtures() {
let manifest = EvalManifest { cases: vec![] };
let config = LintConfig::default();
let temp = tempfile::TempDir::new().unwrap();
let results = evaluate_manifest(&manifest, temp.path(), &config, None);
assert!(results.is_empty());
let summary = EvalSummary::from_results(&results);
assert_eq!(summary.cases_run, 0);
assert_eq!(summary.cases_passed, 0);
assert_eq!(summary.cases_failed, 0);
assert!(summary.rules.is_empty());
assert!((summary.overall_precision - 1.0).abs() < 0.001);
assert!((summary.overall_recall - 1.0).abs() < 0.001);
assert!((summary.overall_f1 - 1.0).abs() < 0.001);
}
#[test]
fn test_f1_when_precision_is_zero_recall_is_nonzero() {
let mut m = RuleMetrics::new("TEST-001");
m.tp = 0;
m.fp = 0;
m.fn_count = 5;
assert!((m.precision() - 1.0).abs() < 0.001);
assert!((m.recall() - 0.0).abs() < 0.001);
assert!((m.f1() - 0.0).abs() < 0.001);
}
#[test]
fn test_f1_when_recall_is_zero_precision_is_nonzero() {
let mut m = RuleMetrics::new("TEST-001");
m.tp = 0;
m.fp = 5;
m.fn_count = 0;
assert!((m.precision() - 0.0).abs() < 0.001);
assert!((m.recall() - 1.0).abs() < 0.001);
assert!((m.f1() - 0.0).abs() < 0.001);
}
#[test]
fn test_f1_perfect_score() {
let mut m = RuleMetrics::new("TEST-001");
m.tp = 10;
m.fp = 0;
m.fn_count = 0;
assert!((m.precision() - 1.0).abs() < 0.001);
assert!((m.recall() - 1.0).abs() < 0.001);
assert!((m.f1() - 1.0).abs() < 0.001);
}
#[test]
fn test_eval_summary_to_json() {
let results = vec![EvalResult {
case: EvalCase {
file: PathBuf::from("test.md"),
expected: vec!["AS-001".to_string()],
description: None,
},
actual: vec!["AS-001".to_string()],
true_positives: vec!["AS-001".to_string()],
false_positives: vec![],
false_negatives: vec![],
}];
let summary = EvalSummary::from_results(&results);
let json = summary.to_json();
assert!(json.is_ok());
let json_str = json.unwrap();
assert!(json_str.contains("cases_run"));
assert!(json_str.contains("overall_f1"));
}
#[test]
fn test_evaluate_case_skipped_for_unknown_extension() {
let temp = tempfile::TempDir::new().unwrap();
let unknown_file = temp.path().join("test.xyz");
std::fs::write(&unknown_file, "arbitrary content").unwrap();
let case = EvalCase {
file: PathBuf::from("test.xyz"),
expected: vec![],
description: Some("Unknown file type should be skipped".to_string()),
};
let config = LintConfig::default();
let result = evaluate_case(&case, temp.path(), &config);
assert!(
!result.actual.contains(&"eval::io-error".to_string()),
"Skipped file should not produce eval::io-error, got: {:?}",
result.actual
);
assert!(
!result.actual.contains(&"eval::error".to_string()),
"Skipped file should not produce eval::error, got: {:?}",
result.actual
);
assert!(
result.passed(),
"Skipped file with empty expected should pass, got: {:?}",
result
);
}
}