skill_veil_core/benchmark/
gold.rs1use std::path::Path;
17use std::path::PathBuf;
18
19use serde::{Deserialize, Serialize};
20
21use super::evaluation::evaluate_manifest;
22use super::loader::load_yaml;
23use super::types::{BenchmarkError, CorpusEvaluation, CorpusManifest, LabeledSample, SampleLabel};
24use crate::ports::FileSystemProvider;
25use crate::scanner::Scanner;
26use crate::ThreatCategory;
27
28fn default_schema_version() -> String {
29 "1".to_string()
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct GoldSample {
36 pub id: String,
37 pub path: PathBuf,
38 pub final_label: SampleLabel,
41 #[serde(default)]
43 pub vt_label: Option<SampleLabel>,
44 #[serde(default)]
46 pub llm_consensus: Option<SampleLabel>,
47 #[serde(default)]
49 pub human_review: Option<SampleLabel>,
50 #[serde(default)]
54 pub disputed: bool,
55 #[serde(default)]
56 pub focus_category: Option<ThreatCategory>,
57 #[serde(default)]
58 pub attack_family: Option<String>,
59}
60
61impl GoldSample {
62 #[must_use]
67 pub fn is_admitted(&self) -> bool {
68 !self.disputed || self.human_review.is_some()
69 }
70
71 #[must_use]
76 pub fn derive_disputed(&self) -> bool {
77 match (self.vt_label, self.llm_consensus) {
78 (Some(vt), Some(llm)) => vt != llm,
79 (Some(_), None) => true,
80 _ => false,
81 }
82 }
83}
84
85#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct GoldCorpusManifest {
88 #[serde(default = "default_schema_version")]
89 pub schema_version: String,
90 #[serde(default)]
91 pub samples: Vec<GoldSample>,
92}
93
94impl GoldCorpusManifest {
95 #[must_use]
99 pub fn to_corpus_manifest(&self) -> CorpusManifest {
100 CorpusManifest {
101 samples: self
102 .samples
103 .iter()
104 .filter(|s| s.is_admitted())
105 .map(|s| LabeledSample {
106 id: s.id.clone(),
107 path: s.path.clone(),
108 label: s.final_label,
109 focus_category: s.focus_category,
110 attack_family: s.attack_family.clone(),
111 })
112 .collect(),
113 }
114 }
115}
116
117pub fn evaluate_gold_corpus<F: FileSystemProvider>(
122 fs: &F,
123 scanner: &Scanner,
124 manifest_path: &Path,
125) -> Result<CorpusEvaluation, BenchmarkError> {
126 let gold: GoldCorpusManifest = load_yaml(fs, manifest_path)?;
127 let root = manifest_path.parent().unwrap_or_else(|| Path::new("."));
128 evaluate_manifest(fs, scanner, gold.to_corpus_manifest(), root)
129}
130
131#[cfg(test)]
132mod tests {
133 use super::*;
134
135 fn sample(id: &str, disputed: bool, human: Option<SampleLabel>) -> GoldSample {
136 GoldSample {
137 id: id.to_string(),
138 path: PathBuf::from(format!("{id}.md")),
139 final_label: SampleLabel::Malicious,
140 vt_label: Some(SampleLabel::Benign),
141 llm_consensus: Some(SampleLabel::Malicious),
142 human_review: human,
143 disputed,
144 focus_category: None,
145 attack_family: None,
146 }
147 }
148
149 #[test]
153 fn disputed_sample_excluded_until_reviewed() {
154 let m = GoldCorpusManifest {
155 schema_version: "1".into(),
156 samples: vec![sample("a", true, None)],
157 };
158 assert!(m.to_corpus_manifest().samples.is_empty());
159 }
160
161 #[test]
164 fn reviewed_dispute_is_admitted() {
165 let m = GoldCorpusManifest {
166 schema_version: "1".into(),
167 samples: vec![sample("a", true, Some(SampleLabel::Malicious))],
168 };
169 let cm = m.to_corpus_manifest();
170 assert_eq!(cm.samples.len(), 1);
171 assert_eq!(cm.samples[0].label, SampleLabel::Malicious);
172 }
173
174 #[test]
176 fn undisputed_sample_is_admitted() {
177 let mut s = sample("a", false, None);
178 s.vt_label = Some(SampleLabel::Malicious);
179 let m = GoldCorpusManifest {
180 schema_version: "1".into(),
181 samples: vec![s],
182 };
183 assert_eq!(m.to_corpus_manifest().samples.len(), 1);
184 }
185
186 #[test]
190 fn derive_disputed_from_provenance() {
191 let mut s = sample("a", false, None);
192 s.vt_label = Some(SampleLabel::Benign);
193 s.llm_consensus = Some(SampleLabel::Malicious);
194 assert!(s.derive_disputed(), "VT≠LLM is a dispute");
195
196 s.llm_consensus = Some(SampleLabel::Benign);
197 assert!(!s.derive_disputed(), "VT==LLM agreement is not a dispute");
198
199 s.llm_consensus = None;
200 assert!(s.derive_disputed(), "VT present, no consensus is a dispute");
201
202 s.vt_label = None;
203 assert!(
204 !s.derive_disputed(),
205 "no VT label and no consensus is not a dispute"
206 );
207 }
208
209 #[test]
212 fn minimal_manifest_deserialises_additively() {
213 let yaml = "samples:\n - id: a\n path: a.md\n final_label: benign\n";
214 let m: GoldCorpusManifest = serde_yaml::from_str(yaml).unwrap();
215 assert_eq!(m.samples.len(), 1);
216 assert_eq!(m.samples[0].final_label, SampleLabel::Benign);
217 assert!(!m.samples[0].disputed);
218 assert_eq!(m.schema_version, "1");
219 }
220}