use std::path::Path;
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use super::evaluation::evaluate_manifest;
use super::loader::load_yaml;
use super::types::{BenchmarkError, CorpusEvaluation, CorpusManifest, LabeledSample, SampleLabel};
use crate::ports::FileSystemProvider;
use crate::scanner::Scanner;
use crate::ThreatCategory;
fn default_schema_version() -> String {
"1".to_string()
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoldSample {
pub id: String,
pub path: PathBuf,
pub final_label: SampleLabel,
#[serde(default)]
pub vt_label: Option<SampleLabel>,
#[serde(default)]
pub llm_consensus: Option<SampleLabel>,
#[serde(default)]
pub human_review: Option<SampleLabel>,
#[serde(default)]
pub disputed: bool,
#[serde(default)]
pub focus_category: Option<ThreatCategory>,
#[serde(default)]
pub attack_family: Option<String>,
}
impl GoldSample {
#[must_use]
pub fn is_admitted(&self) -> bool {
!self.disputed || self.human_review.is_some()
}
#[must_use]
pub fn derive_disputed(&self) -> bool {
match (self.vt_label, self.llm_consensus) {
(Some(vt), Some(llm)) => vt != llm,
(Some(_), None) => true,
_ => false,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoldCorpusManifest {
#[serde(default = "default_schema_version")]
pub schema_version: String,
#[serde(default)]
pub samples: Vec<GoldSample>,
}
impl GoldCorpusManifest {
#[must_use]
pub fn to_corpus_manifest(&self) -> CorpusManifest {
CorpusManifest {
samples: self
.samples
.iter()
.filter(|s| s.is_admitted())
.map(|s| LabeledSample {
id: s.id.clone(),
path: s.path.clone(),
label: s.final_label,
focus_category: s.focus_category,
attack_family: s.attack_family.clone(),
})
.collect(),
}
}
}
pub fn evaluate_gold_corpus<F: FileSystemProvider>(
fs: &F,
scanner: &Scanner,
manifest_path: &Path,
) -> Result<CorpusEvaluation, BenchmarkError> {
let gold: GoldCorpusManifest = load_yaml(fs, manifest_path)?;
let root = manifest_path.parent().unwrap_or_else(|| Path::new("."));
evaluate_manifest(fs, scanner, gold.to_corpus_manifest(), root)
}
#[cfg(test)]
mod tests {
use super::*;
fn sample(id: &str, disputed: bool, human: Option<SampleLabel>) -> GoldSample {
GoldSample {
id: id.to_string(),
path: PathBuf::from(format!("{id}.md")),
final_label: SampleLabel::Malicious,
vt_label: Some(SampleLabel::Benign),
llm_consensus: Some(SampleLabel::Malicious),
human_review: human,
disputed,
focus_category: None,
attack_family: None,
}
}
#[test]
fn disputed_sample_excluded_until_reviewed() {
let m = GoldCorpusManifest {
schema_version: "1".into(),
samples: vec![sample("a", true, None)],
};
assert!(m.to_corpus_manifest().samples.is_empty());
}
#[test]
fn reviewed_dispute_is_admitted() {
let m = GoldCorpusManifest {
schema_version: "1".into(),
samples: vec![sample("a", true, Some(SampleLabel::Malicious))],
};
let cm = m.to_corpus_manifest();
assert_eq!(cm.samples.len(), 1);
assert_eq!(cm.samples[0].label, SampleLabel::Malicious);
}
#[test]
fn undisputed_sample_is_admitted() {
let mut s = sample("a", false, None);
s.vt_label = Some(SampleLabel::Malicious);
let m = GoldCorpusManifest {
schema_version: "1".into(),
samples: vec![s],
};
assert_eq!(m.to_corpus_manifest().samples.len(), 1);
}
#[test]
fn derive_disputed_from_provenance() {
let mut s = sample("a", false, None);
s.vt_label = Some(SampleLabel::Benign);
s.llm_consensus = Some(SampleLabel::Malicious);
assert!(s.derive_disputed(), "VT≠LLM is a dispute");
s.llm_consensus = Some(SampleLabel::Benign);
assert!(!s.derive_disputed(), "VT==LLM agreement is not a dispute");
s.llm_consensus = None;
assert!(s.derive_disputed(), "VT present, no consensus is a dispute");
s.vt_label = None;
assert!(
!s.derive_disputed(),
"no VT label and no consensus is not a dispute"
);
}
#[test]
fn minimal_manifest_deserialises_additively() {
let yaml = "samples:\n - id: a\n path: a.md\n final_label: benign\n";
let m: GoldCorpusManifest = serde_yaml::from_str(yaml).unwrap();
assert_eq!(m.samples.len(), 1);
assert_eq!(m.samples[0].final_label, SampleLabel::Benign);
assert!(!m.samples[0].disputed);
assert_eq!(m.schema_version, "1");
}
}