mod features;
pub mod feedback;
pub mod model;
pub mod thresholds;
pub mod train;
pub use features::FeatureExtractor;
pub use feedback::{FeedbackCollector, LabeledFinding};
pub use model::{FpClassifier, HeuristicClassifier, Prediction};
pub use thresholds::{
CategoryAwarePrediction, CategoryThresholds, DetectorCategory, ThresholdConfig,
};
pub use train::{train, TrainConfig, TrainResult};
use crate::models::Finding;
pub fn classify_findings(findings: &[Finding], classifier: &FpClassifier) -> Vec<Prediction> {
let extractor = FeatureExtractor::new();
findings
.iter()
.map(|f| {
let features = extractor.extract(f);
classifier.predict(&features)
})
.collect()
}
pub fn classify_findings_with_thresholds(
findings: &[Finding],
classifier: &FpClassifier,
thresholds: &CategoryThresholds,
) -> Vec<CategoryAwarePrediction> {
let extractor = FeatureExtractor::new();
findings
.iter()
.map(|f| {
let features = extractor.extract(f);
let pred = classifier.predict(&features);
CategoryAwarePrediction::from_prediction(pred.tp_probability, &f.detector, thresholds)
})
.collect()
}
pub fn filter_false_positives(
findings: Vec<Finding>,
classifier: &FpClassifier,
threshold: f32,
) -> Vec<Finding> {
let extractor = FeatureExtractor::new();
findings
.into_iter()
.filter(|f| {
let features = extractor.extract(f);
let pred = classifier.predict(&features);
pred.tp_probability >= threshold
})
.collect()
}
pub fn filter_with_category_thresholds(
findings: Vec<Finding>,
classifier: &FpClassifier,
thresholds: &CategoryThresholds,
) -> Vec<Finding> {
let extractor = FeatureExtractor::new();
findings
.into_iter()
.filter(|f| {
let features = extractor.extract(f);
let pred = classifier.predict(&features);
!thresholds.should_filter(&f.detector, pred.tp_probability)
})
.collect()
}
pub fn annotate_findings(
findings: Vec<Finding>,
classifier: &FpClassifier,
thresholds: &CategoryThresholds,
) -> Vec<(Finding, CategoryAwarePrediction)> {
let extractor = FeatureExtractor::new();
findings
.into_iter()
.map(|f| {
let features = extractor.extract(&f);
let pred = classifier.predict(&features);
let cat_pred = CategoryAwarePrediction::from_prediction(
pred.tp_probability,
&f.detector,
thresholds,
);
(f, cat_pred)
})
.collect()
}
#[derive(Debug, Clone)]
pub struct ClassificationSummary {
pub total: usize,
pub high_confidence_tp: usize,
pub likely_tp: usize,
pub uncertain: usize,
pub likely_fp: usize,
pub would_filter: usize,
pub by_category: std::collections::HashMap<DetectorCategory, CategoryStats>,
}
#[derive(Debug, Clone, Default)]
pub struct CategoryStats {
pub total: usize,
pub high_confidence: usize,
pub would_filter: usize,
}
impl ClassificationSummary {
pub fn from_predictions(predictions: &[CategoryAwarePrediction]) -> Self {
use std::collections::HashMap;
let mut by_category: HashMap<DetectorCategory, CategoryStats> = HashMap::new();
let mut high_confidence_tp = 0;
let mut likely_tp = 0;
let mut uncertain = 0;
let mut likely_fp = 0;
let mut would_filter = 0;
for pred in predictions {
let stats = by_category.entry(pred.category).or_default();
stats.total += 1;
if pred.high_confidence {
stats.high_confidence += 1;
}
if pred.should_filter {
stats.would_filter += 1;
}
if pred.high_confidence {
high_confidence_tp += 1;
} else if pred.is_true_positive {
likely_tp += 1;
} else if pred.likely_fp {
likely_fp += 1;
} else {
uncertain += 1;
}
if pred.should_filter {
would_filter += 1;
}
}
Self {
total: predictions.len(),
high_confidence_tp,
likely_tp,
uncertain,
likely_fp,
would_filter,
by_category,
}
}
}