use std::collections::{HashMap, VecDeque};
use crate::models::bundle::{SynthesisBundle, SynthesisPayload};
use crate::models::profile::BioProfile;
use crate::models::verdict::ThreatAnalysis;
#[derive(Debug, Clone)]
pub struct ThreatScorerConfig {
pub window_size: usize,
pub alert_threshold: f64,
pub weights: ThreatWeights,
pub boundary_band_fraction: f64,
}
#[derive(Debug, Clone)]
pub struct ThreatWeights {
pub boundary_clustering: f64,
pub authority_probing: f64,
pub replay_similarity: f64,
pub drift: f64,
pub anomaly: f64,
}
impl Default for ThreatScorerConfig {
fn default() -> Self {
Self {
window_size: 100,
alert_threshold: 0.7,
weights: ThreatWeights {
boundary_clustering: 0.2,
authority_probing: 0.25,
replay_similarity: 0.2,
drift: 0.2,
anomaly: 0.15,
},
boundary_band_fraction: 0.05,
}
}
}
#[derive(Debug, Clone)]
struct BundleFingerprint {
payload_len: usize,
kmers: Vec<u64>,
#[allow(dead_code)]
rejected: bool,
}
#[derive(Debug, Clone, Default)]
struct DriftTracker {
means: HashMap<String, f64>,
counts: HashMap<String, u64>,
}
impl DriftTracker {
fn update(&mut self, principal: &str, volume: f64) -> f64 {
let count = self.counts.entry(principal.to_string()).or_insert(0);
*count = count.saturating_add(1);
let n = *count as f64;
let old_mean = self.means.get(principal).copied().unwrap_or(volume);
let new_mean = old_mean + (volume - old_mean) / n;
self.means.insert(principal.to_string(), new_mean);
(new_mean - old_mean).abs()
}
}
pub struct ThreatScorer {
config: ThreatScorerConfig,
window: VecDeque<BundleFingerprint>,
rejected_window: VecDeque<BundleFingerprint>,
authority_rejections: HashMap<String, u64>,
authority_checks: u64,
drift: DriftTracker,
}
impl ThreatScorer {
pub fn new(config: ThreatScorerConfig) -> Self {
let window_size = config.window_size;
Self {
config,
window: VecDeque::with_capacity(window_size),
rejected_window: VecDeque::with_capacity(window_size),
authority_rejections: HashMap::new(),
authority_checks: 0,
drift: DriftTracker::default(),
}
}
pub fn with_defaults() -> Self {
Self::new(ThreatScorerConfig::default())
}
pub fn score(
&mut self,
bundle: &SynthesisBundle,
profile: &BioProfile,
authority_passed: bool,
principal: &str,
approved: bool,
) -> ThreatAnalysis {
let fp = fingerprint(bundle);
let boundary_score = self.score_boundary_clustering(&fp, profile);
let authority_score = self.score_authority_probing(authority_passed, principal);
let replay_score = self.score_replay_similarity(&fp);
let drift_score = self.score_drift(principal, fp.payload_len as f64);
let anomaly_score = self.score_anomaly(&fp);
self.window.push_back(fp.clone());
if self.window.len() > self.config.window_size {
self.window.pop_front();
}
if !approved {
self.rejected_window.push_back(fp);
if self.rejected_window.len() > self.config.window_size {
self.rejected_window.pop_front();
}
}
let w = &self.config.weights;
let composite = boundary_score * w.boundary_clustering
+ authority_score * w.authority_probing
+ replay_score * w.replay_similarity
+ drift_score * w.drift
+ anomaly_score * w.anomaly;
let composite = composite.clamp(0.0, 1.0);
ThreatAnalysis {
boundary_clustering_score: boundary_score,
authority_probing_score: authority_score,
replay_similarity_score: replay_score,
drift_score,
anomaly_score,
composite_threat_score: composite,
alert: composite > self.config.alert_threshold,
}
}
fn score_boundary_clustering(&self, fp: &BundleFingerprint, profile: &BioProfile) -> f64 {
let cap = profile.max_synthesis_volume_ml.max(1e-9);
let proxy_volume = fp.payload_len as f64 / 100_000.0;
let band = self.config.boundary_band_fraction;
let band_size = cap * band;
let dist = (cap - proxy_volume).abs();
if dist < band_size {
1.0
} else {
(1.0 - (dist / cap)).clamp(0.0, 1.0) * 0.5
}
}
fn score_authority_probing(&mut self, authority_passed: bool, principal: &str) -> f64 {
self.authority_checks = self.authority_checks.saturating_add(1);
if !authority_passed && !principal.is_empty() {
let entry = self
.authority_rejections
.entry(principal.to_string())
.or_insert(0);
*entry = entry.saturating_add(1);
}
if self.authority_checks < 5 {
return 0.0;
}
let max_rejection_rate = self
.authority_rejections
.values()
.map(|&count| count as f64 / self.authority_checks as f64)
.fold(0.0f64, f64::max);
(max_rejection_rate * 2.0).clamp(0.0, 1.0)
}
fn score_replay_similarity(&self, fp: &BundleFingerprint) -> f64 {
if self.rejected_window.is_empty() || fp.kmers.is_empty() {
return 0.0;
}
let mut max_overlap = 0.0f64;
for prev in &self.rejected_window {
let overlap = jaccard(&fp.kmers, &prev.kmers);
if overlap > max_overlap {
max_overlap = overlap;
}
}
max_overlap.clamp(0.0, 1.0)
}
fn score_drift(&mut self, principal: &str, volume: f64) -> f64 {
if principal.is_empty() {
return 0.0;
}
let shift = self.drift.update(principal, volume);
(shift / 1000.0).clamp(0.0, 1.0)
}
fn score_anomaly(&self, fp: &BundleFingerprint) -> f64 {
if self.window.len() < 10 {
return 0.0;
}
let mean: f64 = self
.window
.iter()
.map(|f| f.payload_len as f64)
.sum::<f64>()
/ self.window.len() as f64;
let var: f64 = self
.window
.iter()
.map(|f| {
let d = f.payload_len as f64 - mean;
d * d
})
.sum::<f64>()
/ self.window.len() as f64;
let std = var.sqrt();
let z = if std > 1e-9 {
((fp.payload_len as f64 - mean) / std).abs()
} else if (fp.payload_len as f64 - mean).abs() > 1e-9 {
10.0
} else {
0.0
};
let z_score = ((z - 1.0) / 3.0).clamp(0.0, 1.0);
let mut frag_hits = 0u32;
for prev in &self.window {
let j = jaccard(&fp.kmers, &prev.kmers);
if (0.05..0.5).contains(&j) {
frag_hits += 1;
}
}
let frag_score = (frag_hits as f64 / self.window.len() as f64).clamp(0.0, 1.0);
(z_score.max(frag_score)).clamp(0.0, 1.0)
}
}
fn fingerprint(bundle: &SynthesisBundle) -> BundleFingerprint {
let (payload_len, kmers) = match &bundle.payload {
SynthesisPayload::Dna { sequence } => (sequence.len(), kmers(sequence, 12)),
SynthesisPayload::Peptide { sequence } => (sequence.len(), kmers(sequence, 6)),
SynthesisPayload::Chemical { smiles } => (smiles.len(), kmers(smiles, 6)),
SynthesisPayload::Protocol { steps } => {
let len: usize = steps.iter().map(|s| s.len()).sum();
let joined = steps.join("\n");
(len, kmers(&joined, 8))
}
};
BundleFingerprint {
payload_len,
kmers,
rejected: false,
}
}
fn kmers(s: &str, k: usize) -> Vec<u64> {
if s.len() < k || k == 0 {
return Vec::new();
}
let bytes = s.as_bytes();
let mut out: Vec<u64> = (0..=bytes.len() - k)
.map(|i| {
let mut h: u64 = 0xcbf29ce484222325;
for &b in &bytes[i..i + k] {
h ^= b as u64;
h = h.wrapping_mul(0x100000001b3);
}
h
})
.collect();
out.sort_unstable();
out.dedup();
out
}
fn jaccard(a: &[u64], b: &[u64]) -> f64 {
if a.is_empty() && b.is_empty() {
return 0.0;
}
let mut i = 0;
let mut j = 0;
let mut inter = 0usize;
while i < a.len() && j < b.len() {
match a[i].cmp(&b[j]) {
std::cmp::Ordering::Equal => {
inter += 1;
i += 1;
j += 1;
}
std::cmp::Ordering::Less => i += 1,
std::cmp::Ordering::Greater => j += 1,
}
}
let union = a.len() + b.len() - inter;
if union == 0 {
0.0
} else {
inter as f64 / union as f64
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::bundle::{BundleAuthority, SynthesisPayload};
use chrono::Utc;
fn profile() -> BioProfile {
BioProfile {
name: "test".into(),
version: "0.1.0".into(),
bsl_level: 2,
allowed_substrates: vec!["dna".into()],
max_synthesis_volume_ml: 5.0,
export_controlled: false,
profile_signature: None,
profile_signer_kid: None,
codon_usage_organism: None,
codon_entropy_band: None,
protein_kmer_k: None,
protein_kmer_threshold: None,
allowed_protocol_steps: None,
allow_stale_screening: false,
stale_screening_max_days: None,
max_authority_chain_depth: 5,
max_dna_length_bp: None,
max_peptide_length_aa: None,
max_smiles_length_chars: None,
}
}
fn dna_bundle(seq: &str, sequence_no: u64) -> SynthesisBundle {
SynthesisBundle {
timestamp: Utc::now(),
source: "t".into(),
sequence: sequence_no,
payload: SynthesisPayload::Dna {
sequence: seq.into(),
},
delta_time: 0.0,
authority: BundleAuthority {
pca_chain: String::new(),
required_ops: vec![],
},
metadata: Default::default(),
}
}
#[test]
fn default_config_window_and_threshold() {
let cfg = ThreatScorerConfig::default();
assert_eq!(cfg.window_size, 100);
assert!((cfg.alert_threshold - 0.7).abs() < 1e-9);
}
#[test]
fn weights_sum_to_one() {
let w = ThreatScorerConfig::default().weights;
let s =
w.boundary_clustering + w.authority_probing + w.replay_similarity + w.drift + w.anomaly;
assert!((s - 1.0).abs() < 1e-9);
}
#[test]
fn first_bundle_produces_zero_or_low_score() {
let mut scorer = ThreatScorer::with_defaults();
let b = dna_bundle("ATGCGTATGCGT", 1);
let analysis = scorer.score(&b, &profile(), true, "alice", true);
assert!(analysis.composite_threat_score < 0.5);
assert!(!analysis.alert);
}
#[test]
fn replay_of_rejected_bundle_increases_score() {
let mut scorer = ThreatScorer::with_defaults();
let pf = profile();
let b = dna_bundle("ATGCGTACGTACGTACGTACGTACGT", 1);
let _ = scorer.score(&b, &pf, true, "alice", false);
let analysis = scorer.score(&b, &pf, true, "alice", true);
assert!(analysis.replay_similarity_score > 0.5);
}
#[test]
fn authority_probing_after_repeated_rejections() {
let mut scorer = ThreatScorer::with_defaults();
let pf = profile();
for i in 0..10 {
let b = dna_bundle("ATGCGTATGCGTACGTACGT", i);
let _ = scorer.score(&b, &pf, false, "mallory", false);
}
let b = dna_bundle("ATGCGTATGCGTACGTACGT", 99);
let a = scorer.score(&b, &pf, false, "mallory", false);
assert!(a.authority_probing_score > 0.5);
}
#[test]
fn jaccard_is_one_for_identical_sets() {
let a = vec![1u64, 2, 3];
let b = vec![1u64, 2, 3];
assert!((jaccard(&a, &b) - 1.0).abs() < 1e-9);
}
#[test]
fn jaccard_is_zero_for_non_overlapping_sets() {
let a = vec![1u64, 2, 3];
let b = vec![4u64, 5, 6];
assert!(jaccard(&a, &b) < 1e-9);
}
#[test]
fn alert_fires_above_threshold() {
let cfg = ThreatScorerConfig {
alert_threshold: 0.0, ..Default::default()
};
let mut scorer = ThreatScorer::new(cfg);
let pf = profile();
for i in 0..6 {
let b = dna_bundle("AAAAAAAAAAAAAA", i);
let _ = scorer.score(&b, &pf, false, "mallory", false);
}
let b = dna_bundle("AAAAAAAAAAAAAA", 99);
let a = scorer.score(&b, &pf, false, "mallory", false);
assert!(a.alert);
}
}