use std::collections::{HashMap, HashSet};
use datasynth_audit_fsm::event::AuditEvent;
use datasynth_audit_fsm::schema::AuditBlueprint;
use serde::Serialize;
#[derive(Debug, Clone, Serialize)]
pub struct DiscoveredBlueprint {
pub procedures: Vec<DiscoveredProcedure>,
pub phases: Vec<String>,
pub total_events_analyzed: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct DiscoveredProcedure {
pub id: String,
pub phase: String,
pub states: Vec<String>,
pub transitions: Vec<(String, String)>,
pub initial_state: String,
pub terminal_states: Vec<String>,
pub event_count: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct BlueprintDiff {
pub matching_procedures: Vec<String>,
pub missing_procedures: Vec<String>,
pub extra_procedures: Vec<String>,
pub transition_diffs: Vec<TransitionDiff>,
pub conformance_score: f64,
}
#[derive(Debug, Clone, Serialize)]
pub struct TransitionDiff {
pub procedure_id: String,
pub diff_type: String,
pub from_state: String,
pub to_state: String,
}
pub fn discover_blueprint(events: &[AuditEvent]) -> DiscoveredBlueprint {
let mut proc_events: HashMap<String, Vec<&AuditEvent>> = HashMap::new();
for event in events {
proc_events
.entry(event.procedure_id.clone())
.or_default()
.push(event);
}
let mut proc_ids: Vec<String> = proc_events.keys().cloned().collect();
proc_ids.sort_by_key(|id| {
proc_events[id]
.first()
.map(|e| e.timestamp)
.unwrap_or_default()
});
let mut procedures: Vec<DiscoveredProcedure> = Vec::new();
for id in &proc_ids {
let evts = &proc_events[id];
let phase = evts
.first()
.map(|e| e.phase_id.as_str())
.unwrap_or("")
.to_string();
let event_count = evts.len();
let transition_evts: Vec<&&AuditEvent> = evts
.iter()
.filter(|e| e.from_state.is_some() && e.to_state.is_some())
.collect();
let mut states_ordered: Vec<String> = Vec::new();
let mut states_seen: HashSet<String> = HashSet::new();
let mut transitions_ordered: Vec<(String, String)> = Vec::new();
let mut transitions_seen: HashSet<(String, String)> = HashSet::new();
let mut from_states_set: HashSet<String> = HashSet::new();
for evt in &transition_evts {
let from = match evt.from_state.as_ref() {
Some(s) => s.clone(),
None => continue,
};
let to = match evt.to_state.as_ref() {
Some(s) => s.clone(),
None => continue,
};
if states_seen.insert(from.clone()) {
states_ordered.push(from.clone());
}
if states_seen.insert(to.clone()) {
states_ordered.push(to.clone());
}
if transitions_seen.insert((from.clone(), to.clone())) {
transitions_ordered.push((from.clone(), to.clone()));
}
from_states_set.insert(from);
}
let initial_state = transition_evts
.first()
.and_then(|e| e.from_state.as_ref())
.cloned()
.unwrap_or_default();
let to_states_set: HashSet<String> = transition_evts
.iter()
.filter_map(|e| e.to_state.as_ref())
.cloned()
.collect();
let mut terminal_states: Vec<String> = to_states_set
.iter()
.filter(|s| !from_states_set.contains(*s))
.cloned()
.collect();
terminal_states.sort();
procedures.push(DiscoveredProcedure {
id: id.clone(),
phase,
states: states_ordered,
transitions: transitions_ordered,
initial_state,
terminal_states,
event_count,
});
}
let mut phases_ordered: Vec<String> = Vec::new();
let mut phases_seen: HashSet<String> = HashSet::new();
for proc in &procedures {
if phases_seen.insert(proc.phase.clone()) {
phases_ordered.push(proc.phase.clone());
}
}
DiscoveredBlueprint {
procedures,
phases: phases_ordered,
total_events_analyzed: events.len(),
}
}
pub fn compare_blueprints(
discovered: &DiscoveredBlueprint,
reference: &AuditBlueprint,
) -> BlueprintDiff {
let discovered_ids: HashSet<String> =
discovered.procedures.iter().map(|p| p.id.clone()).collect();
let reference_ids: HashSet<String> = reference
.phases
.iter()
.flat_map(|phase| phase.procedures.iter())
.map(|p| p.id.clone())
.collect();
let mut matching_procedures: Vec<String> = discovered_ids
.intersection(&reference_ids)
.cloned()
.collect();
matching_procedures.sort();
let mut missing_procedures: Vec<String> =
reference_ids.difference(&discovered_ids).cloned().collect();
missing_procedures.sort();
let mut extra_procedures: Vec<String> =
discovered_ids.difference(&reference_ids).cloned().collect();
extra_procedures.sort();
let discovered_map: HashMap<&str, &DiscoveredProcedure> = discovered
.procedures
.iter()
.map(|p| (p.id.as_str(), p))
.collect();
let reference_transitions: HashMap<&str, HashSet<(String, String)>> = reference
.phases
.iter()
.flat_map(|phase| phase.procedures.iter())
.map(|p| {
let set: HashSet<(String, String)> = p
.aggregate
.transitions
.iter()
.map(|t| (t.from_state.clone(), t.to_state.clone()))
.collect();
(p.id.as_str(), set)
})
.collect();
let mut transition_diffs: Vec<TransitionDiff> = Vec::new();
for proc_id in &matching_procedures {
let disc_proc = match discovered_map.get(proc_id.as_str()) {
Some(p) => p,
None => continue,
};
let ref_transitions = match reference_transitions.get(proc_id.as_str()) {
Some(t) => t,
None => continue,
};
let disc_set: HashSet<(String, String)> = disc_proc.transitions.iter().cloned().collect();
let mut missing_trans: Vec<&(String, String)> =
ref_transitions.difference(&disc_set).collect();
missing_trans.sort();
for (from, to) in missing_trans {
transition_diffs.push(TransitionDiff {
procedure_id: proc_id.clone(),
diff_type: "missing".to_string(),
from_state: from.clone(),
to_state: to.clone(),
});
}
let mut extra_trans: Vec<&(String, String)> =
disc_set.difference(ref_transitions).collect();
extra_trans.sort();
for (from, to) in extra_trans {
transition_diffs.push(TransitionDiff {
procedure_id: proc_id.clone(),
diff_type: "extra".to_string(),
from_state: from.clone(),
to_state: to.clone(),
});
}
}
let m = matching_procedures.len() as f64;
let mi = missing_procedures.len() as f64;
let ex = extra_procedures.len() as f64;
let denominator = m + mi + ex;
let conformance_score = if denominator > 0.0 {
m / denominator
} else {
1.0
};
BlueprintDiff {
matching_procedures,
missing_procedures,
extra_procedures,
transition_diffs,
conformance_score,
}
}
#[cfg(test)]
mod tests {
use super::*;
use datasynth_audit_fsm::benchmark::{
generate_benchmark, BenchmarkComplexity, BenchmarkConfig,
};
use datasynth_audit_fsm::loader::BlueprintWithPreconditions;
fn fsa_events() -> Vec<AuditEvent> {
generate_benchmark(&BenchmarkConfig {
complexity: BenchmarkComplexity::Simple,
anomaly_rate: None,
seed: 42,
})
.unwrap()
.events
}
fn ia_events() -> Vec<AuditEvent> {
generate_benchmark(&BenchmarkConfig {
complexity: BenchmarkComplexity::Complex,
anomaly_rate: None,
seed: 42,
})
.unwrap()
.events
}
#[test]
fn test_discover_from_fsa_events() {
let events = fsa_events();
let discovered = discover_blueprint(&events);
assert_eq!(
discovered.procedures.len(),
9,
"FSA blueprint has 9 procedures, got {}",
discovered.procedures.len()
);
assert_eq!(discovered.total_events_analyzed, events.len());
for proc in &discovered.procedures {
assert!(
!proc.states.is_empty(),
"Procedure {} should have states",
proc.id
);
assert!(
!proc.transitions.is_empty(),
"Procedure {} should have transitions",
proc.id
);
}
}
#[test]
fn test_discover_from_ia_events() {
let events = ia_events();
let discovered = discover_blueprint(&events);
assert!(
discovered.procedures.len() >= 30,
"IA blueprint should yield >= 30 discovered procedures, got {}",
discovered.procedures.len()
);
}
#[test]
fn test_discovered_states_match_aggregate() {
let events = fsa_events();
let discovered = discover_blueprint(&events);
let proc = discovered
.procedures
.iter()
.find(|p| p.id == "accept_engagement")
.expect("accept_engagement should be discovered");
let expected: HashSet<&str> = ["not_started", "in_progress", "under_review", "completed"]
.iter()
.copied()
.collect();
let found: HashSet<&str> = proc.states.iter().map(|s| s.as_str()).collect();
assert_eq!(
found, expected,
"accept_engagement states should be {:?}, got {:?}",
expected, found
);
}
#[test]
fn test_compare_discovered_vs_reference() {
let events = fsa_events();
let discovered = discover_blueprint(&events);
let bwp = BlueprintWithPreconditions::load_builtin_fsa().unwrap();
let diff = compare_blueprints(&discovered, &bwp.blueprint);
assert!(
diff.conformance_score > 0.7,
"Conformance score should be > 0.7, got {}",
diff.conformance_score
);
assert!(
!diff.matching_procedures.is_empty(),
"Should have matching procedures"
);
}
#[test]
fn test_compare_reports_missing_procedures() {
let all_events = fsa_events();
let mut seen: Vec<String> = Vec::new();
let mut partial: Vec<AuditEvent> = Vec::new();
for evt in &all_events {
if !seen.contains(&evt.procedure_id) {
if seen.len() >= 3 {
break;
}
seen.push(evt.procedure_id.clone());
}
if seen.contains(&evt.procedure_id) {
partial.push(evt.clone());
}
}
let discovered = discover_blueprint(&partial);
assert_eq!(discovered.procedures.len(), 3);
let bwp = BlueprintWithPreconditions::load_builtin_fsa().unwrap();
let diff = compare_blueprints(&discovered, &bwp.blueprint);
assert!(
!diff.missing_procedures.is_empty(),
"Should report missing procedures when only 3 / 9 procedures are in the log"
);
assert_eq!(
diff.missing_procedures.len(),
6,
"Expected 6 missing procedures (9 - 3), got {}",
diff.missing_procedures.len()
);
}
#[test]
fn test_blueprint_diff_serializes() {
let events = fsa_events();
let discovered = discover_blueprint(&events);
let bwp = BlueprintWithPreconditions::load_builtin_fsa().unwrap();
let diff = compare_blueprints(&discovered, &bwp.blueprint);
let json = serde_json::to_string(&diff).expect("BlueprintDiff should serialise to JSON");
assert!(json.contains("conformance_score"));
assert!(json.contains("matching_procedures"));
}
}