use std::collections::{HashMap, HashSet};
use crate::types::{Receipt, AdmittedReceipt};
use wasm4pm_compat::ocel::{OCEL, OCELEvent, OCELObject, OCELRelationship};
use wasm4pm_compat::petri::{Arc, Marking, PetriNet, Place, Transition};
use thiserror::Error;
use serde::{Serialize, Deserialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct AlignmentReport {
pub fitness: f64,
pub activity_coverage: f64,
pub simplicity: f64,
pub interpretation: String,
pub unfit_events: u32,
pub model_moves: u32,
}
#[derive(Debug, Error)]
pub enum MiningError {
#[error("wasm4pm HIM failed: {0}")]
Him(String),
#[error("admission refused: {0}")]
Admission(String),
#[error("OCEL conversion failed: {0}")]
OcelConversion(String),
#[error("alignment fitness failed: {0}")]
Alignment(String),
}
pub fn interpret_score(fitness: f64) -> &'static str {
if fitness >= 0.95 {
"perfect-fit"
} else if fitness >= 0.80 {
"good-fit"
} else if fitness >= 0.60 {
"acceptable-fit"
} else if fitness >= 0.30 {
"poor-fit"
} else {
"no-fit"
}
}
pub fn alignment_fitness_score(
admitted: &AdmittedReceipt,
model: &PetriNet,
) -> Result<AlignmentReport, MiningError> {
let receipt = &admitted.value;
if receipt.events.is_empty() {
return Ok(AlignmentReport {
fitness: 0.0,
activity_coverage: 0.0,
simplicity: calculate_simplicity(model),
interpretation: "no-fit".to_string(),
unfit_events: 0,
model_moves: 0,
});
}
let trace: Vec<String> = receipt.events.iter().map(|e| e.event_type.clone()).collect();
let mut fitness = 1.0;
let mut unfit_events = 0;
let mut model_moves = 0;
let model_activities: HashSet<_> = model.transitions().iter().map(|t| t.label()).collect();
let log_activities: HashSet<_> = trace.iter().cloned().collect();
let intersection = log_activities.intersection(&model_activities).count();
let activity_coverage = if !model_activities.is_empty() {
intersection as f64 / model_activities.len() as f64
} else {
0.0
};
if activity_coverage < 1.0 {
fitness = activity_coverage;
unfit_events = (trace.len() as f64 * (1.0 - activity_coverage)) as u32;
}
Ok(AlignmentReport {
fitness,
activity_coverage,
simplicity: calculate_simplicity(model),
interpretation: interpret_score(fitness).to_string(),
unfit_events,
model_moves,
})
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ActivityPrediction {
pub activity: String,
pub confidence: f64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PredictionReport {
pub predictions: Vec<ActivityPrediction>,
pub context_length: usize,
pub model_type: String,
}
pub fn predict_next(
admitted: &AdmittedReceipt,
top_k: usize,
) -> Result<PredictionReport, MiningError> {
if top_k == 0 {
return Err(MiningError::Him("top-k must be at least 1".to_string()));
}
let receipt = &admitted.value;
let context_length = receipt.events.len();
let mut transitions: HashMap<String, HashMap<String, usize>> = HashMap::new();
let mut trace = Vec::new();
for ev in &receipt.events {
trace.push(ev.event_type.clone());
}
for window in trace.windows(2) {
let counts = transitions.entry(window[0].clone()).or_default();
*counts.entry(window[1].clone()).or_insert(0) += 1;
}
let mut predictions = Vec::new();
if let Some(last_act) = trace.last() {
if let Some(next_counts) = transitions.get(last_act) {
let total: usize = next_counts.values().sum();
for (act, count) in next_counts {
predictions.push(ActivityPrediction {
activity: act.clone(),
confidence: *count as f64 / total as f64,
});
}
}
}
predictions.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
predictions.truncate(top_k);
Ok(PredictionReport {
predictions,
context_length,
model_type: "Heuristic transition model".to_string(),
})
}
fn calculate_simplicity(model: &PetriNet) -> f64 {
let arcs = model.arcs().len();
let transitions = model.transitions().len();
if transitions == 0 { return 0.0; }
1.0 / (1.0 + (arcs as f64 / transitions as f64))
}
pub fn receipt_to_ocel(receipt: &Receipt) -> Result<OCEL, MiningError> {
let mut objects_map = HashMap::new();
let mut ocel_events = Vec::new();
for ev in &receipt.events {
let mut ocel_ev = OCELEvent::new(ev.id.clone(), ev.event_type.clone());
for obj_ref in &ev.objects {
objects_map.entry(obj_ref.id.clone()).or_insert_with(|| {
OCELObject::new(obj_ref.id.clone(), obj_ref.obj_type.clone())
});
let qualifier = obj_ref.qualifier.clone().unwrap_or_else(|| "unspecified".to_string());
ocel_ev.relationships.push(OCELRelationship {
object_id: obj_ref.id.clone(),
qualifier,
});
}
ocel_events.push(ocel_ev);
}
let ocel_objects: Vec<_> = objects_map.into_values().collect();
Ok(OCEL::new(ocel_events, ocel_objects))
}
pub fn discover_him(admitted: &AdmittedReceipt) -> Result<PetriNet, MiningError> {
let ocel = receipt_to_ocel(&admitted.value)?;
let mut activities = HashSet::new();
let mut transitions = Vec::new();
let mut places = Vec::new();
let mut arcs = Vec::new();
for ev in ocel.event_set() {
if activities.insert(ev.event_type.clone()) {
transitions.push(Transition::new(ev.event_type.clone(), ev.event_type.clone()));
}
}
transitions.sort_by(|a, b| a.id().cmp(b.id()));
let mut object_traces: HashMap<String, Vec<String>> = HashMap::new();
let mut global_trace: Vec<String> = Vec::new();
for ev in ocel.event_set() {
global_trace.push(ev.event_type.clone());
for (obj_id, _) in ocel.e2o(&ev.id) {
object_traces.entry(obj_id.to_string()).or_default().push(ev.event_type.clone());
}
}
let mut causal_pairs = HashSet::new();
let mut starts = HashSet::new();
let mut ends = HashSet::new();
for trace in object_traces.values() {
if let Some(first) = trace.first() { starts.insert(first.clone()); }
if let Some(last) = trace.last() { ends.insert(last.clone()); }
for window in trace.windows(2) {
causal_pairs.insert((window[0].clone(), window[1].clone()));
}
}
if let Some(first) = global_trace.first() { starts.insert(first.clone()); }
if let Some(last) = global_trace.last() { ends.insert(last.clone()); }
for window in global_trace.windows(2) {
causal_pairs.insert((window[0].clone(), window[1].clone()));
}
places.push(Place::new("start"));
for start_act in sorted_set(starts) {
arcs.push(Arc::place_to_transition("start", start_act));
}
for (a, b) in sorted_pairs(causal_pairs) {
let place_id = format!("p_{}_{}", a, b);
places.push(Place::new(place_id.clone()));
arcs.push(Arc::transition_to_place(a, place_id.clone()));
arcs.push(Arc::place_to_transition(place_id, b));
}
places.push(Place::new("end"));
for end_act in sorted_set(ends) {
arcs.push(Arc::transition_to_place(end_act, "end"));
}
let marking = Marking::new([("start".to_string(), 1)]);
Ok(PetriNet::new(places, transitions, arcs, marking))
}
fn sorted_set(set: HashSet<String>) -> Vec<String> {
let mut v: Vec<_> = set.into_iter().collect();
v.sort();
v
}
fn sorted_pairs(set: HashSet<(String, String)>) -> Vec<(String, String)> {
let mut v: Vec<_> = set.into_iter().collect();
v.sort();
v
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ocel::{build_event, object_ref, SeqCounter};
use crate::chain::ChainAssembler;
use crate::admission::admit;
#[test]
fn test_maximalist_discovery_linear() {
let mut asm = ChainAssembler::new();
let mut counter = SeqCounter::new();
let ev1 = build_event("create", vec![object_ref("o1", "artifact")], b"1", &mut counter).unwrap();
let ev2 = build_event("update", vec![object_ref("o1", "artifact")], b"2", &mut counter).unwrap();
let ev3 = build_event("delete", vec![object_ref("o1", "artifact")], b"3", &mut counter).unwrap();
asm.append(ev1).unwrap();
asm.append(ev2).unwrap();
asm.append(ev3).unwrap();
let receipt = asm.finalize();
let admitted = admit(receipt).expect("admission must pass");
let net = discover_him(&admitted).expect("discovery must pass");
assert_eq!(net.transitions().len(), 3, "three activities -> three transitions");
assert!(net.places().iter().any(|p| p.id() == "start"));
assert!(net.places().iter().any(|p| p.id() == "end"));
assert!(net.places().iter().any(|p| p.id() == "p_create_update"));
assert!(net.places().iter().any(|p| p.id() == "p_update_delete"));
}
}