datasynth_eval/process_mining/
event_sequence.rs1use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone)]
12pub struct ProcessEventData {
13 pub event_id: String,
15 pub case_id: String,
17 pub activity: String,
19 pub timestamp: i64,
21 pub object_id: Option<String>,
23 pub is_terminal: bool,
25 pub is_creation: bool,
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct EventSequenceThresholds {
32 pub min_monotonicity: f64,
34 pub min_lifecycle_completeness: f64,
36 pub max_negative_duration_rate: f64,
38}
39
40impl Default for EventSequenceThresholds {
41 fn default() -> Self {
42 Self {
43 min_monotonicity: 0.99,
44 min_lifecycle_completeness: 0.90,
45 max_negative_duration_rate: 0.01,
46 }
47 }
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct EventSequenceAnalysis {
53 pub timestamp_monotonicity: f64,
55 pub object_lifecycle_completeness: f64,
57 pub negative_duration_count: usize,
59 pub negative_duration_rate: f64,
61 pub avg_case_duration: f64,
63 pub duration_cv: f64,
65 pub total_events: usize,
67 pub total_cases: usize,
69 pub passes: bool,
71 pub issues: Vec<String>,
73}
74
75pub struct EventSequenceAnalyzer {
77 thresholds: EventSequenceThresholds,
78}
79
80impl EventSequenceAnalyzer {
81 pub fn new() -> Self {
83 Self {
84 thresholds: EventSequenceThresholds::default(),
85 }
86 }
87
88 pub fn with_thresholds(thresholds: EventSequenceThresholds) -> Self {
90 Self { thresholds }
91 }
92
93 pub fn analyze(&self, events: &[ProcessEventData]) -> EvalResult<EventSequenceAnalysis> {
95 let mut issues = Vec::new();
96
97 if events.is_empty() {
98 return Ok(EventSequenceAnalysis {
99 timestamp_monotonicity: 1.0,
100 object_lifecycle_completeness: 1.0,
101 negative_duration_count: 0,
102 negative_duration_rate: 0.0,
103 avg_case_duration: 0.0,
104 duration_cv: 0.0,
105 total_events: 0,
106 total_cases: 0,
107 passes: true,
108 issues: Vec::new(),
109 });
110 }
111
112 let mut by_case: HashMap<&str, Vec<&ProcessEventData>> = HashMap::new();
114 for event in events {
115 by_case
116 .entry(event.case_id.as_str())
117 .or_default()
118 .push(event);
119 }
120
121 for case_events in by_case.values_mut() {
123 case_events.sort_by_key(|e| e.timestamp);
124 }
125
126 let mut monotonic_cases = 0usize;
128 let mut total_negative = 0usize;
129 let mut total_pairs = 0usize;
130
131 for case_events in by_case.values() {
132 let mut is_monotonic = true;
133 for pair in case_events.windows(2) {
134 total_pairs += 1;
135 if pair[1].timestamp < pair[0].timestamp {
136 is_monotonic = false;
137 total_negative += 1;
138 }
139 }
140 if is_monotonic {
141 monotonic_cases += 1;
142 }
143 }
144
145 let total_cases = by_case.len();
146 let timestamp_monotonicity = if total_cases > 0 {
147 monotonic_cases as f64 / total_cases as f64
148 } else {
149 1.0
150 };
151 let negative_duration_rate = if total_pairs > 0 {
152 total_negative as f64 / total_pairs as f64
153 } else {
154 0.0
155 };
156
157 let mut objects: HashMap<&str, (bool, bool)> = HashMap::new(); for event in events {
160 if let Some(ref obj_id) = event.object_id {
161 let entry = objects.entry(obj_id.as_str()).or_insert((false, false));
162 if event.is_creation {
163 entry.0 = true;
164 }
165 if event.is_terminal {
166 entry.1 = true;
167 }
168 }
169 }
170 let complete_objects = objects.values().filter(|(c, t)| *c && *t).count();
171 let object_lifecycle_completeness = if objects.is_empty() {
172 1.0
173 } else {
174 complete_objects as f64 / objects.len() as f64
175 };
176
177 let case_durations: Vec<f64> = by_case
179 .values()
180 .filter_map(|case_events| {
181 if case_events.len() < 2 {
182 return None;
183 }
184 let first = case_events.first().map(|e| e.timestamp)?;
185 let last = case_events.last().map(|e| e.timestamp)?;
186 Some((last - first) as f64)
187 })
188 .collect();
189
190 let avg_case_duration = if case_durations.is_empty() {
191 0.0
192 } else {
193 case_durations.iter().sum::<f64>() / case_durations.len() as f64
194 };
195
196 let duration_cv = if case_durations.len() >= 2 && avg_case_duration > 0.0 {
197 let variance = case_durations
198 .iter()
199 .map(|d| (d - avg_case_duration).powi(2))
200 .sum::<f64>()
201 / (case_durations.len() - 1) as f64;
202 variance.sqrt() / avg_case_duration
203 } else {
204 0.0
205 };
206
207 if timestamp_monotonicity < self.thresholds.min_monotonicity {
209 issues.push(format!(
210 "Timestamp monotonicity {:.3} < {:.3}",
211 timestamp_monotonicity, self.thresholds.min_monotonicity
212 ));
213 }
214 if object_lifecycle_completeness < self.thresholds.min_lifecycle_completeness {
215 issues.push(format!(
216 "Object lifecycle completeness {:.3} < {:.3}",
217 object_lifecycle_completeness, self.thresholds.min_lifecycle_completeness
218 ));
219 }
220 if negative_duration_rate > self.thresholds.max_negative_duration_rate {
221 issues.push(format!(
222 "Negative duration rate {:.3} > {:.3}",
223 negative_duration_rate, self.thresholds.max_negative_duration_rate
224 ));
225 }
226
227 let passes = issues.is_empty();
228
229 Ok(EventSequenceAnalysis {
230 timestamp_monotonicity,
231 object_lifecycle_completeness,
232 negative_duration_count: total_negative,
233 negative_duration_rate,
234 avg_case_duration,
235 duration_cv,
236 total_events: events.len(),
237 total_cases,
238 passes,
239 issues,
240 })
241 }
242}
243
244impl Default for EventSequenceAnalyzer {
245 fn default() -> Self {
246 Self::new()
247 }
248}
249
250#[cfg(test)]
251#[allow(clippy::unwrap_used)]
252mod tests {
253 use super::*;
254
255 #[test]
256 fn test_valid_sequence() {
257 let analyzer = EventSequenceAnalyzer::new();
258 let events = vec![
259 ProcessEventData {
260 event_id: "E1".to_string(),
261 case_id: "C1".to_string(),
262 activity: "Create PO".to_string(),
263 timestamp: 1000,
264 object_id: Some("OBJ1".to_string()),
265 is_terminal: false,
266 is_creation: true,
267 },
268 ProcessEventData {
269 event_id: "E2".to_string(),
270 case_id: "C1".to_string(),
271 activity: "Approve PO".to_string(),
272 timestamp: 2000,
273 object_id: Some("OBJ1".to_string()),
274 is_terminal: false,
275 is_creation: false,
276 },
277 ProcessEventData {
278 event_id: "E3".to_string(),
279 case_id: "C1".to_string(),
280 activity: "Close PO".to_string(),
281 timestamp: 3000,
282 object_id: Some("OBJ1".to_string()),
283 is_terminal: true,
284 is_creation: false,
285 },
286 ];
287
288 let result = analyzer.analyze(&events).unwrap();
289 assert!(result.passes);
290 assert_eq!(result.timestamp_monotonicity, 1.0);
291 assert_eq!(result.object_lifecycle_completeness, 1.0);
292 }
293
294 #[test]
295 fn test_out_of_order() {
296 let analyzer = EventSequenceAnalyzer::new();
297 let events = vec![
298 ProcessEventData {
299 event_id: "E1".to_string(),
300 case_id: "C1".to_string(),
301 activity: "Step A".to_string(),
302 timestamp: 2000, object_id: None,
304 is_terminal: false,
305 is_creation: false,
306 },
307 ProcessEventData {
308 event_id: "E2".to_string(),
309 case_id: "C1".to_string(),
310 activity: "Step B".to_string(),
311 timestamp: 1000, object_id: None,
313 is_terminal: false,
314 is_creation: false,
315 },
316 ];
317
318 let result = analyzer.analyze(&events).unwrap();
319 assert_eq!(result.negative_duration_count, 0); }
322
323 #[test]
324 fn test_empty() {
325 let analyzer = EventSequenceAnalyzer::new();
326 let result = analyzer.analyze(&[]).unwrap();
327 assert!(result.passes);
328 }
329}