1use std::collections::HashMap;
2use std::str::FromStr;
3use ahash::{AHashSet, AHashMap};
4use chrono::Duration;
5use petgraph::EdgeDirection::Outgoing;
6use itertools::Itertools;
7use petgraph::graph::NodeIndex;
8use polars::prelude::{Series, NamedFromOwned, DataFrame, NamedFrom};
9use serde_json::Value;
10use strum::{EnumString, Display, IntoStaticStr};
11use rayon::prelude::*;
12
13use crate::objects::ocel::Ocel;
14use crate::objects::ocdg::{Ocdg, Relations};
15use super::operator::Operator;
16
17#[derive(EnumString, IntoStaticStr, Display, Hash, Eq, PartialEq, Debug)]
18pub enum ObjectPoint {
19 UniqueNeighborCount,
20 ActivityExistence,
21 ActivityExistenceCount,
22 ActivityValueOperator,
23 ObjectTypeRelationsValueOperator,
24 ObjectLifetime,
25 ObjectUnitSetRatio,
26 ObjectEventInteractionOperator,
27 ObjectTypeInteraction,
28 ObjectEventsDirectlyFollows,
29 ObjectWaitTime,
30 ObjectStartEnd,
31 ObjectDirectRelationCount,
32 SubgraphExistenceCount
33}
34
35
36pub struct ObjectPointConfig<'a> {
37 pub ocel: &'a Ocel,
38 pub ocdg: &'a Ocdg,
39 pub params: &'a Vec<(ObjectPoint, Option<Value>)>
40
41}
42
43fn transpose<T>(v: Vec<Vec<T>>) -> Vec<Vec<T>>
44where
45 T: Clone,
46{
47 assert!(!v.is_empty());
48 (0..v[0].len())
49 .map(|i| v.iter().map(|inner| inner[i].clone()).collect::<Vec<T>>())
50 .collect()
51}
52
53
54pub fn object_point_features(config: ObjectPointConfig) -> DataFrame {
55 let obj_str_vec: Vec<&str> = config.ocel.objects.keys().map(|oid| config.ocel.object_map.get_by_right(oid).unwrap().as_str()).collect();
56
57 let mut series_vec: Vec<Series> = vec![Series::new("oids", obj_str_vec.clone())];
58
59 for (feature, params) in config.params {
60 match feature {
61 ObjectPoint::UniqueNeighborCount => {
62 let mut feature_vector: Vec<u64> = vec![0;obj_str_vec.len()];
63 feature_vector.par_iter_mut()
64 .enumerate()
65 .for_each(|(i, v)| {
66 *v = unique_neighbor_count(config.ocdg, config.ocdg.object_map.get_by_left(obj_str_vec[i]).unwrap()) as u64
67 });
68
69 series_vec.push(Series::from_vec(feature.into(), feature_vector));
70
71 },
72 ObjectPoint::ActivityExistence => {
73 let mut feature_vector: Vec<Vec<u8>> = vec![vec![0;config.ocel.activities.len()];obj_str_vec.len()];
74 feature_vector.par_iter_mut()
75 .enumerate()
76 .for_each(|(i, v)| {
77 *v = activity_existence(config.ocel, config.ocel.object_map.get_by_left(obj_str_vec[i]).unwrap());
78 });
79
80 for (v, act) in transpose(feature_vector).iter().zip(&config.ocel.activities) {
81 series_vec.push(Series::new(format!("{:?}:{:?}:exists", feature, act).as_str(), v));
82 }
83 },
84 ObjectPoint::ActivityExistenceCount => {
85 let mut feature_vector: Vec<Vec<u64>> = vec![vec![0;config.ocel.activities.len()];obj_str_vec.len()];
86 feature_vector.par_iter_mut()
87 .enumerate()
88 .for_each(|(i, v)| {
89 *v = activity_existence_count(config.ocel, config.ocel.object_map.get_by_left(obj_str_vec[i]).unwrap()).iter().map(|c| *c as u64).collect();
90 });
91
92 for (v, act) in transpose(feature_vector).iter().zip(&config.ocel.activities) {
93 series_vec.push(Series::new(format!("{:?}:{:?}:count", feature, act).as_str(), v));
94 }
95 },
96 ObjectPoint::ObjectLifetime => {
97 let mut feature_vector: Vec<i64> = vec![0;obj_str_vec.len()];
98 feature_vector.par_iter_mut()
99 .enumerate()
100 .for_each(|(i, v)| {
101 *v = object_lifetime(config.ocel, config.ocel.object_map.get_by_left(obj_str_vec[i]).unwrap()).num_milliseconds();
102 });
103 series_vec.push(Series::from_vec(feature.into(), feature_vector));
104 },
105 ObjectPoint::ObjectUnitSetRatio => {
106 let mut feature_vector: Vec<f64> = vec![0.0;obj_str_vec.len()];
107 feature_vector.par_iter_mut()
108 .enumerate()
109 .for_each(|(i, v)| {
110 *v = object_unit_set_ratio(config.ocel, config.ocel.object_map.get_by_left(obj_str_vec[i]).unwrap());
111 });
112 series_vec.push(Series::from_vec(feature.into(), feature_vector));
113 },
114 ObjectPoint::ObjectEventInteractionOperator => {
115 let mut feature_vector: Vec<f64> = vec![0.0;obj_str_vec.len()];
116 feature_vector.par_iter_mut()
117 .enumerate()
118 .for_each(|(i, v)| {
119 *v = object_average_event_interaction(config.ocel, config.ocel.object_map.get_by_left(obj_str_vec[i]).unwrap());
120 });
121 series_vec.push(Series::from_vec(feature.into(), feature_vector));
122 },
123 ObjectPoint::ActivityValueOperator => {
124 if let Some(f_params) = params {
125 let attr: Option<&Value> = f_params.get("attribute");
126 let op: Option<&Value> = f_params.get("operator");
127 if let (Some(attr_valid), Some(op_valid)) = (attr, op) {
128 let attr_str = attr_valid.as_str().unwrap();
129 let op_enum = Operator::from_str(op_valid.as_str().unwrap()).unwrap();
130 let mut feature_vector: Vec<f64> = vec![0.0; obj_str_vec.len()];
131 feature_vector.par_iter_mut()
132 .enumerate()
133 .for_each(|(i, v)| {
134 *v = activity_value_operator(&config.ocel, config.ocel.object_map.get_by_left(obj_str_vec[i]).unwrap(), attr_str, &op_enum);
135 });
136 series_vec.push(Series::from_vec(format!("{:?}:{:?}:{:?}", feature, attr_str, op_enum).as_str(), feature_vector));
137 }
138 }
139 },
140 ObjectPoint::ObjectTypeInteraction => {
141 if let Some(f_params) = params {
142 if let Some(otype_valid) = f_params.get("object_type") {
143 let otype_str = otype_valid.as_str().unwrap();
144 let mut feature_vector: Vec<u64> = vec![0;obj_str_vec.len()];
145 feature_vector.par_iter_mut()
146 .enumerate()
147 .for_each(|(i, v)| {
148 *v = object_type_interaction(&config.ocdg, config.ocdg.object_map.get_by_left(obj_str_vec[i]).unwrap(), otype_str) as u64;
149 });
150
151 series_vec.push(Series::from_vec(format!("{:?}:{:?}", feature, otype_str).as_str(), feature_vector));
152 }
153 }
154 },
155 ObjectPoint::ObjectWaitTime => {
156 if let Some(f_params) = params {
157 let act1: Option<&Value> = f_params.get("activity_src");
158 let act2: Option<&Value> = f_params.get("activity_tar");
159 if let (Some(act1_valid), Some(act2_valid)) = (act1, act2) {
160 let act1_str = act1_valid.as_str().unwrap();
161 let act2_str = act2_valid.as_str().unwrap();
162 let mut feature_vector: Vec<i64> = vec![0;obj_str_vec.len()];
163 feature_vector.par_iter_mut()
164 .enumerate()
165 .for_each(|(i, v)| {
166 *v = object_wait_time(&config.ocel, config.ocel.object_map.get_by_left(obj_str_vec[i]).unwrap(), act1_str, act2_str).num_milliseconds();
167 });
168 series_vec.push(Series::from_vec(format!("{:?}:{:?}:{:?}", feature, act1_str, act2_str).as_str(), feature_vector));
169 }
170 }
171 },
172 ObjectPoint::ObjectDirectRelationCount => {
173 if let Some(f_params) = params {
174 if let Some(relations_valid) = f_params.get("relations") {
175 let rel_enum = Relations::from_str(relations_valid.as_str().unwrap()).unwrap();
176 let mut feature_vector: Vec<u64> = vec![0;obj_str_vec.len()];
177 feature_vector.par_iter_mut()
178 .enumerate()
179 .for_each(|(i, v)| {
180 *v = object_direct_rel_count(&config.ocdg, config.ocdg.object_map.get_by_left(obj_str_vec[i]).unwrap(), &rel_enum) as u64;
181 });
182 series_vec.push(Series::from_vec(format!("{:?}:{:?}", feature, rel_enum).as_str(), feature_vector));
183
184 }
185 }
186
187 },
188 ObjectPoint::ObjectEventsDirectlyFollows => {
189 let act_act_order: Vec<(&str, &str)> = config.ocel.activities.iter()
190 .cartesian_product(&config.ocel.activities)
191 .map(|(act1, act2)| {
192 (act1.as_str(), act2.as_str())
193 })
194 .collect();
195 let mut feature_vector: Vec<Vec<u64>> = vec![vec![0;act_act_order.len()];obj_str_vec.len()];
196 feature_vector.par_iter_mut()
197 .enumerate()
198 .for_each(|(i, v)| {
199 let oe_df = object_events_directly_follows(&config.ocel, config.ocel.object_map.get_by_left(obj_str_vec[i]).unwrap());
200 let oe_vec = (0..act_act_order.len()).into_iter()
201 .map(|_| {
202 let curr_pair = act_act_order[i];
203 if let Some(ac1) = oe_df.get(curr_pair.0) {
204 if let Some(ac2) = ac1.get(curr_pair.1) {
205 return *ac2 as u64;
206 }
207 }
208 0
209 })
210 .collect();
211 *v = oe_vec;
212 });
213
214 for (v, act_act) in transpose(feature_vector).iter().zip(act_act_order) {
215 series_vec.push(Series::new(format!("{:?}:{:?}:count", feature, act_act).as_str(), v));
216 }
217
218 },
219 _ => {}
220 }
221 }
222 DataFrame::new(series_vec).unwrap()
223}
224
225pub fn unique_neighbor_count(ocdg: &Ocdg, oid: &usize) -> usize {
226 let curr_oid: NodeIndex = ocdg.inodes[oid];
227 ocdg.net.neighbors_directed(curr_oid, Outgoing).into_iter()
228 .unique()
229 .count()
230}
231
232pub fn activity_existence(log: &Ocel, oid: &usize) -> Vec<u8> {
233 let oe_activities: AHashSet<&String> = AHashSet::from_iter(log.objects[&oid].events.iter()
234 .map(|oe| &log.events[&oe].activity));
235 log.activities.iter()
236 .map(|act| {if oe_activities.contains(act) {1} else {0}})
237 .collect_vec()
238}
239
240
241pub fn activity_existence_count(log: &Ocel, oid: &usize) -> Vec<usize> {
242 let oe_activities: HashMap<&String, usize> = log.objects[&oid].events.iter()
243 .map(|oe| &log.events[&oe].activity)
244 .counts();
245 log.activities.iter()
246 .map(|act| {match oe_activities.get(act) {
247 Some(v) => *v,
248 None => 0
249 }})
250 .collect_vec()
251}
252
253pub fn activity_value_operator(log: &Ocel, oid: &usize, attr: &str, op: &Operator) -> f64 {
254 op.execute(log.objects[&oid].events.iter()
255 .filter(|oe| log.events[&oe].vmap.contains_key(&attr.to_string()))
256 .map(|oe| match &log.events[&oe].vmap[&attr.to_string()] {
257 Value::Number(v) => v.as_f64().unwrap(),
258 _ => 0.0
259 })).unwrap()
260
261}
262
263pub fn object_type_relations_value_operator() {todo!();}
264
265pub fn object_lifetime(log: &Ocel, oid: &usize) -> Duration {
266 if let Some(node) = log.objects.get(oid) {
267 let initial = node.events.first().unwrap();
268 let end = node.events.last().unwrap();
269
270 if log.events.contains_key(&initial) && log.events.contains_key(&end) {
271 return log.events[&end].timestamp - log.events[&initial].timestamp;
272 }
273 }
274 Duration::zero()
275}
276
277pub fn object_unit_set_ratio(log: &Ocel, oid: &usize) -> f64 {
278 if let Some(node) = log.objects.get(oid) {
279 let unitset = node.events.iter()
280 .map(|ev| {
281 if log.events.contains_key(ev) {
282 for oid2 in &log.events[ev].omap {
283 if oid != oid2 && log.objects[oid].obj_type == log.objects[oid2].obj_type {
284 return 0;
285 }
286 }
287 } else {
288 return 0;
289 }
290 1
291 }).fold(0, |accum, item| accum + item);
292
293 return unitset as f64 / node.events.len() as f64
294 }
295 0.0
296}
297
298pub fn object_average_event_interaction(log: &Ocel, oid: &usize) -> f64 {
299 if let Some(node) = log.objects.get(oid) {
300 let interaction = node.events.iter()
301 .map(|ev| {
302 if log.events.contains_key(ev) {
303 return log.events[ev].omap.len() - 1;
304 }
305 0})
306 .fold(0, |accum, item| accum + item);
307
308 return interaction as f64 / node.events.len() as f64
309
310 }
311 0.0
312}
313
314pub fn object_type_interaction(ocdg: &Ocdg, oid: &usize, otype: &str) -> usize {
315 if let Some(node) = ocdg.inodes.get(oid) {
316 let neighs = ocdg.net.neighbors_directed(*node, Outgoing);
317 return neighs.map(|oid2| {if oid != &ocdg.net[oid2]
318 && otype == ocdg.node_attributes[&ocdg.net[oid2]].node_type {1} else {0}})
319 .fold(0, |accum, item| accum + item);
320
321 }
322 0
323
324}
325
326pub fn object_events_directly_follows(log: &Ocel, oid: &usize) -> AHashMap<String, AHashMap<String, usize>> {
327 let mut df: AHashMap<String, AHashMap<String, usize>> = AHashMap::default();
328 if let Some(obj) = log.objects.get(oid) {
329 (0..obj.events.len() - 1).into_iter()
330 .for_each(|i| {
331 let src = &log.events[&obj.events[i]].activity;
332 let tar = &log.events[&obj.events[i+1]].activity;
333 let df_srctar = df.entry(src.to_owned())
334 .or_insert(AHashMap::default())
335 .entry(tar.to_owned())
336 .or_insert(0);
337
338 *df_srctar += 1;
339 });
340 }
341 df
342}
343
344pub fn object_wait_time(log: &Ocel, oid: &usize, act1: &str, act2: &str) -> Duration {
345 let mut time_diff = Duration::zero();
346 if let Some(obj) = log.objects.get(oid) {
347 let mut ev1: usize = usize::MAX;
348 let mut ev2: usize = usize::MAX;
349 obj.events.iter().rev().for_each(|item|{
350 if let Some(curr) = log.events.get(item) {
351 if ev2 == usize::MAX {
352 if curr.activity == act2 {
353 ev2 = *item;
354 }
355 } else if ev1 == usize::MAX {
356 if curr.activity == act1 {
357 ev1 = *item;
358 time_diff = log.events[&ev2].timestamp - log.events[&ev1].timestamp;
359 if time_diff < Duration::zero() {
360 time_diff = Duration::zero();
361 }
362 }
363 }
364 }
365 });
366 }
367 time_diff
368}
369
370pub fn object_oe_root(log: &Ocel, oid: &usize) -> bool {
371 if let Some(obj) = log.objects.get(oid) {
372 if let Some(root_ev) = obj.events.first() {
373 let root1 = &log.events[root_ev];
374 for oid2 in root1.omap.iter() {
375 if let Some(other) = log.objects.get(&oid2) {
376 if let Some(root_ev2) = other.events.first() {
377 let root2 = &log.events[root_ev2];
378 if root1.timestamp > root2.timestamp {
379 return false;
380 }
381
382 }
383 }
384
385 }
386
387 }
388 true
389 } else {
390 false
391 }
392
393}
394
395
396pub fn object_oe_leaf(log: &Ocel, oid: &usize) -> bool {
397 if let Some(obj) = log.objects.get(oid) {
398 if let Some(leaf_ev) = obj.events.last() {
399 let leaf1 = &log.events[leaf_ev];
400 for oid2 in leaf1.omap.iter() {
401 if let Some(other) = log.objects.get(&oid2) {
402 if let Some(leaf_ev2) = other.events.last() {
403 let leaf2 = &log.events[leaf_ev2];
404 if leaf1.timestamp < leaf2.timestamp {
405 return false;
406 }
407
408 }
409 }
410
411 }
412
413 }
414 true
415 } else {
416 false
417 }
418
419}
420
421pub fn object_direct_rel_count(ocdg: &Ocdg, oid: &usize, rel: &Relations) -> usize {
422 if let Some(obj) = ocdg.inodes.get(oid) {
423 let neighs = ocdg.net.neighbors_directed(*obj, Outgoing);
424 return neighs.enumerate().map(|(_i, neigh)| {
425 let neigh_id = &ocdg.net[neigh];
426 let conn = ocdg.irels.get(oid).unwrap().get(neigh_id).unwrap();
427 if conn.contains_key(&(rel.relation_index())) {
428 1
429 } else {
430 0
431 }
432 }).fold(0 as usize, |accum, item| accum + item);
433
434 }
4350
436}
437
438pub fn object_subgraph_count() {todo!()}
439
440
441#[cfg(test)]
442mod tests {
443 use std::collections::HashSet;
444
445 use serde_json::json;
446
447 use crate::objects::{ocel::importer::import_ocel, ocdg::generate_ocdg};
448
449 use super::*;
450
451 static ERROR: f64 = 0.0001;
452
453 lazy_static::lazy_static!{
454 static ref OCEL: Ocel = import_ocel("logs/ocel-complex-test.jsonocel").expect("What did you do to the file?");
455 static ref OCDG: Ocdg = generate_ocdg(&import_ocel("logs/ocel-complex-test.jsonocel").expect("What did you do to the file?"), &vec![Relations::INTERACTS]);
456 }
457
458
459 #[test]
460 fn test_unique_neighbour_count() {
461 let oid = OCDG.object_map.get_by_left("r1").expect("cannot fail");
462 assert_eq!(unique_neighbor_count(&OCDG, oid), 4);
463 }
464
465 #[test]
466 fn test_activity_existence() {
467 let correct: HashSet<&str> = HashSet::from_iter(["place order", "check availability", "pick item", "receive payment", "send invoice"]);
468 let oid = OCEL.object_map.get_by_left("o1").expect("cannot fail");
469 assert_eq!(activity_existence(&OCEL, oid).iter().sum::<u8>(), 5);
470 activity_existence(&OCEL, oid).iter().enumerate().for_each(|(i, val)| {
471 match val {
472 0 => {assert!(!correct.contains(&OCEL.activities[i].as_str()))},
473 _ => {assert!(correct.contains(&OCEL.activities[i].as_str()))}
474 }
475 });
476 }
477
478 #[test]
479 fn test_activity_existence_count() {
480 let correct: HashMap<&str, usize> = HashMap::from_iter([("place order", 1), ("check availability", 3), ("pick item", 2), ("receive payment", 1), ("send invoice", 1)]);
481 let oid = OCEL.object_map.get_by_left("o1").expect("cannot fail");
482 assert_eq!(activity_existence_count(&OCEL, oid).iter().sum::<usize>(), 8);
483
484 activity_existence_count(&OCEL, oid).iter().enumerate().for_each(|(i, val)| {
485 match val {
486 0 => {assert!(!correct.contains_key(&OCEL.activities[i].as_str()))},
487 _ => {assert_eq!(*val, correct[OCEL.activities[i].as_str()])}
488 }
489
490 });
491 }
492
493 #[test]
494 fn test_activity_value_operator() {
495 let oid = OCEL.object_map.get_by_left("i1").expect("cannot fail");
496 let attr = "prepaid-amount";
497 assert_eq!(activity_value_operator(&OCEL, oid, attr, &Operator::Max), 1000.0);
498 }
499
500 #[test]
501 fn test_object_lifetime() {
502 let oid = OCEL.object_map.get_by_left("i1").expect("cannot fail");
503 assert_eq!(object_lifetime(&OCEL, oid).num_milliseconds(), 1980000);
504 }
505
506 #[test]
507 fn test_unit_set_ratio() {
508 let oid = OCEL.object_map.get_by_left("i1").expect("cannot fail");
509 assert!((object_unit_set_ratio(&OCEL, oid) - 0.222222).abs() < ERROR);
510 let oid = OCEL.object_map.get_by_left("r1").expect("cannot fail");
511 assert!((object_unit_set_ratio(&OCEL, oid) - 1.0).abs() < ERROR);
512 }
513
514 #[test]
515 fn test_object_average_event_interaction() {
516 let oid = OCEL.object_map.get_by_left("i1").expect("cannot fail");
517 assert!((object_average_event_interaction(&OCEL, oid) - 2.44444).abs() < ERROR);
518 let oid = OCEL.object_map.get_by_left("r1").expect("cannot fail");
519 assert!((object_average_event_interaction(&OCEL, oid) - 1.6).abs() < ERROR);
520 }
521
522 #[test]
523 fn test_object_type_interaction() {
524 let oid = OCDG.object_map.get_by_left("i1").expect("cannot fail");
525 assert_eq!(object_type_interaction(&OCDG, oid, "order"), 1);
526 let oid = OCDG.object_map.get_by_left("r2").expect("cannot fail");
527 assert_eq!(object_type_interaction(&OCDG, oid, "item"), 6);
528 }
529
530 #[test]
531 fn test_events_directly_follows() {
532 let oid = OCEL.object_map.get_by_left("o3").expect("cannot fail");
533 let oid_df = object_events_directly_follows(&OCEL, oid);
534 assert_eq!(oid_df["place order"]["check availability"], 1);
535 assert_eq!(oid_df["pick item"]["send invoice"], 1);
536 assert_eq!(oid_df["send invoice"]["receive payment"], 1);
537 assert_eq!(oid_df["check availability"]["check availability"], 1);
538 assert_eq!(oid_df["check availability"]["pick item"], 1);
539
540 let oid = OCEL.object_map.get_by_left("o1").expect("cannot fail");
542 let oid_df = object_events_directly_follows(&OCEL, oid);
543 assert_eq!(oid_df["check availability"]["pick item"], 2);
544 }
545
546 #[test]
547 fn test_object_wait_time() {
548 let oid = OCEL.object_map.get_by_left("o1").expect("cannot fail");
549 let from_activity = "place order";
550 let to_activity = "receive payment";
551 assert_eq!(object_wait_time(&OCEL, oid, from_activity, to_activity).num_milliseconds(), 1320000);
552
553 }
554
555 #[test]
556 fn test_object_oe_leaf() {
557 let oid = OCEL.object_map.get_by_left("o3").expect("cannot fail");
558 assert_eq!(object_oe_leaf(&OCEL, oid), true);
559
560 let oid = OCEL.object_map.get_by_left("r1").expect("cannot fail");
561 assert_eq!(object_oe_leaf(&OCEL, oid), false);
562
563 }
564
565 #[test]
566 fn test_object_oe_root() {
567 let oid = OCEL.object_map.get_by_left("o3").expect("cannot fail");
568 assert_eq!(object_oe_root(&OCEL, oid), true);
569
570 let oid = OCEL.object_map.get_by_left("r1").expect("cannot fail");
571 assert_eq!(object_oe_root(&OCEL, oid), false);
572
573 }
574
575 #[test]
576 fn test_object_direct_rel_count() {
577 let oid = OCDG.object_map.get_by_left("o1").expect("cannot fail");
578 assert_eq!(object_direct_rel_count(&OCDG, oid, &Relations::INTERACTS), 2);
579
580 let oid = OCDG.object_map.get_by_left("r2").expect("cannot fail");
581 assert_eq!(object_direct_rel_count(&OCDG, oid, &Relations::INTERACTS), 8);
582 }
583
584 #[test]
585 fn test_user_facing_suite() {
586 let mut feature_vec: Vec<(ObjectPoint, Option<Value>)> = vec![];
587 feature_vec.push((ObjectPoint::UniqueNeighborCount, None));
588 feature_vec.push((ObjectPoint::ObjectWaitTime, Some(json!({"activity_src": "place order", "activity_tar": "receive payment"}))));
589 let config = ObjectPointConfig {ocel: &OCEL, ocdg: &OCDG, params: &feature_vec};
590 let res = object_point_features(config);
591 println!("{}", res);
592 }
595}