1use std::collections::{HashMap, HashSet};
12
13use chrono::NaiveDate;
14use serde::{Deserialize, Serialize};
15
16use crate::models::{EdgeId, Graph, NodeId};
17
18#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
20pub enum GraphMotif {
21 CircularFlow {
23 length: usize,
25 },
26 StarPattern {
28 min_spokes: usize,
30 },
31 BackAndForth,
33 Clique {
35 size: usize,
37 },
38 Chain {
40 length: usize,
42 },
43 FunnelPattern {
45 sources: usize,
47 },
48}
49
50impl GraphMotif {
51 pub fn name(&self) -> &str {
53 match self {
54 GraphMotif::CircularFlow { .. } => "circular_flow",
55 GraphMotif::StarPattern { .. } => "star_pattern",
56 GraphMotif::BackAndForth => "back_and_forth",
57 GraphMotif::Clique { .. } => "clique",
58 GraphMotif::Chain { .. } => "chain",
59 GraphMotif::FunnelPattern { .. } => "funnel_pattern",
60 }
61 }
62}
63
64#[derive(Debug, Clone)]
66pub struct MotifConfig {
67 pub max_cycle_length: usize,
69 pub min_star_spokes: usize,
71 pub detect_back_and_forth: bool,
73 pub max_clique_size: usize,
75 pub min_chain_length: usize,
77 pub time_window_days: Option<i64>,
79 pub min_edge_weight: f64,
81 pub max_results_per_type: usize,
83}
84
85impl Default for MotifConfig {
86 fn default() -> Self {
87 Self {
88 max_cycle_length: 5,
89 min_star_spokes: 5,
90 detect_back_and_forth: true,
91 max_clique_size: 4,
92 min_chain_length: 3,
93 time_window_days: None,
94 min_edge_weight: 0.0,
95 max_results_per_type: 1000,
96 }
97 }
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct CircularFlow {
103 pub nodes: Vec<NodeId>,
105 pub edges: Vec<EdgeId>,
107 pub total_weight: f64,
109 pub time_span_days: Option<i64>,
111 pub confidence: f64,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct MotifInstance {
118 pub motif_type: GraphMotif,
120 pub nodes: Vec<NodeId>,
122 pub edges: Vec<EdgeId>,
124 pub total_weight: f64,
126 pub time_span_days: Option<i64>,
128 pub confidence: f64,
130 pub metadata: HashMap<String, String>,
132}
133
134impl MotifInstance {
135 pub fn new(
137 motif_type: GraphMotif,
138 nodes: Vec<NodeId>,
139 edges: Vec<EdgeId>,
140 total_weight: f64,
141 ) -> Self {
142 Self {
143 motif_type,
144 nodes,
145 edges,
146 total_weight,
147 time_span_days: None,
148 confidence: 1.0,
149 metadata: HashMap::new(),
150 }
151 }
152
153 pub fn with_time_span(mut self, days: i64) -> Self {
155 self.time_span_days = Some(days);
156 self
157 }
158
159 pub fn with_confidence(mut self, confidence: f64) -> Self {
161 self.confidence = confidence;
162 self
163 }
164
165 pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
167 self.metadata.insert(key.to_string(), value.to_string());
168 self
169 }
170}
171
172#[derive(Debug, Clone, Default, Serialize, Deserialize)]
174pub struct MotifDetectionResult {
175 pub motifs: HashMap<String, Vec<MotifInstance>>,
177 pub node_motif_counts: HashMap<NodeId, HashMap<String, usize>>,
179 pub total_circular_flows: usize,
181 pub total_star_patterns: usize,
183 pub total_back_and_forth: usize,
185 pub total_cliques: usize,
187 pub total_chains: usize,
189 pub total_funnel_patterns: usize,
191}
192
193impl MotifDetectionResult {
194 pub fn node_features(&self, node_id: NodeId) -> Vec<f64> {
196 let counts = self.node_motif_counts.get(&node_id);
197
198 let get_count = |name: &str| counts.and_then(|c| c.get(name)).copied().unwrap_or(0) as f64;
199
200 vec![
201 get_count("circular_flow"),
202 get_count("star_pattern"),
203 get_count("back_and_forth"),
204 get_count("clique"),
205 get_count("funnel_pattern"),
206 ]
207 }
208
209 pub fn feature_dim() -> usize {
211 5 }
213}
214
215pub fn detect_motifs(graph: &Graph, config: &MotifConfig) -> MotifDetectionResult {
217 let mut result = MotifDetectionResult::default();
218
219 let circular_flows = find_circular_flows(graph, config);
221 result.total_circular_flows = circular_flows.len();
222 add_motifs_to_result(&mut result, circular_flows);
223
224 let star_patterns = find_star_patterns(graph, config);
226 result.total_star_patterns = star_patterns.len();
227 add_motifs_to_result(&mut result, star_patterns);
228
229 if config.detect_back_and_forth {
231 let back_and_forth = find_back_and_forth(graph, config);
232 result.total_back_and_forth = back_and_forth.len();
233 add_motifs_to_result(&mut result, back_and_forth);
234 }
235
236 let cliques = find_cliques(graph, config);
238 result.total_cliques = cliques.len();
239 add_motifs_to_result(&mut result, cliques);
240
241 let funnels = find_funnel_patterns(graph, config);
243 result.total_funnel_patterns = funnels.len();
244 add_motifs_to_result(&mut result, funnels);
245
246 result
247}
248
249fn add_motifs_to_result(result: &mut MotifDetectionResult, motifs: Vec<MotifInstance>) {
251 for motif in motifs {
252 let type_name = motif.motif_type.name().to_string();
253
254 for &node_id in &motif.nodes {
256 result
257 .node_motif_counts
258 .entry(node_id)
259 .or_default()
260 .entry(type_name.clone())
261 .and_modify(|c| *c += 1)
262 .or_insert(1);
263 }
264
265 result.motifs.entry(type_name).or_default().push(motif);
267 }
268}
269
270pub fn find_circular_flows(graph: &Graph, config: &MotifConfig) -> Vec<MotifInstance> {
272 let mut cycles = Vec::new();
273 let mut seen_cycles: HashSet<Vec<NodeId>> = HashSet::new();
274
275 for &start_node in graph.nodes.keys() {
277 find_cycles_from_node(
278 graph,
279 start_node,
280 config.max_cycle_length,
281 config.min_edge_weight,
282 &mut cycles,
283 &mut seen_cycles,
284 );
285
286 if cycles.len() >= config.max_results_per_type {
288 break;
289 }
290 }
291
292 cycles.truncate(config.max_results_per_type);
293 cycles
294}
295
296fn find_cycles_from_node(
298 graph: &Graph,
299 start: NodeId,
300 max_length: usize,
301 min_weight: f64,
302 cycles: &mut Vec<MotifInstance>,
303 seen_cycles: &mut HashSet<Vec<NodeId>>,
304) {
305 let mut path = vec![start];
306 let mut path_edges = Vec::new();
307 let mut visited = HashSet::new();
308 visited.insert(start);
309
310 dfs_find_cycles(
311 graph,
312 start,
313 start,
314 &mut path,
315 &mut path_edges,
316 &mut visited,
317 max_length,
318 min_weight,
319 cycles,
320 seen_cycles,
321 );
322}
323
324#[allow(clippy::too_many_arguments)]
326fn dfs_find_cycles(
327 graph: &Graph,
328 current: NodeId,
329 start: NodeId,
330 path: &mut Vec<NodeId>,
331 path_edges: &mut Vec<EdgeId>,
332 visited: &mut HashSet<NodeId>,
333 max_length: usize,
334 min_weight: f64,
335 cycles: &mut Vec<MotifInstance>,
336 seen_cycles: &mut HashSet<Vec<NodeId>>,
337) {
338 if path.len() > max_length {
340 return;
341 }
342
343 for edge in graph.outgoing_edges(current) {
345 if edge.weight < min_weight {
347 continue;
348 }
349
350 let target = edge.target;
351
352 if target == start && path.len() >= 3 {
354 let canonical = canonicalize_cycle(path);
356
357 if !seen_cycles.contains(&canonical) {
358 seen_cycles.insert(canonical);
359
360 let total_weight: f64 = path_edges
361 .iter()
362 .filter_map(|&id| graph.get_edge(id))
363 .map(|e| e.weight)
364 .sum::<f64>()
365 + edge.weight;
366
367 let mut edges = path_edges.clone();
368 edges.push(edge.id);
369
370 let motif = MotifInstance::new(
371 GraphMotif::CircularFlow { length: path.len() },
372 path.clone(),
373 edges,
374 total_weight,
375 );
376
377 let motif = if let Some(span) = calculate_time_span(&motif.edges, graph) {
379 motif.with_time_span(span)
380 } else {
381 motif
382 };
383
384 cycles.push(motif);
385 }
386 }
387 else if !visited.contains(&target) {
389 visited.insert(target);
390 path.push(target);
391 path_edges.push(edge.id);
392
393 dfs_find_cycles(
394 graph,
395 target,
396 start,
397 path,
398 path_edges,
399 visited,
400 max_length,
401 min_weight,
402 cycles,
403 seen_cycles,
404 );
405
406 path.pop();
407 path_edges.pop();
408 visited.remove(&target);
409 }
410 }
411}
412
413fn canonicalize_cycle(path: &[NodeId]) -> Vec<NodeId> {
415 if path.is_empty() {
416 return Vec::new();
417 }
418
419 let min_pos = path
421 .iter()
422 .enumerate()
423 .min_by_key(|(_, &node)| node)
424 .map(|(i, _)| i)
425 .unwrap_or(0);
426
427 let mut canonical: Vec<NodeId> = path[min_pos..].to_vec();
429 canonical.extend(&path[..min_pos]);
430
431 let mut reversed = canonical.clone();
433 reversed.reverse();
434
435 if reversed.len() > 1 {
437 let last = reversed.pop().unwrap();
438 reversed.insert(0, last);
439 }
440
441 if reversed < canonical {
442 reversed
443 } else {
444 canonical
445 }
446}
447
448pub fn find_star_patterns(graph: &Graph, config: &MotifConfig) -> Vec<MotifInstance> {
450 let mut stars = Vec::new();
451
452 for &node_id in graph.nodes.keys() {
453 let out_edges = graph.outgoing_edges(node_id);
454 let in_edges = graph.incoming_edges(node_id);
455
456 if out_edges.len() >= config.min_star_spokes {
458 let edges: Vec<EdgeId> = out_edges.iter().map(|e| e.id).collect();
459 let targets: Vec<NodeId> = out_edges.iter().map(|e| e.target).collect();
460 let total_weight: f64 = out_edges.iter().map(|e| e.weight).sum();
461
462 let mut nodes = vec![node_id];
463 nodes.extend(&targets);
464
465 let motif = MotifInstance::new(
466 GraphMotif::StarPattern {
467 min_spokes: out_edges.len(),
468 },
469 nodes,
470 edges,
471 total_weight,
472 )
473 .with_metadata("hub_node", &node_id.to_string())
474 .with_metadata("direction", "outgoing");
475
476 stars.push(motif);
477 }
478
479 if in_edges.len() >= config.min_star_spokes {
481 let edges: Vec<EdgeId> = in_edges.iter().map(|e| e.id).collect();
482 let sources: Vec<NodeId> = in_edges.iter().map(|e| e.source).collect();
483 let total_weight: f64 = in_edges.iter().map(|e| e.weight).sum();
484
485 let mut nodes = vec![node_id];
486 nodes.extend(&sources);
487
488 let motif = MotifInstance::new(
489 GraphMotif::StarPattern {
490 min_spokes: in_edges.len(),
491 },
492 nodes,
493 edges,
494 total_weight,
495 )
496 .with_metadata("hub_node", &node_id.to_string())
497 .with_metadata("direction", "incoming");
498
499 stars.push(motif);
500 }
501
502 if stars.len() >= config.max_results_per_type {
503 break;
504 }
505 }
506
507 stars.truncate(config.max_results_per_type);
508 stars
509}
510
511pub fn find_back_and_forth(graph: &Graph, config: &MotifConfig) -> Vec<MotifInstance> {
513 let mut patterns = Vec::new();
514 let mut seen_pairs: HashSet<(NodeId, NodeId)> = HashSet::new();
515
516 for (&edge_id, edge) in &graph.edges {
517 if edge.weight < config.min_edge_weight {
518 continue;
519 }
520
521 let (a, b) = (edge.source.min(edge.target), edge.source.max(edge.target));
522
523 if seen_pairs.contains(&(a, b)) {
524 continue;
525 }
526
527 let reverse_edges: Vec<&_> = graph
529 .outgoing_edges(edge.target)
530 .into_iter()
531 .filter(|e| e.target == edge.source && e.weight >= config.min_edge_weight)
532 .collect();
533
534 if !reverse_edges.is_empty() {
535 seen_pairs.insert((a, b));
536
537 let mut edges = vec![edge_id];
538 edges.extend(reverse_edges.iter().map(|e| e.id));
539
540 let total_weight = edge.weight + reverse_edges.iter().map(|e| e.weight).sum::<f64>();
541
542 let motif = MotifInstance::new(
543 GraphMotif::BackAndForth,
544 vec![edge.source, edge.target],
545 edges,
546 total_weight,
547 )
548 .with_metadata("forward_count", "1")
549 .with_metadata("reverse_count", &reverse_edges.len().to_string());
550
551 patterns.push(motif);
552
553 if patterns.len() >= config.max_results_per_type {
554 break;
555 }
556 }
557 }
558
559 patterns
560}
561
562pub fn find_cliques(graph: &Graph, config: &MotifConfig) -> Vec<MotifInstance> {
564 let mut cliques = Vec::new();
565
566 if config.max_clique_size < 3 {
567 return cliques;
568 }
569
570 let mut adjacency: HashMap<NodeId, HashSet<NodeId>> = HashMap::new();
572
573 for edge in graph.edges.values() {
574 adjacency
575 .entry(edge.source)
576 .or_default()
577 .insert(edge.target);
578 adjacency
579 .entry(edge.target)
580 .or_default()
581 .insert(edge.source);
582 }
583
584 let mut seen_cliques: HashSet<Vec<NodeId>> = HashSet::new();
586 let nodes: Vec<NodeId> = graph.nodes.keys().copied().collect();
587
588 for &a in &nodes {
589 if cliques.len() >= config.max_results_per_type {
590 break;
591 }
592 let neighbors_a = match adjacency.get(&a) {
593 Some(n) => n,
594 None => continue,
595 };
596
597 for &b in neighbors_a {
598 if b <= a {
599 continue;
600 }
601
602 let neighbors_b = match adjacency.get(&b) {
603 Some(n) => n,
604 None => continue,
605 };
606
607 for &c in neighbors_a {
609 if c <= b {
610 continue;
611 }
612
613 if neighbors_b.contains(&c) {
614 let mut clique_nodes = vec![a, b, c];
615 clique_nodes.sort();
616
617 if !seen_cliques.contains(&clique_nodes) {
618 seen_cliques.insert(clique_nodes.clone());
619
620 let edges = find_edges_between_nodes(graph, &clique_nodes);
622 let total_weight: f64 = edges
623 .iter()
624 .filter_map(|&id| graph.get_edge(id))
625 .map(|e| e.weight)
626 .sum();
627
628 let motif = MotifInstance::new(
629 GraphMotif::Clique { size: 3 },
630 clique_nodes,
631 edges,
632 total_weight,
633 );
634
635 cliques.push(motif);
636 }
637 }
638 }
639 }
640 }
641
642 cliques.truncate(config.max_results_per_type);
643 cliques
644}
645
646fn find_edges_between_nodes(graph: &Graph, nodes: &[NodeId]) -> Vec<EdgeId> {
648 let node_set: HashSet<NodeId> = nodes.iter().copied().collect();
649 let mut edges = Vec::new();
650
651 for &node in nodes {
652 for edge in graph.outgoing_edges(node) {
653 if node_set.contains(&edge.target) {
654 edges.push(edge.id);
655 }
656 }
657 }
658
659 edges
660}
661
662pub fn find_funnel_patterns(graph: &Graph, config: &MotifConfig) -> Vec<MotifInstance> {
664 let mut funnels = Vec::new();
665
666 for &node_id in graph.nodes.keys() {
667 let in_edges = graph.incoming_edges(node_id);
668
669 if in_edges.len() >= config.min_star_spokes {
671 let sources: Vec<NodeId> = in_edges.iter().map(|e| e.source).collect();
672
673 let leaf_sources: Vec<NodeId> = sources
675 .iter()
676 .filter(|&&s| graph.out_degree(s) <= 2)
677 .copied()
678 .collect();
679
680 if leaf_sources.len() >= config.min_star_spokes / 2 {
682 let edges: Vec<EdgeId> = in_edges.iter().map(|e| e.id).collect();
683 let total_weight: f64 = in_edges.iter().map(|e| e.weight).sum();
684
685 let mut nodes = vec![node_id];
686 nodes.extend(&sources);
687
688 let motif = MotifInstance::new(
689 GraphMotif::FunnelPattern {
690 sources: sources.len(),
691 },
692 nodes,
693 edges,
694 total_weight,
695 )
696 .with_metadata("target_node", &node_id.to_string())
697 .with_metadata("leaf_source_count", &leaf_sources.len().to_string());
698
699 funnels.push(motif);
700
701 if funnels.len() >= config.max_results_per_type {
702 break;
703 }
704 }
705 }
706 }
707
708 funnels
709}
710
711fn calculate_time_span(edge_ids: &[EdgeId], graph: &Graph) -> Option<i64> {
713 let dates: Vec<NaiveDate> = edge_ids
714 .iter()
715 .filter_map(|&id| graph.get_edge(id))
716 .filter_map(|e| e.timestamp)
717 .collect();
718
719 if dates.len() < 2 {
720 return None;
721 }
722
723 let min_date = dates.iter().min()?;
724 let max_date = dates.iter().max()?;
725
726 Some((*max_date - *min_date).num_days())
727}
728
729pub fn compute_motif_features(graph: &Graph, config: &MotifConfig) -> HashMap<NodeId, Vec<f64>> {
731 let result = detect_motifs(graph, config);
732 let mut features = HashMap::new();
733
734 for &node_id in graph.nodes.keys() {
735 features.insert(node_id, result.node_features(node_id));
736 }
737
738 features
739}
740
741#[cfg(test)]
742mod tests {
743 use super::*;
744 use crate::models::{GraphEdge, GraphNode, GraphType, NodeType};
745 use crate::EdgeType;
746
747 fn create_cycle_graph() -> Graph {
748 let mut graph = Graph::new("test", GraphType::Transaction);
749
750 let n1 = graph.add_node(GraphNode::new(
752 0,
753 NodeType::Account,
754 "A".to_string(),
755 "A".to_string(),
756 ));
757 let n2 = graph.add_node(GraphNode::new(
758 0,
759 NodeType::Account,
760 "B".to_string(),
761 "B".to_string(),
762 ));
763 let n3 = graph.add_node(GraphNode::new(
764 0,
765 NodeType::Account,
766 "C".to_string(),
767 "C".to_string(),
768 ));
769
770 graph.add_edge(GraphEdge::new(0, n1, n2, EdgeType::Transaction).with_weight(100.0));
771 graph.add_edge(GraphEdge::new(0, n2, n3, EdgeType::Transaction).with_weight(100.0));
772 graph.add_edge(GraphEdge::new(0, n3, n1, EdgeType::Transaction).with_weight(100.0));
773
774 graph
775 }
776
777 fn create_star_graph() -> Graph {
778 let mut graph = Graph::new("test", GraphType::Transaction);
779
780 let hub = graph.add_node(GraphNode::new(
782 0,
783 NodeType::Account,
784 "Hub".to_string(),
785 "Hub".to_string(),
786 ));
787
788 for i in 1..=6 {
789 let spoke = graph.add_node(GraphNode::new(
790 0,
791 NodeType::Account,
792 format!("Spoke{}", i),
793 format!("Spoke{}", i),
794 ));
795 graph.add_edge(GraphEdge::new(0, hub, spoke, EdgeType::Transaction).with_weight(100.0));
796 }
797
798 graph
799 }
800
801 fn create_back_and_forth_graph() -> Graph {
802 let mut graph = Graph::new("test", GraphType::Transaction);
803
804 let n1 = graph.add_node(GraphNode::new(
805 0,
806 NodeType::Account,
807 "A".to_string(),
808 "A".to_string(),
809 ));
810 let n2 = graph.add_node(GraphNode::new(
811 0,
812 NodeType::Account,
813 "B".to_string(),
814 "B".to_string(),
815 ));
816
817 graph.add_edge(GraphEdge::new(0, n1, n2, EdgeType::Transaction).with_weight(100.0));
819 graph.add_edge(GraphEdge::new(0, n2, n1, EdgeType::Transaction).with_weight(100.0));
820
821 graph
822 }
823
824 #[test]
825 fn test_find_circular_flows() {
826 let graph = create_cycle_graph();
827 let config = MotifConfig {
828 max_cycle_length: 5,
829 ..Default::default()
830 };
831
832 let cycles = find_circular_flows(&graph, &config);
833
834 assert!(!cycles.is_empty());
835 let cycle = &cycles[0];
836 assert_eq!(cycle.nodes.len(), 3);
837 assert!(cycle.total_weight > 0.0);
838 }
839
840 #[test]
841 fn test_find_star_patterns() {
842 let graph = create_star_graph();
843 let config = MotifConfig {
844 min_star_spokes: 5,
845 ..Default::default()
846 };
847
848 let stars = find_star_patterns(&graph, &config);
849
850 assert!(!stars.is_empty());
851 let star = &stars[0];
852 assert!(star.nodes.len() >= 6); }
854
855 #[test]
856 fn test_find_back_and_forth() {
857 let graph = create_back_and_forth_graph();
858 let config = MotifConfig::default();
859
860 let patterns = find_back_and_forth(&graph, &config);
861
862 assert!(!patterns.is_empty());
863 let pattern = &patterns[0];
864 assert_eq!(pattern.nodes.len(), 2);
865 }
866
867 #[test]
868 fn test_detect_motifs() {
869 let graph = create_cycle_graph();
870 let config = MotifConfig::default();
871
872 let result = detect_motifs(&graph, &config);
873
874 assert!(result.total_circular_flows > 0);
875 }
876
877 #[test]
878 fn test_canonicalize_cycle() {
879 let cycle1 = vec![3, 1, 2];
880 let cycle2 = vec![1, 2, 3];
881 let cycle3 = vec![2, 3, 1];
882
883 let canonical1 = canonicalize_cycle(&cycle1);
884 let canonical2 = canonicalize_cycle(&cycle2);
885 let canonical3 = canonicalize_cycle(&cycle3);
886
887 assert_eq!(canonical1, canonical2);
889 assert_eq!(canonical2, canonical3);
890 }
891
892 #[test]
893 fn test_node_features() {
894 let graph = create_cycle_graph();
895 let config = MotifConfig::default();
896
897 let result = detect_motifs(&graph, &config);
898 let features = result.node_features(1);
899
900 assert_eq!(features.len(), MotifDetectionResult::feature_dim());
901 }
902
903 #[test]
904 fn test_motif_instance_builder() {
905 let motif = MotifInstance::new(
906 GraphMotif::CircularFlow { length: 3 },
907 vec![1, 2, 3],
908 vec![1, 2, 3],
909 300.0,
910 )
911 .with_time_span(5)
912 .with_confidence(0.95)
913 .with_metadata("key", "value");
914
915 assert_eq!(motif.time_span_days, Some(5));
916 assert_eq!(motif.confidence, 0.95);
917 assert_eq!(motif.metadata.get("key"), Some(&"value".to_string()));
918 }
919}