1use std::collections::{HashMap, HashSet};
14
15use nulid::Nulid;
16
17use crate::graph::IndexedSubgraph;
18use crate::patterns::{
19 ConflictPattern, CyclePattern, HUB_INFLUENCE_TYPES, HUB_THRESHOLD, PathPattern, Severity,
20 cycle_patterns, path_patterns,
21};
22
23#[derive(Debug, Clone)]
25pub struct DetectOpts {
26 pub max_depth: usize,
28 pub pattern_ids: Option<HashSet<String>>,
30}
31
32impl Default for DetectOpts {
33 fn default() -> Self {
34 Self {
35 max_depth: 6,
36 pattern_ids: None,
37 }
38 }
39}
40
41#[must_use]
43pub fn detect_conflicts(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
44 let mut results = Vec::new();
45
46 results.extend(detect_cycles(sg, opts));
47 results.extend(detect_paths(sg, opts));
48 results.extend(detect_hubs(sg, opts));
49
50 results
51}
52fn detect_cycles(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
63 let patterns = cycle_patterns();
64 let active: Vec<&CyclePattern> = patterns
65 .iter()
66 .filter(|p| pattern_enabled(p.id, opts))
67 .collect();
68
69 if active.is_empty() {
70 return Vec::new();
71 }
72
73 let relevant_types: HashSet<&str> = active
75 .iter()
76 .flat_map(|p| p.edge_sequence.iter().copied())
77 .collect();
78
79 let mut results = Vec::new();
80 let mut seen_cycles: HashSet<Vec<Nulid>> = HashSet::new();
81
82 for &start_id in sg.nodes.keys() {
83 let mut path_nodes: Vec<Nulid> = vec![start_id];
84 let mut path_edges: Vec<Nulid> = Vec::new();
85 let mut edge_types: Vec<String> = Vec::new();
86 let mut visited: HashSet<Nulid> = HashSet::new();
87 visited.insert(start_id);
88
89 cycle_dfs(
90 sg,
91 opts,
92 start_id,
93 start_id,
94 &active,
95 &relevant_types,
96 &mut path_nodes,
97 &mut path_edges,
98 &mut edge_types,
99 &mut visited,
100 &mut results,
101 &mut seen_cycles,
102 );
103 }
104
105 results
106}
107
108#[allow(clippy::too_many_arguments)]
109fn cycle_dfs(
110 sg: &IndexedSubgraph,
111 opts: &DetectOpts,
112 start: Nulid,
113 current: Nulid,
114 patterns: &[&CyclePattern],
115 relevant_types: &HashSet<&str>,
116 path_nodes: &mut Vec<Nulid>,
117 path_edges: &mut Vec<Nulid>,
118 edge_types: &mut Vec<String>,
119 visited: &mut HashSet<Nulid>,
120 results: &mut Vec<ConflictPattern>,
121 seen: &mut HashSet<Vec<Nulid>>,
122) {
123 if path_edges.len() >= opts.max_depth {
124 return;
125 }
126
127 for &edge_idx in sg.outgoing_edges(current) {
128 let edge = &sg.edge_list[edge_idx];
129
130 if !relevant_types.contains(edge.rel_type.as_str()) {
132 continue;
133 }
134
135 let next = edge.target_id;
136
137 if next == start && !path_edges.is_empty() {
139 edge_types.push(edge.rel_type.clone());
140 path_edges.push(edge.id);
141
142 for pat in patterns {
143 if matches_cycle_pattern(edge_types, pat.edge_sequence) {
144 let mut key = path_edges.clone();
146 key.sort();
147 if seen.insert(key) {
148 results.push(build_cycle_result(pat, path_nodes, path_edges));
149 }
150 }
151 }
152
153 path_edges.pop();
154 edge_types.pop();
155 continue;
156 }
157
158 if !visited.contains(&next) && sg.nodes.contains_key(&next) {
160 visited.insert(next);
161 path_nodes.push(next);
162 path_edges.push(edge.id);
163 edge_types.push(edge.rel_type.clone());
164
165 cycle_dfs(
166 sg,
167 opts,
168 start,
169 next,
170 patterns,
171 relevant_types,
172 path_nodes,
173 path_edges,
174 edge_types,
175 visited,
176 results,
177 seen,
178 );
179
180 edge_types.pop();
181 path_edges.pop();
182 path_nodes.pop();
183 visited.remove(&next);
184 }
185 }
186}
187
188fn matches_cycle_pattern(trail: &[String], sequence: &[&str]) -> bool {
193 let seq_len = sequence.len();
194 if trail.is_empty() || !trail.len().is_multiple_of(seq_len) {
195 return false;
196 }
197
198 trail
199 .iter()
200 .enumerate()
201 .all(|(i, t)| t == sequence[i % seq_len])
202}
203
204fn build_cycle_result(
205 pat: &CyclePattern,
206 path_nodes: &[Nulid],
207 path_edges: &[Nulid],
208) -> ConflictPattern {
209 let mut path = Vec::with_capacity(path_nodes.len() + path_edges.len());
211 for i in 0..path_edges.len() {
212 path.push(path_nodes[i]);
213 path.push(path_edges[i]);
214 }
215 if let Some(&start) = path_nodes.first() {
217 path.push(start);
218 }
219
220 let node_set: Vec<Nulid> = path_nodes.to_vec();
221
222 ConflictPattern {
223 pattern_id: pat.id.to_string(),
224 pattern_name: pat.name.to_string(),
225 severity: pat.severity,
226 nodes: node_set,
227 edges: path_edges.to_vec(),
228 path,
229 description: format!("{}: cycle of {} edges detected", pat.name, path_edges.len()),
230 }
231}
232fn detect_paths(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
234 let patterns = path_patterns();
235 let active: Vec<&PathPattern> = patterns
236 .iter()
237 .filter(|p| pattern_enabled(p.id, opts))
238 .collect();
239
240 if active.is_empty() {
241 return Vec::new();
242 }
243
244 let mut results = Vec::new();
245 let mut seen_paths: HashSet<Vec<Nulid>> = HashSet::new();
246
247 for (&node_id, node) in &sg.nodes {
248 for pat in &active {
249 if let Some(start_label) = pat.start_label
251 && node.label != start_label
252 {
253 continue;
254 }
255
256 if pat.steps.len() > opts.max_depth {
257 continue;
258 }
259
260 let mut path_nodes = vec![node_id];
261 let mut path_edges = Vec::new();
262
263 path_dfs(
264 sg,
265 pat,
266 node_id,
267 0,
268 &mut path_nodes,
269 &mut path_edges,
270 &mut results,
271 &mut seen_paths,
272 );
273 }
274 }
275
276 results
277}
278
279#[allow(clippy::too_many_arguments)]
280fn path_dfs(
281 sg: &IndexedSubgraph,
282 pat: &PathPattern,
283 current: Nulid,
284 step_idx: usize,
285 path_nodes: &mut Vec<Nulid>,
286 path_edges: &mut Vec<Nulid>,
287 results: &mut Vec<ConflictPattern>,
288 seen: &mut HashSet<Vec<Nulid>>,
289) {
290 if step_idx >= pat.steps.len() {
291 let mut key = path_edges.clone();
293 key.sort();
294 if seen.insert(key) {
295 results.push(build_path_result(pat, path_nodes, path_edges));
296 }
297 return;
298 }
299
300 let step = &pat.steps[step_idx];
301
302 for &edge_idx in sg.outgoing_edges(current) {
303 let edge = &sg.edge_list[edge_idx];
304
305 if edge.rel_type != step.edge_type {
307 continue;
308 }
309
310 let next = edge.target_id;
311
312 if let Some(target_label) = step.target_label {
314 match sg.node_label(next) {
315 Some(label) if label == target_label => {}
316 _ => continue,
317 }
318 }
319
320 if path_nodes.contains(&next) {
322 continue;
323 }
324
325 path_nodes.push(next);
326 path_edges.push(edge.id);
327
328 path_dfs(
329 sg,
330 pat,
331 next,
332 step_idx + 1,
333 path_nodes,
334 path_edges,
335 results,
336 seen,
337 );
338
339 path_edges.pop();
340 path_nodes.pop();
341 }
342}
343
344fn build_path_result(
345 pat: &PathPattern,
346 path_nodes: &[Nulid],
347 path_edges: &[Nulid],
348) -> ConflictPattern {
349 let mut path = Vec::with_capacity(path_nodes.len() + path_edges.len());
350 for i in 0..path_edges.len() {
351 path.push(path_nodes[i]);
352 path.push(path_edges[i]);
353 }
354 if let Some(&last) = path_nodes.last() {
355 path.push(last);
356 }
357
358 ConflictPattern {
359 pattern_id: pat.id.to_string(),
360 pattern_name: pat.name.to_string(),
361 severity: pat.severity,
362 nodes: path_nodes.to_vec(),
363 edges: path_edges.to_vec(),
364 path,
365 description: format!("{}: path of {} steps detected", pat.name, path_edges.len()),
366 }
367}
368fn detect_hubs(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
370 if !pattern_enabled("COI-006", opts) {
371 return Vec::new();
372 }
373
374 let influence_set: HashSet<&str> = HUB_INFLUENCE_TYPES.iter().copied().collect();
375
376 let mut person_targets: HashMap<Nulid, HashMap<Nulid, HashSet<String>>> = HashMap::new();
379 let mut person_target_edges: HashMap<(Nulid, Nulid), Vec<Nulid>> = HashMap::new();
381
382 for (&node_id, node) in &sg.nodes {
383 if node.label != "Person" {
384 continue;
385 }
386
387 for &edge_idx in sg.outgoing_edges(node_id) {
388 let edge = &sg.edge_list[edge_idx];
389 if !influence_set.contains(edge.rel_type.as_str()) {
390 continue;
391 }
392
393 match sg.node_label(edge.target_id) {
395 Some("Organization") => {}
396 _ => continue,
397 }
398
399 person_targets
400 .entry(node_id)
401 .or_default()
402 .entry(edge.target_id)
403 .or_default()
404 .insert(edge.rel_type.clone());
405
406 person_target_edges
407 .entry((node_id, edge.target_id))
408 .or_default()
409 .push(edge.id);
410 }
411 }
412
413 let mut results = Vec::new();
414
415 for (person_id, targets) in &person_targets {
416 for (target_id, edge_type_set) in targets {
417 if edge_type_set.len() >= HUB_THRESHOLD {
418 let edge_ids = person_target_edges
419 .get(&(*person_id, *target_id))
420 .cloned()
421 .unwrap_or_default();
422
423 let types_str: Vec<&str> = edge_type_set.iter().map(String::as_str).collect();
424
425 results.push(ConflictPattern {
426 pattern_id: "COI-006".to_string(),
427 pattern_name: "Hub Concentration".to_string(),
428 severity: Severity::Low,
429 nodes: vec![*person_id, *target_id],
430 edges: edge_ids,
431 path: vec![*person_id, *target_id],
432 description: format!(
433 "Hub Concentration: person has {} distinct influence ties ({}) to organization",
434 edge_type_set.len(),
435 types_str.join(", ")
436 ),
437 });
438 }
439 }
440 }
441
442 results
443}
444fn pattern_enabled(id: &str, opts: &DetectOpts) -> bool {
445 opts.pattern_ids.as_ref().is_none_or(|ids| ids.contains(id))
446}
447#[cfg(test)]
448mod tests {
449 use super::*;
450 use crate::graph::Subgraph;
451 use crate::{Edge, Node};
452
453 fn nulid(n: u8) -> Nulid {
454 Nulid::from_bytes([n; 16])
455 }
456
457 fn node(id: Nulid, label: &str, name: &str) -> Node {
458 Node {
459 id,
460 label: label.to_string(),
461 name: name.to_string(),
462 }
463 }
464
465 fn edge(id: Nulid, src: Nulid, tgt: Nulid, rel: &str) -> Edge {
466 Edge {
467 id,
468 source_id: src,
469 target_id: tgt,
470 rel_type: rel.to_string(),
471 source_urls: vec!["https://example.com".to_string()],
472 }
473 }
474
475 fn default_opts() -> DetectOpts {
476 DetectOpts::default()
477 }
478
479 #[test]
482 fn detects_donation_appointment_cycle() {
483 let a = nulid(1);
485 let b = nulid(2);
486 let e1 = nulid(10);
487 let e2 = nulid(11);
488
489 let sg = Subgraph {
490 nodes: vec![node(a, "Person", "Alice"), node(b, "Organization", "Gov")],
491 edges: vec![edge(e1, a, b, "PAID_TO"), edge(e2, b, a, "APPOINTED_BY")],
492 };
493
494 let idx = IndexedSubgraph::from_subgraph(&sg);
495 let results = detect_cycles(&idx, &default_opts());
496
497 assert_eq!(results.len(), 1);
498 assert_eq!(results[0].pattern_id, "COI-001");
499 assert_eq!(results[0].severity, Severity::High);
500 assert_eq!(results[0].edges.len(), 2);
501 }
502
503 #[test]
504 fn detects_contract_kickback_cycle() {
505 let a = nulid(1);
507 let b = nulid(2);
508 let e1 = nulid(10);
509 let e2 = nulid(11);
510
511 let sg = Subgraph {
512 nodes: vec![node(a, "Organization", "Corp"), node(b, "Person", "Bob")],
513 edges: vec![
514 edge(e1, a, b, "AWARDED_CONTRACT"),
515 edge(e2, b, a, "PAID_TO"),
516 ],
517 };
518
519 let idx = IndexedSubgraph::from_subgraph(&sg);
520 let results = detect_cycles(&idx, &default_opts());
521
522 assert_eq!(results.len(), 1);
523 assert_eq!(results[0].pattern_id, "COI-002");
524 }
525
526 #[test]
527 fn detects_revolving_door_cycle() {
528 let a = nulid(1);
530 let b = nulid(2);
531 let e1 = nulid(10);
532 let e2 = nulid(11);
533
534 let sg = Subgraph {
535 nodes: vec![node(a, "Person", "Alice"), node(b, "Organization", "Corp")],
536 edges: vec![edge(e1, a, b, "EMPLOYED_BY"), edge(e2, b, a, "LOBBIED")],
537 };
538
539 let idx = IndexedSubgraph::from_subgraph(&sg);
540 let results = detect_cycles(&idx, &default_opts());
541
542 assert_eq!(results.len(), 1);
543 assert_eq!(results[0].pattern_id, "COI-003");
544 }
545
546 #[test]
547 fn no_cycle_detected_for_unmatched_types() {
548 let a = nulid(1);
549 let b = nulid(2);
550 let e1 = nulid(10);
551 let e2 = nulid(11);
552
553 let sg = Subgraph {
554 nodes: vec![node(a, "Person", "Alice"), node(b, "Person", "Bob")],
555 edges: vec![edge(e1, a, b, "FAMILY_OF"), edge(e2, b, a, "ASSOCIATE_OF")],
556 };
557
558 let idx = IndexedSubgraph::from_subgraph(&sg);
559 let results = detect_cycles(&idx, &default_opts());
560
561 assert!(results.is_empty());
562 }
563
564 #[test]
565 fn cycle_respects_max_depth() {
566 let a = nulid(1);
568 let b = nulid(2);
569 let c = nulid(3);
570 let d = nulid(4);
571
572 let sg = Subgraph {
573 nodes: vec![
574 node(a, "Person", "A"),
575 node(b, "Organization", "B"),
576 node(c, "Person", "C"),
577 node(d, "Organization", "D"),
578 ],
579 edges: vec![
580 edge(nulid(10), a, b, "PAID_TO"),
581 edge(nulid(11), b, c, "APPOINTED_BY"),
582 edge(nulid(12), c, d, "PAID_TO"),
583 edge(nulid(13), d, a, "APPOINTED_BY"),
584 ],
585 };
586
587 let idx = IndexedSubgraph::from_subgraph(&sg);
588
589 let opts = DetectOpts {
591 max_depth: 3,
592 pattern_ids: None,
593 };
594 let results = detect_cycles(&idx, &opts);
595 assert!(results.is_empty());
596
597 let opts = DetectOpts {
599 max_depth: 4,
600 pattern_ids: None,
601 };
602 let results = detect_cycles(&idx, &opts);
603 assert_eq!(results.len(), 1);
604 assert_eq!(results[0].pattern_id, "COI-001");
605 }
606
607 #[test]
610 fn detects_family_appointment_path() {
611 let a = nulid(1);
613 let b = nulid(2);
614 let c = nulid(3);
615
616 let sg = Subgraph {
617 nodes: vec![
618 node(a, "Person", "Alice"),
619 node(b, "Person", "Bob"),
620 node(c, "Organization", "Gov"),
621 ],
622 edges: vec![
623 edge(nulid(10), a, b, "FAMILY_OF"),
624 edge(nulid(11), b, c, "APPOINTED_BY"),
625 ],
626 };
627
628 let idx = IndexedSubgraph::from_subgraph(&sg);
629 let results = detect_paths(&idx, &default_opts());
630
631 assert_eq!(results.len(), 1);
632 assert_eq!(results[0].pattern_id, "COI-004");
633 assert_eq!(results[0].severity, Severity::Medium);
634 }
635
636 #[test]
637 fn detects_donor_influence_chain() {
638 let a = nulid(1);
640 let b = nulid(2);
641 let c = nulid(3);
642
643 let sg = Subgraph {
644 nodes: vec![
645 node(a, "Person", "Donor"),
646 node(b, "Organization", "Party"),
647 node(c, "Organization", "Corp"),
648 ],
649 edges: vec![
650 edge(nulid(10), a, b, "PAID_TO"),
651 edge(nulid(11), b, c, "AWARDED_CONTRACT"),
652 ],
653 };
654
655 let idx = IndexedSubgraph::from_subgraph(&sg);
656 let results = detect_paths(&idx, &default_opts());
657
658 assert_eq!(results.len(), 1);
659 assert_eq!(results[0].pattern_id, "COI-005");
660 }
661
662 #[test]
663 fn path_requires_correct_labels() {
664 let a = nulid(1);
666 let b = nulid(2);
667 let c = nulid(3);
668
669 let sg = Subgraph {
670 nodes: vec![
671 node(a, "Organization", "OrgA"),
672 node(b, "Organization", "OrgB"),
673 node(c, "Organization", "OrgC"),
674 ],
675 edges: vec![
676 edge(nulid(10), a, b, "FAMILY_OF"),
677 edge(nulid(11), b, c, "APPOINTED_BY"),
678 ],
679 };
680
681 let idx = IndexedSubgraph::from_subgraph(&sg);
682 let results = detect_paths(&idx, &default_opts());
683
684 assert!(results.is_empty());
685 }
686
687 #[test]
690 fn detects_hub_concentration() {
691 let actor = nulid(1);
692 let inst = nulid(2);
693
694 let sg = Subgraph {
695 nodes: vec![
696 node(actor, "Person", "Big Player"),
697 node(inst, "Organization", "Gov"),
698 ],
699 edges: vec![
700 edge(nulid(10), actor, inst, "PAID_TO"),
701 edge(nulid(11), actor, inst, "AWARDED_CONTRACT"),
702 edge(nulid(12), actor, inst, "APPOINTED_BY"),
703 ],
704 };
705
706 let idx = IndexedSubgraph::from_subgraph(&sg);
707 let results = detect_hubs(&idx, &default_opts());
708
709 assert_eq!(results.len(), 1);
710 assert_eq!(results[0].pattern_id, "COI-006");
711 assert_eq!(results[0].severity, Severity::Low);
712 assert_eq!(results[0].edges.len(), 3);
713 }
714
715 #[test]
716 fn hub_below_threshold_not_flagged() {
717 let actor = nulid(1);
718 let inst = nulid(2);
719
720 let sg = Subgraph {
721 nodes: vec![
722 node(actor, "Person", "Small Player"),
723 node(inst, "Organization", "Gov"),
724 ],
725 edges: vec![
726 edge(nulid(10), actor, inst, "PAID_TO"),
727 edge(nulid(11), actor, inst, "AWARDED_CONTRACT"),
728 ],
729 };
730
731 let idx = IndexedSubgraph::from_subgraph(&sg);
732 let results = detect_hubs(&idx, &default_opts());
733
734 assert!(results.is_empty());
735 }
736
737 #[test]
738 fn hub_requires_person_to_organization() {
739 let a = nulid(1);
741 let b = nulid(2);
742
743 let sg = Subgraph {
744 nodes: vec![
745 node(a, "Organization", "OrgA"),
746 node(b, "Organization", "OrgB"),
747 ],
748 edges: vec![
749 edge(nulid(10), a, b, "PAID_TO"),
750 edge(nulid(11), a, b, "AWARDED_CONTRACT"),
751 edge(nulid(12), a, b, "APPOINTED_BY"),
752 ],
753 };
754
755 let idx = IndexedSubgraph::from_subgraph(&sg);
756 let results = detect_hubs(&idx, &default_opts());
757
758 assert!(results.is_empty());
759 }
760
761 #[test]
764 fn detect_conflicts_runs_all_algorithms() {
765 let a = nulid(1);
767 let b = nulid(2);
768 let c = nulid(3);
769
770 let sg = Subgraph {
771 nodes: vec![
772 node(a, "Person", "Alice"),
773 node(b, "Organization", "Gov"),
774 node(c, "Organization", "Corp"),
775 ],
776 edges: vec![
777 edge(nulid(10), a, b, "PAID_TO"),
779 edge(nulid(11), b, a, "APPOINTED_BY"),
780 edge(nulid(12), a, b, "AWARDED_CONTRACT"),
782 edge(nulid(13), a, b, "FUNDED_BY"),
783 ],
784 };
785
786 let idx = IndexedSubgraph::from_subgraph(&sg);
787 let results = detect_conflicts(&idx, &default_opts());
788
789 let cycle_count = results
790 .iter()
791 .filter(|r| r.pattern_id.starts_with("COI-00"))
792 .count();
793 assert!(cycle_count >= 1);
795
796 let hub_count = results.iter().filter(|r| r.pattern_id == "COI-006").count();
797 assert!(hub_count >= 1);
798 }
799
800 #[test]
801 fn pattern_filter_limits_detection() {
802 let a = nulid(1);
803 let b = nulid(2);
804
805 let sg = Subgraph {
806 nodes: vec![node(a, "Person", "Alice"), node(b, "Organization", "Gov")],
807 edges: vec![
808 edge(nulid(10), a, b, "PAID_TO"),
809 edge(nulid(11), b, a, "APPOINTED_BY"),
810 ],
811 };
812
813 let idx = IndexedSubgraph::from_subgraph(&sg);
814
815 let opts = DetectOpts {
817 max_depth: 6,
818 pattern_ids: Some(["COI-002".to_string()].into_iter().collect()),
819 };
820 let results = detect_conflicts(&idx, &opts);
821
822 assert!(results.is_empty());
823 }
824
825 #[test]
826 fn empty_subgraph_returns_no_conflicts() {
827 let sg = Subgraph {
828 nodes: vec![],
829 edges: vec![],
830 };
831
832 let idx = IndexedSubgraph::from_subgraph(&sg);
833 let results = detect_conflicts(&idx, &default_opts());
834
835 assert!(results.is_empty());
836 }
837
838 fn nulid_from_u32(n: u32) -> Nulid {
842 let bytes = n.to_le_bytes();
843 Nulid::from_bytes([
844 bytes[0], bytes[1], bytes[2], bytes[3], 0, 0, 0, 0, bytes[0], bytes[1], bytes[2],
845 bytes[3], 0, 0, 0, 0,
846 ])
847 }
848
849 const LABELS: &[&str] = &["Person", "Organization", "Event"];
850
851 const BG_REL_TYPES: &[&str] = &["ASSOCIATE_OF", "MEMBER_OF", "FAMILY_OF"];
854
855 fn build_benchmark_subgraph(n: usize, e: usize) -> Subgraph {
867 let mut nodes = Vec::with_capacity(n);
868 let mut edges = Vec::with_capacity(e);
869 let mut node_counter: u32 = 1;
870 let mut edge_counter: u32 = 100_000;
871
872 let mut chain_ids = Vec::with_capacity(11);
874 for i in 0..11 {
875 let id = nulid_from_u32(node_counter);
876 let label = if i % 2 == 0 { "Person" } else { "Organization" };
877 nodes.push(node(id, label, &format!("chain-{i}")));
878 chain_ids.push(id);
879 node_counter += 1;
880 }
881 for i in 0..10 {
882 let rel = if i % 2 == 0 {
883 "PAID_TO"
884 } else {
885 "APPOINTED_BY"
886 };
887 let eid = nulid_from_u32(edge_counter);
888 edges.push(edge(eid, chain_ids[i], chain_ids[i + 1], rel));
889 edge_counter += 1;
890 }
891 let eid = nulid_from_u32(edge_counter);
893 edges.push(edge(eid, chain_ids[10], chain_ids[0], "APPOINTED_BY"));
894 edge_counter += 1;
895
896 let hub_actor = nulid_from_u32(node_counter);
898 nodes.push(node(hub_actor, "Person", "hub-actor"));
899 node_counter += 1;
900 let hub_inst = nulid_from_u32(node_counter);
901 nodes.push(node(hub_inst, "Organization", "hub-inst"));
902 node_counter += 1;
903
904 for rel in &["PAID_TO", "AWARDED_CONTRACT", "APPOINTED_BY"] {
905 let eid = nulid_from_u32(edge_counter);
906 edges.push(edge(eid, hub_actor, hub_inst, rel));
907 edge_counter += 1;
908 }
909
910 let planted_nodes = nodes.len();
912 let mut all_node_ids: Vec<Nulid> = nodes.iter().map(|n| n.id).collect();
913 for _ in planted_nodes..n {
914 let id = nulid_from_u32(node_counter);
915 let label = LABELS[node_counter as usize % LABELS.len()];
916 nodes.push(node(id, label, &format!("bg-{node_counter}")));
917 all_node_ids.push(id);
918 node_counter += 1;
919 }
920
921 let planted_edges = edges.len();
923 let total_nodes = all_node_ids.len();
924 if total_nodes > 1 {
925 let remaining_edges = e.saturating_sub(planted_edges);
926 for i in 0..remaining_edges {
927 let src_idx = (i * 7 + 3) % total_nodes;
928 let tgt_idx = (i * 13 + 11) % total_nodes;
929 if src_idx == tgt_idx {
930 continue;
931 }
932 let rel = BG_REL_TYPES[i % BG_REL_TYPES.len()];
933 let eid = nulid_from_u32(edge_counter);
934 edges.push(edge(eid, all_node_ids[src_idx], all_node_ids[tgt_idx], rel));
935 edge_counter += 1;
936 }
937 }
938
939 Subgraph { nodes, edges }
940 }
941
942 #[test]
950 fn bench_detect_100_nodes() {
951 let sg = build_benchmark_subgraph(100, 500);
952 let idx = IndexedSubgraph::from_subgraph(&sg);
953 let opts = default_opts();
954
955 let start = std::time::Instant::now();
956 let results = detect_conflicts(&idx, &opts);
957 let elapsed = start.elapsed();
958
959 eprintln!(
960 "100 nodes / 500 edges: {:?} ({} patterns found)",
961 elapsed,
962 results.len()
963 );
964 assert!(
965 elapsed.as_millis() < 50,
966 "detection took {}ms, must be < 50ms",
967 elapsed.as_millis()
968 );
969 }
970
971 #[test]
972 fn bench_detect_1000_nodes() {
973 let sg = build_benchmark_subgraph(1_000, 5_000);
974 let idx = IndexedSubgraph::from_subgraph(&sg);
975 let opts = default_opts();
976
977 let start = std::time::Instant::now();
978 let results = detect_conflicts(&idx, &opts);
979 let elapsed = start.elapsed();
980
981 eprintln!(
982 "1000 nodes / 5000 edges: {:?} ({} patterns found)",
983 elapsed,
984 results.len()
985 );
986 assert!(
987 elapsed.as_millis() < 50,
988 "detection took {}ms, must be < 50ms",
989 elapsed.as_millis()
990 );
991 }
992
993 #[test]
994 fn bench_detect_10000_nodes() {
995 let sg = build_benchmark_subgraph(10_000, 50_000);
996 let idx = IndexedSubgraph::from_subgraph(&sg);
997 let opts = default_opts();
998
999 let start = std::time::Instant::now();
1000 let results = detect_conflicts(&idx, &opts);
1001 let elapsed = start.elapsed();
1002
1003 eprintln!(
1004 "10000 nodes / 50000 edges: {:?} ({} patterns found)",
1005 elapsed,
1006 results.len()
1007 );
1008 assert!(
1009 elapsed.as_millis() < 200,
1010 "detection took {}ms, must be < 200ms",
1011 elapsed.as_millis()
1012 );
1013 }
1014
1015 #[test]
1016 fn bench_indexing_10000_nodes() {
1017 let sg = build_benchmark_subgraph(10_000, 50_000);
1018
1019 let start = std::time::Instant::now();
1020 let _idx = IndexedSubgraph::from_subgraph(&sg);
1021 let elapsed = start.elapsed();
1022
1023 eprintln!("indexing 10000 nodes / 50000 edges: {elapsed:?}");
1024 assert!(
1025 elapsed.as_millis() < 200,
1026 "indexing took {}ms, must be < 200ms",
1027 elapsed.as_millis()
1028 );
1029 }
1030
1031 #[test]
1032 fn bench_deep_chain_traversal() {
1033 let mut nodes = Vec::with_capacity(11);
1038 let mut edges_vec = Vec::with_capacity(12);
1039
1040 let mut chain_ids = Vec::with_capacity(11);
1041 for i in 0u32..11 {
1042 let id = nulid_from_u32(i + 1);
1043 let label = if i % 2 == 0 { "Person" } else { "Organization" };
1044 nodes.push(node(id, label, &format!("deep-{i}")));
1045 chain_ids.push(id);
1046 }
1047 for i in 0..10 {
1048 let rel = if i % 2 == 0 {
1049 "PAID_TO"
1050 } else {
1051 "APPOINTED_BY"
1052 };
1053 let eid = nulid_from_u32(200 + u32::try_from(i).unwrap());
1054 edges_vec.push(edge(eid, chain_ids[i], chain_ids[i + 1], rel));
1055 }
1056 let eid = nulid_from_u32(210);
1060 edges_vec.push(edge(eid, chain_ids[1], chain_ids[0], "APPOINTED_BY"));
1061
1062 let sg = Subgraph {
1063 nodes,
1064 edges: edges_vec,
1065 };
1066 let idx = IndexedSubgraph::from_subgraph(&sg);
1067 let opts = default_opts();
1068
1069 let start = std::time::Instant::now();
1070 let results = detect_conflicts(&idx, &opts);
1071 let elapsed = start.elapsed();
1072
1073 eprintln!(
1074 "deep 10-degree chain: {:?} ({} patterns found)",
1075 elapsed,
1076 results.len()
1077 );
1078 assert!(
1079 elapsed.as_millis() < 50,
1080 "detection took {}ms, must be < 50ms",
1081 elapsed.as_millis()
1082 );
1083 assert!(
1085 !results.is_empty(),
1086 "expected at least one pattern on the planted cycle chain"
1087 );
1088 }
1089}
1090#[cfg(test)]
1091mod proptests {
1092 use super::*;
1093 use crate::graph::Subgraph;
1094 use crate::{Edge, Node};
1095
1096 use proptest::prelude::*;
1097 use std::collections::HashSet;
1098
1099 fn arb_nulid() -> impl Strategy<Value = Nulid> {
1102 any::<[u8; 16]>().prop_map(Nulid::from_bytes)
1103 }
1104
1105 const LABELS: &[&str] = &["Person", "Organization", "Event"];
1106 const ALL_REL_TYPES: &[&str] = &[
1107 "PAID_TO",
1108 "APPOINTED_BY",
1109 "AWARDED_CONTRACT",
1110 "EMPLOYED_BY",
1111 "LOBBIED",
1112 "FAMILY_OF",
1113 "ASSOCIATE_OF",
1114 "MEMBER_OF",
1115 "FUNDED_BY",
1116 "OWNS",
1117 ];
1118
1119 fn arb_node() -> impl Strategy<Value = Node> {
1120 (arb_nulid(), 0..LABELS.len()).prop_map(|(id, label_idx)| Node {
1121 id,
1122 label: LABELS[label_idx].to_string(),
1123 name: format!("node-{id}"),
1124 })
1125 }
1126
1127 fn arb_edge(node_ids: Vec<Nulid>) -> impl Strategy<Value = Edge> {
1128 let n = node_ids.len();
1129 (arb_nulid(), 0..n, 0..n, 0..ALL_REL_TYPES.len()).prop_map(
1130 move |(id, src_idx, tgt_idx, rel_idx)| Edge {
1131 id,
1132 source_id: node_ids[src_idx],
1133 target_id: node_ids[tgt_idx],
1134 rel_type: ALL_REL_TYPES[rel_idx].to_string(),
1135 source_urls: vec!["https://example.com".to_string()],
1136 },
1137 )
1138 }
1139
1140 fn arb_subgraph() -> impl Strategy<Value = Subgraph> {
1143 prop::collection::vec(arb_node(), 2..=30).prop_flat_map(|nodes| {
1144 let mut seen = HashSet::new();
1146 let deduped: Vec<Node> = nodes.into_iter().filter(|n| seen.insert(n.id)).collect();
1147 let node_ids: Vec<Nulid> = deduped.iter().map(|n| n.id).collect();
1148 let edge_count = deduped.len() * 2;
1149 prop::collection::vec(arb_edge(node_ids), 0..=edge_count).prop_map(move |edges| {
1150 Subgraph {
1151 nodes: deduped.clone(),
1152 edges,
1153 }
1154 })
1155 })
1156 }
1157
1158 fn default_opts() -> DetectOpts {
1159 DetectOpts::default()
1160 }
1161
1162 proptest! {
1165 #[test]
1167 fn never_panics(sg in arb_subgraph()) {
1168 let idx = IndexedSubgraph::from_subgraph(&sg);
1169 let _ = detect_conflicts(&idx, &default_opts());
1170 }
1171
1172 #[test]
1174 fn deterministic(sg in arb_subgraph()) {
1175 let idx = IndexedSubgraph::from_subgraph(&sg);
1176 let opts = default_opts();
1177
1178 let mut a = detect_conflicts(&idx, &opts);
1179 let mut b = detect_conflicts(&idx, &opts);
1180
1181 a.sort_by(|x, y| x.pattern_id.cmp(&y.pattern_id).then(x.description.cmp(&y.description)));
1183 b.sort_by(|x, y| x.pattern_id.cmp(&y.pattern_id).then(x.description.cmp(&y.description)));
1184
1185 prop_assert_eq!(a.len(), b.len(), "different result count");
1186 for (pa, pb) in a.iter().zip(b.iter()) {
1187 prop_assert_eq!(&pa.pattern_id, &pb.pattern_id);
1188 prop_assert_eq!(&pa.nodes, &pb.nodes);
1189 prop_assert_eq!(&pa.edges, &pb.edges);
1190 }
1191 }
1192
1193 #[test]
1196 fn results_reference_valid_ids(sg in arb_subgraph()) {
1197 let node_ids: HashSet<Nulid> = sg.nodes.iter().map(|n| n.id).collect();
1198 let edge_ids: HashSet<Nulid> = sg.edges.iter().map(|e| e.id).collect();
1199
1200 let idx = IndexedSubgraph::from_subgraph(&sg);
1201 let results = detect_conflicts(&idx, &default_opts());
1202
1203 for pat in &results {
1204 for nid in &pat.nodes {
1205 prop_assert!(
1206 node_ids.contains(nid),
1207 "pattern {} references unknown node {}",
1208 pat.pattern_id,
1209 nid
1210 );
1211 }
1212 for eid in &pat.edges {
1213 prop_assert!(
1214 edge_ids.contains(eid),
1215 "pattern {} references unknown edge {}",
1216 pat.pattern_id,
1217 eid
1218 );
1219 }
1220 }
1221 }
1222
1223 #[test]
1225 fn severity_matches_pattern_id(sg in arb_subgraph()) {
1226 let idx = IndexedSubgraph::from_subgraph(&sg);
1227 let results = detect_conflicts(&idx, &default_opts());
1228
1229 for pat in &results {
1230 let expected = match pat.pattern_id.as_str() {
1231 "COI-001" | "COI-002" | "COI-003" => Severity::High,
1232 "COI-004" | "COI-005" => Severity::Medium,
1233 "COI-006" => Severity::Low,
1234 _ => continue,
1235 };
1236 prop_assert_eq!(
1237 pat.severity, expected,
1238 "pattern {} has wrong severity",
1239 pat.pattern_id
1240 );
1241 }
1242 }
1243
1244 #[test]
1247 fn results_are_well_formed(sg in arb_subgraph()) {
1248 let idx = IndexedSubgraph::from_subgraph(&sg);
1249 let results = detect_conflicts(&idx, &default_opts());
1250
1251 for pat in &results {
1252 prop_assert!(!pat.description.is_empty(), "empty description");
1253 prop_assert!(!pat.nodes.is_empty(), "no nodes in pattern {}", pat.pattern_id);
1254 prop_assert!(!pat.edges.is_empty(), "no edges in pattern {}", pat.pattern_id);
1255 prop_assert!(!pat.path.is_empty(), "no path in pattern {}", pat.pattern_id);
1256 prop_assert!(!pat.pattern_name.is_empty(), "empty pattern name");
1257 }
1258 }
1259
1260 #[test]
1262 fn nonexistent_pattern_filter_returns_empty(sg in arb_subgraph()) {
1263 let idx = IndexedSubgraph::from_subgraph(&sg);
1264 let opts = DetectOpts {
1265 max_depth: 6,
1266 pattern_ids: Some(["DOES-NOT-EXIST".to_string()].into_iter().collect()),
1267 };
1268 let results = detect_conflicts(&idx, &opts);
1269 prop_assert!(results.is_empty());
1270 }
1271
1272 #[test]
1274 fn zero_depth_finds_only_hubs(sg in arb_subgraph()) {
1275 let idx = IndexedSubgraph::from_subgraph(&sg);
1276 let opts = DetectOpts {
1277 max_depth: 0,
1278 pattern_ids: None,
1279 };
1280 let results = detect_conflicts(&idx, &opts);
1281
1282 for pat in &results {
1285 prop_assert_eq!(
1286 &pat.pattern_id, "COI-006",
1287 "non-hub pattern found with max_depth 0"
1288 );
1289 }
1290 }
1291 }
1292
1293 fn make_nulid(n: u8) -> Nulid {
1296 Nulid::from_bytes([n; 16])
1297 }
1298
1299 proptest! {
1302 #[test]
1303 fn planted_coi001_always_detected(
1304 extra_node_count in 0usize..20,
1305 extra_edge_count in 0usize..40,
1306 ) {
1307 let a = make_nulid(1);
1308 let b = make_nulid(2);
1309 let e1 = make_nulid(10);
1310 let e2 = make_nulid(11);
1311
1312 let mut nodes = vec![
1313 Node { id: a, label: "Person".into(), name: "Alice".into() },
1314 Node { id: b, label: "Organization".into(), name: "Gov".into() },
1315 ];
1316 let mut edges = vec![
1317 Edge { id: e1, source_id: a, target_id: b, rel_type: "PAID_TO".into(), source_urls: vec!["https://example.com".into()] },
1318 Edge { id: e2, source_id: b, target_id: a, rel_type: "APPOINTED_BY".into(), source_urls: vec!["https://example.com".into()] },
1319 ];
1320
1321 for i in 0..extra_node_count {
1323 let id = make_nulid(100 + u8::try_from(i).unwrap());
1324 let label = LABELS[i % LABELS.len()];
1325 nodes.push(Node { id, label: label.into(), name: format!("extra-{i}") });
1326 }
1327
1328 let all_ids: Vec<Nulid> = nodes.iter().map(|n| n.id).collect();
1330 for i in 0..extra_edge_count {
1331 let eid = make_nulid(200 + u8::try_from(i).unwrap());
1332 let src = all_ids[(i * 3 + 1) % all_ids.len()];
1333 let tgt = all_ids[(i * 7 + 5) % all_ids.len()];
1334 edges.push(Edge { id: eid, source_id: src, target_id: tgt, rel_type: "ASSOCIATE_OF".into(), source_urls: vec!["https://example.com".into()] });
1335 }
1336
1337 let sg = Subgraph { nodes, edges };
1338 let idx = IndexedSubgraph::from_subgraph(&sg);
1339 let results = detect_conflicts(&idx, &default_opts());
1340
1341 let has_coi001 = results.iter().any(|r| r.pattern_id == "COI-001");
1342 prop_assert!(has_coi001, "COI-001 not detected despite planted cycle");
1343 }
1344
1345 #[test]
1348 fn planted_coi006_always_detected(
1349 extra_node_count in 0usize..20,
1350 ) {
1351 let actor = make_nulid(1);
1352 let inst = make_nulid(2);
1353
1354 let mut nodes = vec![
1355 Node { id: actor, label: "Person".into(), name: "Hub".into() },
1356 Node { id: inst, label: "Organization".into(), name: "Gov".into() },
1357 ];
1358 let edges = vec![
1359 Edge { id: make_nulid(10), source_id: actor, target_id: inst, rel_type: "PAID_TO".into(), source_urls: vec!["https://example.com".into()] },
1360 Edge { id: make_nulid(11), source_id: actor, target_id: inst, rel_type: "AWARDED_CONTRACT".into(), source_urls: vec!["https://example.com".into()] },
1361 Edge { id: make_nulid(12), source_id: actor, target_id: inst, rel_type: "APPOINTED_BY".into(), source_urls: vec!["https://example.com".into()] },
1362 ];
1363
1364 for i in 0..extra_node_count {
1365 let id = make_nulid(100 + u8::try_from(i).unwrap());
1366 nodes.push(Node { id, label: "Event".into(), name: format!("extra-{i}") });
1367 }
1368
1369 let sg = Subgraph { nodes, edges };
1370 let idx = IndexedSubgraph::from_subgraph(&sg);
1371 let results = detect_conflicts(&idx, &default_opts());
1372
1373 let has_coi006 = results.iter().any(|r| r.pattern_id == "COI-006");
1374 prop_assert!(has_coi006, "COI-006 not detected despite planted hub");
1375 }
1376 }
1377}