Skip to main content

weave_graph/
detect.rs

1//! Conflict-of-interest detection algorithms.
2//!
3//! Three detection strategies:
4//!
5//! 1. **Cycle detection** -- enumerate directed simple cycles and match
6//!    edge-type sequences against [`CyclePattern`]s.
7//! 2. **Path detection** -- constrained DFS from every node, matching
8//!    edge types and node labels against [`PathPattern`]s.
9//! 3. **Hub detection** -- for each Person→Organization pair, count
10//!    distinct "influence" edge types and flag when the count exceeds
11//!    [`HUB_THRESHOLD`].
12
13use std::collections::{HashMap, HashSet};
14
15use nulid::Nulid;
16
17use crate::graph::IndexedSubgraph;
18use crate::patterns::{
19    ConflictPattern, CyclePattern, HUB_INFLUENCE_TYPES, HUB_THRESHOLD, PathPattern, Severity,
20    cycle_patterns, path_patterns,
21};
22
23/// Options controlling detection behaviour.
24#[derive(Debug, Clone)]
25pub struct DetectOpts {
26    /// Maximum cycle/path length in edges (default: 6).
27    pub max_depth: usize,
28    /// Which pattern IDs to check.  `None` means all.
29    pub pattern_ids: Option<HashSet<String>>,
30}
31
32impl Default for DetectOpts {
33    fn default() -> Self {
34        Self {
35            max_depth: 6,
36            pattern_ids: None,
37        }
38    }
39}
40
41/// Run all detection algorithms on the given subgraph.
42#[must_use]
43pub fn detect_conflicts(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
44    let mut results = Vec::new();
45
46    results.extend(detect_cycles(sg, opts));
47    results.extend(detect_paths(sg, opts));
48    results.extend(detect_hubs(sg, opts));
49
50    results
51}
52/// Detect directed cycles whose edge-type sequence matches a pattern.
53///
54/// Algorithm: for every node in the subgraph, run a DFS up to
55/// `max_depth` edges.  When the DFS returns to the start node and the
56/// edge-type trail matches a pattern's `edge_sequence` (possibly
57/// repeated), record the conflict.
58///
59/// **Optimisation:** only follows edges whose `rel_type` appears in at
60/// least one active pattern's `edge_sequence`.  This prunes the search
61/// space dramatically in graphs dominated by non-pattern edge types.
62fn detect_cycles(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
63    let patterns = cycle_patterns();
64    let active: Vec<&CyclePattern> = patterns
65        .iter()
66        .filter(|p| pattern_enabled(p.id, opts))
67        .collect();
68
69    if active.is_empty() {
70        return Vec::new();
71    }
72
73    // Collect all edge types that appear in any active pattern.
74    let relevant_types: HashSet<&str> = active
75        .iter()
76        .flat_map(|p| p.edge_sequence.iter().copied())
77        .collect();
78
79    let mut results = Vec::new();
80    let mut seen_cycles: HashSet<Vec<Nulid>> = HashSet::new();
81
82    for &start_id in sg.nodes.keys() {
83        let mut path_nodes: Vec<Nulid> = vec![start_id];
84        let mut path_edges: Vec<Nulid> = Vec::new();
85        let mut edge_types: Vec<String> = Vec::new();
86        let mut visited: HashSet<Nulid> = HashSet::new();
87        visited.insert(start_id);
88
89        cycle_dfs(
90            sg,
91            opts,
92            start_id,
93            start_id,
94            &active,
95            &relevant_types,
96            &mut path_nodes,
97            &mut path_edges,
98            &mut edge_types,
99            &mut visited,
100            &mut results,
101            &mut seen_cycles,
102        );
103    }
104
105    results
106}
107
108#[allow(clippy::too_many_arguments)]
109fn cycle_dfs(
110    sg: &IndexedSubgraph,
111    opts: &DetectOpts,
112    start: Nulid,
113    current: Nulid,
114    patterns: &[&CyclePattern],
115    relevant_types: &HashSet<&str>,
116    path_nodes: &mut Vec<Nulid>,
117    path_edges: &mut Vec<Nulid>,
118    edge_types: &mut Vec<String>,
119    visited: &mut HashSet<Nulid>,
120    results: &mut Vec<ConflictPattern>,
121    seen: &mut HashSet<Vec<Nulid>>,
122) {
123    if path_edges.len() >= opts.max_depth {
124        return;
125    }
126
127    for &edge_idx in sg.outgoing_edges(current) {
128        let edge = &sg.edge_list[edge_idx];
129
130        // Prune: skip edges whose type is not in any active pattern.
131        if !relevant_types.contains(edge.rel_type.as_str()) {
132            continue;
133        }
134
135        let next = edge.target_id;
136
137        // Found a cycle back to start
138        if next == start && !path_edges.is_empty() {
139            edge_types.push(edge.rel_type.clone());
140            path_edges.push(edge.id);
141
142            for pat in patterns {
143                if matches_cycle_pattern(edge_types, pat.edge_sequence) {
144                    // Canonical key: sorted edge NULIDs to deduplicate
145                    let mut key = path_edges.clone();
146                    key.sort();
147                    if seen.insert(key) {
148                        results.push(build_cycle_result(pat, path_nodes, path_edges));
149                    }
150                }
151            }
152
153            path_edges.pop();
154            edge_types.pop();
155            continue;
156        }
157
158        // Continue DFS if not visited and node exists in subgraph
159        if !visited.contains(&next) && sg.nodes.contains_key(&next) {
160            visited.insert(next);
161            path_nodes.push(next);
162            path_edges.push(edge.id);
163            edge_types.push(edge.rel_type.clone());
164
165            cycle_dfs(
166                sg,
167                opts,
168                start,
169                next,
170                patterns,
171                relevant_types,
172                path_nodes,
173                path_edges,
174                edge_types,
175                visited,
176                results,
177                seen,
178            );
179
180            edge_types.pop();
181            path_edges.pop();
182            path_nodes.pop();
183            visited.remove(&next);
184        }
185    }
186}
187
188/// Check if a trail of edge types matches a pattern's edge sequence.
189///
190/// The pattern sequence can repeat: e.g., `["PAID_TO", "APPOINTED_BY"]`
191/// matches trails of length 2, 4, 6, ... where the types repeat.
192fn matches_cycle_pattern(trail: &[String], sequence: &[&str]) -> bool {
193    let seq_len = sequence.len();
194    if trail.is_empty() || !trail.len().is_multiple_of(seq_len) {
195        return false;
196    }
197
198    trail
199        .iter()
200        .enumerate()
201        .all(|(i, t)| t == sequence[i % seq_len])
202}
203
204fn build_cycle_result(
205    pat: &CyclePattern,
206    path_nodes: &[Nulid],
207    path_edges: &[Nulid],
208) -> ConflictPattern {
209    // Build alternating path: node, edge, node, edge, ...
210    let mut path = Vec::with_capacity(path_nodes.len() + path_edges.len());
211    for i in 0..path_edges.len() {
212        path.push(path_nodes[i]);
213        path.push(path_edges[i]);
214    }
215    // Close the cycle with the start node
216    if let Some(&start) = path_nodes.first() {
217        path.push(start);
218    }
219
220    let node_set: Vec<Nulid> = path_nodes.to_vec();
221
222    ConflictPattern {
223        pattern_id: pat.id.to_string(),
224        pattern_name: pat.name.to_string(),
225        severity: pat.severity,
226        nodes: node_set,
227        edges: path_edges.to_vec(),
228        path,
229        description: format!("{}: cycle of {} edges detected", pat.name, path_edges.len()),
230    }
231}
232/// Detect directed paths matching pattern templates.
233fn detect_paths(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
234    let patterns = path_patterns();
235    let active: Vec<&PathPattern> = patterns
236        .iter()
237        .filter(|p| pattern_enabled(p.id, opts))
238        .collect();
239
240    if active.is_empty() {
241        return Vec::new();
242    }
243
244    let mut results = Vec::new();
245    let mut seen_paths: HashSet<Vec<Nulid>> = HashSet::new();
246
247    for (&node_id, node) in &sg.nodes {
248        for pat in &active {
249            // Check start label constraint
250            if let Some(start_label) = pat.start_label
251                && node.label != start_label
252            {
253                continue;
254            }
255
256            if pat.steps.len() > opts.max_depth {
257                continue;
258            }
259
260            let mut path_nodes = vec![node_id];
261            let mut path_edges = Vec::new();
262
263            path_dfs(
264                sg,
265                pat,
266                node_id,
267                0,
268                &mut path_nodes,
269                &mut path_edges,
270                &mut results,
271                &mut seen_paths,
272            );
273        }
274    }
275
276    results
277}
278
279#[allow(clippy::too_many_arguments)]
280fn path_dfs(
281    sg: &IndexedSubgraph,
282    pat: &PathPattern,
283    current: Nulid,
284    step_idx: usize,
285    path_nodes: &mut Vec<Nulid>,
286    path_edges: &mut Vec<Nulid>,
287    results: &mut Vec<ConflictPattern>,
288    seen: &mut HashSet<Vec<Nulid>>,
289) {
290    if step_idx >= pat.steps.len() {
291        // Full pattern matched
292        let mut key = path_edges.clone();
293        key.sort();
294        if seen.insert(key) {
295            results.push(build_path_result(pat, path_nodes, path_edges));
296        }
297        return;
298    }
299
300    let step = &pat.steps[step_idx];
301
302    for &edge_idx in sg.outgoing_edges(current) {
303        let edge = &sg.edge_list[edge_idx];
304
305        // Check edge type
306        if edge.rel_type != step.edge_type {
307            continue;
308        }
309
310        let next = edge.target_id;
311
312        // Check target label constraint
313        if let Some(target_label) = step.target_label {
314            match sg.node_label(next) {
315                Some(label) if label == target_label => {}
316                _ => continue,
317            }
318        }
319
320        // Avoid revisiting nodes in this path
321        if path_nodes.contains(&next) {
322            continue;
323        }
324
325        path_nodes.push(next);
326        path_edges.push(edge.id);
327
328        path_dfs(
329            sg,
330            pat,
331            next,
332            step_idx + 1,
333            path_nodes,
334            path_edges,
335            results,
336            seen,
337        );
338
339        path_edges.pop();
340        path_nodes.pop();
341    }
342}
343
344fn build_path_result(
345    pat: &PathPattern,
346    path_nodes: &[Nulid],
347    path_edges: &[Nulid],
348) -> ConflictPattern {
349    let mut path = Vec::with_capacity(path_nodes.len() + path_edges.len());
350    for i in 0..path_edges.len() {
351        path.push(path_nodes[i]);
352        path.push(path_edges[i]);
353    }
354    if let Some(&last) = path_nodes.last() {
355        path.push(last);
356    }
357
358    ConflictPattern {
359        pattern_id: pat.id.to_string(),
360        pattern_name: pat.name.to_string(),
361        severity: pat.severity,
362        nodes: path_nodes.to_vec(),
363        edges: path_edges.to_vec(),
364        path,
365        description: format!("{}: path of {} steps detected", pat.name, path_edges.len()),
366    }
367}
368/// Detect Person→Organization pairs with many distinct influence edge types.
369fn detect_hubs(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
370    if !pattern_enabled("COI-006", opts) {
371        return Vec::new();
372    }
373
374    let influence_set: HashSet<&str> = HUB_INFLUENCE_TYPES.iter().copied().collect();
375
376    // For each Person, group outgoing influence edges by target Organization.
377    // person_id -> target_id -> set of edge types
378    let mut person_targets: HashMap<Nulid, HashMap<Nulid, HashSet<String>>> = HashMap::new();
379    // Also track which edges are involved
380    let mut person_target_edges: HashMap<(Nulid, Nulid), Vec<Nulid>> = HashMap::new();
381
382    for (&node_id, node) in &sg.nodes {
383        if node.label != "Person" {
384            continue;
385        }
386
387        for &edge_idx in sg.outgoing_edges(node_id) {
388            let edge = &sg.edge_list[edge_idx];
389            if !influence_set.contains(edge.rel_type.as_str()) {
390                continue;
391            }
392
393            // Target must be an Organization
394            match sg.node_label(edge.target_id) {
395                Some("Organization") => {}
396                _ => continue,
397            }
398
399            person_targets
400                .entry(node_id)
401                .or_default()
402                .entry(edge.target_id)
403                .or_default()
404                .insert(edge.rel_type.clone());
405
406            person_target_edges
407                .entry((node_id, edge.target_id))
408                .or_default()
409                .push(edge.id);
410        }
411    }
412
413    let mut results = Vec::new();
414
415    for (person_id, targets) in &person_targets {
416        for (target_id, edge_type_set) in targets {
417            if edge_type_set.len() >= HUB_THRESHOLD {
418                let edge_ids = person_target_edges
419                    .get(&(*person_id, *target_id))
420                    .cloned()
421                    .unwrap_or_default();
422
423                let types_str: Vec<&str> = edge_type_set.iter().map(String::as_str).collect();
424
425                results.push(ConflictPattern {
426                    pattern_id: "COI-006".to_string(),
427                    pattern_name: "Hub Concentration".to_string(),
428                    severity: Severity::Low,
429                    nodes: vec![*person_id, *target_id],
430                    edges: edge_ids,
431                    path: vec![*person_id, *target_id],
432                    description: format!(
433                        "Hub Concentration: person has {} distinct influence ties ({}) to organization",
434                        edge_type_set.len(),
435                        types_str.join(", ")
436                    ),
437                });
438            }
439        }
440    }
441
442    results
443}
444fn pattern_enabled(id: &str, opts: &DetectOpts) -> bool {
445    opts.pattern_ids.as_ref().is_none_or(|ids| ids.contains(id))
446}
447#[cfg(test)]
448mod tests {
449    use super::*;
450    use crate::graph::Subgraph;
451    use crate::{Edge, Node};
452
453    fn nulid(n: u8) -> Nulid {
454        Nulid::from_bytes([n; 16])
455    }
456
457    fn node(id: Nulid, label: &str, name: &str) -> Node {
458        Node {
459            id,
460            label: label.to_string(),
461            name: name.to_string(),
462        }
463    }
464
465    fn edge(id: Nulid, src: Nulid, tgt: Nulid, rel: &str) -> Edge {
466        Edge {
467            id,
468            source_id: src,
469            target_id: tgt,
470            rel_type: rel.to_string(),
471            source_urls: vec!["https://example.com".to_string()],
472        }
473    }
474
475    fn default_opts() -> DetectOpts {
476        DetectOpts::default()
477    }
478
479    // -- Cycle tests --
480
481    #[test]
482    fn detects_donation_appointment_cycle() {
483        // A -[PAID_TO]-> B -[APPOINTED_BY]-> A
484        let a = nulid(1);
485        let b = nulid(2);
486        let e1 = nulid(10);
487        let e2 = nulid(11);
488
489        let sg = Subgraph {
490            nodes: vec![node(a, "Person", "Alice"), node(b, "Organization", "Gov")],
491            edges: vec![edge(e1, a, b, "PAID_TO"), edge(e2, b, a, "APPOINTED_BY")],
492        };
493
494        let idx = IndexedSubgraph::from_subgraph(&sg);
495        let results = detect_cycles(&idx, &default_opts());
496
497        assert_eq!(results.len(), 1);
498        assert_eq!(results[0].pattern_id, "COI-001");
499        assert_eq!(results[0].severity, Severity::High);
500        assert_eq!(results[0].edges.len(), 2);
501    }
502
503    #[test]
504    fn detects_contract_kickback_cycle() {
505        // A -[AWARDED_CONTRACT]-> B -[PAID_TO]-> A
506        let a = nulid(1);
507        let b = nulid(2);
508        let e1 = nulid(10);
509        let e2 = nulid(11);
510
511        let sg = Subgraph {
512            nodes: vec![node(a, "Organization", "Corp"), node(b, "Person", "Bob")],
513            edges: vec![
514                edge(e1, a, b, "AWARDED_CONTRACT"),
515                edge(e2, b, a, "PAID_TO"),
516            ],
517        };
518
519        let idx = IndexedSubgraph::from_subgraph(&sg);
520        let results = detect_cycles(&idx, &default_opts());
521
522        assert_eq!(results.len(), 1);
523        assert_eq!(results[0].pattern_id, "COI-002");
524    }
525
526    #[test]
527    fn detects_revolving_door_cycle() {
528        // A -[EMPLOYED_BY]-> B -[LOBBIED]-> A
529        let a = nulid(1);
530        let b = nulid(2);
531        let e1 = nulid(10);
532        let e2 = nulid(11);
533
534        let sg = Subgraph {
535            nodes: vec![node(a, "Person", "Alice"), node(b, "Organization", "Corp")],
536            edges: vec![edge(e1, a, b, "EMPLOYED_BY"), edge(e2, b, a, "LOBBIED")],
537        };
538
539        let idx = IndexedSubgraph::from_subgraph(&sg);
540        let results = detect_cycles(&idx, &default_opts());
541
542        assert_eq!(results.len(), 1);
543        assert_eq!(results[0].pattern_id, "COI-003");
544    }
545
546    #[test]
547    fn no_cycle_detected_for_unmatched_types() {
548        let a = nulid(1);
549        let b = nulid(2);
550        let e1 = nulid(10);
551        let e2 = nulid(11);
552
553        let sg = Subgraph {
554            nodes: vec![node(a, "Person", "Alice"), node(b, "Person", "Bob")],
555            edges: vec![edge(e1, a, b, "FAMILY_OF"), edge(e2, b, a, "ASSOCIATE_OF")],
556        };
557
558        let idx = IndexedSubgraph::from_subgraph(&sg);
559        let results = detect_cycles(&idx, &default_opts());
560
561        assert!(results.is_empty());
562    }
563
564    #[test]
565    fn cycle_respects_max_depth() {
566        // Create a 4-edge cycle: A->B->C->D->A with PAID_TO, APPOINTED_BY repeating
567        let a = nulid(1);
568        let b = nulid(2);
569        let c = nulid(3);
570        let d = nulid(4);
571
572        let sg = Subgraph {
573            nodes: vec![
574                node(a, "Person", "A"),
575                node(b, "Organization", "B"),
576                node(c, "Person", "C"),
577                node(d, "Organization", "D"),
578            ],
579            edges: vec![
580                edge(nulid(10), a, b, "PAID_TO"),
581                edge(nulid(11), b, c, "APPOINTED_BY"),
582                edge(nulid(12), c, d, "PAID_TO"),
583                edge(nulid(13), d, a, "APPOINTED_BY"),
584            ],
585        };
586
587        let idx = IndexedSubgraph::from_subgraph(&sg);
588
589        // With max_depth 3, should not detect the 4-edge cycle
590        let opts = DetectOpts {
591            max_depth: 3,
592            pattern_ids: None,
593        };
594        let results = detect_cycles(&idx, &opts);
595        assert!(results.is_empty());
596
597        // With max_depth 4+, should detect it
598        let opts = DetectOpts {
599            max_depth: 4,
600            pattern_ids: None,
601        };
602        let results = detect_cycles(&idx, &opts);
603        assert_eq!(results.len(), 1);
604        assert_eq!(results[0].pattern_id, "COI-001");
605    }
606
607    // -- Path tests --
608
609    #[test]
610    fn detects_family_appointment_path() {
611        // Person A -[FAMILY_OF]-> Person B -[APPOINTED_BY]-> Organization C
612        let a = nulid(1);
613        let b = nulid(2);
614        let c = nulid(3);
615
616        let sg = Subgraph {
617            nodes: vec![
618                node(a, "Person", "Alice"),
619                node(b, "Person", "Bob"),
620                node(c, "Organization", "Gov"),
621            ],
622            edges: vec![
623                edge(nulid(10), a, b, "FAMILY_OF"),
624                edge(nulid(11), b, c, "APPOINTED_BY"),
625            ],
626        };
627
628        let idx = IndexedSubgraph::from_subgraph(&sg);
629        let results = detect_paths(&idx, &default_opts());
630
631        assert_eq!(results.len(), 1);
632        assert_eq!(results[0].pattern_id, "COI-004");
633        assert_eq!(results[0].severity, Severity::Medium);
634    }
635
636    #[test]
637    fn detects_donor_influence_chain() {
638        // Person A -[PAID_TO]-> Organization B -[AWARDED_CONTRACT]-> Organization C
639        let a = nulid(1);
640        let b = nulid(2);
641        let c = nulid(3);
642
643        let sg = Subgraph {
644            nodes: vec![
645                node(a, "Person", "Donor"),
646                node(b, "Organization", "Party"),
647                node(c, "Organization", "Corp"),
648            ],
649            edges: vec![
650                edge(nulid(10), a, b, "PAID_TO"),
651                edge(nulid(11), b, c, "AWARDED_CONTRACT"),
652            ],
653        };
654
655        let idx = IndexedSubgraph::from_subgraph(&sg);
656        let results = detect_paths(&idx, &default_opts());
657
658        assert_eq!(results.len(), 1);
659        assert_eq!(results[0].pattern_id, "COI-005");
660    }
661
662    #[test]
663    fn path_requires_correct_labels() {
664        // FAMILY_OF between two Organizations should not match COI-004
665        let a = nulid(1);
666        let b = nulid(2);
667        let c = nulid(3);
668
669        let sg = Subgraph {
670            nodes: vec![
671                node(a, "Organization", "OrgA"),
672                node(b, "Organization", "OrgB"),
673                node(c, "Organization", "OrgC"),
674            ],
675            edges: vec![
676                edge(nulid(10), a, b, "FAMILY_OF"),
677                edge(nulid(11), b, c, "APPOINTED_BY"),
678            ],
679        };
680
681        let idx = IndexedSubgraph::from_subgraph(&sg);
682        let results = detect_paths(&idx, &default_opts());
683
684        assert!(results.is_empty());
685    }
686
687    // -- Hub tests --
688
689    #[test]
690    fn detects_hub_concentration() {
691        let actor = nulid(1);
692        let inst = nulid(2);
693
694        let sg = Subgraph {
695            nodes: vec![
696                node(actor, "Person", "Big Player"),
697                node(inst, "Organization", "Gov"),
698            ],
699            edges: vec![
700                edge(nulid(10), actor, inst, "PAID_TO"),
701                edge(nulid(11), actor, inst, "AWARDED_CONTRACT"),
702                edge(nulid(12), actor, inst, "APPOINTED_BY"),
703            ],
704        };
705
706        let idx = IndexedSubgraph::from_subgraph(&sg);
707        let results = detect_hubs(&idx, &default_opts());
708
709        assert_eq!(results.len(), 1);
710        assert_eq!(results[0].pattern_id, "COI-006");
711        assert_eq!(results[0].severity, Severity::Low);
712        assert_eq!(results[0].edges.len(), 3);
713    }
714
715    #[test]
716    fn hub_below_threshold_not_flagged() {
717        let actor = nulid(1);
718        let inst = nulid(2);
719
720        let sg = Subgraph {
721            nodes: vec![
722                node(actor, "Person", "Small Player"),
723                node(inst, "Organization", "Gov"),
724            ],
725            edges: vec![
726                edge(nulid(10), actor, inst, "PAID_TO"),
727                edge(nulid(11), actor, inst, "AWARDED_CONTRACT"),
728            ],
729        };
730
731        let idx = IndexedSubgraph::from_subgraph(&sg);
732        let results = detect_hubs(&idx, &default_opts());
733
734        assert!(results.is_empty());
735    }
736
737    #[test]
738    fn hub_requires_person_to_organization() {
739        // Organization to Organization should not trigger hub
740        let a = nulid(1);
741        let b = nulid(2);
742
743        let sg = Subgraph {
744            nodes: vec![
745                node(a, "Organization", "OrgA"),
746                node(b, "Organization", "OrgB"),
747            ],
748            edges: vec![
749                edge(nulid(10), a, b, "PAID_TO"),
750                edge(nulid(11), a, b, "AWARDED_CONTRACT"),
751                edge(nulid(12), a, b, "APPOINTED_BY"),
752            ],
753        };
754
755        let idx = IndexedSubgraph::from_subgraph(&sg);
756        let results = detect_hubs(&idx, &default_opts());
757
758        assert!(results.is_empty());
759    }
760
761    // -- Combined detection --
762
763    #[test]
764    fn detect_conflicts_runs_all_algorithms() {
765        // Set up a subgraph with one cycle and one hub
766        let a = nulid(1);
767        let b = nulid(2);
768        let c = nulid(3);
769
770        let sg = Subgraph {
771            nodes: vec![
772                node(a, "Person", "Alice"),
773                node(b, "Organization", "Gov"),
774                node(c, "Organization", "Corp"),
775            ],
776            edges: vec![
777                // Cycle: A -PAID_TO-> B -APPOINTED_BY-> A
778                edge(nulid(10), a, b, "PAID_TO"),
779                edge(nulid(11), b, a, "APPOINTED_BY"),
780                // Hub: A has 3 influence types to B
781                edge(nulid(12), a, b, "AWARDED_CONTRACT"),
782                edge(nulid(13), a, b, "FUNDED_BY"),
783            ],
784        };
785
786        let idx = IndexedSubgraph::from_subgraph(&sg);
787        let results = detect_conflicts(&idx, &default_opts());
788
789        let cycle_count = results
790            .iter()
791            .filter(|r| r.pattern_id.starts_with("COI-00"))
792            .count();
793        // Should have at least one cycle (COI-001) and one hub (COI-006)
794        assert!(cycle_count >= 1);
795
796        let hub_count = results.iter().filter(|r| r.pattern_id == "COI-006").count();
797        assert!(hub_count >= 1);
798    }
799
800    #[test]
801    fn pattern_filter_limits_detection() {
802        let a = nulid(1);
803        let b = nulid(2);
804
805        let sg = Subgraph {
806            nodes: vec![node(a, "Person", "Alice"), node(b, "Organization", "Gov")],
807            edges: vec![
808                edge(nulid(10), a, b, "PAID_TO"),
809                edge(nulid(11), b, a, "APPOINTED_BY"),
810            ],
811        };
812
813        let idx = IndexedSubgraph::from_subgraph(&sg);
814
815        // Only check COI-002 (which won't match this graph)
816        let opts = DetectOpts {
817            max_depth: 6,
818            pattern_ids: Some(["COI-002".to_string()].into_iter().collect()),
819        };
820        let results = detect_conflicts(&idx, &opts);
821
822        assert!(results.is_empty());
823    }
824
825    #[test]
826    fn empty_subgraph_returns_no_conflicts() {
827        let sg = Subgraph {
828            nodes: vec![],
829            edges: vec![],
830        };
831
832        let idx = IndexedSubgraph::from_subgraph(&sg);
833        let results = detect_conflicts(&idx, &default_opts());
834
835        assert!(results.is_empty());
836    }
837
838    // -- Benchmark helpers --
839
840    /// Create a unique Nulid from a u32 counter.
841    fn nulid_from_u32(n: u32) -> Nulid {
842        let bytes = n.to_le_bytes();
843        Nulid::from_bytes([
844            bytes[0], bytes[1], bytes[2], bytes[3], 0, 0, 0, 0, bytes[0], bytes[1], bytes[2],
845            bytes[3], 0, 0, 0, 0,
846        ])
847    }
848
849    const LABELS: &[&str] = &["Person", "Organization", "Event"];
850
851    /// Edge types used for background noise -- deliberately non-pattern-
852    /// triggering so that only the planted structures cause detections.
853    const BG_REL_TYPES: &[&str] = &["ASSOCIATE_OF", "MEMBER_OF", "FAMILY_OF"];
854
855    /// Build a subgraph representing a 10-degree neighbourhood
856    /// extracted from Neo4j.
857    ///
858    /// - `n` total nodes, `e` total edges.
859    /// - Plants a 10-edge chain (Person/Organization alternating) with
860    ///   `PAID_TO`/`APPOINTED_BY` edges plus a closing back-edge
861    ///   to form a detectable cycle.
862    /// - Plants a hub structure (Person -> Organization with 3 influence
863    ///   edge types) for COI-006 detection.
864    /// - Remaining edges use non-pattern edge types so the cycle DFS
865    ///   prunes quickly (realistic: most real-world edges are mundane).
866    fn build_benchmark_subgraph(n: usize, e: usize) -> Subgraph {
867        let mut nodes = Vec::with_capacity(n);
868        let mut edges = Vec::with_capacity(e);
869        let mut node_counter: u32 = 1;
870        let mut edge_counter: u32 = 100_000;
871
872        // -- Plant a 10-degree chain (11 nodes, 10 edges + 1 back-edge) --
873        let mut chain_ids = Vec::with_capacity(11);
874        for i in 0..11 {
875            let id = nulid_from_u32(node_counter);
876            let label = if i % 2 == 0 { "Person" } else { "Organization" };
877            nodes.push(node(id, label, &format!("chain-{i}")));
878            chain_ids.push(id);
879            node_counter += 1;
880        }
881        for i in 0..10 {
882            let rel = if i % 2 == 0 {
883                "PAID_TO"
884            } else {
885                "APPOINTED_BY"
886            };
887            let eid = nulid_from_u32(edge_counter);
888            edges.push(edge(eid, chain_ids[i], chain_ids[i + 1], rel));
889            edge_counter += 1;
890        }
891        // Close cycle
892        let eid = nulid_from_u32(edge_counter);
893        edges.push(edge(eid, chain_ids[10], chain_ids[0], "APPOINTED_BY"));
894        edge_counter += 1;
895
896        // -- Plant a hub (Person with 3 influence types to Organization) --
897        let hub_actor = nulid_from_u32(node_counter);
898        nodes.push(node(hub_actor, "Person", "hub-actor"));
899        node_counter += 1;
900        let hub_inst = nulid_from_u32(node_counter);
901        nodes.push(node(hub_inst, "Organization", "hub-inst"));
902        node_counter += 1;
903
904        for rel in &["PAID_TO", "AWARDED_CONTRACT", "APPOINTED_BY"] {
905            let eid = nulid_from_u32(edge_counter);
906            edges.push(edge(eid, hub_actor, hub_inst, rel));
907            edge_counter += 1;
908        }
909
910        // -- Generate remaining background nodes --
911        let planted_nodes = nodes.len();
912        let mut all_node_ids: Vec<Nulid> = nodes.iter().map(|n| n.id).collect();
913        for _ in planted_nodes..n {
914            let id = nulid_from_u32(node_counter);
915            let label = LABELS[node_counter as usize % LABELS.len()];
916            nodes.push(node(id, label, &format!("bg-{node_counter}")));
917            all_node_ids.push(id);
918            node_counter += 1;
919        }
920
921        // -- Generate background edges with non-pattern types --
922        let planted_edges = edges.len();
923        let total_nodes = all_node_ids.len();
924        if total_nodes > 1 {
925            let remaining_edges = e.saturating_sub(planted_edges);
926            for i in 0..remaining_edges {
927                let src_idx = (i * 7 + 3) % total_nodes;
928                let tgt_idx = (i * 13 + 11) % total_nodes;
929                if src_idx == tgt_idx {
930                    continue;
931                }
932                let rel = BG_REL_TYPES[i % BG_REL_TYPES.len()];
933                let eid = nulid_from_u32(edge_counter);
934                edges.push(edge(eid, all_node_ids[src_idx], all_node_ids[tgt_idx], rel));
935                edge_counter += 1;
936            }
937        }
938
939        Subgraph { nodes, edges }
940    }
941
942    // -- Benchmark tests --
943    //
944    // ADR-003 performance target: < 50ms for subgraphs up to 10K
945    // nodes / 50K edges with default max_depth 6.  The "10 degrees"
946    // in task 2.2.6 refers to the Neo4j extraction neighbourhood
947    // depth, not the cycle DFS depth.
948
949    #[test]
950    fn bench_detect_100_nodes() {
951        let sg = build_benchmark_subgraph(100, 500);
952        let idx = IndexedSubgraph::from_subgraph(&sg);
953        let opts = default_opts();
954
955        let start = std::time::Instant::now();
956        let results = detect_conflicts(&idx, &opts);
957        let elapsed = start.elapsed();
958
959        eprintln!(
960            "100 nodes / 500 edges: {:?} ({} patterns found)",
961            elapsed,
962            results.len()
963        );
964        assert!(
965            elapsed.as_millis() < 50,
966            "detection took {}ms, must be < 50ms",
967            elapsed.as_millis()
968        );
969    }
970
971    #[test]
972    fn bench_detect_1000_nodes() {
973        let sg = build_benchmark_subgraph(1_000, 5_000);
974        let idx = IndexedSubgraph::from_subgraph(&sg);
975        let opts = default_opts();
976
977        let start = std::time::Instant::now();
978        let results = detect_conflicts(&idx, &opts);
979        let elapsed = start.elapsed();
980
981        eprintln!(
982            "1000 nodes / 5000 edges: {:?} ({} patterns found)",
983            elapsed,
984            results.len()
985        );
986        assert!(
987            elapsed.as_millis() < 50,
988            "detection took {}ms, must be < 50ms",
989            elapsed.as_millis()
990        );
991    }
992
993    #[test]
994    fn bench_detect_10000_nodes() {
995        let sg = build_benchmark_subgraph(10_000, 50_000);
996        let idx = IndexedSubgraph::from_subgraph(&sg);
997        let opts = default_opts();
998
999        let start = std::time::Instant::now();
1000        let results = detect_conflicts(&idx, &opts);
1001        let elapsed = start.elapsed();
1002
1003        eprintln!(
1004            "10000 nodes / 50000 edges: {:?} ({} patterns found)",
1005            elapsed,
1006            results.len()
1007        );
1008        assert!(
1009            elapsed.as_millis() < 200,
1010            "detection took {}ms, must be < 200ms",
1011            elapsed.as_millis()
1012        );
1013    }
1014
1015    #[test]
1016    fn bench_indexing_10000_nodes() {
1017        let sg = build_benchmark_subgraph(10_000, 50_000);
1018
1019        let start = std::time::Instant::now();
1020        let _idx = IndexedSubgraph::from_subgraph(&sg);
1021        let elapsed = start.elapsed();
1022
1023        eprintln!("indexing 10000 nodes / 50000 edges: {elapsed:?}");
1024        assert!(
1025            elapsed.as_millis() < 200,
1026            "indexing took {}ms, must be < 200ms",
1027            elapsed.as_millis()
1028        );
1029    }
1030
1031    #[test]
1032    fn bench_deep_chain_traversal() {
1033        // Stress test: a 10-degree linear chain with a short detectable
1034        // cycle embedded within it.  Tests that traversal along the
1035        // full chain is fast, and that the detector finds the planted
1036        // pattern.
1037        let mut nodes = Vec::with_capacity(11);
1038        let mut edges_vec = Vec::with_capacity(12);
1039
1040        let mut chain_ids = Vec::with_capacity(11);
1041        for i in 0u32..11 {
1042            let id = nulid_from_u32(i + 1);
1043            let label = if i % 2 == 0 { "Person" } else { "Organization" };
1044            nodes.push(node(id, label, &format!("deep-{i}")));
1045            chain_ids.push(id);
1046        }
1047        for i in 0..10 {
1048            let rel = if i % 2 == 0 {
1049                "PAID_TO"
1050            } else {
1051                "APPOINTED_BY"
1052            };
1053            let eid = nulid_from_u32(200 + u32::try_from(i).unwrap());
1054            edges_vec.push(edge(eid, chain_ids[i], chain_ids[i + 1], rel));
1055        }
1056        // Plant a detectable 2-edge cycle: node[1] -[APPOINTED_BY]-> node[0]
1057        // This creates: node[0] -[PAID_TO]-> node[1] -[APPOINTED_BY]-> node[0]
1058        // which matches COI-001 (Donation-Appointment Cycle).
1059        let eid = nulid_from_u32(210);
1060        edges_vec.push(edge(eid, chain_ids[1], chain_ids[0], "APPOINTED_BY"));
1061
1062        let sg = Subgraph {
1063            nodes,
1064            edges: edges_vec,
1065        };
1066        let idx = IndexedSubgraph::from_subgraph(&sg);
1067        let opts = default_opts();
1068
1069        let start = std::time::Instant::now();
1070        let results = detect_conflicts(&idx, &opts);
1071        let elapsed = start.elapsed();
1072
1073        eprintln!(
1074            "deep 10-degree chain: {:?} ({} patterns found)",
1075            elapsed,
1076            results.len()
1077        );
1078        assert!(
1079            elapsed.as_millis() < 50,
1080            "detection took {}ms, must be < 50ms",
1081            elapsed.as_millis()
1082        );
1083        // Should detect the planted COI-001 cycle.
1084        assert!(
1085            !results.is_empty(),
1086            "expected at least one pattern on the planted cycle chain"
1087        );
1088    }
1089}
1090#[cfg(test)]
1091mod proptests {
1092    use super::*;
1093    use crate::graph::Subgraph;
1094    use crate::{Edge, Node};
1095
1096    use proptest::prelude::*;
1097    use std::collections::HashSet;
1098
1099    // -- Generators --
1100
1101    fn arb_nulid() -> impl Strategy<Value = Nulid> {
1102        any::<[u8; 16]>().prop_map(Nulid::from_bytes)
1103    }
1104
1105    const LABELS: &[&str] = &["Person", "Organization", "Event"];
1106    const ALL_REL_TYPES: &[&str] = &[
1107        "PAID_TO",
1108        "APPOINTED_BY",
1109        "AWARDED_CONTRACT",
1110        "EMPLOYED_BY",
1111        "LOBBIED",
1112        "FAMILY_OF",
1113        "ASSOCIATE_OF",
1114        "MEMBER_OF",
1115        "FUNDED_BY",
1116        "OWNS",
1117    ];
1118
1119    fn arb_node() -> impl Strategy<Value = Node> {
1120        (arb_nulid(), 0..LABELS.len()).prop_map(|(id, label_idx)| Node {
1121            id,
1122            label: LABELS[label_idx].to_string(),
1123            name: format!("node-{id}"),
1124        })
1125    }
1126
1127    fn arb_edge(node_ids: Vec<Nulid>) -> impl Strategy<Value = Edge> {
1128        let n = node_ids.len();
1129        (arb_nulid(), 0..n, 0..n, 0..ALL_REL_TYPES.len()).prop_map(
1130            move |(id, src_idx, tgt_idx, rel_idx)| Edge {
1131                id,
1132                source_id: node_ids[src_idx],
1133                target_id: node_ids[tgt_idx],
1134                rel_type: ALL_REL_TYPES[rel_idx].to_string(),
1135                source_urls: vec!["https://example.com".to_string()],
1136            },
1137        )
1138    }
1139
1140    /// Generate a subgraph with 2..=30 nodes and 0..=60 edges.
1141    /// Kept small to ensure tests run quickly.
1142    fn arb_subgraph() -> impl Strategy<Value = Subgraph> {
1143        prop::collection::vec(arb_node(), 2..=30).prop_flat_map(|nodes| {
1144            // Deduplicate node IDs (proptest may generate duplicates).
1145            let mut seen = HashSet::new();
1146            let deduped: Vec<Node> = nodes.into_iter().filter(|n| seen.insert(n.id)).collect();
1147            let node_ids: Vec<Nulid> = deduped.iter().map(|n| n.id).collect();
1148            let edge_count = deduped.len() * 2;
1149            prop::collection::vec(arb_edge(node_ids), 0..=edge_count).prop_map(move |edges| {
1150                Subgraph {
1151                    nodes: deduped.clone(),
1152                    edges,
1153                }
1154            })
1155        })
1156    }
1157
1158    fn default_opts() -> DetectOpts {
1159        DetectOpts::default()
1160    }
1161
1162    // -- Properties --
1163
1164    proptest! {
1165        /// Detection must never panic on arbitrary subgraphs.
1166        #[test]
1167        fn never_panics(sg in arb_subgraph()) {
1168            let idx = IndexedSubgraph::from_subgraph(&sg);
1169            let _ = detect_conflicts(&idx, &default_opts());
1170        }
1171
1172        /// Same input always produces the same set of detected patterns.
1173        #[test]
1174        fn deterministic(sg in arb_subgraph()) {
1175            let idx = IndexedSubgraph::from_subgraph(&sg);
1176            let opts = default_opts();
1177
1178            let mut a = detect_conflicts(&idx, &opts);
1179            let mut b = detect_conflicts(&idx, &opts);
1180
1181            // Sort by pattern_id then description for stable comparison.
1182            a.sort_by(|x, y| x.pattern_id.cmp(&y.pattern_id).then(x.description.cmp(&y.description)));
1183            b.sort_by(|x, y| x.pattern_id.cmp(&y.pattern_id).then(x.description.cmp(&y.description)));
1184
1185            prop_assert_eq!(a.len(), b.len(), "different result count");
1186            for (pa, pb) in a.iter().zip(b.iter()) {
1187                prop_assert_eq!(&pa.pattern_id, &pb.pattern_id);
1188                prop_assert_eq!(&pa.nodes, &pb.nodes);
1189                prop_assert_eq!(&pa.edges, &pb.edges);
1190            }
1191        }
1192
1193        /// Every node and edge in a detected pattern must exist in the
1194        /// input subgraph.
1195        #[test]
1196        fn results_reference_valid_ids(sg in arb_subgraph()) {
1197            let node_ids: HashSet<Nulid> = sg.nodes.iter().map(|n| n.id).collect();
1198            let edge_ids: HashSet<Nulid> = sg.edges.iter().map(|e| e.id).collect();
1199
1200            let idx = IndexedSubgraph::from_subgraph(&sg);
1201            let results = detect_conflicts(&idx, &default_opts());
1202
1203            for pat in &results {
1204                for nid in &pat.nodes {
1205                    prop_assert!(
1206                        node_ids.contains(nid),
1207                        "pattern {} references unknown node {}",
1208                        pat.pattern_id,
1209                        nid
1210                    );
1211                }
1212                for eid in &pat.edges {
1213                    prop_assert!(
1214                        edge_ids.contains(eid),
1215                        "pattern {} references unknown edge {}",
1216                        pat.pattern_id,
1217                        eid
1218                    );
1219                }
1220            }
1221        }
1222
1223        /// Severity must match the pattern ID's expected level.
1224        #[test]
1225        fn severity_matches_pattern_id(sg in arb_subgraph()) {
1226            let idx = IndexedSubgraph::from_subgraph(&sg);
1227            let results = detect_conflicts(&idx, &default_opts());
1228
1229            for pat in &results {
1230                let expected = match pat.pattern_id.as_str() {
1231                    "COI-001" | "COI-002" | "COI-003" => Severity::High,
1232                    "COI-004" | "COI-005" => Severity::Medium,
1233                    "COI-006" => Severity::Low,
1234                    _ => continue,
1235                };
1236                prop_assert_eq!(
1237                    pat.severity, expected,
1238                    "pattern {} has wrong severity",
1239                    pat.pattern_id
1240                );
1241            }
1242        }
1243
1244        /// Every detected pattern must have a non-empty description,
1245        /// at least one node, and at least one edge.
1246        #[test]
1247        fn results_are_well_formed(sg in arb_subgraph()) {
1248            let idx = IndexedSubgraph::from_subgraph(&sg);
1249            let results = detect_conflicts(&idx, &default_opts());
1250
1251            for pat in &results {
1252                prop_assert!(!pat.description.is_empty(), "empty description");
1253                prop_assert!(!pat.nodes.is_empty(), "no nodes in pattern {}", pat.pattern_id);
1254                prop_assert!(!pat.edges.is_empty(), "no edges in pattern {}", pat.pattern_id);
1255                prop_assert!(!pat.path.is_empty(), "no path in pattern {}", pat.pattern_id);
1256                prop_assert!(!pat.pattern_name.is_empty(), "empty pattern name");
1257            }
1258        }
1259
1260        /// Filtering to a non-existent pattern ID returns no results.
1261        #[test]
1262        fn nonexistent_pattern_filter_returns_empty(sg in arb_subgraph()) {
1263            let idx = IndexedSubgraph::from_subgraph(&sg);
1264            let opts = DetectOpts {
1265                max_depth: 6,
1266                pattern_ids: Some(["DOES-NOT-EXIST".to_string()].into_iter().collect()),
1267            };
1268            let results = detect_conflicts(&idx, &opts);
1269            prop_assert!(results.is_empty());
1270        }
1271
1272        /// max_depth of 0 should yield no cycle or path results.
1273        #[test]
1274        fn zero_depth_finds_only_hubs(sg in arb_subgraph()) {
1275            let idx = IndexedSubgraph::from_subgraph(&sg);
1276            let opts = DetectOpts {
1277                max_depth: 0,
1278                pattern_ids: None,
1279            };
1280            let results = detect_conflicts(&idx, &opts);
1281
1282            // With max_depth 0, cycles and paths cannot form.
1283            // Only hub detection (COI-006) is depth-independent.
1284            for pat in &results {
1285                prop_assert_eq!(
1286                    &pat.pattern_id, "COI-006",
1287                    "non-hub pattern found with max_depth 0"
1288                );
1289            }
1290        }
1291    }
1292
1293    // -- Planted pattern properties --
1294
1295    fn make_nulid(n: u8) -> Nulid {
1296        Nulid::from_bytes([n; 16])
1297    }
1298
1299    // Planting a COI-001 cycle (PAID_TO, APPOINTED_BY) must always
1300    // be detected regardless of extra background nodes/edges.
1301    proptest! {
1302        #[test]
1303        fn planted_coi001_always_detected(
1304            extra_node_count in 0usize..20,
1305            extra_edge_count in 0usize..40,
1306        ) {
1307            let a = make_nulid(1);
1308            let b = make_nulid(2);
1309            let e1 = make_nulid(10);
1310            let e2 = make_nulid(11);
1311
1312            let mut nodes = vec![
1313                Node { id: a, label: "Person".into(), name: "Alice".into() },
1314                Node { id: b, label: "Organization".into(), name: "Gov".into() },
1315            ];
1316            let mut edges = vec![
1317                Edge { id: e1, source_id: a, target_id: b, rel_type: "PAID_TO".into(), source_urls: vec!["https://example.com".into()] },
1318                Edge { id: e2, source_id: b, target_id: a, rel_type: "APPOINTED_BY".into(), source_urls: vec!["https://example.com".into()] },
1319            ];
1320
1321            // Add extra noise nodes.
1322            for i in 0..extra_node_count {
1323                let id = make_nulid(100 + u8::try_from(i).unwrap());
1324                let label = LABELS[i % LABELS.len()];
1325                nodes.push(Node { id, label: label.into(), name: format!("extra-{i}") });
1326            }
1327
1328            // Add extra noise edges with non-pattern types.
1329            let all_ids: Vec<Nulid> = nodes.iter().map(|n| n.id).collect();
1330            for i in 0..extra_edge_count {
1331                let eid = make_nulid(200 + u8::try_from(i).unwrap());
1332                let src = all_ids[(i * 3 + 1) % all_ids.len()];
1333                let tgt = all_ids[(i * 7 + 5) % all_ids.len()];
1334                edges.push(Edge { id: eid, source_id: src, target_id: tgt, rel_type: "ASSOCIATE_OF".into(), source_urls: vec!["https://example.com".into()] });
1335            }
1336
1337            let sg = Subgraph { nodes, edges };
1338            let idx = IndexedSubgraph::from_subgraph(&sg);
1339            let results = detect_conflicts(&idx, &default_opts());
1340
1341            let has_coi001 = results.iter().any(|r| r.pattern_id == "COI-001");
1342            prop_assert!(has_coi001, "COI-001 not detected despite planted cycle");
1343        }
1344
1345        // Planting a COI-006 hub (Person -> Organization with 3+ distinct
1346        // influence types) must always be detected.
1347        #[test]
1348        fn planted_coi006_always_detected(
1349            extra_node_count in 0usize..20,
1350        ) {
1351            let actor = make_nulid(1);
1352            let inst = make_nulid(2);
1353
1354            let mut nodes = vec![
1355                Node { id: actor, label: "Person".into(), name: "Hub".into() },
1356                Node { id: inst, label: "Organization".into(), name: "Gov".into() },
1357            ];
1358            let edges = vec![
1359                Edge { id: make_nulid(10), source_id: actor, target_id: inst, rel_type: "PAID_TO".into(), source_urls: vec!["https://example.com".into()] },
1360                Edge { id: make_nulid(11), source_id: actor, target_id: inst, rel_type: "AWARDED_CONTRACT".into(), source_urls: vec!["https://example.com".into()] },
1361                Edge { id: make_nulid(12), source_id: actor, target_id: inst, rel_type: "APPOINTED_BY".into(), source_urls: vec!["https://example.com".into()] },
1362            ];
1363
1364            for i in 0..extra_node_count {
1365                let id = make_nulid(100 + u8::try_from(i).unwrap());
1366                nodes.push(Node { id, label: "Event".into(), name: format!("extra-{i}") });
1367            }
1368
1369            let sg = Subgraph { nodes, edges };
1370            let idx = IndexedSubgraph::from_subgraph(&sg);
1371            let results = detect_conflicts(&idx, &default_opts());
1372
1373            let has_coi006 = results.iter().any(|r| r.pattern_id == "COI-006");
1374            prop_assert!(has_coi006, "COI-006 not detected despite planted hub");
1375        }
1376    }
1377}