use std::collections::{HashMap, HashSet};
use nulid::Nulid;
use crate::graph::IndexedSubgraph;
use crate::patterns::{
ConflictPattern, CyclePattern, HUB_INFLUENCE_TYPES, HUB_THRESHOLD, PathPattern, Severity,
cycle_patterns, path_patterns,
};
#[derive(Debug, Clone)]
pub struct DetectOpts {
pub max_depth: usize,
pub pattern_ids: Option<HashSet<String>>,
}
impl Default for DetectOpts {
fn default() -> Self {
Self {
max_depth: 6,
pattern_ids: None,
}
}
}
#[must_use]
pub fn detect_conflicts(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
let mut results = Vec::new();
results.extend(detect_cycles(sg, opts));
results.extend(detect_paths(sg, opts));
results.extend(detect_hubs(sg, opts));
results
}
fn detect_cycles(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
let patterns = cycle_patterns();
let active: Vec<&CyclePattern> = patterns
.iter()
.filter(|p| pattern_enabled(p.id, opts))
.collect();
if active.is_empty() {
return Vec::new();
}
let relevant_types: HashSet<&str> = active
.iter()
.flat_map(|p| p.edge_sequence.iter().copied())
.collect();
let mut results = Vec::new();
let mut seen_cycles: HashSet<Vec<Nulid>> = HashSet::new();
for &start_id in sg.nodes.keys() {
let mut path_nodes: Vec<Nulid> = vec![start_id];
let mut path_edges: Vec<Nulid> = Vec::new();
let mut edge_types: Vec<String> = Vec::new();
let mut visited: HashSet<Nulid> = HashSet::new();
visited.insert(start_id);
cycle_dfs(
sg,
opts,
start_id,
start_id,
&active,
&relevant_types,
&mut path_nodes,
&mut path_edges,
&mut edge_types,
&mut visited,
&mut results,
&mut seen_cycles,
);
}
results
}
#[allow(clippy::too_many_arguments)]
fn cycle_dfs(
sg: &IndexedSubgraph,
opts: &DetectOpts,
start: Nulid,
current: Nulid,
patterns: &[&CyclePattern],
relevant_types: &HashSet<&str>,
path_nodes: &mut Vec<Nulid>,
path_edges: &mut Vec<Nulid>,
edge_types: &mut Vec<String>,
visited: &mut HashSet<Nulid>,
results: &mut Vec<ConflictPattern>,
seen: &mut HashSet<Vec<Nulid>>,
) {
if path_edges.len() >= opts.max_depth {
return;
}
for &edge_idx in sg.outgoing_edges(current) {
let edge = &sg.edge_list[edge_idx];
if !relevant_types.contains(edge.rel_type.as_str()) {
continue;
}
let next = edge.target_id;
if next == start && !path_edges.is_empty() {
edge_types.push(edge.rel_type.clone());
path_edges.push(edge.id);
for pat in patterns {
if matches_cycle_pattern(edge_types, pat.edge_sequence) {
let mut key = path_edges.clone();
key.sort();
if seen.insert(key) {
results.push(build_cycle_result(pat, path_nodes, path_edges));
}
}
}
path_edges.pop();
edge_types.pop();
continue;
}
if !visited.contains(&next) && sg.nodes.contains_key(&next) {
visited.insert(next);
path_nodes.push(next);
path_edges.push(edge.id);
edge_types.push(edge.rel_type.clone());
cycle_dfs(
sg,
opts,
start,
next,
patterns,
relevant_types,
path_nodes,
path_edges,
edge_types,
visited,
results,
seen,
);
edge_types.pop();
path_edges.pop();
path_nodes.pop();
visited.remove(&next);
}
}
}
fn matches_cycle_pattern(trail: &[String], sequence: &[&str]) -> bool {
let seq_len = sequence.len();
if trail.is_empty() || !trail.len().is_multiple_of(seq_len) {
return false;
}
trail
.iter()
.enumerate()
.all(|(i, t)| t == sequence[i % seq_len])
}
fn build_cycle_result(
pat: &CyclePattern,
path_nodes: &[Nulid],
path_edges: &[Nulid],
) -> ConflictPattern {
let mut path = Vec::with_capacity(path_nodes.len() + path_edges.len());
for i in 0..path_edges.len() {
path.push(path_nodes[i]);
path.push(path_edges[i]);
}
if let Some(&start) = path_nodes.first() {
path.push(start);
}
let node_set: Vec<Nulid> = path_nodes.to_vec();
ConflictPattern {
pattern_id: pat.id.to_string(),
pattern_name: pat.name.to_string(),
severity: pat.severity,
nodes: node_set,
edges: path_edges.to_vec(),
path,
description: format!("{}: cycle of {} edges detected", pat.name, path_edges.len()),
}
}
fn detect_paths(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
let patterns = path_patterns();
let active: Vec<&PathPattern> = patterns
.iter()
.filter(|p| pattern_enabled(p.id, opts))
.collect();
if active.is_empty() {
return Vec::new();
}
let mut results = Vec::new();
let mut seen_paths: HashSet<Vec<Nulid>> = HashSet::new();
for (&node_id, node) in &sg.nodes {
for pat in &active {
if let Some(start_label) = pat.start_label
&& node.label != start_label
{
continue;
}
if pat.steps.len() > opts.max_depth {
continue;
}
let mut path_nodes = vec![node_id];
let mut path_edges = Vec::new();
path_dfs(
sg,
pat,
node_id,
0,
&mut path_nodes,
&mut path_edges,
&mut results,
&mut seen_paths,
);
}
}
results
}
#[allow(clippy::too_many_arguments)]
fn path_dfs(
sg: &IndexedSubgraph,
pat: &PathPattern,
current: Nulid,
step_idx: usize,
path_nodes: &mut Vec<Nulid>,
path_edges: &mut Vec<Nulid>,
results: &mut Vec<ConflictPattern>,
seen: &mut HashSet<Vec<Nulid>>,
) {
if step_idx >= pat.steps.len() {
let mut key = path_edges.clone();
key.sort();
if seen.insert(key) {
results.push(build_path_result(pat, path_nodes, path_edges));
}
return;
}
let step = &pat.steps[step_idx];
for &edge_idx in sg.outgoing_edges(current) {
let edge = &sg.edge_list[edge_idx];
if edge.rel_type != step.edge_type {
continue;
}
let next = edge.target_id;
if let Some(target_label) = step.target_label {
match sg.node_label(next) {
Some(label) if label == target_label => {}
_ => continue,
}
}
if path_nodes.contains(&next) {
continue;
}
path_nodes.push(next);
path_edges.push(edge.id);
path_dfs(
sg,
pat,
next,
step_idx + 1,
path_nodes,
path_edges,
results,
seen,
);
path_edges.pop();
path_nodes.pop();
}
}
fn build_path_result(
pat: &PathPattern,
path_nodes: &[Nulid],
path_edges: &[Nulid],
) -> ConflictPattern {
let mut path = Vec::with_capacity(path_nodes.len() + path_edges.len());
for i in 0..path_edges.len() {
path.push(path_nodes[i]);
path.push(path_edges[i]);
}
if let Some(&last) = path_nodes.last() {
path.push(last);
}
ConflictPattern {
pattern_id: pat.id.to_string(),
pattern_name: pat.name.to_string(),
severity: pat.severity,
nodes: path_nodes.to_vec(),
edges: path_edges.to_vec(),
path,
description: format!("{}: path of {} steps detected", pat.name, path_edges.len()),
}
}
fn detect_hubs(sg: &IndexedSubgraph, opts: &DetectOpts) -> Vec<ConflictPattern> {
if !pattern_enabled("COI-006", opts) {
return Vec::new();
}
let influence_set: HashSet<&str> = HUB_INFLUENCE_TYPES.iter().copied().collect();
let mut actor_targets: HashMap<Nulid, HashMap<Nulid, HashSet<String>>> = HashMap::new();
let mut actor_target_edges: HashMap<(Nulid, Nulid), Vec<Nulid>> = HashMap::new();
for (&node_id, node) in &sg.nodes {
if node.label != "Actor" {
continue;
}
for &edge_idx in sg.outgoing_edges(node_id) {
let edge = &sg.edge_list[edge_idx];
if !influence_set.contains(edge.rel_type.as_str()) {
continue;
}
match sg.node_label(edge.target_id) {
Some("Institution") => {}
_ => continue,
}
actor_targets
.entry(node_id)
.or_default()
.entry(edge.target_id)
.or_default()
.insert(edge.rel_type.clone());
actor_target_edges
.entry((node_id, edge.target_id))
.or_default()
.push(edge.id);
}
}
let mut results = Vec::new();
for (actor_id, targets) in &actor_targets {
for (target_id, edge_type_set) in targets {
if edge_type_set.len() >= HUB_THRESHOLD {
let edge_ids = actor_target_edges
.get(&(*actor_id, *target_id))
.cloned()
.unwrap_or_default();
let types_str: Vec<&str> = edge_type_set.iter().map(String::as_str).collect();
results.push(ConflictPattern {
pattern_id: "COI-006".to_string(),
pattern_name: "Hub Concentration".to_string(),
severity: Severity::Low,
nodes: vec![*actor_id, *target_id],
edges: edge_ids,
path: vec![*actor_id, *target_id],
description: format!(
"Hub Concentration: actor has {} distinct influence ties ({}) to institution",
edge_type_set.len(),
types_str.join(", ")
),
});
}
}
}
results
}
fn pattern_enabled(id: &str, opts: &DetectOpts) -> bool {
opts.pattern_ids.as_ref().is_none_or(|ids| ids.contains(id))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::Subgraph;
use crate::{Edge, Node};
fn nulid(n: u8) -> Nulid {
Nulid::from_bytes([n; 16])
}
fn node(id: Nulid, label: &str, name: &str) -> Node {
Node {
id,
label: label.to_string(),
name: name.to_string(),
}
}
fn edge(id: Nulid, src: Nulid, tgt: Nulid, rel: &str) -> Edge {
Edge {
id,
source_id: src,
target_id: tgt,
rel_type: rel.to_string(),
source_urls: vec!["https://example.com".to_string()],
}
}
fn default_opts() -> DetectOpts {
DetectOpts::default()
}
#[test]
fn detects_donation_appointment_cycle() {
let a = nulid(1);
let b = nulid(2);
let e1 = nulid(10);
let e2 = nulid(11);
let sg = Subgraph {
nodes: vec![node(a, "Actor", "Alice"), node(b, "Institution", "Gov")],
edges: vec![edge(e1, a, b, "DONATED_TO"), edge(e2, b, a, "APPOINTED_BY")],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_cycles(&idx, &default_opts());
assert_eq!(results.len(), 1);
assert_eq!(results[0].pattern_id, "COI-001");
assert_eq!(results[0].severity, Severity::High);
assert_eq!(results[0].edges.len(), 2);
}
#[test]
fn detects_contract_kickback_cycle() {
let a = nulid(1);
let b = nulid(2);
let e1 = nulid(10);
let e2 = nulid(11);
let sg = Subgraph {
nodes: vec![node(a, "Institution", "Corp"), node(b, "Actor", "Bob")],
edges: vec![
edge(e1, a, b, "CONTRACTED_WITH"),
edge(e2, b, a, "DONATED_TO"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_cycles(&idx, &default_opts());
assert_eq!(results.len(), 1);
assert_eq!(results[0].pattern_id, "COI-002");
}
#[test]
fn detects_revolving_door_cycle() {
let a = nulid(1);
let b = nulid(2);
let e1 = nulid(10);
let e2 = nulid(11);
let sg = Subgraph {
nodes: vec![node(a, "Actor", "Alice"), node(b, "Institution", "Corp")],
edges: vec![edge(e1, a, b, "EMPLOYED_BY"), edge(e2, b, a, "LOBBIED_FOR")],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_cycles(&idx, &default_opts());
assert_eq!(results.len(), 1);
assert_eq!(results[0].pattern_id, "COI-003");
}
#[test]
fn no_cycle_detected_for_unmatched_types() {
let a = nulid(1);
let b = nulid(2);
let e1 = nulid(10);
let e2 = nulid(11);
let sg = Subgraph {
nodes: vec![node(a, "Actor", "Alice"), node(b, "Actor", "Bob")],
edges: vec![edge(e1, a, b, "FAMILY_OF"), edge(e2, b, a, "RELATED_TO")],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_cycles(&idx, &default_opts());
assert!(results.is_empty());
}
#[test]
fn cycle_respects_max_depth() {
let a = nulid(1);
let b = nulid(2);
let c = nulid(3);
let d = nulid(4);
let sg = Subgraph {
nodes: vec![
node(a, "Actor", "A"),
node(b, "Institution", "B"),
node(c, "Actor", "C"),
node(d, "Institution", "D"),
],
edges: vec![
edge(nulid(10), a, b, "DONATED_TO"),
edge(nulid(11), b, c, "APPOINTED_BY"),
edge(nulid(12), c, d, "DONATED_TO"),
edge(nulid(13), d, a, "APPOINTED_BY"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = DetectOpts {
max_depth: 3,
pattern_ids: None,
};
let results = detect_cycles(&idx, &opts);
assert!(results.is_empty());
let opts = DetectOpts {
max_depth: 4,
pattern_ids: None,
};
let results = detect_cycles(&idx, &opts);
assert_eq!(results.len(), 1);
assert_eq!(results[0].pattern_id, "COI-001");
}
#[test]
fn detects_family_appointment_path() {
let a = nulid(1);
let b = nulid(2);
let c = nulid(3);
let sg = Subgraph {
nodes: vec![
node(a, "Actor", "Alice"),
node(b, "Actor", "Bob"),
node(c, "Institution", "Gov"),
],
edges: vec![
edge(nulid(10), a, b, "FAMILY_OF"),
edge(nulid(11), b, c, "APPOINTED_BY"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_paths(&idx, &default_opts());
assert_eq!(results.len(), 1);
assert_eq!(results[0].pattern_id, "COI-004");
assert_eq!(results[0].severity, Severity::Medium);
}
#[test]
fn detects_donor_influence_chain() {
let a = nulid(1);
let b = nulid(2);
let c = nulid(3);
let sg = Subgraph {
nodes: vec![
node(a, "Actor", "Donor"),
node(b, "Institution", "Party"),
node(c, "Institution", "Corp"),
],
edges: vec![
edge(nulid(10), a, b, "DONATED_TO"),
edge(nulid(11), b, c, "CONTRACTED_WITH"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_paths(&idx, &default_opts());
assert_eq!(results.len(), 1);
assert_eq!(results[0].pattern_id, "COI-005");
}
#[test]
fn path_requires_correct_labels() {
let a = nulid(1);
let b = nulid(2);
let c = nulid(3);
let sg = Subgraph {
nodes: vec![
node(a, "Institution", "OrgA"),
node(b, "Institution", "OrgB"),
node(c, "Institution", "OrgC"),
],
edges: vec![
edge(nulid(10), a, b, "FAMILY_OF"),
edge(nulid(11), b, c, "APPOINTED_BY"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_paths(&idx, &default_opts());
assert!(results.is_empty());
}
#[test]
fn detects_hub_concentration() {
let actor = nulid(1);
let inst = nulid(2);
let sg = Subgraph {
nodes: vec![
node(actor, "Actor", "Big Player"),
node(inst, "Institution", "Gov"),
],
edges: vec![
edge(nulid(10), actor, inst, "DONATED_TO"),
edge(nulid(11), actor, inst, "CONTRACTED_WITH"),
edge(nulid(12), actor, inst, "APPOINTED_BY"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_hubs(&idx, &default_opts());
assert_eq!(results.len(), 1);
assert_eq!(results[0].pattern_id, "COI-006");
assert_eq!(results[0].severity, Severity::Low);
assert_eq!(results[0].edges.len(), 3);
}
#[test]
fn hub_below_threshold_not_flagged() {
let actor = nulid(1);
let inst = nulid(2);
let sg = Subgraph {
nodes: vec![
node(actor, "Actor", "Small Player"),
node(inst, "Institution", "Gov"),
],
edges: vec![
edge(nulid(10), actor, inst, "DONATED_TO"),
edge(nulid(11), actor, inst, "CONTRACTED_WITH"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_hubs(&idx, &default_opts());
assert!(results.is_empty());
}
#[test]
fn hub_requires_actor_to_institution() {
let a = nulid(1);
let b = nulid(2);
let sg = Subgraph {
nodes: vec![
node(a, "Institution", "OrgA"),
node(b, "Institution", "OrgB"),
],
edges: vec![
edge(nulid(10), a, b, "DONATED_TO"),
edge(nulid(11), a, b, "CONTRACTED_WITH"),
edge(nulid(12), a, b, "APPOINTED_BY"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_hubs(&idx, &default_opts());
assert!(results.is_empty());
}
#[test]
fn detect_conflicts_runs_all_algorithms() {
let a = nulid(1);
let b = nulid(2);
let c = nulid(3);
let sg = Subgraph {
nodes: vec![
node(a, "Actor", "Alice"),
node(b, "Institution", "Gov"),
node(c, "Institution", "Corp"),
],
edges: vec![
edge(nulid(10), a, b, "DONATED_TO"),
edge(nulid(11), b, a, "APPOINTED_BY"),
edge(nulid(12), a, b, "CONTRACTED_WITH"),
edge(nulid(13), a, b, "FUNDED_BY"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_conflicts(&idx, &default_opts());
let cycle_count = results
.iter()
.filter(|r| r.pattern_id.starts_with("COI-00"))
.count();
assert!(cycle_count >= 1);
let hub_count = results.iter().filter(|r| r.pattern_id == "COI-006").count();
assert!(hub_count >= 1);
}
#[test]
fn pattern_filter_limits_detection() {
let a = nulid(1);
let b = nulid(2);
let sg = Subgraph {
nodes: vec![node(a, "Actor", "Alice"), node(b, "Institution", "Gov")],
edges: vec![
edge(nulid(10), a, b, "DONATED_TO"),
edge(nulid(11), b, a, "APPOINTED_BY"),
],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = DetectOpts {
max_depth: 6,
pattern_ids: Some(["COI-002".to_string()].into_iter().collect()),
};
let results = detect_conflicts(&idx, &opts);
assert!(results.is_empty());
}
#[test]
fn empty_subgraph_returns_no_conflicts() {
let sg = Subgraph {
nodes: vec![],
edges: vec![],
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_conflicts(&idx, &default_opts());
assert!(results.is_empty());
}
fn nulid_from_u32(n: u32) -> Nulid {
let bytes = n.to_le_bytes();
Nulid::from_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], 0, 0, 0, 0, bytes[0], bytes[1], bytes[2],
bytes[3], 0, 0, 0, 0,
])
}
const LABELS: &[&str] = &["Actor", "Institution", "PublicRecord"];
const BG_REL_TYPES: &[&str] = &["RELATED_TO", "MEMBER_OF", "ASSOCIATED_WITH"];
fn build_benchmark_subgraph(n: usize, e: usize) -> Subgraph {
let mut nodes = Vec::with_capacity(n);
let mut edges = Vec::with_capacity(e);
let mut node_counter: u32 = 1;
let mut edge_counter: u32 = 100_000;
let mut chain_ids = Vec::with_capacity(11);
for i in 0..11 {
let id = nulid_from_u32(node_counter);
let label = if i % 2 == 0 { "Actor" } else { "Institution" };
nodes.push(node(id, label, &format!("chain-{i}")));
chain_ids.push(id);
node_counter += 1;
}
for i in 0..10 {
let rel = if i % 2 == 0 {
"DONATED_TO"
} else {
"APPOINTED_BY"
};
let eid = nulid_from_u32(edge_counter);
edges.push(edge(eid, chain_ids[i], chain_ids[i + 1], rel));
edge_counter += 1;
}
let eid = nulid_from_u32(edge_counter);
edges.push(edge(eid, chain_ids[10], chain_ids[0], "APPOINTED_BY"));
edge_counter += 1;
let hub_actor = nulid_from_u32(node_counter);
nodes.push(node(hub_actor, "Actor", "hub-actor"));
node_counter += 1;
let hub_inst = nulid_from_u32(node_counter);
nodes.push(node(hub_inst, "Institution", "hub-inst"));
node_counter += 1;
for rel in &["DONATED_TO", "CONTRACTED_WITH", "APPOINTED_BY"] {
let eid = nulid_from_u32(edge_counter);
edges.push(edge(eid, hub_actor, hub_inst, rel));
edge_counter += 1;
}
let planted_nodes = nodes.len();
let mut all_node_ids: Vec<Nulid> = nodes.iter().map(|n| n.id).collect();
for _ in planted_nodes..n {
let id = nulid_from_u32(node_counter);
let label = LABELS[node_counter as usize % LABELS.len()];
nodes.push(node(id, label, &format!("bg-{node_counter}")));
all_node_ids.push(id);
node_counter += 1;
}
let planted_edges = edges.len();
let total_nodes = all_node_ids.len();
if total_nodes > 1 {
let remaining_edges = e.saturating_sub(planted_edges);
for i in 0..remaining_edges {
let src_idx = (i * 7 + 3) % total_nodes;
let tgt_idx = (i * 13 + 11) % total_nodes;
if src_idx == tgt_idx {
continue;
}
let rel = BG_REL_TYPES[i % BG_REL_TYPES.len()];
let eid = nulid_from_u32(edge_counter);
edges.push(edge(eid, all_node_ids[src_idx], all_node_ids[tgt_idx], rel));
edge_counter += 1;
}
}
Subgraph { nodes, edges }
}
#[test]
fn bench_detect_100_nodes() {
let sg = build_benchmark_subgraph(100, 500);
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = default_opts();
let start = std::time::Instant::now();
let results = detect_conflicts(&idx, &opts);
let elapsed = start.elapsed();
eprintln!(
"100 nodes / 500 edges: {:?} ({} patterns found)",
elapsed,
results.len()
);
assert!(
elapsed.as_millis() < 50,
"detection took {}ms, must be < 50ms",
elapsed.as_millis()
);
}
#[test]
fn bench_detect_1000_nodes() {
let sg = build_benchmark_subgraph(1_000, 5_000);
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = default_opts();
let start = std::time::Instant::now();
let results = detect_conflicts(&idx, &opts);
let elapsed = start.elapsed();
eprintln!(
"1000 nodes / 5000 edges: {:?} ({} patterns found)",
elapsed,
results.len()
);
assert!(
elapsed.as_millis() < 50,
"detection took {}ms, must be < 50ms",
elapsed.as_millis()
);
}
#[test]
fn bench_detect_10000_nodes() {
let sg = build_benchmark_subgraph(10_000, 50_000);
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = default_opts();
let start = std::time::Instant::now();
let results = detect_conflicts(&idx, &opts);
let elapsed = start.elapsed();
eprintln!(
"10000 nodes / 50000 edges: {:?} ({} patterns found)",
elapsed,
results.len()
);
assert!(
elapsed.as_millis() < 200,
"detection took {}ms, must be < 200ms",
elapsed.as_millis()
);
}
#[test]
fn bench_indexing_10000_nodes() {
let sg = build_benchmark_subgraph(10_000, 50_000);
let start = std::time::Instant::now();
let _idx = IndexedSubgraph::from_subgraph(&sg);
let elapsed = start.elapsed();
eprintln!("indexing 10000 nodes / 50000 edges: {elapsed:?}");
assert!(
elapsed.as_millis() < 200,
"indexing took {}ms, must be < 200ms",
elapsed.as_millis()
);
}
#[test]
fn bench_deep_chain_traversal() {
let mut nodes = Vec::with_capacity(11);
let mut edges_vec = Vec::with_capacity(12);
let mut chain_ids = Vec::with_capacity(11);
for i in 0u32..11 {
let id = nulid_from_u32(i + 1);
let label = if i % 2 == 0 { "Actor" } else { "Institution" };
nodes.push(node(id, label, &format!("deep-{i}")));
chain_ids.push(id);
}
for i in 0..10 {
let rel = if i % 2 == 0 {
"DONATED_TO"
} else {
"APPOINTED_BY"
};
let eid = nulid_from_u32(200 + u32::try_from(i).unwrap());
edges_vec.push(edge(eid, chain_ids[i], chain_ids[i + 1], rel));
}
let eid = nulid_from_u32(210);
edges_vec.push(edge(eid, chain_ids[1], chain_ids[0], "APPOINTED_BY"));
let sg = Subgraph {
nodes,
edges: edges_vec,
};
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = default_opts();
let start = std::time::Instant::now();
let results = detect_conflicts(&idx, &opts);
let elapsed = start.elapsed();
eprintln!(
"deep 10-degree chain: {:?} ({} patterns found)",
elapsed,
results.len()
);
assert!(
elapsed.as_millis() < 50,
"detection took {}ms, must be < 50ms",
elapsed.as_millis()
);
assert!(
!results.is_empty(),
"expected at least one pattern on the planted cycle chain"
);
}
}
#[cfg(test)]
mod proptests {
use super::*;
use crate::graph::Subgraph;
use crate::{Edge, Node};
use proptest::prelude::*;
use std::collections::HashSet;
fn arb_nulid() -> impl Strategy<Value = Nulid> {
any::<[u8; 16]>().prop_map(Nulid::from_bytes)
}
const LABELS: &[&str] = &["Actor", "Institution", "PublicRecord"];
const ALL_REL_TYPES: &[&str] = &[
"DONATED_TO",
"APPOINTED_BY",
"CONTRACTED_WITH",
"EMPLOYED_BY",
"LOBBIED_FOR",
"FAMILY_OF",
"RELATED_TO",
"MEMBER_OF",
"FUNDED_BY",
"OWNS",
];
fn arb_node() -> impl Strategy<Value = Node> {
(arb_nulid(), 0..LABELS.len()).prop_map(|(id, label_idx)| Node {
id,
label: LABELS[label_idx].to_string(),
name: format!("node-{id}"),
})
}
fn arb_edge(node_ids: Vec<Nulid>) -> impl Strategy<Value = Edge> {
let n = node_ids.len();
(arb_nulid(), 0..n, 0..n, 0..ALL_REL_TYPES.len()).prop_map(
move |(id, src_idx, tgt_idx, rel_idx)| Edge {
id,
source_id: node_ids[src_idx],
target_id: node_ids[tgt_idx],
rel_type: ALL_REL_TYPES[rel_idx].to_string(),
source_urls: vec!["https://example.com".to_string()],
},
)
}
fn arb_subgraph() -> impl Strategy<Value = Subgraph> {
prop::collection::vec(arb_node(), 2..=30).prop_flat_map(|nodes| {
let mut seen = HashSet::new();
let deduped: Vec<Node> = nodes.into_iter().filter(|n| seen.insert(n.id)).collect();
let node_ids: Vec<Nulid> = deduped.iter().map(|n| n.id).collect();
let edge_count = deduped.len() * 2;
prop::collection::vec(arb_edge(node_ids), 0..=edge_count).prop_map(move |edges| {
Subgraph {
nodes: deduped.clone(),
edges,
}
})
})
}
fn default_opts() -> DetectOpts {
DetectOpts::default()
}
proptest! {
#[test]
fn never_panics(sg in arb_subgraph()) {
let idx = IndexedSubgraph::from_subgraph(&sg);
let _ = detect_conflicts(&idx, &default_opts());
}
#[test]
fn deterministic(sg in arb_subgraph()) {
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = default_opts();
let mut a = detect_conflicts(&idx, &opts);
let mut b = detect_conflicts(&idx, &opts);
a.sort_by(|x, y| x.pattern_id.cmp(&y.pattern_id).then(x.description.cmp(&y.description)));
b.sort_by(|x, y| x.pattern_id.cmp(&y.pattern_id).then(x.description.cmp(&y.description)));
prop_assert_eq!(a.len(), b.len(), "different result count");
for (pa, pb) in a.iter().zip(b.iter()) {
prop_assert_eq!(&pa.pattern_id, &pb.pattern_id);
prop_assert_eq!(&pa.nodes, &pb.nodes);
prop_assert_eq!(&pa.edges, &pb.edges);
}
}
#[test]
fn results_reference_valid_ids(sg in arb_subgraph()) {
let node_ids: HashSet<Nulid> = sg.nodes.iter().map(|n| n.id).collect();
let edge_ids: HashSet<Nulid> = sg.edges.iter().map(|e| e.id).collect();
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_conflicts(&idx, &default_opts());
for pat in &results {
for nid in &pat.nodes {
prop_assert!(
node_ids.contains(nid),
"pattern {} references unknown node {}",
pat.pattern_id,
nid
);
}
for eid in &pat.edges {
prop_assert!(
edge_ids.contains(eid),
"pattern {} references unknown edge {}",
pat.pattern_id,
eid
);
}
}
}
#[test]
fn severity_matches_pattern_id(sg in arb_subgraph()) {
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_conflicts(&idx, &default_opts());
for pat in &results {
let expected = match pat.pattern_id.as_str() {
"COI-001" | "COI-002" | "COI-003" => Severity::High,
"COI-004" | "COI-005" => Severity::Medium,
"COI-006" => Severity::Low,
_ => continue,
};
prop_assert_eq!(
pat.severity, expected,
"pattern {} has wrong severity",
pat.pattern_id
);
}
}
#[test]
fn results_are_well_formed(sg in arb_subgraph()) {
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_conflicts(&idx, &default_opts());
for pat in &results {
prop_assert!(!pat.description.is_empty(), "empty description");
prop_assert!(!pat.nodes.is_empty(), "no nodes in pattern {}", pat.pattern_id);
prop_assert!(!pat.edges.is_empty(), "no edges in pattern {}", pat.pattern_id);
prop_assert!(!pat.path.is_empty(), "no path in pattern {}", pat.pattern_id);
prop_assert!(!pat.pattern_name.is_empty(), "empty pattern name");
}
}
#[test]
fn nonexistent_pattern_filter_returns_empty(sg in arb_subgraph()) {
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = DetectOpts {
max_depth: 6,
pattern_ids: Some(["DOES-NOT-EXIST".to_string()].into_iter().collect()),
};
let results = detect_conflicts(&idx, &opts);
prop_assert!(results.is_empty());
}
#[test]
fn zero_depth_finds_only_hubs(sg in arb_subgraph()) {
let idx = IndexedSubgraph::from_subgraph(&sg);
let opts = DetectOpts {
max_depth: 0,
pattern_ids: None,
};
let results = detect_conflicts(&idx, &opts);
for pat in &results {
prop_assert_eq!(
&pat.pattern_id, "COI-006",
"non-hub pattern found with max_depth 0"
);
}
}
}
fn make_nulid(n: u8) -> Nulid {
Nulid::from_bytes([n; 16])
}
proptest! {
#[test]
fn planted_coi001_always_detected(
extra_node_count in 0usize..20,
extra_edge_count in 0usize..40,
) {
let a = make_nulid(1);
let b = make_nulid(2);
let e1 = make_nulid(10);
let e2 = make_nulid(11);
let mut nodes = vec![
Node { id: a, label: "Actor".into(), name: "Alice".into() },
Node { id: b, label: "Institution".into(), name: "Gov".into() },
];
let mut edges = vec![
Edge { id: e1, source_id: a, target_id: b, rel_type: "DONATED_TO".into(), source_urls: vec!["https://example.com".into()] },
Edge { id: e2, source_id: b, target_id: a, rel_type: "APPOINTED_BY".into(), source_urls: vec!["https://example.com".into()] },
];
for i in 0..extra_node_count {
let id = make_nulid(100 + u8::try_from(i).unwrap());
let label = LABELS[i % LABELS.len()];
nodes.push(Node { id, label: label.into(), name: format!("extra-{i}") });
}
let all_ids: Vec<Nulid> = nodes.iter().map(|n| n.id).collect();
for i in 0..extra_edge_count {
let eid = make_nulid(200 + u8::try_from(i).unwrap());
let src = all_ids[(i * 3 + 1) % all_ids.len()];
let tgt = all_ids[(i * 7 + 5) % all_ids.len()];
edges.push(Edge { id: eid, source_id: src, target_id: tgt, rel_type: "RELATED_TO".into(), source_urls: vec!["https://example.com".into()] });
}
let sg = Subgraph { nodes, edges };
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_conflicts(&idx, &default_opts());
let has_coi001 = results.iter().any(|r| r.pattern_id == "COI-001");
prop_assert!(has_coi001, "COI-001 not detected despite planted cycle");
}
#[test]
fn planted_coi006_always_detected(
extra_node_count in 0usize..20,
) {
let actor = make_nulid(1);
let inst = make_nulid(2);
let mut nodes = vec![
Node { id: actor, label: "Actor".into(), name: "Hub".into() },
Node { id: inst, label: "Institution".into(), name: "Gov".into() },
];
let edges = vec![
Edge { id: make_nulid(10), source_id: actor, target_id: inst, rel_type: "DONATED_TO".into(), source_urls: vec!["https://example.com".into()] },
Edge { id: make_nulid(11), source_id: actor, target_id: inst, rel_type: "CONTRACTED_WITH".into(), source_urls: vec!["https://example.com".into()] },
Edge { id: make_nulid(12), source_id: actor, target_id: inst, rel_type: "APPOINTED_BY".into(), source_urls: vec!["https://example.com".into()] },
];
for i in 0..extra_node_count {
let id = make_nulid(100 + u8::try_from(i).unwrap());
nodes.push(Node { id, label: "PublicRecord".into(), name: format!("extra-{i}") });
}
let sg = Subgraph { nodes, edges };
let idx = IndexedSubgraph::from_subgraph(&sg);
let results = detect_conflicts(&idx, &default_opts());
let has_coi006 = results.iter().any(|r| r.pattern_id == "COI-006");
prop_assert!(has_coi006, "COI-006 not detected despite planted hub");
}
}
}