weave-graph 0.2.16

Graph conflict detection and pattern matching for OSINT knowledge graphs
Documentation
//! In-memory subgraph representation optimised for traversal.
//!
//! The [`Subgraph`] struct builds an adjacency list on construction so
//! that neighbor lookups are O(1).  It is the input to all detection
//! algorithms in [`crate::detect`].

use std::collections::HashMap;

use nulid::Nulid;
use serde::{Deserialize, Serialize};

use crate::{Edge, Node};

/// A self-contained subgraph extracted from Neo4j.
///
/// Nodes and edges are stored by value and indexed by their [`Nulid`]
/// IDs.  An adjacency list maps each node ID to its outgoing edges so
/// traversal does not require linear scans.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Subgraph {
    pub nodes: Vec<Node>,
    pub edges: Vec<Edge>,
}

/// Indexed form of a [`Subgraph`] for efficient traversal.
///
/// Built once via [`IndexedSubgraph::from_subgraph`] and then passed
/// to the detection functions.
#[derive(Debug)]
pub struct IndexedSubgraph {
    /// All nodes keyed by their NULID.
    pub nodes: HashMap<Nulid, Node>,
    /// All edges keyed by their NULID.
    pub edges: HashMap<Nulid, Edge>,
    /// Outgoing adjacency list: source node ID -> list of edge indices
    /// (referencing [`Self::edge_list`]).
    pub outgoing: HashMap<Nulid, Vec<usize>>,
    /// Flat edge list for index-based access.
    pub edge_list: Vec<Edge>,
}

impl IndexedSubgraph {
    /// Build an indexed subgraph from the raw deserialized input.
    #[must_use]
    pub fn from_subgraph(sg: &Subgraph) -> Self {
        let nodes: HashMap<Nulid, Node> = sg.nodes.iter().map(|n| (n.id, n.clone())).collect();

        let edges: HashMap<Nulid, Edge> = sg.edges.iter().map(|e| (e.id, e.clone())).collect();

        let mut outgoing: HashMap<Nulid, Vec<usize>> = HashMap::new();
        let edge_list: Vec<Edge> = sg.edges.clone();

        for (idx, edge) in edge_list.iter().enumerate() {
            outgoing.entry(edge.source_id).or_default().push(idx);
        }

        Self {
            nodes,
            edges,
            outgoing,
            edge_list,
        }
    }

    /// Return outgoing edges for a given node ID.
    #[inline]
    pub fn outgoing_edges(&self, node_id: Nulid) -> &[usize] {
        self.outgoing.get(&node_id).map_or(&[], Vec::as_slice)
    }

    /// Return the label of a node, or `None` if not in the subgraph.
    #[inline]
    #[must_use]
    pub fn node_label(&self, node_id: Nulid) -> Option<&str> {
        self.nodes.get(&node_id).map(|n| n.label.as_str())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn make_nulid(n: u8) -> Nulid {
        Nulid::from_bytes([n; 16])
    }

    fn make_node(id: Nulid, label: &str, name: &str) -> Node {
        Node {
            id,
            label: label.to_string(),
            name: name.to_string(),
        }
    }

    fn make_edge(id: Nulid, src: Nulid, tgt: Nulid, rel: &str) -> Edge {
        Edge {
            id,
            source_id: src,
            target_id: tgt,
            rel_type: rel.to_string(),
            source_urls: vec!["https://example.com".to_string()],
        }
    }

    #[test]
    fn indexed_subgraph_builds_adjacency_list() {
        let a = make_nulid(1);
        let b = make_nulid(2);
        let e1 = make_nulid(10);
        let e2 = make_nulid(11);

        let sg = Subgraph {
            nodes: vec![
                make_node(a, "Person", "Alice"),
                make_node(b, "Person", "Bob"),
            ],
            edges: vec![
                make_edge(e1, a, b, "PAID_TO"),
                make_edge(e2, b, a, "APPOINTED_BY"),
            ],
        };

        let idx = IndexedSubgraph::from_subgraph(&sg);

        assert_eq!(idx.nodes.len(), 2);
        assert_eq!(idx.edges.len(), 2);
        assert_eq!(idx.outgoing_edges(a).len(), 1);
        assert_eq!(idx.outgoing_edges(b).len(), 1);
        assert_eq!(idx.edge_list[idx.outgoing_edges(a)[0]].rel_type, "PAID_TO");
        assert_eq!(
            idx.edge_list[idx.outgoing_edges(b)[0]].rel_type,
            "APPOINTED_BY"
        );
    }

    #[test]
    fn node_label_lookup() {
        let a = make_nulid(1);
        let sg = Subgraph {
            nodes: vec![make_node(a, "Person", "Alice")],
            edges: vec![],
        };
        let idx = IndexedSubgraph::from_subgraph(&sg);

        assert_eq!(idx.node_label(a), Some("Person"));
        assert_eq!(idx.node_label(make_nulid(99)), None);
    }
}