Skip to main content

oxirs_arq/analytics/
stats.rs

1//! Graph statistics summary
2
3use super::adapter::{NodeId, RdfGraphAdapter};
4use super::components::ConnectedComponents;
5
6/// Summary statistics for an RDF graph.
7#[derive(Debug, Clone)]
8pub struct GraphStatsSummary {
9    /// Number of unique IRI nodes.
10    pub node_count: usize,
11    /// Number of unique directed edges.
12    pub edge_count: usize,
13    /// Average total degree (in + out) across all nodes.
14    pub avg_degree: f64,
15    /// Maximum total degree of any single node.
16    pub max_degree: usize,
17    /// Graph density: `edges / (nodes * (nodes - 1))` for a directed graph.
18    pub density: f64,
19    /// `true` when the graph is a directed acyclic graph (no strongly connected
20    /// components of size > 1).
21    pub is_dag: bool,
22    /// Number of weakly connected components.
23    pub component_count: usize,
24}
25
26/// Compute summary statistics for an RDF graph.
27pub struct GraphStats;
28
29impl GraphStats {
30    /// Compute all summary statistics in one pass.
31    pub fn compute(graph: &RdfGraphAdapter) -> GraphStatsSummary {
32        let n = graph.node_count();
33        let e = graph.edge_count();
34
35        // Degree statistics
36        let degrees: Vec<usize> = (0..n)
37            .map(|u| graph.adjacency[u].len() + graph.reverse_adjacency[u].len())
38            .collect();
39        let max_degree = degrees.iter().copied().max().unwrap_or(0);
40        let avg_degree = if n == 0 {
41            0.0
42        } else {
43            degrees.iter().sum::<usize>() as f64 / n as f64
44        };
45
46        // Density
47        let density = if n <= 1 {
48            0.0
49        } else {
50            e as f64 / (n as f64 * (n - 1) as f64)
51        };
52
53        // is_dag: true iff every SCC has exactly one node
54        let sccs = ConnectedComponents::strongly_connected(graph);
55        let is_dag = sccs.iter().all(|c| c.len() == 1);
56
57        // Component count (weakly connected)
58        let components = ConnectedComponents::weakly_connected(graph);
59        let component_count = components.len();
60
61        GraphStatsSummary {
62            node_count: n,
63            edge_count: e,
64            avg_degree,
65            max_degree,
66            density,
67            is_dag,
68            component_count,
69        }
70    }
71}
72
73/// Check whether a graph contains a cycle (quick wrapper).
74pub fn is_dag(graph: &RdfGraphAdapter) -> bool {
75    GraphStats::compute(graph).is_dag
76}
77
78/// Check whether a node is reachable from another using BFS.
79pub fn is_reachable(graph: &RdfGraphAdapter, from: NodeId, to: NodeId) -> bool {
80    use std::collections::VecDeque;
81    let n = graph.node_count();
82    if from >= n || to >= n {
83        return false;
84    }
85    if from == to {
86        return true;
87    }
88    let mut visited = vec![false; n];
89    visited[from] = true;
90    let mut queue: VecDeque<NodeId> = VecDeque::new();
91    queue.push_back(from);
92    while let Some(u) = queue.pop_front() {
93        for &(v, _) in &graph.adjacency[u] {
94            if v == to {
95                return true;
96            }
97            if !visited[v] {
98                visited[v] = true;
99                queue.push_back(v);
100            }
101        }
102    }
103    false
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    fn build_graph(edges: &[(&str, &str)]) -> RdfGraphAdapter {
111        let triples: Vec<(String, String, String)> = edges
112            .iter()
113            .map(|(s, o)| (s.to_string(), "ex:rel".to_string(), o.to_string()))
114            .collect();
115        RdfGraphAdapter::from_triples(&triples)
116    }
117
118    #[test]
119    fn test_stats_empty() {
120        let g = RdfGraphAdapter::from_triples(&[]);
121        let s = GraphStats::compute(&g);
122        assert_eq!(s.node_count, 0);
123        assert_eq!(s.edge_count, 0);
124        assert_eq!(s.avg_degree, 0.0);
125        assert_eq!(s.max_degree, 0);
126        assert_eq!(s.density, 0.0);
127        assert!(s.is_dag);
128        assert_eq!(s.component_count, 0);
129    }
130
131    #[test]
132    fn test_stats_single_edge() {
133        let g = build_graph(&[("ex:A", "ex:B")]);
134        let s = GraphStats::compute(&g);
135        assert_eq!(s.node_count, 2);
136        assert_eq!(s.edge_count, 1);
137        assert!(s.is_dag);
138        assert_eq!(s.component_count, 1);
139    }
140
141    #[test]
142    fn test_stats_cycle_not_dag() {
143        let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:A")]);
144        let s = GraphStats::compute(&g);
145        assert!(!s.is_dag, "graph with cycle should not be a DAG");
146    }
147
148    #[test]
149    fn test_stats_dag() {
150        let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C"), ("ex:A", "ex:C")]);
151        let s = GraphStats::compute(&g);
152        assert!(s.is_dag);
153    }
154
155    #[test]
156    fn test_stats_density() {
157        // 3 nodes, 2 edges → density = 2 / (3*2) = 0.333...
158        let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C")]);
159        let s = GraphStats::compute(&g);
160        assert!((s.density - 2.0 / 6.0).abs() < 1e-9);
161    }
162
163    #[test]
164    fn test_stats_component_count() {
165        let g = build_graph(&[("ex:A", "ex:B"), ("ex:C", "ex:D"), ("ex:E", "ex:F")]);
166        let s = GraphStats::compute(&g);
167        assert_eq!(s.component_count, 3);
168    }
169
170    #[test]
171    fn test_stats_avg_degree() {
172        // A→B: A has out-deg 1, in-deg 0 → total 1
173        //       B has out-deg 0, in-deg 1 → total 1
174        // avg = 1.0
175        let g = build_graph(&[("ex:A", "ex:B")]);
176        let s = GraphStats::compute(&g);
177        assert!((s.avg_degree - 1.0).abs() < 1e-9);
178    }
179
180    #[test]
181    fn test_stats_max_degree() {
182        // Hub: A→B, A→C, A→D, B→A   → A has out 3 + in 1 = 4
183        let g = build_graph(&[
184            ("ex:A", "ex:B"),
185            ("ex:A", "ex:C"),
186            ("ex:A", "ex:D"),
187            ("ex:B", "ex:A"),
188        ]);
189        let s = GraphStats::compute(&g);
190        assert!(s.max_degree >= 4);
191    }
192
193    #[test]
194    fn test_is_dag_helper() {
195        let g_dag = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C")]);
196        let g_cycle = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:A")]);
197        assert!(is_dag(&g_dag));
198        assert!(!is_dag(&g_cycle));
199    }
200
201    #[test]
202    fn test_is_reachable() {
203        let g = build_graph(&[("ex:A", "ex:B"), ("ex:B", "ex:C")]);
204        let a = g.get_node_id("ex:A").unwrap();
205        let b = g.get_node_id("ex:B").unwrap();
206        let c = g.get_node_id("ex:C").unwrap();
207        assert!(is_reachable(&g, a, c));
208        assert!(!is_reachable(&g, c, a)); // directed: no path back
209        assert!(is_reachable(&g, a, b));
210        assert!(is_reachable(&g, a, a)); // self
211    }
212
213    #[test]
214    fn test_is_reachable_out_of_range() {
215        let g = build_graph(&[("ex:A", "ex:B")]);
216        let a = g.get_node_id("ex:A").unwrap();
217        assert!(!is_reachable(&g, a, 999));
218        assert!(!is_reachable(&g, 999, a));
219    }
220}