Skip to main content

fabryk_graph/
stats.rs

1//! Graph statistics and analysis.
2//!
3//! Provides functions for analysing graph structure and composition,
4//! including degree distribution, category breakdowns, and top-node
5//! rankings.
6
7use crate::GraphData;
8use petgraph::Direction;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12// ============================================================================
13// Types
14// ============================================================================
15
16/// Comprehensive statistics about a graph.
17#[derive(Clone, Debug, Serialize, Deserialize)]
18pub struct GraphStats {
19    /// Total number of nodes.
20    pub node_count: usize,
21    /// Total number of edges.
22    pub edge_count: usize,
23    /// Number of canonical nodes.
24    pub canonical_count: usize,
25    /// Number of variant nodes.
26    pub variant_count: usize,
27    /// Nodes per category.
28    pub category_distribution: HashMap<String, usize>,
29    /// Edges per relationship type.
30    pub relationship_distribution: HashMap<String, usize>,
31    /// Nodes without any edges (orphans).
32    pub orphan_count: usize,
33    /// Average edges per node (in + out).
34    pub avg_degree: f32,
35    /// Maximum in-degree (most dependencies).
36    pub max_in_degree: usize,
37    /// Maximum out-degree (most dependents).
38    pub max_out_degree: usize,
39    /// Node with highest in-degree.
40    pub most_depended_on: Option<String>,
41    /// Node with highest out-degree.
42    pub most_dependencies: Option<String>,
43}
44
45/// Direction for degree calculation.
46#[derive(Clone, Copy, Debug)]
47pub enum DegreeDirection {
48    /// Incoming edges only.
49    In,
50    /// Outgoing edges only.
51    Out,
52    /// Both directions.
53    Both,
54}
55
56// ============================================================================
57// Functions
58// ============================================================================
59
60/// Compute comprehensive statistics for a graph.
61pub fn compute_stats(graph: &GraphData) -> GraphStats {
62    let node_count = graph.node_count();
63    let edge_count = graph.edge_count();
64
65    // Count canonical vs variant
66    let mut canonical_count = 0;
67    let mut variant_count = 0;
68    for node in graph.iter_nodes() {
69        if node.is_canonical {
70            canonical_count += 1;
71        } else {
72            variant_count += 1;
73        }
74    }
75
76    // Category distribution
77    let mut category_distribution: HashMap<String, usize> = HashMap::new();
78    for node in graph.iter_nodes() {
79        let cat = node
80            .category
81            .clone()
82            .unwrap_or_else(|| "uncategorized".to_string());
83        *category_distribution.entry(cat).or_insert(0) += 1;
84    }
85
86    // Relationship distribution
87    let mut relationship_distribution: HashMap<String, usize> = HashMap::new();
88    for edge in graph.iter_edges() {
89        let rel = edge.relationship.name().to_string();
90        *relationship_distribution.entry(rel).or_insert(0) += 1;
91    }
92
93    // Degree analysis
94    let mut in_degrees: HashMap<String, usize> = HashMap::new();
95    let mut out_degrees: HashMap<String, usize> = HashMap::new();
96
97    for edge in graph.iter_edges() {
98        *in_degrees.entry(edge.to.clone()).or_insert(0) += 1;
99        *out_degrees.entry(edge.from.clone()).or_insert(0) += 1;
100    }
101
102    // Initialise all nodes with 0 degree
103    for node_id in graph.node_ids() {
104        in_degrees.entry(node_id.to_string()).or_insert(0);
105        out_degrees.entry(node_id.to_string()).or_insert(0);
106    }
107
108    let orphan_count = graph
109        .node_ids()
110        .filter(|id| {
111            in_degrees.get(*id).copied().unwrap_or(0) == 0
112                && out_degrees.get(*id).copied().unwrap_or(0) == 0
113        })
114        .count();
115
116    let total_degree: usize =
117        in_degrees.values().sum::<usize>() + out_degrees.values().sum::<usize>();
118    let avg_degree = if node_count > 0 {
119        total_degree as f32 / node_count as f32
120    } else {
121        0.0
122    };
123
124    let (most_depended_on, max_in_degree) = in_degrees
125        .iter()
126        .max_by_key(|(_, v)| *v)
127        .map(|(k, v)| (Some(k.clone()), *v))
128        .unwrap_or((None, 0));
129
130    let (most_dependencies, max_out_degree) = out_degrees
131        .iter()
132        .max_by_key(|(_, v)| *v)
133        .map(|(k, v)| (Some(k.clone()), *v))
134        .unwrap_or((None, 0));
135
136    GraphStats {
137        node_count,
138        edge_count,
139        canonical_count,
140        variant_count,
141        category_distribution,
142        relationship_distribution,
143        orphan_count,
144        avg_degree,
145        max_in_degree,
146        max_out_degree,
147        most_depended_on,
148        most_dependencies,
149    }
150}
151
152/// Get a quick summary of graph size.
153pub fn quick_summary(graph: &GraphData) -> String {
154    format!("{} nodes, {} edges", graph.node_count(), graph.edge_count())
155}
156
157/// Get top N nodes by degree.
158pub fn top_nodes_by_degree(
159    graph: &GraphData,
160    limit: usize,
161    direction: DegreeDirection,
162) -> Vec<(String, usize)> {
163    let mut scores: Vec<(String, usize)> = graph
164        .iter_nodes()
165        .map(|node| {
166            let idx = graph.get_index(&node.id).unwrap();
167            let degree = match direction {
168                DegreeDirection::In => graph.graph.edges_directed(idx, Direction::Incoming).count(),
169                DegreeDirection::Out => {
170                    graph.graph.edges_directed(idx, Direction::Outgoing).count()
171                }
172                DegreeDirection::Both => {
173                    graph.graph.edges_directed(idx, Direction::Incoming).count()
174                        + graph.graph.edges_directed(idx, Direction::Outgoing).count()
175                }
176            };
177            (node.id.clone(), degree)
178        })
179        .collect();
180
181    scores.sort_by(|a, b| b.1.cmp(&a.1));
182    scores.truncate(limit);
183    scores
184}
185
186// ============================================================================
187// Tests
188// ============================================================================
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use crate::types::*;
194
195    fn create_test_graph() -> GraphData {
196        let mut graph = GraphData::new();
197
198        graph.add_node(Node::new("a", "A").with_category("basics"));
199        graph.add_node(Node::new("b", "B").with_category("basics"));
200        graph.add_node(Node::new("c", "C").with_category("advanced"));
201        graph.add_node(Node::new("d", "D")); // no category
202        graph.add_node(Node::new("orphan", "Orphan").with_category("basics"));
203
204        graph
205            .add_edge(Edge::new("a", "b", Relationship::Prerequisite))
206            .unwrap();
207        graph
208            .add_edge(Edge::new("b", "c", Relationship::Prerequisite))
209            .unwrap();
210        graph
211            .add_edge(Edge::new("a", "c", Relationship::RelatesTo))
212            .unwrap();
213        graph
214            .add_edge(Edge::new("c", "d", Relationship::LeadsTo))
215            .unwrap();
216
217        graph
218    }
219
220    #[test]
221    fn test_compute_stats_basic_counts() {
222        let graph = create_test_graph();
223        let stats = compute_stats(&graph);
224
225        assert_eq!(stats.node_count, 5);
226        assert_eq!(stats.edge_count, 4);
227    }
228
229    #[test]
230    fn test_compute_stats_canonical_vs_variant() {
231        let mut graph = GraphData::new();
232        graph.add_node(Node::new("a", "A")); // canonical
233        graph.add_node(Node::new("b", "B").as_variant_of("a")); // variant
234
235        let stats = compute_stats(&graph);
236
237        assert_eq!(stats.canonical_count, 1);
238        assert_eq!(stats.variant_count, 1);
239    }
240
241    #[test]
242    fn test_compute_stats_category_distribution() {
243        let graph = create_test_graph();
244        let stats = compute_stats(&graph);
245
246        assert_eq!(stats.category_distribution["basics"], 3); // a, b, orphan
247        assert_eq!(stats.category_distribution["advanced"], 1); // c
248        assert_eq!(stats.category_distribution["uncategorized"], 1); // d
249    }
250
251    #[test]
252    fn test_compute_stats_relationship_distribution() {
253        let graph = create_test_graph();
254        let stats = compute_stats(&graph);
255
256        assert_eq!(stats.relationship_distribution["prerequisite"], 2);
257        assert_eq!(stats.relationship_distribution["relates_to"], 1);
258        assert_eq!(stats.relationship_distribution["leads_to"], 1);
259    }
260
261    #[test]
262    fn test_compute_stats_orphans() {
263        let graph = create_test_graph();
264        let stats = compute_stats(&graph);
265
266        assert_eq!(stats.orphan_count, 1); // "orphan" node
267    }
268
269    #[test]
270    fn test_compute_stats_avg_degree() {
271        let graph = create_test_graph();
272        let stats = compute_stats(&graph);
273
274        // 4 edges: each contributes 1 in-degree and 1 out-degree = 8 total degree
275        // 5 nodes: avg = 8/5 = 1.6
276        assert!((stats.avg_degree - 1.6).abs() < 0.01);
277    }
278
279    #[test]
280    fn test_compute_stats_max_degrees() {
281        let graph = create_test_graph();
282        let stats = compute_stats(&graph);
283
284        // a has out-degree 2 (a->b, a->c)
285        assert_eq!(stats.max_out_degree, 2);
286        // c has in-degree 2 (b->c, a->c)
287        assert_eq!(stats.max_in_degree, 2);
288    }
289
290    #[test]
291    fn test_compute_stats_empty_graph() {
292        let graph = GraphData::new();
293        let stats = compute_stats(&graph);
294
295        assert_eq!(stats.node_count, 0);
296        assert_eq!(stats.edge_count, 0);
297        assert_eq!(stats.orphan_count, 0);
298        assert_eq!(stats.avg_degree, 0.0);
299        assert!(stats.most_depended_on.is_none());
300        assert!(stats.most_dependencies.is_none());
301    }
302
303    #[test]
304    fn test_quick_summary() {
305        let graph = create_test_graph();
306        let summary = quick_summary(&graph);
307
308        assert_eq!(summary, "5 nodes, 4 edges");
309    }
310
311    #[test]
312    fn test_quick_summary_empty() {
313        let graph = GraphData::new();
314        let summary = quick_summary(&graph);
315
316        assert_eq!(summary, "0 nodes, 0 edges");
317    }
318
319    #[test]
320    fn test_top_nodes_by_degree_in() {
321        let graph = create_test_graph();
322        let top = top_nodes_by_degree(&graph, 2, DegreeDirection::In);
323
324        assert_eq!(top.len(), 2);
325        // c has in-degree 2 (highest)
326        assert_eq!(top[0].0, "c");
327        assert_eq!(top[0].1, 2);
328    }
329
330    #[test]
331    fn test_top_nodes_by_degree_out() {
332        let graph = create_test_graph();
333        let top = top_nodes_by_degree(&graph, 2, DegreeDirection::Out);
334
335        assert_eq!(top.len(), 2);
336        // a has out-degree 2 (highest)
337        assert_eq!(top[0].0, "a");
338        assert_eq!(top[0].1, 2);
339    }
340
341    #[test]
342    fn test_top_nodes_by_degree_both() {
343        let graph = create_test_graph();
344        let top = top_nodes_by_degree(&graph, 3, DegreeDirection::Both);
345
346        assert_eq!(top.len(), 3);
347        // All non-orphan nodes should have degree > 0
348        assert!(top[0].1 > 0);
349    }
350
351    #[test]
352    fn test_top_nodes_by_degree_empty_graph() {
353        let graph = GraphData::new();
354        let top = top_nodes_by_degree(&graph, 5, DegreeDirection::Both);
355
356        assert!(top.is_empty());
357    }
358
359    #[test]
360    fn test_top_nodes_limit() {
361        let graph = create_test_graph();
362        let top = top_nodes_by_degree(&graph, 1, DegreeDirection::Both);
363
364        assert_eq!(top.len(), 1);
365    }
366
367    #[test]
368    fn test_graph_stats_serialization() {
369        let graph = create_test_graph();
370        let stats = compute_stats(&graph);
371
372        let json = serde_json::to_string(&stats).unwrap();
373        let parsed: GraphStats = serde_json::from_str(&json).unwrap();
374
375        assert_eq!(parsed.node_count, stats.node_count);
376        assert_eq!(parsed.edge_count, stats.edge_count);
377        assert_eq!(parsed.orphan_count, stats.orphan_count);
378    }
379}