Skip to main content

lore_engine/engine/
graph.rs

1//! In-memory directed graph of wiki pages and their links.
2//!
3//! Backed by `petgraph::DiGraph`. Nodes are pages (real or placeholder),
4//! edges are wiki links. Used for graph visualization, backlink queries,
5//! and orphan cleanup.
6
7use petgraph::graph::{DiGraph, NodeIndex};
8use petgraph::Direction;
9use serde::Serialize;
10use std::collections::{HashMap, HashSet};
11
12/// A node in the wiki graph.
13#[derive(Debug, Serialize, Clone)]
14pub struct GraphNode {
15    pub slug: String,
16    pub title: String,
17    pub is_placeholder: bool,
18}
19
20/// A directed edge in the wiki graph (source page links to target page).
21#[derive(Debug, Serialize, Clone)]
22pub struct GraphEdge {
23    pub source: String,
24    pub target: String,
25}
26
27/// In-memory directed graph of wiki pages and their `[[wikilink]]` connections.
28pub struct WikiGraph {
29    graph: DiGraph<GraphNode, ()>,
30    index_map: HashMap<String, NodeIndex>,
31}
32
33impl Default for WikiGraph {
34    fn default() -> Self {
35        Self::new()
36    }
37}
38
39impl WikiGraph {
40    pub fn new() -> Self {
41        Self {
42            graph: DiGraph::new(),
43            index_map: HashMap::new(),
44        }
45    }
46
47    /// Add or update a node. Returns its `NodeIndex`.
48    pub fn upsert_node(&mut self, slug: &str, title: &str, is_placeholder: bool) -> NodeIndex {
49        if let Some(&idx) = self.index_map.get(slug) {
50            if let Some(node) = self.graph.node_weight_mut(idx) {
51                node.title = title.to_string();
52                node.is_placeholder = is_placeholder;
53            }
54            idx
55        } else {
56            let idx = self.graph.add_node(GraphNode {
57                slug: slug.to_string(),
58                title: title.to_string(),
59                is_placeholder,
60            });
61            self.index_map.insert(slug.to_string(), idx);
62            idx
63        }
64    }
65
66    /// Remove a node and all its edges.
67    pub fn remove_node(&mut self, slug: &str) {
68        if let Some(idx) = self.index_map.remove(slug) {
69            self.graph.remove_node(idx);
70            // petgraph may reuse indices after removal, so rebuild the index map
71            self.rebuild_index_map();
72        }
73    }
74
75    fn rebuild_index_map(&mut self) {
76        self.index_map.clear();
77        for idx in self.graph.node_indices() {
78            if let Some(node) = self.graph.node_weight(idx) {
79                self.index_map.insert(node.slug.clone(), idx);
80            }
81        }
82    }
83
84    /// Set outgoing edges FROM a source node.
85    ///
86    /// Removes all existing outgoing edges first, then adds new ones.
87    /// Creates placeholder nodes for targets that don't exist.
88    /// Duplicate targets are deduplicated — at most one edge per (source, target) pair.
89    pub fn set_outgoing_edges(&mut self, source_slug: &str, target_slugs: &[String]) {
90        let source_idx = match self.index_map.get(source_slug) {
91            Some(&idx) => idx,
92            None => return,
93        };
94
95        // Remove existing outgoing edges
96        let outgoing: Vec<_> = self
97            .graph
98            .neighbors_directed(source_idx, Direction::Outgoing)
99            .collect();
100        for target_idx in outgoing {
101            if let Some(edge) = self.graph.find_edge(source_idx, target_idx) {
102                self.graph.remove_edge(edge);
103            }
104        }
105
106        // Deduplicate targets — page-level graph, not occurrence graph
107        let mut seen = HashSet::new();
108        for target_slug in target_slugs {
109            if !seen.insert(target_slug.as_str()) {
110                continue;
111            }
112
113            let target_idx = if let Some(&idx) = self.index_map.get(target_slug.as_str()) {
114                idx
115            } else {
116                self.upsert_node(target_slug, target_slug, true)
117            };
118
119            self.graph.add_edge(source_idx, target_idx, ());
120        }
121    }
122
123    /// Get all slugs that link TO a given slug (backlinks / incoming).
124    /// Results are sorted by slug for deterministic output.
125    pub fn get_backlinks(&self, slug: &str) -> Vec<String> {
126        let idx = match self.index_map.get(slug) {
127            Some(&idx) => idx,
128            None => return Vec::new(),
129        };
130
131        let mut result: Vec<String> = self.graph
132            .neighbors_directed(idx, Direction::Incoming)
133            .filter_map(|n| self.graph.node_weight(n).map(|w| w.slug.clone()))
134            .collect();
135        result.sort();
136        result
137    }
138
139    /// Get all slugs that a given slug links TO (forward links / outgoing).
140    /// Results are sorted by slug for deterministic output.
141    pub fn get_forward_links(&self, slug: &str) -> Vec<String> {
142        let idx = match self.index_map.get(slug) {
143            Some(&idx) => idx,
144            None => return Vec::new(),
145        };
146
147        let mut result: Vec<String> = self.graph
148            .neighbors_directed(idx, Direction::Outgoing)
149            .filter_map(|n| self.graph.node_weight(n).map(|w| w.slug.clone()))
150            .collect();
151        result.sort();
152        result
153    }
154
155    /// Get the degree (total edges in + out) of a node.
156    pub fn degree(&self, slug: &str) -> usize {
157        let idx = match self.index_map.get(slug) {
158            Some(&idx) => idx,
159            None => return 0,
160        };
161
162        self.graph
163            .neighbors_directed(idx, Direction::Incoming)
164            .count()
165            + self
166                .graph
167                .neighbors_directed(idx, Direction::Outgoing)
168                .count()
169    }
170
171    /// Remove placeholder nodes that have zero edges.
172    /// Returns the slugs of removed nodes.
173    ///
174    /// Uses batched removal with a single index rebuild at the end,
175    /// avoiding the O(n * k) cost of rebuilding after each removal.
176    pub fn cleanup_orphaned_placeholders(&mut self) -> Vec<String> {
177        // Collect orphans using direct index access (no map lookup)
178        let orphan_indices: Vec<(NodeIndex, String)> = self
179            .graph
180            .node_indices()
181            .filter_map(|idx| {
182                let node = self.graph.node_weight(idx)?;
183                if node.is_placeholder {
184                    let incoming = self.graph.neighbors_directed(idx, Direction::Incoming).count();
185                    let outgoing = self.graph.neighbors_directed(idx, Direction::Outgoing).count();
186                    if incoming + outgoing == 0 {
187                        return Some((idx, node.slug.clone()));
188                    }
189                }
190                None
191            })
192            .collect();
193
194        let slugs: Vec<String> = orphan_indices.iter().map(|(_, s)| s.clone()).collect();
195
196        // Remove nodes in reverse index order to avoid invalidating earlier indices
197        // (petgraph::DiGraph::remove_node swaps with the last node)
198        let mut indices: Vec<NodeIndex> = orphan_indices.into_iter().map(|(idx, _)| idx).collect();
199        indices.sort_by_key(|idx| std::cmp::Reverse(idx.index()));
200
201        for idx in indices {
202            self.index_map.remove(&self.graph[idx].slug.clone());
203            self.graph.remove_node(idx);
204        }
205
206        // Single rebuild after all removals
207        if !slugs.is_empty() {
208            self.rebuild_index_map();
209        }
210
211        slugs
212    }
213
214    /// Get full graph data for serialization.
215    /// Nodes sorted by slug, edges sorted by (source, target) for deterministic output.
216    pub fn get_full_graph(&self) -> (Vec<GraphNode>, Vec<GraphEdge>) {
217        let mut nodes: Vec<GraphNode> = self
218            .graph
219            .node_indices()
220            .filter_map(|idx| self.graph.node_weight(idx).cloned())
221            .collect();
222        nodes.sort_by(|a, b| a.slug.cmp(&b.slug));
223
224        let mut edges: Vec<GraphEdge> = self
225            .graph
226            .edge_indices()
227            .filter_map(|eidx| {
228                let (src, tgt) = self.graph.edge_endpoints(eidx)?;
229                let src_slug = self.graph.node_weight(src)?.slug.clone();
230                let tgt_slug = self.graph.node_weight(tgt)?.slug.clone();
231                Some(GraphEdge {
232                    source: src_slug,
233                    target: tgt_slug,
234                })
235            })
236            .collect();
237        edges.sort_by(|a, b| (&a.source, &a.target).cmp(&(&b.source, &b.target)));
238
239        (nodes, edges)
240    }
241
242    /// Get node and edge counts.
243    pub fn stats(&self) -> (usize, usize) {
244        (self.graph.node_count(), self.graph.edge_count())
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn test_add_nodes_and_edges() {
254        let mut g = WikiGraph::new();
255        g.upsert_node("a", "Page A", false);
256        g.upsert_node("b", "Page B", false);
257        g.upsert_node("c", "Page C", false);
258
259        g.set_outgoing_edges("a", &["b".into(), "c".into()]);
260
261        assert_eq!(g.get_forward_links("a").len(), 2);
262        assert_eq!(g.stats(), (3, 2));
263    }
264
265    #[test]
266    fn test_backlinks() {
267        let mut g = WikiGraph::new();
268        g.upsert_node("a", "Page A", false);
269        g.upsert_node("b", "Page B", false);
270        g.upsert_node("c", "Page C", false);
271
272        g.set_outgoing_edges("a", &["b".into(), "c".into()]);
273
274        let backlinks_b = g.get_backlinks("b");
275        assert_eq!(backlinks_b, vec!["a".to_string()]);
276
277        let backlinks_c = g.get_backlinks("c");
278        assert_eq!(backlinks_c, vec!["a".to_string()]);
279
280        assert!(g.get_backlinks("a").is_empty());
281    }
282
283    #[test]
284    fn test_remove_node_cleans_edges() {
285        let mut g = WikiGraph::new();
286        g.upsert_node("a", "A", false);
287        g.upsert_node("b", "B", false);
288        g.set_outgoing_edges("a", &["b".into()]);
289
290        assert_eq!(g.stats(), (2, 1));
291
292        g.remove_node("a");
293        assert_eq!(g.stats(), (1, 0));
294        assert!(g.get_backlinks("b").is_empty());
295    }
296
297    #[test]
298    fn test_set_outgoing_replaces() {
299        let mut g = WikiGraph::new();
300        g.upsert_node("a", "A", false);
301        g.upsert_node("b", "B", false);
302        g.upsert_node("c", "C", false);
303
304        g.set_outgoing_edges("a", &["b".into(), "c".into()]);
305        assert_eq!(g.get_forward_links("a").len(), 2);
306
307        g.set_outgoing_edges("a", &["c".into()]);
308        let fwd = g.get_forward_links("a");
309        assert_eq!(fwd.len(), 1);
310        assert_eq!(fwd[0], "c");
311        assert!(g.get_backlinks("b").is_empty());
312    }
313
314    #[test]
315    fn test_placeholder_creation() {
316        let mut g = WikiGraph::new();
317        g.upsert_node("a", "A", false);
318
319        g.set_outgoing_edges("a", &["d".into()]);
320
321        let (nodes, _) = g.get_full_graph();
322        let d_node = nodes.iter().find(|n| n.slug == "d");
323        assert!(d_node.is_some());
324        assert!(d_node.expect("d exists").is_placeholder);
325    }
326
327    #[test]
328    fn test_cleanup_orphaned_placeholders() {
329        let mut g = WikiGraph::new();
330        g.upsert_node("a", "A", false);
331        g.upsert_node("b", "B", true);
332        g.set_outgoing_edges("a", &["b".into()]);
333        assert_eq!(g.stats(), (2, 1));
334
335        g.set_outgoing_edges("a", &[]);
336        assert_eq!(g.stats(), (2, 0));
337
338        let orphans = g.cleanup_orphaned_placeholders();
339        assert_eq!(orphans, vec!["b".to_string()]);
340        assert_eq!(g.stats(), (1, 0));
341
342        g.upsert_node("c", "C", false);
343        let orphans2 = g.cleanup_orphaned_placeholders();
344        assert!(orphans2.is_empty());
345        assert_eq!(g.stats(), (2, 0));
346    }
347
348    #[test]
349    fn test_degree() {
350        let mut g = WikiGraph::new();
351        g.upsert_node("a", "A", false);
352        g.upsert_node("b", "B", false);
353        g.upsert_node("c", "C", false);
354
355        g.set_outgoing_edges("a", &["b".into(), "c".into()]);
356        g.set_outgoing_edges("b", &["c".into()]);
357
358        assert_eq!(g.degree("a"), 2);
359        assert_eq!(g.degree("b"), 2);
360        assert_eq!(g.degree("c"), 2);
361    }
362
363    #[test]
364    fn test_duplicate_targets_deduplicated() {
365        let mut g = WikiGraph::new();
366        g.upsert_node("a", "A", false);
367        g.upsert_node("b", "B", false);
368
369        // Same target three times — should produce one edge, not three
370        g.set_outgoing_edges("a", &["b".into(), "b".into(), "b".into()]);
371
372        assert_eq!(g.stats(), (2, 1));
373        assert_eq!(g.degree("a"), 1);
374        assert_eq!(g.degree("b"), 1);
375    }
376
377    #[test]
378    fn test_backlinks_sorted() {
379        let mut g = WikiGraph::new();
380        g.upsert_node("target", "Target", false);
381        g.upsert_node("charlie", "Charlie", false);
382        g.upsert_node("alice", "Alice", false);
383        g.upsert_node("bob", "Bob", false);
384
385        g.set_outgoing_edges("charlie", &["target".into()]);
386        g.set_outgoing_edges("alice", &["target".into()]);
387        g.set_outgoing_edges("bob", &["target".into()]);
388
389        let backlinks = g.get_backlinks("target");
390        assert_eq!(backlinks, vec!["alice", "bob", "charlie"]);
391    }
392
393    #[test]
394    fn test_forward_links_sorted() {
395        let mut g = WikiGraph::new();
396        g.upsert_node("a", "A", false);
397        g.upsert_node("z", "Z", false);
398        g.upsert_node("m", "M", false);
399
400        g.set_outgoing_edges("a", &["z".into(), "m".into()]);
401
402        let forward = g.get_forward_links("a");
403        assert_eq!(forward, vec!["m", "z"]);
404    }
405
406    #[test]
407    fn test_full_graph_sorted() {
408        let mut g = WikiGraph::new();
409        g.upsert_node("c", "C", false);
410        g.upsert_node("a", "A", false);
411        g.upsert_node("b", "B", false);
412
413        g.set_outgoing_edges("c", &["a".into()]);
414        g.set_outgoing_edges("a", &["b".into()]);
415
416        let (nodes, edges) = g.get_full_graph();
417        let slugs: Vec<&str> = nodes.iter().map(|n| n.slug.as_str()).collect();
418        assert_eq!(slugs, vec!["a", "b", "c"]);
419
420        assert_eq!(edges[0].source, "a");
421        assert_eq!(edges[0].target, "b");
422        assert_eq!(edges[1].source, "c");
423        assert_eq!(edges[1].target, "a");
424    }
425
426    #[test]
427    fn test_batch_orphan_cleanup() {
428        let mut g = WikiGraph::new();
429        g.upsert_node("real", "Real", false);
430
431        // Create 10 placeholders linked from real
432        let targets: Vec<String> = (0..10).map(|i| format!("ph-{i}")).collect();
433        g.set_outgoing_edges("real", &targets);
434        assert_eq!(g.stats(), (11, 10));
435
436        // Remove all outgoing edges — placeholders become orphans
437        g.set_outgoing_edges("real", &[]);
438        assert_eq!(g.stats(), (11, 0));
439
440        let orphans = g.cleanup_orphaned_placeholders();
441        assert_eq!(orphans.len(), 10);
442        assert_eq!(g.stats(), (1, 0));
443    }
444
445    #[test]
446    fn test_set_outgoing_edges_missing_source_is_noop() {
447        let mut g = WikiGraph::new();
448        // Source doesn't exist — should not panic or create nodes
449        g.set_outgoing_edges("nonexistent", &["a".into()]);
450        assert_eq!(g.stats(), (0, 0));
451    }
452}