turbovault_graph/
graph.rs

1//! Link graph using petgraph for vault relationship analysis
2
3use turbovault_core::prelude::*;
4use petgraph::algo::kosaraju_scc;
5use petgraph::prelude::*;
6use std::collections::{HashMap, HashSet};
7use std::path::PathBuf;
8
9/// Node index type for graph
10type NodeIndex = petgraph::graph::NodeIndex;
11
12/// Link graph for analyzing vault relationships
13pub struct LinkGraph {
14    /// Directed graph: nodes are file paths, edges are links
15    graph: DiGraph<PathBuf, Link>,
16
17    /// Map from file name (stem) to node index
18    file_index: HashMap<String, NodeIndex>,
19
20    /// Map from aliases to node index
21    alias_index: HashMap<String, NodeIndex>,
22
23    /// Map from full path to node index (for quick lookups)
24    path_index: HashMap<PathBuf, NodeIndex>,
25}
26
27impl LinkGraph {
28    /// Create a new link graph
29    pub fn new() -> Self {
30        Self {
31            graph: DiGraph::new(),
32            file_index: HashMap::new(),
33            alias_index: HashMap::new(),
34            path_index: HashMap::new(),
35        }
36    }
37
38    /// Add a file to the graph
39    pub fn add_file(&mut self, file: &VaultFile) -> Result<()> {
40        let path = file.path.clone();
41
42        // Create node if not exists
43        let node_idx = if let Some(&idx) = self.path_index.get(&path) {
44            idx
45        } else {
46            let idx = self.graph.add_node(path.clone());
47            self.path_index.insert(path.clone(), idx);
48
49            // Add to file_index by stem
50            if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
51                self.file_index.insert(stem.to_string(), idx);
52            }
53
54            idx
55        };
56
57        // Register aliases from frontmatter
58        if let Some(fm) = &file.frontmatter {
59            for alias in fm.aliases() {
60                self.alias_index.insert(alias, node_idx);
61            }
62        }
63
64        Ok(())
65    }
66
67    /// Remove a file from the graph
68    pub fn remove_file(&mut self, path: &PathBuf) -> Result<()> {
69        if let Some(&idx) = self.path_index.get(path) {
70            // Remove from all indices
71            self.path_index.remove(path);
72
73            if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
74                self.file_index.remove(stem);
75            }
76
77            // Remove aliases pointing to this node
78            self.alias_index.retain(|_, &mut node_idx| node_idx != idx);
79
80            // Remove node and all edges
81            self.graph.remove_node(idx);
82        }
83
84        Ok(())
85    }
86
87    /// Add links from a parsed file to the graph
88    pub fn update_links(&mut self, file: &VaultFile) -> Result<()> {
89        let source_path = &file.path;
90
91        // Get or create source node
92        let source_idx = if let Some(&idx) = self.path_index.get(source_path) {
93            idx
94        } else {
95            let idx = self.graph.add_node(source_path.clone());
96            self.path_index.insert(source_path.clone(), idx);
97            idx
98        };
99
100        // Remove old outgoing edges
101        let outgoing: Vec<_> = self.graph.edges(source_idx).map(|e| e.id()).collect();
102        for edge_id in outgoing {
103            self.graph.remove_edge(edge_id);
104        }
105
106        // Add edges for each internal link (wikilinks and embeds)
107        for link in &file.links {
108            if matches!(link.type_, LinkType::WikiLink | LinkType::Embed)
109                && let Some(target_idx) = self.resolve_link(&link.target)
110            {
111                // Add edge (both nodes already exist from add_file)
112                self.graph.add_edge(source_idx, target_idx, link.clone());
113            }
114        }
115
116        Ok(())
117    }
118
119    /// Resolve a wikilink target to a file path and node index
120    fn resolve_link(&self, target: &str) -> Option<NodeIndex> {
121        // Remove block/heading references
122        let clean_target = target.split('#').next()?.trim();
123
124        // Try direct stem match
125        if let Some(&idx) = self.file_index.get(clean_target) {
126            return Some(idx);
127        }
128
129        // Try alias match
130        if let Some(&idx) = self.alias_index.get(clean_target) {
131            return Some(idx);
132        }
133
134        // Try path-like match (folder/Note)
135        let target_parts: Vec<&str> = clean_target.split('/').filter(|p| !p.is_empty()).collect();
136        if target_parts.is_empty() {
137            return None;
138        }
139
140        // Find file path that matches the tail of the target path
141        for (path, &idx) in self.path_index.iter() {
142            let path_parts: Vec<&str> = path.iter().filter_map(|p| p.to_str()).collect();
143
144            if path_parts.len() >= target_parts.len() {
145                let start = path_parts.len() - target_parts.len();
146                if path_parts[start..] == target_parts[..] {
147                    return Some(idx);
148                }
149            }
150        }
151
152        None
153    }
154
155    /// Get all backlinks to a file (files that link to this file)
156    pub fn backlinks(&self, path: &PathBuf) -> Result<Vec<(PathBuf, Vec<Link>)>> {
157        if let Some(&target_idx) = self.path_index.get(path) {
158            let backlinks: Vec<_> = self
159                .graph
160                .edges_directed(target_idx, Incoming)
161                .map(|edge| {
162                    let source_idx = edge.source();
163                    let source_path = self.graph[source_idx].clone();
164                    (source_path, edge.weight().clone())
165                })
166                .fold(HashMap::new(), |mut acc, (path, link)| {
167                    acc.entry(path).or_insert_with(Vec::new).push(link);
168                    acc
169                })
170                .into_iter()
171                .collect();
172
173            Ok(backlinks)
174        } else {
175            Ok(vec![])
176        }
177    }
178
179    /// Get all forward links from a file (files this file links to)
180    pub fn forward_links(&self, path: &PathBuf) -> Result<Vec<(PathBuf, Vec<Link>)>> {
181        if let Some(&source_idx) = self.path_index.get(path) {
182            let forward_links: Vec<_> = self
183                .graph
184                .edges(source_idx)
185                .map(|edge| {
186                    let target_idx = edge.target();
187                    let target_path = self.graph[target_idx].clone();
188                    (target_path, edge.weight().clone())
189                })
190                .fold(HashMap::new(), |mut acc, (path, link)| {
191                    acc.entry(path).or_insert_with(Vec::new).push(link);
192                    acc
193                })
194                .into_iter()
195                .collect();
196
197            Ok(forward_links)
198        } else {
199            Ok(vec![])
200        }
201    }
202
203    /// Find all orphaned notes (no incoming or outgoing links)
204    pub fn orphaned_notes(&self) -> Vec<PathBuf> {
205        self.graph
206            .node_indices()
207            .filter(|&idx| {
208                let in_degree = self.graph.edges_directed(idx, Incoming).count();
209                let out_degree = self.graph.edges(idx).count();
210                in_degree == 0 && out_degree == 0
211            })
212            .map(|idx| self.graph[idx].clone())
213            .collect()
214    }
215
216    /// Find related notes within N hops (breadth-first search)
217    pub fn related_notes(&self, path: &PathBuf, max_hops: usize) -> Result<Vec<PathBuf>> {
218        if let Some(&start_idx) = self.path_index.get(path) {
219            let mut visited = HashSet::new();
220            let mut queue = vec![(start_idx, 0)];
221            let mut related = Vec::new();
222
223            visited.insert(start_idx);
224
225            while let Some((idx, hops)) = queue.pop() {
226                if hops > 0 {
227                    related.push(self.graph[idx].clone());
228                }
229
230                if hops < max_hops {
231                    // Add all neighbors
232                    for neighbor_idx in self.graph.neighbors(idx) {
233                        if visited.insert(neighbor_idx) {
234                            queue.push((neighbor_idx, hops + 1));
235                        }
236                    }
237
238                    // Also traverse incoming edges
239                    for neighbor_idx in self.graph.edges_directed(idx, Incoming).map(|e| e.source())
240                    {
241                        if visited.insert(neighbor_idx) {
242                            queue.push((neighbor_idx, hops + 1));
243                        }
244                    }
245                }
246            }
247
248            Ok(related)
249        } else {
250            Ok(vec![])
251        }
252    }
253
254    /// Find strongly connected components (cycles in the graph)
255    pub fn cycles(&self) -> Vec<Vec<PathBuf>> {
256        let sccs = kosaraju_scc(&self.graph);
257        sccs.into_iter()
258            .filter(|scc| scc.len() > 1) // Only return actual cycles (size > 1)
259            .map(|scc| scc.iter().map(|&idx| self.graph[idx].clone()).collect())
260            .collect()
261    }
262
263    /// Get statistics about the graph
264    pub fn stats(&self) -> GraphStats {
265        let node_count = self.graph.node_count();
266        let edge_count = self.graph.edge_count();
267
268        let orphaned_count = self.orphaned_notes().len();
269
270        let avg_links_per_file = if node_count > 0 {
271            edge_count as f64 / node_count as f64
272        } else {
273            0.0
274        };
275
276        GraphStats {
277            total_files: node_count,
278            total_links: edge_count,
279            orphaned_files: orphaned_count,
280            average_links_per_file: avg_links_per_file,
281        }
282    }
283
284    /// Get all file paths in the graph
285    pub fn all_files(&self) -> Vec<PathBuf> {
286        self.graph
287            .node_indices()
288            .map(|idx| self.graph[idx].clone())
289            .collect()
290    }
291
292    /// Get node count
293    pub fn node_count(&self) -> usize {
294        self.graph.node_count()
295    }
296
297    /// Get edge count
298    pub fn edge_count(&self) -> usize {
299        self.graph.edge_count()
300    }
301
302    /// Get incoming links to a file (just the Link objects)
303    pub fn incoming_links(&self, path: &PathBuf) -> Result<Vec<Link>> {
304        if let Some(&target_idx) = self.path_index.get(path) {
305            let links: Vec<Link> = self
306                .graph
307                .edges_directed(target_idx, Incoming)
308                .map(|edge| edge.weight().clone())
309                .collect();
310            Ok(links)
311        } else {
312            Ok(vec![])
313        }
314    }
315
316    /// Get outgoing links from a file (just the Link objects)
317    pub fn outgoing_links(&self, path: &PathBuf) -> Result<Vec<Link>> {
318        if let Some(&source_idx) = self.path_index.get(path) {
319            let links: Vec<Link> = self
320                .graph
321                .edges(source_idx)
322                .map(|edge| edge.weight().clone())
323                .collect();
324            Ok(links)
325        } else {
326            Ok(vec![])
327        }
328    }
329
330    /// Get all links in the graph, grouped by source file
331    pub fn all_links(&self) -> HashMap<PathBuf, Vec<Link>> {
332        let mut result = HashMap::new();
333
334        for node_idx in self.graph.node_indices() {
335            let source_path = self.graph[node_idx].clone();
336            let links: Vec<Link> = self
337                .graph
338                .edges(node_idx)
339                .map(|edge| edge.weight().clone())
340                .collect();
341
342            if !links.is_empty() {
343                result.insert(source_path, links);
344            }
345        }
346
347        result
348    }
349
350    /// Find connected components in the graph (using undirected view)
351    pub fn connected_components(&self) -> Result<Vec<Vec<PathBuf>>> {
352        use petgraph::algo::tarjan_scc;
353
354        // Use Tarjan's algorithm for strongly connected components
355        let components = tarjan_scc(&self.graph);
356
357        let result: Vec<Vec<PathBuf>> = components
358            .into_iter()
359            .map(|component| {
360                component
361                    .iter()
362                    .map(|&idx| self.graph[idx].clone())
363                    .collect()
364            })
365            .collect();
366
367        Ok(result)
368    }
369}
370
371impl Default for LinkGraph {
372    fn default() -> Self {
373        Self::new()
374    }
375}
376
377/// Statistics about the graph
378#[derive(Debug, Clone)]
379pub struct GraphStats {
380    pub total_files: usize,
381    pub total_links: usize,
382    pub orphaned_files: usize,
383    pub average_links_per_file: f64,
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389
390    fn create_test_file(path: &str, links: Vec<&str>) -> VaultFile {
391        let parsed_links: Vec<Link> = links
392            .into_iter()
393            .enumerate()
394            .map(|(i, target)| Link {
395                type_: LinkType::WikiLink,
396                source_file: PathBuf::from(path),
397                target: target.to_string(),
398                display_text: None,
399                position: SourcePosition::new(0, 0, i * 10, 10),
400                resolved_target: None,
401                is_valid: true,
402            })
403            .collect();
404
405        let mut vault_file = VaultFile::new(
406            PathBuf::from(path),
407            String::new(),
408            FileMetadata {
409                path: PathBuf::from(path),
410                size: 0,
411                created_at: 0.0,
412                modified_at: 0.0,
413                checksum: String::new(),
414                is_attachment: false,
415            },
416        );
417        vault_file.links = parsed_links;
418        vault_file
419    }
420
421    #[test]
422    fn test_add_file() {
423        let mut graph = LinkGraph::new();
424        let file = create_test_file("note.md", vec![]);
425
426        assert!(graph.add_file(&file).is_ok());
427        assert_eq!(graph.node_count(), 1);
428    }
429
430    #[test]
431    fn test_add_multiple_files() {
432        let mut graph = LinkGraph::new();
433        let file1 = create_test_file("note1.md", vec![]);
434        let file2 = create_test_file("note2.md", vec![]);
435
436        graph.add_file(&file1).unwrap();
437        graph.add_file(&file2).unwrap();
438
439        assert_eq!(graph.node_count(), 2);
440    }
441
442    #[test]
443    fn test_update_links() {
444        let mut graph = LinkGraph::new();
445        let file1 = create_test_file("note1.md", vec![]);
446        let file2 = create_test_file("note2.md", vec!["note1"]);
447
448        graph.add_file(&file1).unwrap();
449        graph.add_file(&file2).unwrap();
450        graph.update_links(&file2).unwrap();
451
452        assert_eq!(graph.edge_count(), 1);
453    }
454
455    #[test]
456    fn test_orphaned_notes() {
457        let mut graph = LinkGraph::new();
458        let orphan = create_test_file("orphan.md", vec![]);
459        let linked1 = create_test_file("note1.md", vec![]);
460        let linked2 = create_test_file("note2.md", vec!["note1"]);
461
462        graph.add_file(&orphan).unwrap();
463        graph.add_file(&linked1).unwrap();
464        graph.add_file(&linked2).unwrap();
465        graph.update_links(&linked2).unwrap();
466
467        let orphans = graph.orphaned_notes();
468        assert_eq!(orphans.len(), 1);
469        assert_eq!(orphans[0], PathBuf::from("orphan.md"));
470    }
471
472    #[test]
473    fn test_graph_stats() {
474        let mut graph = LinkGraph::new();
475        let file1 = create_test_file("note1.md", vec![]);
476        let file2 = create_test_file("note2.md", vec!["note1"]);
477
478        graph.add_file(&file1).unwrap();
479        graph.add_file(&file2).unwrap();
480        graph.update_links(&file2).unwrap();
481
482        let stats = graph.stats();
483        assert_eq!(stats.total_files, 2);
484        assert_eq!(stats.total_links, 1);
485        assert_eq!(stats.orphaned_files, 0); // Both notes have links: note1 has incoming, note2 has outgoing
486    }
487}