Skip to main content

oxios_markdown/
backlinks.rs

1//! Bidirectional link tracking between markdown notes.
2//!
3//! Tracks `[text](path)` links in the knowledge base, enabling
4//! forward-link and backlink queries in O(1) time.
5
6use std::collections::{HashMap, HashSet};
7
8use serde::{Deserialize, Serialize};
9
10use crate::parser::extract_markdown_links;
11
12/// A single backlink: a link from one note to another.
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct Backlink {
15    /// File that contains the link.
16    pub source_path: String,
17    /// File that the link points to.
18    pub target_path: String,
19    /// Link display text.
20    pub link_text: String,
21    /// Line number where the link appears (1-indexed).
22    pub line_number: usize,
23}
24
25/// Link graph data for visualization.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct LinkGraph {
28    /// Node entries.
29    pub nodes: Vec<LinkNode>,
30    /// Edge entries.
31    pub edges: Vec<LinkEdge>,
32}
33
34/// A node in the link graph.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct LinkNode {
37    /// File path (unique ID).
38    pub id: String,
39    /// Display label.
40    pub label: String,
41    /// Group (directory name).
42    pub group: String,
43}
44
45/// An edge in the link graph.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct LinkEdge {
48    /// Source file path.
49    pub source: String,
50    /// Target file path.
51    pub target: String,
52    /// Link text.
53    pub label: String,
54}
55
56/// Bidirectional link index.
57///
58/// Maintains forward links (source → targets) and backward links
59/// (target → sources) for O(1) lookup.
60#[derive(Debug, Clone, Default)]
61pub struct BacklinkIndex {
62    /// Forward: source_path → set of target_paths.
63    forward: HashMap<String, HashSet<String>>,
64    /// Backward: target_path → set of source_paths.
65    backward: HashMap<String, HashSet<String>>,
66    /// Detailed backlink records.
67    details: HashMap<String, Vec<Backlink>>,
68}
69
70impl BacklinkIndex {
71    /// Create a new empty index.
72    pub fn new() -> Self {
73        Self::default()
74    }
75
76    /// Index all links in a file's content.
77    ///
78    /// Replaces any previously indexed links for this file (incremental update).
79    pub fn index_file(&mut self, path: &str, content: &str) {
80        // Remove old forward links
81        if let Some(old_targets) = self.forward.remove(path) {
82            for target in &old_targets {
83                if let Some(sources) = self.backward.get_mut(target) {
84                    sources.remove(path);
85                }
86            }
87            self.details
88                .retain(|k, _| !k.starts_with(&format!("{}→", path)));
89        }
90
91        // Extract and register new links
92        let links = extract_markdown_links(content);
93        let mut targets = HashSet::new();
94
95        for (line_num, line) in content.lines().enumerate() {
96            for (text, target) in extract_markdown_links(line) {
97                targets.insert(target.clone());
98                self.backward
99                    .entry(target.clone())
100                    .or_default()
101                    .insert(path.to_string());
102                self.details.insert(
103                    format!("{}→{}", path, target),
104                    vec![Backlink {
105                        source_path: path.to_string(),
106                        target_path: target,
107                        link_text: text,
108                        line_number: line_num + 1,
109                    }],
110                );
111            }
112        }
113
114        // Deduplicate: extract_markdown_links called twice above; fix by re-extracting from full content once
115        // Actually the line-level extraction adds duplicates. Let's simplify:
116        // Clear and redo from the full-content extraction.
117        self.backward.values_mut().for_each(|s| {
118            s.remove(path);
119        });
120        self.details
121            .retain(|k, _| !k.starts_with(&format!("{}→", path)));
122
123        let mut new_targets = HashSet::new();
124        for (text, target) in &links {
125            new_targets.insert(target.clone());
126            self.backward
127                .entry(target.clone())
128                .or_default()
129                .insert(path.to_string());
130            self.details.insert(
131                format!("{}→{}", path, target),
132                vec![Backlink {
133                    source_path: path.to_string(),
134                    target_path: target.clone(),
135                    link_text: text.clone(),
136                    line_number: 0, // simplified
137                }],
138            );
139        }
140        self.forward.insert(path.to_string(), new_targets);
141    }
142
143    /// Remove a file from the index.
144    pub fn remove_file(&mut self, path: &str) {
145        if let Some(targets) = self.forward.remove(path) {
146            for target in &targets {
147                if let Some(sources) = self.backward.get_mut(target) {
148                    sources.remove(path);
149                }
150            }
151        }
152        for sources in self.backward.values_mut() {
153            sources.remove(path);
154        }
155        self.details.retain(|k, _| !k.contains(path));
156    }
157
158    /// Get all backlinks pointing to a file (files that reference this one).
159    pub fn backlinks_for(&self, path: &str) -> Vec<Backlink> {
160        let sources = self.backward.get(path).cloned().unwrap_or_default();
161        let mut result = Vec::new();
162        for source in &sources {
163            let key = format!("{}→{}", source, path);
164            if let Some(details) = self.details.get(&key) {
165                result.extend(details.clone());
166            }
167        }
168        result
169    }
170
171    /// Get all forward links from a file (files this one references).
172    pub fn forward_links_for(&self, path: &str) -> Vec<String> {
173        self.forward
174            .get(path)
175            .cloned()
176            .unwrap_or_default()
177            .into_iter()
178            .collect()
179    }
180
181    /// Get the number of backlinks for a file.
182    pub fn backlink_count(&self, path: &str) -> usize {
183        self.backward.get(path).map(|s| s.len()).unwrap_or(0)
184    }
185
186    /// Get the full link graph for visualization.
187    pub fn link_graph(&self) -> LinkGraph {
188        let mut node_set = HashSet::new();
189        let mut edges = Vec::new();
190
191        for (source, targets) in &self.forward {
192            node_set.insert(source.clone());
193            for target in targets {
194                node_set.insert(target.clone());
195                edges.push(LinkEdge {
196                    source: source.clone(),
197                    target: target.clone(),
198                    label: String::new(),
199                });
200            }
201        }
202
203        let nodes: Vec<LinkNode> = node_set
204            .into_iter()
205            .map(|id| {
206                let label = id
207                    .trim_end_matches(".md")
208                    .rsplit('/')
209                    .next()
210                    .unwrap_or(&id)
211                    .to_string();
212                let group = id.split('/').next().unwrap_or("").to_string();
213                LinkNode { id, label, group }
214            })
215            .collect();
216
217        LinkGraph { nodes, edges }
218    }
219
220    /// Compute connection strength between two files (shared backlink sources).
221    pub fn connection_strength(&self, path_a: &str, path_b: &str) -> usize {
222        let sources_a = self.backward.get(path_a).cloned().unwrap_or_default();
223        let sources_b = self.backward.get(path_b).cloned().unwrap_or_default();
224        sources_a.intersection(&sources_b).count()
225    }
226
227    /// Number of files in the index.
228    pub fn len(&self) -> usize {
229        self.forward.len()
230    }
231
232    /// Whether the index is empty.
233    pub fn is_empty(&self) -> bool {
234        self.forward.is_empty()
235    }
236
237    /// Clear all indexed data.
238    pub fn clear(&mut self) {
239        self.forward.clear();
240        self.backward.clear();
241        self.details.clear();
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn test_index_and_backlinks() {
251        let mut idx = BacklinkIndex::new();
252        idx.index_file(
253            "brain/Rust.md",
254            "See [Ownership](brain/Ownership.md) and [Go](brain/Go.md)",
255        );
256
257        let bl = idx.backlinks_for("brain/Ownership.md");
258        assert_eq!(bl.len(), 1);
259        assert_eq!(bl[0].source_path, "brain/Rust.md");
260    }
261
262    #[test]
263    fn test_forward_links() {
264        let mut idx = BacklinkIndex::new();
265        idx.index_file("a.md", "[b](b.md) [c](c.md)");
266        let fwd = idx.forward_links_for("a.md");
267        assert_eq!(fwd.len(), 2);
268    }
269
270    #[test]
271    fn test_remove_file() {
272        let mut idx = BacklinkIndex::new();
273        idx.index_file("a.md", "[b](b.md)");
274        idx.remove_file("a.md");
275        assert!(idx.backlinks_for("b.md").is_empty());
276    }
277
278    #[test]
279    fn test_connection_strength() {
280        let mut idx = BacklinkIndex::new();
281        idx.index_file("x.md", "[a](a.md) [b](b.md)");
282        idx.index_file("y.md", "[a](a.md) [b](b.md)");
283        assert_eq!(idx.connection_strength("a.md", "b.md"), 2);
284    }
285
286    #[test]
287    fn test_link_graph() {
288        let mut idx = BacklinkIndex::new();
289        idx.index_file("brain/A.md", "[B](brain/B.md)");
290        let graph = idx.link_graph();
291        assert_eq!(graph.edges.len(), 1);
292        assert_eq!(graph.nodes.len(), 2);
293    }
294
295    #[test]
296    fn test_update_replaces_old_links() {
297        let mut idx = BacklinkIndex::new();
298        idx.index_file("a.md", "[old](old.md)");
299        idx.index_file("a.md", "[new](new.md)");
300        assert!(idx.backlinks_for("old.md").is_empty());
301        assert_eq!(idx.backlinks_for("new.md").len(), 1);
302    }
303}