Skip to main content

oxios_markdown/
backlinks.rs

1//! Bidirectional link tracking between markdown notes.
2//!
3//! Tracks `[text](path)` links in the knowledge base, enabling
4//! forward-link and backlink queries in O(1) time.
5
6use std::collections::{HashMap, HashSet};
7
8use serde::{Deserialize, Serialize};
9
10use crate::parser::extract_markdown_links;
11
12/// A single backlink: a link from one note to another.
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct Backlink {
15    /// File that contains the link.
16    pub source_path: String,
17    /// File that the link points to.
18    pub target_path: String,
19    /// Link display text.
20    pub link_text: String,
21    /// Line number where the link appears (1-indexed).
22    pub line_number: usize,
23}
24
25/// Link graph data for visualization.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct LinkGraph {
28    /// Node entries.
29    pub nodes: Vec<LinkNode>,
30    /// Edge entries.
31    pub edges: Vec<LinkEdge>,
32}
33
34/// A node in the link graph.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct LinkNode {
37    /// File path (unique ID).
38    pub id: String,
39    /// Display label.
40    pub label: String,
41    /// Group (directory name).
42    pub group: String,
43}
44
45/// An edge in the link graph.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct LinkEdge {
48    /// Source file path.
49    pub source: String,
50    /// Target file path.
51    pub target: String,
52    /// Link text.
53    pub label: String,
54}
55
56/// Bidirectional link index.
57///
58/// Maintains forward links (source → targets) and backward links
59/// (target → sources) for O(1) lookup.
60#[derive(Debug, Clone, Default)]
61pub struct BacklinkIndex {
62    /// Forward: source_path → set of target_paths.
63    forward: HashMap<String, HashSet<String>>,
64    /// Backward: target_path → set of source_paths.
65    backward: HashMap<String, HashSet<String>>,
66    /// Detailed backlink records.
67    details: HashMap<String, Vec<Backlink>>,
68}
69
70impl BacklinkIndex {
71    /// Create a new empty index.
72    pub fn new() -> Self {
73        Self::default()
74    }
75
76    /// Index all links in a file's content.
77    ///
78    /// Replaces any previously indexed links for this file (incremental update).
79    pub fn index_file(&mut self, path: &str, content: &str) {
80        // Remove old forward links
81        if let Some(old_targets) = self.forward.remove(path) {
82            for target in &old_targets {
83                if let Some(sources) = self.backward.get_mut(target) {
84                    sources.remove(path);
85                }
86            }
87            self.details
88                .retain(|k, _| !k.starts_with(&format!("{path}→")));
89        }
90
91        // Extract and register new links
92        let body = strip_frontmatter(content);
93        let links = extract_markdown_links(body);
94        let mut targets = HashSet::new();
95
96        for (line_num, line) in body.lines().enumerate() {
97            for (text, target) in extract_markdown_links(line) {
98                targets.insert(target.clone());
99                self.backward
100                    .entry(target.clone())
101                    .or_default()
102                    .insert(path.to_string());
103                self.details.insert(
104                    format!("{path}→{target}"),
105                    vec![Backlink {
106                        source_path: path.to_string(),
107                        target_path: target,
108                        link_text: text,
109                        line_number: line_num + 1,
110                    }],
111                );
112            }
113        }
114
115        // Deduplicate: extract_markdown_links called twice above; fix by re-extracting from full content once
116        // Actually the line-level extraction adds duplicates. Let's simplify:
117        // Clear and redo from the full-content extraction.
118        self.backward.values_mut().for_each(|s| {
119            s.remove(path);
120        });
121        self.details
122            .retain(|k, _| !k.starts_with(&format!("{path}→")));
123
124        let mut new_targets = HashSet::new();
125        for (text, target) in &links {
126            new_targets.insert(target.clone());
127            self.backward
128                .entry(target.clone())
129                .or_default()
130                .insert(path.to_string());
131            self.details.insert(
132                format!("{path}→{target}"),
133                vec![Backlink {
134                    source_path: path.to_string(),
135                    target_path: target.clone(),
136                    link_text: text.clone(),
137                    line_number: 0, // simplified
138                }],
139            );
140        }
141        self.forward.insert(path.to_string(), new_targets);
142    }
143
144    /// Remove a file from the index.
145    pub fn remove_file(&mut self, path: &str) {
146        if let Some(targets) = self.forward.remove(path) {
147            for target in &targets {
148                if let Some(sources) = self.backward.get_mut(target) {
149                    sources.remove(path);
150                }
151            }
152        }
153        for sources in self.backward.values_mut() {
154            sources.remove(path);
155        }
156        self.details.retain(|k, _| !k.contains(path));
157    }
158
159    /// Get all backlinks pointing to a file (files that reference this one).
160    pub fn backlinks_for(&self, path: &str) -> Vec<Backlink> {
161        let sources = self.backward.get(path).cloned().unwrap_or_default();
162        let mut result = Vec::new();
163        for source in &sources {
164            let key = format!("{source}→{path}");
165            if let Some(details) = self.details.get(&key) {
166                result.extend(details.clone());
167            }
168        }
169        result
170    }
171
172    /// Get all forward links from a file (files this one references).
173    pub fn forward_links_for(&self, path: &str) -> Vec<String> {
174        self.forward
175            .get(path)
176            .cloned()
177            .unwrap_or_default()
178            .into_iter()
179            .collect()
180    }
181
182    /// Get the number of backlinks for a file.
183    pub fn backlink_count(&self, path: &str) -> usize {
184        self.backward.get(path).map(|s| s.len()).unwrap_or(0)
185    }
186
187    /// Get the full link graph for visualization.
188    pub fn link_graph(&self) -> LinkGraph {
189        let mut node_set = HashSet::new();
190        let mut edges = Vec::new();
191
192        for (source, targets) in &self.forward {
193            node_set.insert(source.clone());
194            for target in targets {
195                node_set.insert(target.clone());
196                edges.push(LinkEdge {
197                    source: source.clone(),
198                    target: target.clone(),
199                    label: String::new(),
200                });
201            }
202        }
203
204        let nodes: Vec<LinkNode> = node_set
205            .into_iter()
206            .map(|id| {
207                let label = id
208                    .trim_end_matches(".md")
209                    .rsplit('/')
210                    .next()
211                    .unwrap_or(&id)
212                    .to_string();
213                let group = id.split('/').next().unwrap_or("").to_string();
214                LinkNode { id, label, group }
215            })
216            .collect();
217
218        LinkGraph { nodes, edges }
219    }
220
221    /// Compute connection strength between two files (shared backlink sources).
222    pub fn connection_strength(&self, path_a: &str, path_b: &str) -> usize {
223        let sources_a = self.backward.get(path_a).cloned().unwrap_or_default();
224        let sources_b = self.backward.get(path_b).cloned().unwrap_or_default();
225        sources_a.intersection(&sources_b).count()
226    }
227
228    /// Number of files in the index.
229    pub fn len(&self) -> usize {
230        self.forward.len()
231    }
232
233    /// Whether the index is empty.
234    pub fn is_empty(&self) -> bool {
235        self.forward.is_empty()
236    }
237
238    /// Clear all indexed data.
239    pub fn clear(&mut self) {
240        self.forward.clear();
241        self.backward.clear();
242        self.details.clear();
243    }
244}
245
246/// Strip YAML frontmatter from content, returning the body.
247/// If no frontmatter is found, returns the original content unchanged.
248pub fn strip_frontmatter(content: &str) -> &str {
249    let trimmed = content.trim_start();
250    if !trimmed.starts_with("---") {
251        return content;
252    }
253    // Skip the opening ---
254    let after_first = &trimmed[3..];
255    let rest = after_first.trim_start_matches(['-', '\n', '\r']);
256    if let Some(idx) = rest.find("\n---") {
257        let body_start = idx + 4;
258        rest[body_start..].trim_start()
259    } else {
260        content
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267
268    #[test]
269    fn test_index_and_backlinks() {
270        let mut idx = BacklinkIndex::new();
271        idx.index_file(
272            "brain/Rust.md",
273            "See [Ownership](brain/Ownership.md) and [Go](brain/Go.md)",
274        );
275
276        let bl = idx.backlinks_for("brain/Ownership.md");
277        assert_eq!(bl.len(), 1);
278        assert_eq!(bl[0].source_path, "brain/Rust.md");
279    }
280
281    #[test]
282    fn test_forward_links() {
283        let mut idx = BacklinkIndex::new();
284        idx.index_file("a.md", "[b](b.md) [c](c.md)");
285        let fwd = idx.forward_links_for("a.md");
286        assert_eq!(fwd.len(), 2);
287    }
288
289    #[test]
290    fn test_remove_file() {
291        let mut idx = BacklinkIndex::new();
292        idx.index_file("a.md", "[b](b.md)");
293        idx.remove_file("a.md");
294        assert!(idx.backlinks_for("b.md").is_empty());
295    }
296
297    #[test]
298    fn test_connection_strength() {
299        let mut idx = BacklinkIndex::new();
300        idx.index_file("x.md", "[a](a.md) [b](b.md)");
301        idx.index_file("y.md", "[a](a.md) [b](b.md)");
302        assert_eq!(idx.connection_strength("a.md", "b.md"), 2);
303    }
304
305    #[test]
306    fn test_link_graph() {
307        let mut idx = BacklinkIndex::new();
308        idx.index_file("brain/A.md", "[B](brain/B.md)");
309        let graph = idx.link_graph();
310        assert_eq!(graph.edges.len(), 1);
311        assert_eq!(graph.nodes.len(), 2);
312    }
313
314    #[test]
315    fn test_update_replaces_old_links() {
316        let mut idx = BacklinkIndex::new();
317        idx.index_file("a.md", "[old](old.md)");
318        idx.index_file("a.md", "[new](new.md)");
319        assert!(idx.backlinks_for("old.md").is_empty());
320        assert_eq!(idx.backlinks_for("new.md").len(), 1);
321    }
322}