Skip to main content

obsidian_core/
link.rs

1use std::sync::LazyLock;
2
3use crate::InlineLocation;
4use regex::Regex;
5
6#[derive(Clone)]
7pub enum Link {
8    Wiki {
9        target: String,
10        heading: Option<String>,
11        alias: Option<String>,
12    },
13    Markdown {
14        text: String,
15        url: String,
16    },
17    Embed {
18        target: String,
19        heading: Option<String>,
20        alias: Option<String>,
21    },
22}
23
24#[derive(Clone)]
25pub struct LocatedLink {
26    pub link: Link,
27    pub location: InlineLocation,
28}
29
30pub(crate) static FENCED_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?s)```[^\n]*\n.*?```").unwrap());
31
32pub(crate) static INLINE_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`[^`\n]+`").unwrap());
33
34// Combined link regex. Embed alternative is listed first so ![[...]] is consumed
35// before the wiki alternative can match [[...]] within it.
36// Groups: (1) full embed, (2) embed target, (3) embed heading, (4) embed alias,
37//         (5) full wiki,  (6) wiki target,  (7) wiki heading,  (8) wiki alias,
38//         (9) full md,   (10) md text,     (11) md url.
39static LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
40    Regex::new(
41        r"(!\[\[([^\]#|]*?)(?:#([^\]|]*?))?(?:\|([^\]]*?))?\]\])|(\[\[([^\]#|]*?)(?:#([^\]|]*?))?(?:\|([^\]]*?))?\]\])|(\[([^\]]+?)\]\(([^)\n]+?)\))",
42    )
43    .unwrap()
44});
45
46/// Returns the 1-indexed line number and 0-indexed character column for the given byte position.
47pub(crate) fn byte_to_line_col(text: &str, byte_pos: usize) -> (usize, usize) {
48    let before = &text[..byte_pos];
49    let line = before.matches('\n').count() + 1;
50    let col = match before.rfind('\n') {
51        Some(pos) => before[pos + 1..].chars().count(),
52        None => before.chars().count(),
53    };
54    (line, col)
55}
56
57pub(crate) fn parse_links(content: &str) -> Vec<LocatedLink> {
58    // Replace code block content with spaces to neutralize links inside them
59    // while preserving byte positions.
60    let mut sanitized = content.to_string();
61    for m in FENCED_CODE_RE.find_iter(content) {
62        sanitized.replace_range(m.range(), &" ".repeat(m.len()));
63    }
64    for m in INLINE_CODE_RE.find_iter(&sanitized.clone()) {
65        sanitized.replace_range(m.range(), &" ".repeat(m.len()));
66    }
67
68    let mut links = Vec::new();
69    for caps in LINK_RE.captures_iter(&sanitized) {
70        let m = caps.get(0).unwrap();
71        let (line, col_start) = byte_to_line_col(content, m.start());
72        let col_end = col_start + content[m.start()..m.end()].chars().count();
73        let location = InlineLocation {
74            line,
75            col_start,
76            col_end,
77        };
78
79        if caps.get(1).is_some() {
80            // Embed
81            let target = caps.get(2).map_or("", |m| m.as_str()).to_string();
82            let heading = caps.get(3).map(|m| m.as_str().to_string());
83            let alias = caps.get(4).map(|m| m.as_str().to_string());
84            links.push(LocatedLink {
85                link: Link::Embed { target, heading, alias },
86                location,
87            });
88        } else if caps.get(5).is_some() {
89            // Wiki
90            let target = caps.get(6).map_or("", |m| m.as_str()).to_string();
91            let heading = caps.get(7).map(|m| m.as_str().to_string());
92            let alias = caps.get(8).map(|m| m.as_str().to_string());
93            links.push(LocatedLink {
94                link: Link::Wiki { target, heading, alias },
95                location,
96            });
97        } else if caps.get(9).is_some() {
98            // Markdown
99            let text = caps.get(10).map_or("", |m| m.as_str()).to_string();
100            let url = caps.get(11).map_or("", |m| m.as_str()).to_string();
101            links.push(LocatedLink {
102                link: Link::Markdown { text, url },
103                location,
104            });
105        }
106    }
107    links
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113    use crate::Note;
114
115    fn assert_wiki(link: &Link, target: &str, heading: Option<&str>, alias: Option<&str>) {
116        match link {
117            Link::Wiki {
118                target: t,
119                heading: h,
120                alias: a,
121            } => {
122                assert_eq!(t, target);
123                assert_eq!(h.as_deref(), heading);
124                assert_eq!(a.as_deref(), alias);
125            }
126            _ => panic!("expected Wiki link"),
127        }
128    }
129
130    fn assert_md(link: &Link, text: &str, url: &str) {
131        match link {
132            Link::Markdown { text: t, url: u } => {
133                assert_eq!(t, text);
134                assert_eq!(u, url);
135            }
136            _ => panic!("expected Markdown link"),
137        }
138    }
139
140    fn assert_embed(link: &Link, target: &str, heading: Option<&str>, alias: Option<&str>) {
141        match link {
142            Link::Embed {
143                target: t,
144                heading: h,
145                alias: a,
146            } => {
147                assert_eq!(t, target);
148                assert_eq!(h.as_deref(), heading);
149                assert_eq!(a.as_deref(), alias);
150            }
151            _ => panic!("expected Embed link"),
152        }
153    }
154
155    #[test]
156    fn wiki_basic() {
157        let links = parse_links("See [[target]].");
158        assert_eq!(links.len(), 1);
159        assert_wiki(&links[0].link, "target", None, None);
160    }
161
162    #[test]
163    fn wiki_basic_multi_word() {
164        let links = parse_links("See [[some target]].");
165        assert_eq!(links.len(), 1);
166        assert_wiki(&links[0].link, "some target", None, None);
167    }
168
169    #[test]
170    fn wiki_with_heading() {
171        let links = parse_links("See [[target#heading]].");
172        assert_eq!(links.len(), 1);
173        assert_wiki(&links[0].link, "target", Some("heading"), None);
174    }
175
176    #[test]
177    fn wiki_with_alias() {
178        let links = parse_links("See [[target|alias]].");
179        assert_eq!(links.len(), 1);
180        assert_wiki(&links[0].link, "target", None, Some("alias"));
181    }
182
183    #[test]
184    fn wiki_with_multi_word_alias() {
185        let links = parse_links("See [[target|some alias]].");
186        assert_eq!(links.len(), 1);
187        assert_wiki(&links[0].link, "target", None, Some("some alias"));
188    }
189
190    #[test]
191    fn wiki_multi_word_with_alias() {
192        let links = parse_links("See [[some target|alias]].");
193        assert_eq!(links.len(), 1);
194        assert_wiki(&links[0].link, "some target", None, Some("alias"));
195    }
196
197    #[test]
198    fn wiki_with_heading_and_alias() {
199        let links = parse_links("See [[target#heading|alias]].");
200        assert_eq!(links.len(), 1);
201        assert_wiki(&links[0].link, "target", Some("heading"), Some("alias"));
202    }
203
204    #[test]
205    fn markdown_link() {
206        let links = parse_links("See [some text](https://example.com).");
207        assert_eq!(links.len(), 1);
208        assert_md(&links[0].link, "some text", "https://example.com");
209    }
210
211    #[test]
212    fn embed_basic() {
213        let links = parse_links("![[image.png]]");
214        assert_eq!(links.len(), 1);
215        assert_embed(&links[0].link, "image.png", None, None);
216    }
217
218    #[test]
219    fn embed_with_heading_and_alias() {
220        let links = parse_links("![[note#section|caption]]");
221        assert_eq!(links.len(), 1);
222        assert_embed(&links[0].link, "note", Some("section"), Some("caption"));
223    }
224
225    #[test]
226    fn links_inside_fenced_code_block_excluded() {
227        let content = "Before.\n```\n[[hidden]]\n```\nAfter.";
228        let links = parse_links(content);
229        assert!(links.is_empty(), "expected no links, got {}", links.len());
230    }
231
232    #[test]
233    fn links_inside_inline_code_excluded() {
234        let content = "Text `[[hidden]]` more.";
235        let links = parse_links(content);
236        assert!(links.is_empty(), "expected no links, got {}", links.len());
237    }
238
239    #[test]
240    fn mixed_content() {
241        let content = "[[wiki]] and [md](url) and ![[embed]]";
242        let links = parse_links(content);
243        assert_eq!(links.len(), 3);
244        assert_wiki(&links[0].link, "wiki", None, None);
245        assert_md(&links[1].link, "md", "url");
246        assert_embed(&links[2].link, "embed", None, None);
247    }
248
249    #[test]
250    fn empty_content() {
251        let links = parse_links("");
252        assert!(links.is_empty());
253    }
254
255    #[test]
256    fn location_first_line() {
257        // "[[target]]" starts at col 0, ends at col 10 on line 1.
258        let links = parse_links("[[target]]");
259        assert_eq!(links.len(), 1);
260        let loc = &links[0].location;
261        assert_eq!(loc.line, 1);
262        assert_eq!(loc.col_start, 0);
263        assert_eq!(loc.col_end, 10);
264    }
265
266    #[test]
267    fn location_with_prefix() {
268        // "See [[target]]." — link starts at col 4.
269        let links = parse_links("See [[target]].");
270        let loc = &links[0].location;
271        assert_eq!(loc.line, 1);
272        assert_eq!(loc.col_start, 4);
273        assert_eq!(loc.col_end, 14);
274    }
275
276    #[test]
277    fn location_second_line() {
278        let content = "First line.\n[[target]]";
279        let links = parse_links(content);
280        assert_eq!(links.len(), 1);
281        let loc = &links[0].location;
282        assert_eq!(loc.line, 2);
283        assert_eq!(loc.col_start, 0);
284        assert_eq!(loc.col_end, 10);
285    }
286
287    #[test]
288    fn location_markdown_link() {
289        // "[text](url)" has 11 chars.
290        let links = parse_links("[text](url)");
291        let loc = &links[0].location;
292        assert_eq!(loc.line, 1);
293        assert_eq!(loc.col_start, 0);
294        assert_eq!(loc.col_end, 11);
295    }
296
297    #[test]
298    fn note_links_delegates() {
299        let note = Note::parse("/vault/note.md", "See [[target]] and [text](url).");
300        assert_eq!(note.links.len(), 2);
301        assert_wiki(&note.links[0].link, "target", None, None);
302        assert_md(&note.links[1].link, "text", "url");
303    }
304
305    #[test]
306    fn note_links_location_offset_by_frontmatter() {
307        // Frontmatter occupies lines 1-3 ("---", "title: T", "---").
308        // Body starts on line 4 with "[[target]]".
309        let content = "---\ntitle: T\n---\n[[target]]";
310        let note = Note::parse("/vault/note.md", content);
311        assert_eq!(note.links.len(), 1);
312        let loc = &note.links[0].location;
313        assert_eq!(loc.line, 4);
314        assert_eq!(loc.col_start, 0);
315        assert_eq!(loc.col_end, 10);
316    }
317}