treemd/parser/
links.rs

1//! Link detection and parsing from markdown content.
2//!
3//! This module provides functionality to extract and parse various types of links
4//! from markdown documents, including relative file links, anchor links, wikilinks,
5//! and external URLs.
6//!
7//! All parsing is delegated to `turbovault-parser` for unified, code-block-aware
8//! link extraction.
9
10use std::path::PathBuf;
11use turbovault_parser::LinkType;
12
13/// Represents a link found in markdown content.
14#[derive(Debug, Clone, PartialEq)]
15pub struct Link {
16    /// Display text of the link
17    pub text: String,
18    /// The target this link points to
19    pub target: LinkTarget,
20    /// Byte offset in the source content where the link starts
21    pub offset: usize,
22}
23
24/// The different types of link targets supported.
25#[derive(Debug, Clone, PartialEq)]
26pub enum LinkTarget {
27    /// Internal anchor link to a heading in the current document (e.g., `#installation`)
28    Anchor(String),
29
30    /// Relative file path, optionally with an anchor (e.g., `./docs/api.md#usage`)
31    RelativeFile {
32        path: PathBuf,
33        anchor: Option<String>,
34    },
35
36    /// Wikilink format used in Obsidian and other PKM tools (e.g., `[[filename]]`)
37    WikiLink {
38        target: String,
39        alias: Option<String>,
40    },
41
42    /// External URL (e.g., `https://example.com`)
43    External(String),
44}
45
46impl LinkTarget {
47    /// Get a string representation of the link target for display/search
48    pub fn as_str(&self) -> String {
49        match self {
50            LinkTarget::Anchor(a) => format!("#{}", a),
51            LinkTarget::RelativeFile { path, anchor } => {
52                if let Some(a) = anchor {
53                    format!("{}#{}", path.display(), a)
54                } else {
55                    path.display().to_string()
56                }
57            }
58            LinkTarget::WikiLink { target, alias } => {
59                if let Some(a) = alias {
60                    format!("[[{}|{}]]", target, a)
61                } else {
62                    format!("[[{}]]", target)
63                }
64            }
65            LinkTarget::External(url) => url.clone(),
66        }
67    }
68}
69
70impl Link {
71    /// Create a new link.
72    pub fn new(text: String, target: LinkTarget, offset: usize) -> Self {
73        Self {
74            text,
75            target,
76            offset,
77        }
78    }
79}
80
81/// Extract all links from markdown content.
82///
83/// This function uses turbovault-parser to extract all link types with
84/// proper code-block awareness. Links inside fenced code blocks or
85/// inline code are correctly excluded.
86///
87/// Supported link types:
88/// - Standard markdown links: `[text](url)`
89/// - Wikilinks: `[[target]]` or `[[target|alias]]`
90/// - Anchor links: `[text](#section)`
91/// - External links: `[text](https://...)`
92///
93/// # Arguments
94///
95/// * `content` - The markdown content to parse
96///
97/// # Returns
98///
99/// A vector of `Link` structs representing all links found in the content.
100pub fn extract_links(content: &str) -> Vec<Link> {
101    let mut links = Vec::new();
102
103    // Extract standard markdown links via turbovault-parser
104    for md_link in turbovault_parser::parse_markdown_links(content) {
105        let text = md_link
106            .display_text
107            .clone()
108            .unwrap_or_else(|| md_link.target.clone());
109        let target = convert_link_type(&md_link.type_, &md_link.target);
110
111        links.push(Link::new(text, target, md_link.position.offset));
112    }
113
114    // Extract wikilinks via turbovault-parser
115    for wikilink in turbovault_parser::parse_wikilinks(content) {
116        let target = wikilink.target.clone();
117        let alias = wikilink.display_text.clone();
118        let display_text = alias.clone().unwrap_or_else(|| target.clone());
119
120        links.push(Link::new(
121            display_text,
122            LinkTarget::WikiLink { target, alias },
123            wikilink.position.offset,
124        ));
125    }
126
127    // Sort by offset for consistent ordering
128    links.sort_by_key(|l| l.offset);
129
130    links
131}
132
133/// Convert turbovault LinkType to treemd LinkTarget.
134fn convert_link_type(link_type: &LinkType, target: &str) -> LinkTarget {
135    match link_type {
136        LinkType::Anchor => {
137            // Pure anchor: #section
138            let anchor = target.strip_prefix('#').unwrap_or(target);
139            LinkTarget::Anchor(anchor.to_string())
140        }
141        LinkType::ExternalLink => LinkTarget::External(target.to_string()),
142        LinkType::HeadingRef => {
143            // File with anchor: file.md#section
144            if let Some((path, anchor)) = target.split_once('#') {
145                LinkTarget::RelativeFile {
146                    path: PathBuf::from(path),
147                    anchor: Some(anchor.to_string()),
148                }
149            } else {
150                // Shouldn't happen for HeadingRef, but handle gracefully
151                LinkTarget::RelativeFile {
152                    path: PathBuf::from(target),
153                    anchor: None,
154                }
155            }
156        }
157        LinkType::MarkdownLink => {
158            // Relative file without anchor
159            LinkTarget::RelativeFile {
160                path: PathBuf::from(target),
161                anchor: None,
162            }
163        }
164        LinkType::WikiLink | LinkType::Embed | LinkType::BlockRef => {
165            // These shouldn't come through parse_markdown_links, but handle anyway
166            LinkTarget::WikiLink {
167                target: target.to_string(),
168                alias: None,
169            }
170        }
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_extract_anchor_link() {
180        let md = "See [Installation](#installation) for details.";
181        let links = extract_links(md);
182
183        assert_eq!(links.len(), 1);
184        assert_eq!(links[0].text, "Installation");
185        assert_eq!(
186            links[0].target,
187            LinkTarget::Anchor("installation".to_string())
188        );
189    }
190
191    #[test]
192    fn test_extract_relative_file_link() {
193        let md = "Check [API docs](./docs/api.md) for more.";
194        let links = extract_links(md);
195
196        assert_eq!(links.len(), 1);
197        assert_eq!(links[0].text, "API docs");
198        match &links[0].target {
199            LinkTarget::RelativeFile { path, anchor } => {
200                assert_eq!(path, &PathBuf::from("./docs/api.md"));
201                assert_eq!(anchor, &None);
202            }
203            _ => panic!("Expected RelativeFile link"),
204        }
205    }
206
207    #[test]
208    fn test_extract_relative_file_link_with_anchor() {
209        let md = "See [usage guide](../guide.md#usage) here.";
210        let links = extract_links(md);
211
212        assert_eq!(links.len(), 1);
213        assert_eq!(links[0].text, "usage guide");
214        match &links[0].target {
215            LinkTarget::RelativeFile { path, anchor } => {
216                assert_eq!(path, &PathBuf::from("../guide.md"));
217                assert_eq!(anchor, &Some("usage".to_string()));
218            }
219            _ => panic!("Expected RelativeFile link"),
220        }
221    }
222
223    #[test]
224    fn test_extract_external_link() {
225        let md = "Visit [GitHub](https://github.com) now.";
226        let links = extract_links(md);
227
228        assert_eq!(links.len(), 1);
229        assert_eq!(links[0].text, "GitHub");
230        assert_eq!(
231            links[0].target,
232            LinkTarget::External("https://github.com".to_string())
233        );
234    }
235
236    #[test]
237    fn test_extract_wikilink_simple() {
238        let md = "See [[README]] for info.";
239        let links = extract_links(md);
240
241        assert_eq!(links.len(), 1);
242        assert_eq!(links[0].text, "README");
243        match &links[0].target {
244            LinkTarget::WikiLink { target, alias } => {
245                assert_eq!(target, "README");
246                assert_eq!(alias, &None);
247            }
248            _ => panic!("Expected WikiLink"),
249        }
250    }
251
252    #[test]
253    fn test_extract_wikilink_with_alias() {
254        let md = "Check [[README.md|readme file]] here.";
255        let links = extract_links(md);
256
257        assert_eq!(links.len(), 1);
258        assert_eq!(links[0].text, "readme file");
259        match &links[0].target {
260            LinkTarget::WikiLink { target, alias } => {
261                assert_eq!(target, "README.md");
262                assert_eq!(alias, &Some("readme file".to_string()));
263            }
264            _ => panic!("Expected WikiLink"),
265        }
266    }
267
268    #[test]
269    fn test_extract_multiple_links() {
270        let md = r#"
271# Documentation
272
273See [Installation](#installation) first.
274Then check [API docs](./api.md) and [[contributing]].
275Visit [GitHub](https://github.com/user/repo) for source.
276"#;
277        let links = extract_links(md);
278
279        assert_eq!(links.len(), 4);
280
281        // Links should be sorted by offset
282        assert_eq!(links[0].text, "Installation");
283        assert!(matches!(links[0].target, LinkTarget::Anchor(_)));
284
285        assert_eq!(links[1].text, "API docs");
286        assert!(matches!(links[1].target, LinkTarget::RelativeFile { .. }));
287
288        assert_eq!(links[2].text, "contributing");
289        assert!(matches!(links[2].target, LinkTarget::WikiLink { .. }));
290
291        assert_eq!(links[3].text, "GitHub");
292        assert!(matches!(links[3].target, LinkTarget::External(_)));
293    }
294
295    #[test]
296    fn test_empty_content() {
297        let md = "";
298        let links = extract_links(md);
299        assert_eq!(links.len(), 0);
300    }
301
302    #[test]
303    fn test_no_links() {
304        let md = "This is just plain text with no links.";
305        let links = extract_links(md);
306        assert_eq!(links.len(), 0);
307    }
308
309    #[test]
310    fn test_malformed_wikilink() {
311        let md = "This has [[incomplete wikilink";
312        let links = extract_links(md);
313        assert_eq!(links.len(), 0); // Should not extract malformed links
314    }
315
316    #[test]
317    fn test_wikilinks_excluded_from_code_blocks() {
318        // Wikilinks inside code blocks should NOT be extracted
319        let md = r#"
320# Test Document
321
322[[Valid Link]] outside code block.
323
324```rust
325let x = "[[Fake Inside Code]]";
326```
327
328[[Another Valid]] after code block.
329"#;
330        let links = extract_links(md);
331
332        // Should only find the 2 valid wikilinks, NOT the one inside the code block
333        let wikilink_count = links
334            .iter()
335            .filter(|l| matches!(l.target, LinkTarget::WikiLink { .. }))
336            .count();
337
338        assert_eq!(
339            wikilink_count, 2,
340            "Should find exactly 2 wikilinks (not the one in code block)"
341        );
342
343        // Verify the correct wikilinks were found
344        let wikilink_targets: Vec<_> = links
345            .iter()
346            .filter_map(|l| match &l.target {
347                LinkTarget::WikiLink { target, .. } => Some(target.as_str()),
348                _ => None,
349            })
350            .collect();
351
352        assert!(wikilink_targets.contains(&"Valid Link"));
353        assert!(wikilink_targets.contains(&"Another Valid"));
354        assert!(!wikilink_targets.contains(&"Fake Inside Code"));
355    }
356
357    #[test]
358    fn test_wikilinks_excluded_from_inline_code() {
359        // Wikilinks inside inline code should also be excluded
360        let md = "This is `[[not a link]]` but [[this is]] a link.";
361        let links = extract_links(md);
362
363        let wikilink_count = links
364            .iter()
365            .filter(|l| matches!(l.target, LinkTarget::WikiLink { .. }))
366            .count();
367
368        assert_eq!(
369            wikilink_count, 1,
370            "Should find exactly 1 wikilink (not the one in inline code)"
371        );
372    }
373
374    #[test]
375    fn test_markdown_links_excluded_from_code_blocks() {
376        // Standard markdown links inside code blocks should also be excluded
377        let md = r#"
378[Valid](https://example.com) outside.
379
380```markdown
381[Fake](https://fake.com) inside code
382```
383
384[Also Valid](./file.md) after.
385"#;
386        let links = extract_links(md);
387
388        // Should only find 2 links, not the one in code block
389        assert_eq!(links.len(), 2);
390        assert!(matches!(links[0].target, LinkTarget::External(_)));
391        assert!(matches!(links[1].target, LinkTarget::RelativeFile { .. }));
392    }
393
394    #[test]
395    fn test_link_types_correctly_classified() {
396        let md = r#"
397[anchor](#section)
398[external](https://example.com)
399[file](./docs/api.md)
400[file with anchor](./docs/api.md#usage)
401[[wikilink]]
402"#;
403        let links = extract_links(md);
404
405        assert_eq!(links.len(), 5);
406
407        // Verify each link type is correctly classified
408        assert!(
409            matches!(&links[0].target, LinkTarget::Anchor(a) if a == "section"),
410            "Expected Anchor"
411        );
412        assert!(
413            matches!(&links[1].target, LinkTarget::External(u) if u == "https://example.com"),
414            "Expected External"
415        );
416        assert!(
417            matches!(&links[2].target, LinkTarget::RelativeFile { path, anchor: None } if path == &PathBuf::from("./docs/api.md")),
418            "Expected RelativeFile without anchor"
419        );
420        assert!(
421            matches!(&links[3].target, LinkTarget::RelativeFile { path, anchor: Some(a) } if path == &PathBuf::from("./docs/api.md") && a == "usage"),
422            "Expected RelativeFile with anchor"
423        );
424        assert!(
425            matches!(&links[4].target, LinkTarget::WikiLink { target, .. } if target == "wikilink"),
426            "Expected WikiLink"
427        );
428    }
429}