Skip to main content

drft/parsers/
markdown.rs

1use super::{ParseResult, Parser};
2use pulldown_cmark::{Event, LinkType, Options, Parser as CmarkParser, Tag, TagEnd};
3
4/// Built-in markdown parser. Extracts inline/reference/autolinks and images.
5pub struct MarkdownParser {
6    /// File routing filter. None = receives all File nodes.
7    pub file_filter: Option<globset::GlobSet>,
8}
9
10impl Parser for MarkdownParser {
11    fn name(&self) -> &str {
12        "markdown"
13    }
14
15    fn matches(&self, path: &str) -> bool {
16        match &self.file_filter {
17            Some(set) => set.is_match(path),
18            None => true, // No filter = receives all File nodes
19        }
20    }
21
22    fn parse(&self, _path: &str, content: &str) -> ParseResult {
23        ParseResult {
24            links: extract_markdown_links(content),
25            metadata: None,
26        }
27    }
28}
29
30fn extract_markdown_links(content: &str) -> Vec<String> {
31    let mut links = Vec::new();
32    let mut options = Options::empty();
33    options.insert(Options::ENABLE_STRIKETHROUGH);
34    let parser = CmarkParser::new_ext(content, options);
35
36    let mut in_code_block = false;
37
38    for event in parser {
39        match event {
40            Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
41            Event::End(TagEnd::CodeBlock) => in_code_block = false,
42            Event::Start(Tag::Link {
43                link_type,
44                dest_url,
45                ..
46            }) if !in_code_block => {
47                if link_type == LinkType::Email {
48                    continue;
49                }
50                let link = dest_url.trim();
51                if !link.is_empty() {
52                    links.push(link.to_string());
53                }
54            }
55            Event::Start(Tag::Image { dest_url, .. }) if !in_code_block => {
56                let link = dest_url.trim();
57                if !link.is_empty() {
58                    links.push(link.to_string());
59                }
60            }
61            _ => {}
62        }
63    }
64
65    links
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71
72    fn parse(content: &str) -> Vec<String> {
73        let parser = MarkdownParser { file_filter: None };
74        parser.parse("test.md", content).links
75    }
76
77    #[test]
78    fn extracts_inline_links() {
79        let links = parse("[setup](setup.md) and [faq](faq.md)");
80        assert_eq!(links.len(), 2);
81        assert_eq!(links[0], "setup.md");
82        assert_eq!(links[1], "faq.md");
83    }
84
85    #[test]
86    fn preserves_fragments() {
87        // Parser emits raw targets; graph builder strips fragments
88        let links = parse("[setup](setup.md#installation)");
89        assert_eq!(links.len(), 1);
90        assert_eq!(links[0], "setup.md#installation");
91    }
92
93    #[test]
94    fn emits_external_urls() {
95        let links = parse("[google](https://google.com) and [local](setup.md)");
96        assert_eq!(links.len(), 2);
97        assert_eq!(links[0], "https://google.com");
98        assert_eq!(links[1], "setup.md");
99    }
100
101    #[test]
102    fn emits_anchor_only() {
103        // Parser emits raw targets; graph builder filters anchor-only links
104        let links = parse("[section](#heading) and [local](setup.md)");
105        assert_eq!(links.len(), 2);
106        assert_eq!(links[0], "#heading");
107        assert_eq!(links[1], "setup.md");
108    }
109
110    #[test]
111    fn skips_email_links() {
112        let links = parse("Contact (<user@example.com>)");
113        assert!(links.is_empty());
114    }
115
116    #[test]
117    fn emits_mailto_links() {
118        // mailto: from inline syntax is emitted raw; graph builder filters
119        let links = parse("[email](mailto:user@example.com) and [local](setup.md)");
120        assert_eq!(links.len(), 2);
121        assert_eq!(links[0], "mailto:user@example.com");
122        assert_eq!(links[1], "setup.md");
123    }
124
125    #[test]
126    fn extracts_image_links() {
127        let links = parse("![diagram](assets/arch.png)");
128        assert_eq!(links.len(), 1);
129        assert_eq!(links[0], "assets/arch.png");
130    }
131
132    #[test]
133    fn extracts_reference_links() {
134        let links = parse("[setup][ref]\n\n[ref]: setup.md\n");
135        assert_eq!(links.len(), 1);
136        assert_eq!(links[0], "setup.md");
137    }
138
139    #[test]
140    fn no_filter_matches_everything() {
141        let parser = MarkdownParser { file_filter: None };
142        assert!(parser.matches("index.md"));
143        assert!(parser.matches("main.rs"));
144        assert!(parser.matches("docs/guide.md"));
145    }
146
147    #[test]
148    fn file_filter_restricts_matching() {
149        let mut builder = globset::GlobSetBuilder::new();
150        builder.add(globset::Glob::new("*.md").unwrap());
151        builder.add(globset::Glob::new("*.mdx").unwrap());
152        let parser = MarkdownParser {
153            file_filter: Some(builder.build().unwrap()),
154        };
155        assert!(parser.matches("index.md"));
156        assert!(parser.matches("page.mdx"));
157        assert!(!parser.matches("main.rs"));
158    }
159}