1use super::{ParseResult, Parser};
2use pulldown_cmark::{Event, LinkType, Options, Parser as CmarkParser, Tag, TagEnd};
3
4pub struct MarkdownParser {
6 pub file_filter: Option<globset::GlobSet>,
8}
9
10impl Parser for MarkdownParser {
11 fn name(&self) -> &str {
12 "markdown"
13 }
14
15 fn matches(&self, path: &str) -> bool {
16 match &self.file_filter {
17 Some(set) => set.is_match(path),
18 None => true, }
20 }
21
22 fn parse(&self, _path: &str, content: &str) -> ParseResult {
23 ParseResult {
24 links: extract_markdown_links(content),
25 metadata: None,
26 }
27 }
28}
29
30fn extract_markdown_links(content: &str) -> Vec<String> {
31 let mut links = Vec::new();
32 let mut options = Options::empty();
33 options.insert(Options::ENABLE_STRIKETHROUGH);
34 let parser = CmarkParser::new_ext(content, options);
35
36 let mut in_code_block = false;
37
38 for event in parser {
39 match event {
40 Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
41 Event::End(TagEnd::CodeBlock) => in_code_block = false,
42 Event::Start(Tag::Link {
43 link_type,
44 dest_url,
45 ..
46 }) if !in_code_block => {
47 if link_type == LinkType::Email {
48 continue;
49 }
50 let link = dest_url.trim();
51 if !link.is_empty() {
52 links.push(link.to_string());
53 }
54 }
55 Event::Start(Tag::Image { dest_url, .. }) if !in_code_block => {
56 let link = dest_url.trim();
57 if !link.is_empty() {
58 links.push(link.to_string());
59 }
60 }
61 _ => {}
62 }
63 }
64
65 links
66}
67
68#[cfg(test)]
69mod tests {
70 use super::*;
71
72 fn parse(content: &str) -> Vec<String> {
73 let parser = MarkdownParser { file_filter: None };
74 parser.parse("test.md", content).links
75 }
76
77 #[test]
78 fn extracts_inline_links() {
79 let links = parse("[setup](setup.md) and [faq](faq.md)");
80 assert_eq!(links.len(), 2);
81 assert_eq!(links[0], "setup.md");
82 assert_eq!(links[1], "faq.md");
83 }
84
85 #[test]
86 fn preserves_fragments() {
87 let links = parse("[setup](setup.md#installation)");
89 assert_eq!(links.len(), 1);
90 assert_eq!(links[0], "setup.md#installation");
91 }
92
93 #[test]
94 fn emits_external_urls() {
95 let links = parse("[google](https://google.com) and [local](setup.md)");
96 assert_eq!(links.len(), 2);
97 assert_eq!(links[0], "https://google.com");
98 assert_eq!(links[1], "setup.md");
99 }
100
101 #[test]
102 fn emits_anchor_only() {
103 let links = parse("[section](#heading) and [local](setup.md)");
105 assert_eq!(links.len(), 2);
106 assert_eq!(links[0], "#heading");
107 assert_eq!(links[1], "setup.md");
108 }
109
110 #[test]
111 fn skips_email_links() {
112 let links = parse("Contact (<user@example.com>)");
113 assert!(links.is_empty());
114 }
115
116 #[test]
117 fn emits_mailto_links() {
118 let links = parse("[email](mailto:user@example.com) and [local](setup.md)");
120 assert_eq!(links.len(), 2);
121 assert_eq!(links[0], "mailto:user@example.com");
122 assert_eq!(links[1], "setup.md");
123 }
124
125 #[test]
126 fn extracts_image_links() {
127 let links = parse("");
128 assert_eq!(links.len(), 1);
129 assert_eq!(links[0], "assets/arch.png");
130 }
131
132 #[test]
133 fn extracts_reference_links() {
134 let links = parse("[setup][ref]\n\n[ref]: setup.md\n");
135 assert_eq!(links.len(), 1);
136 assert_eq!(links[0], "setup.md");
137 }
138
139 #[test]
140 fn no_filter_matches_everything() {
141 let parser = MarkdownParser { file_filter: None };
142 assert!(parser.matches("index.md"));
143 assert!(parser.matches("main.rs"));
144 assert!(parser.matches("docs/guide.md"));
145 }
146
147 #[test]
148 fn file_filter_restricts_matching() {
149 let mut builder = globset::GlobSetBuilder::new();
150 builder.add(globset::Glob::new("*.md").unwrap());
151 builder.add(globset::Glob::new("*.mdx").unwrap());
152 let parser = MarkdownParser {
153 file_filter: Some(builder.build().unwrap()),
154 };
155 assert!(parser.matches("index.md"));
156 assert!(parser.matches("page.mdx"));
157 assert!(!parser.matches("main.rs"));
158 }
159}