quickmark_core/rules/
md039.rs

1use std::rc::Rc;
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5use tree_sitter::Node;
6
7use crate::{
8    linter::{range_from_tree_sitter, RuleViolation},
9    rules::{Context, Rule, RuleLinter, RuleType},
10};
11
12// Using once_cell::sync::Lazy for safe, one-time compilation of regexes.
13// Regular inline links: [text](url) - but NOT images ![text](url)
14static RE_INLINE_LINK: Lazy<Regex> =
15    Lazy::new(|| Regex::new(r"(?:^|[^!])\[([^\]]*)\]\(([^)]+)\)").unwrap());
16
17// Reference links: [text][ref] - but NOT images ![text][ref]
18static RE_REF_LINK: Lazy<Regex> =
19    Lazy::new(|| Regex::new(r"(?:^|[^!])\[([^\]]*)\]\[([^\]]+)\]").unwrap());
20
21// Collapsed reference links: [text][] - but NOT images ![text][]
22static RE_COLLAPSED_REF_LINK: Lazy<Regex> =
23    Lazy::new(|| Regex::new(r"(?:^|[^!])\[([^\]]+)\]\[\]").unwrap());
24
25/// MD039 - Spaces inside link text
26///
27/// This rule checks for unnecessary spaces at the beginning or end of link text.
28pub(crate) struct MD039Linter {
29    context: Rc<Context>,
30    violations: Vec<RuleViolation>,
31}
32
33impl MD039Linter {
34    pub fn new(context: Rc<Context>) -> Self {
35        Self {
36            context,
37            violations: Vec::new(),
38        }
39    }
40}
41
42impl RuleLinter for MD039Linter {
43    fn feed(&mut self, node: &Node) {
44        // Process different possible link node types
45        if node.kind() == "link" {
46            self.check_link_for_spaces(node);
47        } else if node.kind() == "inline" {
48            // Check if this inline node contains links
49            self.check_inline_for_links(node);
50        }
51    }
52
53    fn finalize(&mut self) -> Vec<RuleViolation> {
54        std::mem::take(&mut self.violations)
55    }
56}
57
58impl MD039Linter {
59    fn check_inline_for_links(&mut self, inline_node: &Node) {
60        // Look for links within inline content using the text
61        let link_text = {
62            let document_content = self.context.document_content.borrow();
63            inline_node
64                .utf8_text(document_content.as_bytes())
65                .unwrap_or("")
66                .to_string()
67        };
68
69        // Parse the inline content for markdown links
70        // Look for patterns like [text](url), [text][ref], [ref][], [ref]
71        self.check_text_for_link_patterns(&link_text, inline_node);
72    }
73
74    fn check_text_for_link_patterns(&mut self, text: &str, node: &Node) {
75        for caps in RE_INLINE_LINK.captures_iter(text) {
76            if let Some(label_match) = caps.get(1) {
77                let label_text = label_match.as_str();
78                self.check_label_for_spaces(label_text, node);
79            }
80        }
81
82        for caps in RE_REF_LINK.captures_iter(text) {
83            if let Some(label_match) = caps.get(1) {
84                let label_text = label_match.as_str();
85                self.check_label_for_spaces(label_text, node);
86            }
87        }
88
89        for caps in RE_COLLAPSED_REF_LINK.captures_iter(text) {
90            if let Some(label_match) = caps.get(1) {
91                let label_text = label_match.as_str();
92                self.check_label_for_spaces(label_text, node);
93            }
94        }
95
96        // Shortcut reference links: [text] - but only if there's a matching reference definition
97        // We need to be careful here to not match arbitrary brackets
98        // For now, let's only process shortcut links in specific contexts or skip them
99        // since they require document-level analysis to verify the reference exists
100    }
101
102    fn check_label_for_spaces(&mut self, label_text: &str, node: &Node) {
103        // Check for leading spaces
104        if label_text.len() != label_text.trim_start().len() {
105            self.create_space_violation(node, true);
106        }
107
108        // Check for trailing spaces
109        if label_text.len() != label_text.trim_end().len() {
110            self.create_space_violation(node, false);
111        }
112    }
113
114    fn check_link_for_spaces(&mut self, link_node: &Node) {
115        // Look for the link text within the link node
116        // In tree-sitter markdown, links have different structures
117        // We need to find the text content and check for leading/trailing spaces
118
119        let link_text = {
120            let document_content = self.context.document_content.borrow();
121            link_node
122                .utf8_text(document_content.as_bytes())
123                .unwrap_or("")
124                .to_string()
125        };
126
127        // Find the bracket part [text] in the link
128        if let Some(bracket_start) = link_text.find('[') {
129            if let Some(bracket_end) = link_text.find(']') {
130                if bracket_end > bracket_start {
131                    let label_text = &link_text[bracket_start + 1..bracket_end];
132
133                    // Check for leading spaces
134                    if label_text.len() != label_text.trim_start().len() {
135                        self.create_space_violation(link_node, true);
136                    }
137
138                    // Check for trailing spaces
139                    if label_text.len() != label_text.trim_end().len() {
140                        self.create_space_violation(link_node, false);
141                    }
142                }
143            }
144        }
145    }
146
147    fn create_space_violation(&mut self, node: &Node, is_leading: bool) {
148        let space_type = if is_leading { "leading" } else { "trailing" };
149        let message = format!("Spaces inside link text ({space_type})");
150
151        self.violations.push(RuleViolation::new(
152            &MD039,
153            message,
154            self.context.file_path.clone(),
155            range_from_tree_sitter(&node.range()),
156        ));
157    }
158}
159
160pub const MD039: Rule = Rule {
161    id: "MD039",
162    alias: "no-space-in-links",
163    tags: &["whitespace", "links"],
164    description: "Spaces inside link text",
165    rule_type: RuleType::Token,
166    required_nodes: &["link", "inline"], // We need link nodes to check for spaces in link text
167    new_linter: |context| Box::new(MD039Linter::new(context)),
168};
169
170#[cfg(test)]
171mod test {
172    use std::path::PathBuf;
173
174    use crate::config::RuleSeverity;
175    use crate::linter::MultiRuleLinter;
176    use crate::test_utils::test_helpers::test_config_with_rules;
177
178    fn test_config() -> crate::config::QuickmarkConfig {
179        test_config_with_rules(vec![
180            ("no-space-in-links", RuleSeverity::Error),
181            ("heading-style", RuleSeverity::Off),
182            ("heading-increment", RuleSeverity::Off),
183            ("line-length", RuleSeverity::Off),
184        ])
185    }
186
187    #[test]
188    fn test_no_spaces_in_link_text() {
189        let input = "[link text](https://example.com)";
190
191        let config = test_config();
192        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
193        let violations = linter.analyze();
194
195        assert_eq!(0, violations.len());
196    }
197
198    #[test]
199    fn test_leading_space_in_link_text() {
200        let input = "[ link text](https://example.com)";
201
202        let config = test_config();
203        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
204        let violations = linter.analyze();
205
206        assert_eq!(1, violations.len());
207        let violation = &violations[0];
208        assert_eq!("MD039", violation.rule().id);
209        assert!(violation.message().contains("Spaces inside link text"));
210    }
211
212    #[test]
213    fn test_trailing_space_in_link_text() {
214        let input = "[link text ](https://example.com)";
215
216        let config = test_config();
217        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
218        let violations = linter.analyze();
219
220        assert_eq!(1, violations.len());
221        let violation = &violations[0];
222        assert_eq!("MD039", violation.rule().id);
223        assert!(violation.message().contains("Spaces inside link text"));
224    }
225
226    #[test]
227    fn test_both_leading_and_trailing_spaces() {
228        let input = "[ link text ](https://example.com)";
229
230        let config = test_config();
231        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
232        let violations = linter.analyze();
233
234        // Should report both leading and trailing space violations
235        assert_eq!(2, violations.len());
236        for violation in &violations {
237            assert_eq!("MD039", violation.rule().id);
238            assert!(violation.message().contains("Spaces inside link text"));
239        }
240    }
241
242    #[test]
243    fn test_reference_link_with_spaces() {
244        let input = "[ link text ][ref]\n\n[ref]: https://example.com";
245
246        let config = test_config();
247        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
248        let violations = linter.analyze();
249
250        // Should detect spaces in reference link text
251        assert_eq!(2, violations.len());
252        for violation in &violations {
253            assert_eq!("MD039", violation.rule().id);
254        }
255    }
256
257    #[test]
258    fn test_shortcut_reference_link_with_spaces() {
259        let input = "[ link text ][]\n\n[link text]: https://example.com";
260
261        let config = test_config();
262        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
263        let violations = linter.analyze();
264
265        // Should detect spaces in collapsed reference link
266        assert_eq!(2, violations.len());
267        for violation in &violations {
268            assert_eq!("MD039", violation.rule().id);
269        }
270    }
271
272    #[test]
273    fn test_image_not_affected() {
274        let input = "![ image alt text ](image.jpg)";
275
276        let config = test_config();
277        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
278        let violations = linter.analyze();
279
280        // Images should not be affected by this rule
281        assert_eq!(0, violations.len());
282    }
283
284    #[test]
285    fn test_empty_link_text_with_spaces() {
286        let input = "[ ](https://example.com)";
287
288        let config = test_config();
289        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
290        let violations = linter.analyze();
291
292        // Should detect spaces in empty link text
293        assert_eq!(2, violations.len());
294        for violation in &violations {
295            assert_eq!("MD039", violation.rule().id);
296        }
297    }
298
299    #[test]
300    fn test_multiple_links() {
301        let input = "[good link](url1) and [ bad link ](url2) and [another good](url3)";
302
303        let config = test_config();
304        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
305        let violations = linter.analyze();
306
307        // Should only detect violations in the bad link
308        assert_eq!(2, violations.len());
309        for violation in &violations {
310            assert_eq!("MD039", violation.rule().id);
311        }
312    }
313}