quickmark_core/rules/
md011.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3use std::rc::Rc;
4use tree_sitter::Node;
5
6use crate::{
7    linter::{range_from_tree_sitter, RuleViolation},
8    rules::{Context, Rule, RuleLinter, RuleType},
9};
10
11static REVERSED_LINK_REGEX: Lazy<Regex> =
12    Lazy::new(|| Regex::new(r"(^|[^\\])\(([^()]+)\)\[([^\]^][^\]]*)\]").unwrap());
13
14static INLINE_CODE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"`([^`]+)`").unwrap());
15
16/// MD011 Reversed Link Syntax Rule Linter
17///
18/// **SINGLE-USE CONTRACT**: This linter is designed for one-time use only.
19/// After processing a document (via feed() calls and finalize()), the linter
20/// should be discarded. The violations state is not cleared between uses.
21pub(crate) struct MD011Linter {
22    context: Rc<Context>,
23    violations: Vec<RuleViolation>,
24    line_offsets: Vec<usize>,
25}
26
27impl MD011Linter {
28    pub fn new(context: Rc<Context>) -> Self {
29        let line_offsets = context
30            .lines
31            .borrow()
32            .iter()
33            .scan(0, |state, line| {
34                let offset = *state;
35                // Assuming LF line endings. The +1 accounts for the newline character.
36                *state += line.len() + 1;
37                Some(offset)
38            })
39            .collect();
40
41        Self {
42            context,
43            violations: Vec::new(),
44            line_offsets,
45        }
46    }
47
48    /// Analyze all lines and store all violations for reporting via finalize().
49    /// Context cache is already initialized by MultiRuleLinter.
50    fn analyze_all_lines(&mut self) {
51        let lines = self.context.lines.borrow();
52        let excluded_lines = self.get_excluded_lines();
53
54        for (line_index, line) in lines.iter().enumerate() {
55            let line_number = line_index + 1;
56
57            if excluded_lines.contains(&line_number) {
58                continue;
59            }
60
61            // Find all reversed link patterns in the line and create violations.
62            for caps in REVERSED_LINK_REGEX.captures_iter(line) {
63                let full_match = caps.get(0).unwrap();
64                let pre_char = caps.get(1).unwrap().as_str();
65                let link_text = caps.get(2).unwrap().as_str();
66                let link_destination = caps.get(3).unwrap().as_str();
67
68                // Skip if either link text or destination ends with backslash (escaped)
69                if link_text.ends_with("\\") || link_destination.ends_with("\\") {
70                    continue;
71                }
72
73                // Manual negative lookahead: skip if followed by opening parenthesis
74                let match_end_byte = full_match.end();
75                if line.as_bytes().get(match_end_byte) == Some(&b'(') {
76                    continue;
77                }
78
79                // Calculate position accounting for pre_char
80                let match_start_byte = full_match.start() + pre_char.len();
81                let match_length_byte = full_match.len() - pre_char.len();
82
83                // Check if this match overlaps with any inline code spans
84                if self.overlaps_with_inline_code(line_index, match_start_byte, match_length_byte) {
85                    continue;
86                }
87
88                let violation =
89                    self.create_violation(line_index, match_start_byte, match_length_byte);
90                self.violations.push(violation);
91            }
92        }
93    }
94
95    /// Returns a set of line numbers that should be excluded from checking.
96    /// This includes code blocks.
97    fn get_excluded_lines(&self) -> std::collections::HashSet<usize> {
98        let node_cache = self.context.node_cache.borrow();
99
100        ["indented_code_block", "fenced_code_block"]
101            .iter()
102            .filter_map(|block_type| node_cache.get(*block_type))
103            .flatten()
104            .flat_map(|node_info| (node_info.line_start + 1)..=(node_info.line_end + 1))
105            .collect()
106    }
107
108    /// Check if a match overlaps with any inline code spans on the same line.
109    fn overlaps_with_inline_code(
110        &self,
111        line_index: usize,
112        match_start: usize,
113        match_length: usize,
114    ) -> bool {
115        let lines = self.context.lines.borrow();
116        if let Some(line) = lines.get(line_index) {
117            let match_end = match_start + match_length;
118
119            for code_match in INLINE_CODE_REGEX.find_iter(line) {
120                let code_start = code_match.start();
121                let code_end = code_match.end();
122
123                if match_start < code_end && match_end > code_start {
124                    return true;
125                }
126            }
127        }
128
129        false
130    }
131
132    /// Creates a RuleViolation for a reversed link at the specified position.
133    fn create_violation(
134        &self,
135        line_index: usize,
136        match_start: usize,
137        match_length: usize,
138    ) -> RuleViolation {
139        let message = "Reversed link syntax".to_string();
140        let line_start_byte = self.line_offsets[line_index];
141        let start_byte = line_start_byte + match_start;
142        let end_byte = line_start_byte + match_start + match_length;
143
144        RuleViolation::new(
145            &MD011,
146            message,
147            self.context.file_path.clone(),
148            range_from_tree_sitter(&tree_sitter::Range {
149                start_byte,
150                end_byte,
151                start_point: tree_sitter::Point {
152                    row: line_index,
153                    column: match_start,
154                },
155                end_point: tree_sitter::Point {
156                    row: line_index,
157                    column: match_start + match_length,
158                },
159            }),
160        )
161    }
162}
163
164impl RuleLinter for MD011Linter {
165    fn feed(&mut self, node: &Node) {
166        // This rule is line-based and only needs to run once.
167        // We trigger the analysis on seeing the top-level `document` node.
168        if node.kind() == "document" {
169            self.analyze_all_lines();
170        }
171    }
172
173    fn finalize(&mut self) -> Vec<RuleViolation> {
174        std::mem::take(&mut self.violations)
175    }
176}
177
178pub const MD011: Rule = Rule {
179    id: "MD011",
180    alias: "no-reversed-links",
181    tags: &["links"],
182    description: "Reversed link syntax",
183    rule_type: RuleType::Line,
184    required_nodes: &["indented_code_block", "fenced_code_block"],
185    new_linter: |context| Box::new(MD011Linter::new(context)),
186};
187
188#[cfg(test)]
189mod test {
190    use std::path::PathBuf;
191
192    use crate::config::RuleSeverity;
193    use crate::linter::MultiRuleLinter;
194    use crate::test_utils::test_helpers::test_config_with_rules;
195
196    fn test_config() -> crate::config::QuickmarkConfig {
197        test_config_with_rules(vec![
198            ("no-reversed-links", RuleSeverity::Error),
199            ("heading-style", RuleSeverity::Off),
200            ("heading-increment", RuleSeverity::Off),
201        ])
202    }
203
204    #[test]
205    fn test_basic_reversed_link_violation() {
206        let input = "This is a (reversed)[link] example.";
207
208        let config = test_config();
209        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
210        let violations = linter.analyze();
211        assert_eq!(1, violations.len());
212
213        let violation = &violations[0];
214        assert_eq!("MD011", violation.rule().id);
215        assert_eq!("Reversed link syntax", violation.message());
216    }
217
218    #[test]
219    fn test_no_violations_correct_syntax() {
220        let input = "This is a [correct](link) example.";
221
222        let config = test_config();
223        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
224        let violations = linter.analyze();
225        assert_eq!(0, violations.len());
226    }
227
228    #[test]
229    fn test_multiple_reversed_links() {
230        let input = "Here is (one)[link] and (another)[example].";
231
232        let config = test_config();
233        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
234        let violations = linter.analyze();
235        assert_eq!(2, violations.len());
236
237        for violation in &violations {
238            assert_eq!("MD011", violation.rule().id);
239            assert_eq!("Reversed link syntax", violation.message());
240        }
241    }
242
243    #[test]
244    fn test_escaped_reversed_link_not_flagged() {
245        let input = r"This is an escaped \(not)[a-link] example.";
246
247        let config = test_config();
248        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
249        let violations = linter.analyze();
250        assert_eq!(0, violations.len());
251    }
252
253    #[test]
254    fn test_link_text_ending_with_backslash() {
255        let input = r"(text\)[link] should not be flagged.";
256
257        let config = test_config();
258        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
259        let violations = linter.analyze();
260        assert_eq!(0, violations.len());
261    }
262
263    #[test]
264    fn test_link_destination_ending_with_backslash() {
265        let input = r"(text)[link\\] should not be flagged.";
266
267        let config = test_config();
268        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
269        let violations = linter.analyze();
270        assert_eq!(0, violations.len());
271    }
272
273    #[test]
274    fn test_reversed_link_in_fenced_code_block_ignored() {
275        let input = r###"```
276This (reversed)[link] should be ignored in code block.
277```"###;
278
279        let config = test_config();
280        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
281        let violations = linter.analyze();
282        assert_eq!(0, violations.len());
283    }
284
285    #[test]
286    fn test_reversed_link_in_indented_code_block_ignored() {
287        let input = "    This (reversed)[link] should be ignored in indented code block.";
288
289        let config = test_config();
290        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
291        let violations = linter.analyze();
292        assert_eq!(0, violations.len());
293    }
294
295    #[test]
296    fn test_mixed_content_with_some_violations() {
297        let input = r###"# Heading
298
299This is a (reversed)[link] example.
300
301```
302This (code)[link] should be ignored.
303```
304
305And another [correct](link).
306
307Another (bad)[example] here."###;
308
309        let config = test_config();
310        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
311        let violations = linter.analyze();
312        assert_eq!(2, violations.len()); // Only the two reversed links outside code blocks
313
314        for violation in &violations {
315            assert_eq!("MD011", violation.rule().id);
316            assert_eq!("Reversed link syntax", violation.message());
317        }
318    }
319
320    #[test]
321    fn test_markdown_extra_footnote_style() {
322        // Footnote references like [^1] should not be flagged
323        let input = "For (example)[^1] this should not be flagged.";
324
325        let config = test_config();
326        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
327        let violations = linter.analyze();
328        assert_eq!(0, violations.len());
329    }
330
331    #[test]
332    fn test_complex_urls() {
333        let input = "Visit (GitHub)[https://github.com/user/repo#section] for more info.";
334
335        let config = test_config();
336        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
337        let violations = linter.analyze();
338        assert_eq!(1, violations.len());
339
340        let violation = &violations[0];
341        assert_eq!("MD011", violation.rule().id);
342        assert_eq!("Reversed link syntax", violation.message());
343    }
344
345    #[test]
346    fn test_at_start_of_line() {
347        let input = "(reversed)[link] at start of line.";
348
349        let config = test_config();
350        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
351        let violations = linter.analyze();
352        assert_eq!(1, violations.len());
353
354        let violation = &violations[0];
355        assert_eq!("MD011", violation.rule().id);
356        assert_eq!("Reversed link syntax", violation.message());
357    }
358
359    #[test]
360    fn test_nested_parentheses_not_matched() {
361        let input = "This (text (with parens))[link] should not match because of nested parens.";
362
363        let config = test_config();
364        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
365        let violations = linter.analyze();
366        assert_eq!(0, violations.len()); // Regex excludes nested parentheses
367    }
368
369    #[test]
370    fn test_link_destination_starting_with_caret_or_bracket() {
371        // Link destinations starting with ] or ^ should not match
372        let input1 = "(text)[^footnote] should not match.";
373        let input2 = "(text)[]bracket] should not match.";
374
375        let config = test_config();
376
377        let mut linter =
378            MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config.clone(), input1);
379        let violations = linter.analyze();
380        assert_eq!(0, violations.len());
381
382        let mut linter =
383            MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input2);
384        let violations = linter.analyze();
385        assert_eq!(0, violations.len());
386    }
387
388    #[test]
389    fn test_followed_by_parenthesis_not_matched() {
390        // Pattern followed by opening parenthesis should not match
391        let input = "(text)[link](more) should not match.";
392
393        let config = test_config();
394        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
395        let violations = linter.analyze();
396        assert_eq!(0, violations.len());
397    }
398
399    #[test]
400    fn test_reversed_link_in_inline_code_ignored() {
401        let input = "This is `a (reversed)[link]` in inline code.";
402
403        let config = test_config();
404        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
405        let violations = linter.analyze();
406        assert_eq!(0, violations.len());
407    }
408
409    #[test]
410    fn test_reversed_link_partially_in_inline_code_ignored() {
411        let input = "This is `a (reversed`)[link] in inline code.";
412
413        let config = test_config();
414        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
415        let violations = linter.analyze();
416        assert_eq!(0, violations.len());
417    }
418}