rumdl_lib/rules/
md011_no_reversed_links.rs

1/// Rule MD011: No reversed link syntax
2///
3/// See [docs/md011.md](../../docs/md011.md) for full documentation, configuration, and examples.
4use crate::filtered_lines::FilteredLinesExt;
5use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
6use crate::utils::range_utils::calculate_match_range;
7use crate::utils::regex_cache::get_cached_regex;
8use crate::utils::skip_context::is_in_math_context;
9
10// Reversed link detection pattern
11const REVERSED_LINK_REGEX_STR: &str = r"(^|[^\\])\(([^()]+)\)\[([^\]]+)\]";
12
13/// Classification of a link component
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15enum LinkComponent {
16    /// Clear URL: has protocol, www., mailto:, or path prefix
17    ClearUrl,
18    /// Multiple words or sentence-like (likely link text, not URL)
19    MultiWord,
20    /// Single word - could be either URL or text
21    Ambiguous,
22}
23
24/// Information about a detected reversed link pattern
25#[derive(Debug, Clone)]
26struct ReversedLinkInfo {
27    line_num: usize,
28    column: usize,
29    /// Content found in parentheses
30    paren_content: String,
31    /// Content found in square brackets
32    bracket_content: String,
33    /// Classification of parentheses content
34    paren_type: LinkComponent,
35    /// Classification of bracket content
36    bracket_type: LinkComponent,
37}
38
39impl ReversedLinkInfo {
40    /// Determine the correct order: returns (text, url)
41    fn correct_order(&self) -> (&str, &str) {
42        use LinkComponent::*;
43
44        match (self.paren_type, self.bracket_type) {
45            // One side is clearly a URL - that's the URL
46            (ClearUrl, _) => (&self.bracket_content, &self.paren_content),
47            (_, ClearUrl) => (&self.paren_content, &self.bracket_content),
48
49            // One side is multi-word - that's the text, other is URL
50            (MultiWord, _) => (&self.paren_content, &self.bracket_content),
51            (_, MultiWord) => (&self.bracket_content, &self.paren_content),
52
53            // Both ambiguous: assume standard reversed pattern (url)[text]
54            (Ambiguous, Ambiguous) => (&self.bracket_content, &self.paren_content),
55        }
56    }
57
58    /// Get the original pattern as it appears in the source
59    fn original_pattern(&self) -> String {
60        format!("({})[{}]", self.paren_content, self.bracket_content)
61    }
62
63    /// Get the corrected pattern
64    fn corrected_pattern(&self) -> String {
65        let (text, url) = self.correct_order();
66        format!("[{text}]({url})")
67    }
68}
69
70#[derive(Clone)]
71pub struct MD011NoReversedLinks;
72
73impl MD011NoReversedLinks {
74    /// Classify a link component as URL, multi-word text, or ambiguous
75    fn classify_component(s: &str) -> LinkComponent {
76        let trimmed = s.trim();
77
78        // Check for clear URL indicators
79        if trimmed.starts_with("http://")
80            || trimmed.starts_with("https://")
81            || trimmed.starts_with("ftp://")
82            || trimmed.starts_with("www.")
83            || (trimmed.starts_with("mailto:") && trimmed.contains('@'))
84            || (trimmed.starts_with('/') && trimmed.len() > 1)
85            || (trimmed.starts_with("./") || trimmed.starts_with("../"))
86            || (trimmed.starts_with('#') && trimmed.len() > 1 && !trimmed[1..].contains(' '))
87        {
88            return LinkComponent::ClearUrl;
89        }
90
91        // Multi-word text is likely a description, not a URL
92        if trimmed.contains(' ') {
93            return LinkComponent::MultiWord;
94        }
95
96        // Single word - could be either
97        LinkComponent::Ambiguous
98    }
99
100    fn find_reversed_links(content: &str) -> Vec<ReversedLinkInfo> {
101        let mut results = Vec::new();
102        let mut line_num = 1;
103
104        for line in content.lines() {
105            let mut last_end = 0;
106
107            while let Some(cap) = get_cached_regex(REVERSED_LINK_REGEX_STR)
108                .ok()
109                .and_then(|re| re.captures(&line[last_end..]))
110            {
111                let match_obj = cap.get(0).unwrap();
112                let prechar = &cap[1];
113                let paren_content = cap[2].to_string();
114                let bracket_content = cap[3].to_string();
115
116                // Skip wiki-link patterns: if bracket content starts with [ or ends with ]
117                // This handles cases like (url)[[wiki-link]] being misdetected
118                if bracket_content.starts_with('[') || bracket_content.ends_with(']') {
119                    last_end += match_obj.end();
120                    continue;
121                }
122
123                // Skip footnote references: [^footnote]
124                // This prevents false positives like [link](url)[^footnote]
125                if bracket_content.starts_with('^') {
126                    last_end += match_obj.end();
127                    continue;
128                }
129
130                // Check if the brackets at the end are escaped
131                if bracket_content.ends_with('\\') {
132                    last_end += match_obj.end();
133                    continue;
134                }
135
136                // Manual negative lookahead: skip if followed by (
137                // This prevents matching (text)[ref](url) patterns
138                let end_pos = last_end + match_obj.end();
139                if end_pos < line.len() && line[end_pos..].starts_with('(') {
140                    last_end += match_obj.end();
141                    continue;
142                }
143
144                // Classify both components
145                let paren_type = Self::classify_component(&paren_content);
146                let bracket_type = Self::classify_component(&bracket_content);
147
148                // Calculate the actual column (accounting for any prefix character)
149                let column = last_end + match_obj.start() + prechar.len() + 1;
150
151                results.push(ReversedLinkInfo {
152                    line_num,
153                    column,
154                    paren_content,
155                    bracket_content,
156                    paren_type,
157                    bracket_type,
158                });
159
160                last_end += match_obj.end();
161            }
162
163            line_num += 1;
164        }
165
166        results
167    }
168}
169
170impl Rule for MD011NoReversedLinks {
171    fn name(&self) -> &'static str {
172        "MD011"
173    }
174
175    fn description(&self) -> &'static str {
176        "Reversed link syntax"
177    }
178
179    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
180        let mut warnings = Vec::new();
181
182        let line_index = &ctx.line_index;
183
184        // Use filtered_lines() to automatically skip front-matter
185        for filtered_line in ctx.filtered_lines().skip_front_matter() {
186            let line_num = filtered_line.line_num;
187            let line = filtered_line.content;
188
189            let byte_pos = line_index.get_line_start_byte(line_num).unwrap_or(0);
190
191            let mut last_end = 0;
192
193            while let Some(cap) = get_cached_regex(REVERSED_LINK_REGEX_STR)
194                .ok()
195                .and_then(|re| re.captures(&line[last_end..]))
196            {
197                let match_obj = cap.get(0).unwrap();
198                let prechar = &cap[1];
199                let paren_content = cap[2].to_string();
200                let bracket_content = cap[3].to_string();
201
202                // Skip wiki-link patterns: if bracket content starts with [ or ends with ]
203                // This handles cases like (url)[[wiki-link]] being misdetected
204                if bracket_content.starts_with('[') || bracket_content.ends_with(']') {
205                    last_end += match_obj.end();
206                    continue;
207                }
208
209                // Skip footnote references: [^footnote]
210                // This prevents false positives like [link](url)[^footnote]
211                if bracket_content.starts_with('^') {
212                    last_end += match_obj.end();
213                    continue;
214                }
215
216                // Check if the brackets at the end are escaped
217                if bracket_content.ends_with('\\') {
218                    last_end += match_obj.end();
219                    continue;
220                }
221
222                // Manual negative lookahead: skip if followed by (
223                // This prevents matching (text)[ref](url) patterns
224                let end_pos = last_end + match_obj.end();
225                if end_pos < line.len() && line[end_pos..].starts_with('(') {
226                    last_end += match_obj.end();
227                    continue;
228                }
229
230                // Calculate the actual position
231                let match_start = last_end + match_obj.start() + prechar.len();
232                let match_byte_pos = byte_pos + match_start;
233
234                // Skip if in code block, inline code, HTML comments, math contexts, or Jinja templates
235                if ctx.is_in_code_block_or_span(match_byte_pos)
236                    || ctx.is_in_html_comment(match_byte_pos)
237                    || is_in_math_context(ctx, match_byte_pos)
238                    || ctx.is_in_jinja_range(match_byte_pos)
239                {
240                    last_end += match_obj.end();
241                    continue;
242                }
243
244                // Classify both components and determine correct order
245                let paren_type = Self::classify_component(&paren_content);
246                let bracket_type = Self::classify_component(&bracket_content);
247
248                let info = ReversedLinkInfo {
249                    line_num,
250                    column: match_start + 1,
251                    paren_content,
252                    bracket_content,
253                    paren_type,
254                    bracket_type,
255                };
256
257                let (text, url) = info.correct_order();
258
259                // Calculate the range for the actual reversed link (excluding prechar)
260                let actual_length = match_obj.len() - prechar.len();
261                let (start_line, start_col, end_line, end_col) =
262                    calculate_match_range(line_num, line, match_start, actual_length);
263
264                warnings.push(LintWarning {
265                    rule_name: Some(self.name().to_string()),
266                    message: format!("Reversed link syntax: use [{text}]({url}) instead"),
267                    line: start_line,
268                    column: start_col,
269                    end_line,
270                    end_column: end_col,
271                    severity: Severity::Error,
272                    fix: Some(Fix {
273                        range: {
274                            let match_start_byte = byte_pos + match_start;
275                            let match_end_byte = match_start_byte + actual_length;
276                            match_start_byte..match_end_byte
277                        },
278                        replacement: format!("[{text}]({url})"),
279                    }),
280                });
281
282                last_end += match_obj.end();
283            }
284        }
285
286        Ok(warnings)
287    }
288
289    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
290        let content = ctx.content;
291        let mut result = content.to_string();
292        let mut offset: isize = 0;
293
294        let line_index = &ctx.line_index;
295
296        for info in Self::find_reversed_links(content) {
297            // Calculate absolute position in original content using LineIndex
298            let line_start = line_index.get_line_start_byte(info.line_num).unwrap_or(0);
299            let pos = line_start + (info.column - 1);
300
301            // Skip if in front matter using centralized utility
302            if ctx.is_in_front_matter(pos) {
303                continue;
304            }
305
306            // Skip if in any skip context
307            if !ctx.is_in_code_block_or_span(pos)
308                && !ctx.is_in_html_comment(pos)
309                && !is_in_math_context(ctx, pos)
310                && !ctx.is_in_jinja_range(pos)
311            {
312                let adjusted_pos = (pos as isize + offset) as usize;
313
314                // Use the info struct to get both original and corrected patterns
315                let original = info.original_pattern();
316                let replacement = info.corrected_pattern();
317
318                // Make sure we have the right substring before replacing
319                let end_pos = adjusted_pos + original.len();
320                if end_pos <= result.len() && adjusted_pos < result.len() {
321                    result.replace_range(adjusted_pos..end_pos, &replacement);
322                    // Update offset based on the difference in lengths
323                    offset += replacement.len() as isize - original.len() as isize;
324                }
325            }
326        }
327
328        Ok(result)
329    }
330
331    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
332        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
333    }
334
335    fn as_any(&self) -> &dyn std::any::Any {
336        self
337    }
338
339    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
340    where
341        Self: Sized,
342    {
343        Box::new(MD011NoReversedLinks)
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use crate::lint_context::LintContext;
351
352    #[test]
353    fn test_md011_basic() {
354        let rule = MD011NoReversedLinks;
355
356        // Should detect reversed links
357        let content = "(http://example.com)[Example]\n";
358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
359        let warnings = rule.check(&ctx).unwrap();
360        assert_eq!(warnings.len(), 1);
361        assert_eq!(warnings[0].line, 1);
362
363        // Should not detect correct links
364        let content = "[Example](http://example.com)\n";
365        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
366        let warnings = rule.check(&ctx).unwrap();
367        assert_eq!(warnings.len(), 0);
368    }
369
370    #[test]
371    fn test_md011_with_escaped_brackets() {
372        let rule = MD011NoReversedLinks;
373
374        // Should not detect if brackets are escaped
375        let content = "(url)[text\\]\n";
376        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
377        let warnings = rule.check(&ctx).unwrap();
378        assert_eq!(warnings.len(), 0);
379    }
380
381    #[test]
382    fn test_md011_no_false_positive_with_reference_link() {
383        let rule = MD011NoReversedLinks;
384
385        // Should not detect (text)[ref](url) as reversed
386        let content = "(text)[ref](url)\n";
387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
388        let warnings = rule.check(&ctx).unwrap();
389        assert_eq!(warnings.len(), 0);
390    }
391
392    #[test]
393    fn test_md011_fix() {
394        let rule = MD011NoReversedLinks;
395
396        let content = "(http://example.com)[Example]\n(another/url)[text]\n";
397        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
398        let fixed = rule.fix(&ctx).unwrap();
399        assert_eq!(fixed, "[Example](http://example.com)\n[text](another/url)\n");
400    }
401
402    #[test]
403    fn test_md011_in_code_block() {
404        let rule = MD011NoReversedLinks;
405
406        let content = "```\n(url)[text]\n```\n(url)[text]\n";
407        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
408        let warnings = rule.check(&ctx).unwrap();
409        assert_eq!(warnings.len(), 1);
410        assert_eq!(warnings[0].line, 4);
411    }
412
413    #[test]
414    fn test_md011_inline_code() {
415        let rule = MD011NoReversedLinks;
416
417        let content = "`(url)[text]` and (url)[text]\n";
418        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
419        let warnings = rule.check(&ctx).unwrap();
420        assert_eq!(warnings.len(), 1);
421        assert_eq!(warnings[0].column, 19);
422    }
423
424    #[test]
425    fn test_md011_no_false_positive_with_footnote() {
426        let rule = MD011NoReversedLinks;
427
428        // Should not detect [link](url)[^footnote] as reversed - this is valid markdown
429        // The [^footnote] is a footnote reference, not part of a reversed link
430        let content = "Some text with [a link](https://example.com/)[^ft].\n\n[^ft]: Note.\n";
431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
432        let warnings = rule.check(&ctx).unwrap();
433        assert_eq!(warnings.len(), 0);
434
435        // Also test with multiple footnotes
436        let content = "[link1](url1)[^1] and [link2](url2)[^2]\n\n[^1]: First\n[^2]: Second\n";
437        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
438        let warnings = rule.check(&ctx).unwrap();
439        assert_eq!(warnings.len(), 0);
440
441        // But should still detect actual reversed links
442        let content = "(url)[text] and [link](url)[^footnote]\n";
443        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
444        let warnings = rule.check(&ctx).unwrap();
445        assert_eq!(warnings.len(), 1);
446        assert_eq!(warnings[0].line, 1);
447        assert_eq!(warnings[0].column, 1);
448    }
449}