rumdl_lib/rules/
md011_no_reversed_links.rs

1/// Rule MD011: No reversed link syntax
2///
3/// See [docs/md011.md](../../docs/md011.md) for full documentation, configuration, and examples.
4use crate::filtered_lines::FilteredLinesExt;
5use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
6use crate::utils::range_utils::calculate_match_range;
7use crate::utils::regex_cache::get_cached_regex;
8use crate::utils::skip_context::is_in_math_context;
9
10// Reversed link detection pattern
11const REVERSED_LINK_REGEX_STR: &str = r"(^|[^\\])\(([^()]+)\)\[([^\]]+)\]";
12
13/// Classification of a link component
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15enum LinkComponent {
16    /// Clear URL: has protocol, www., mailto:, or path prefix
17    ClearUrl,
18    /// Multiple words or sentence-like (likely link text, not URL)
19    MultiWord,
20    /// Single word - could be either URL or text
21    Ambiguous,
22}
23
24/// Information about a detected reversed link pattern
25#[derive(Debug, Clone)]
26struct ReversedLinkInfo {
27    line_num: usize,
28    column: usize,
29    /// Content found in parentheses
30    paren_content: String,
31    /// Content found in square brackets
32    bracket_content: String,
33    /// Classification of parentheses content
34    paren_type: LinkComponent,
35    /// Classification of bracket content
36    bracket_type: LinkComponent,
37}
38
39impl ReversedLinkInfo {
40    /// Determine the correct order: returns (text, url)
41    fn correct_order(&self) -> (&str, &str) {
42        use LinkComponent::*;
43
44        match (self.paren_type, self.bracket_type) {
45            // One side is clearly a URL - that's the URL
46            (ClearUrl, _) => (&self.bracket_content, &self.paren_content),
47            (_, ClearUrl) => (&self.paren_content, &self.bracket_content),
48
49            // One side is multi-word - that's the text, other is URL
50            (MultiWord, _) => (&self.paren_content, &self.bracket_content),
51            (_, MultiWord) => (&self.bracket_content, &self.paren_content),
52
53            // Both ambiguous: assume standard reversed pattern (url)[text]
54            (Ambiguous, Ambiguous) => (&self.bracket_content, &self.paren_content),
55        }
56    }
57
58    /// Get the original pattern as it appears in the source
59    fn original_pattern(&self) -> String {
60        format!("({})[{}]", self.paren_content, self.bracket_content)
61    }
62
63    /// Get the corrected pattern
64    fn corrected_pattern(&self) -> String {
65        let (text, url) = self.correct_order();
66        format!("[{text}]({url})")
67    }
68}
69
70#[derive(Clone)]
71pub struct MD011NoReversedLinks;
72
73impl MD011NoReversedLinks {
74    /// Classify a link component as URL, multi-word text, or ambiguous
75    fn classify_component(s: &str) -> LinkComponent {
76        let trimmed = s.trim();
77
78        // Check for clear URL indicators
79        if trimmed.starts_with("http://")
80            || trimmed.starts_with("https://")
81            || trimmed.starts_with("ftp://")
82            || trimmed.starts_with("www.")
83            || (trimmed.starts_with("mailto:") && trimmed.contains('@'))
84            || (trimmed.starts_with('/') && trimmed.len() > 1)
85            || (trimmed.starts_with("./") || trimmed.starts_with("../"))
86            || (trimmed.starts_with('#') && trimmed.len() > 1 && !trimmed[1..].contains(' '))
87        {
88            return LinkComponent::ClearUrl;
89        }
90
91        // Multi-word text is likely a description, not a URL
92        if trimmed.contains(' ') {
93            return LinkComponent::MultiWord;
94        }
95
96        // Single word - could be either
97        LinkComponent::Ambiguous
98    }
99
100    fn find_reversed_links(content: &str) -> Vec<ReversedLinkInfo> {
101        let mut results = Vec::new();
102        let mut line_num = 1;
103
104        for line in content.lines() {
105            let mut last_end = 0;
106
107            while let Some(cap) = get_cached_regex(REVERSED_LINK_REGEX_STR)
108                .ok()
109                .and_then(|re| re.captures(&line[last_end..]))
110            {
111                let match_obj = cap.get(0).unwrap();
112                let prechar = &cap[1];
113                let paren_content = cap[2].to_string();
114                let bracket_content = cap[3].to_string();
115
116                // Check if the brackets at the end are escaped
117                if bracket_content.ends_with('\\') {
118                    last_end += match_obj.end();
119                    continue;
120                }
121
122                // Manual negative lookahead: skip if followed by (
123                // This prevents matching (text)[ref](url) patterns
124                let end_pos = last_end + match_obj.end();
125                if end_pos < line.len() && line[end_pos..].starts_with('(') {
126                    last_end += match_obj.end();
127                    continue;
128                }
129
130                // Classify both components
131                let paren_type = Self::classify_component(&paren_content);
132                let bracket_type = Self::classify_component(&bracket_content);
133
134                // Calculate the actual column (accounting for any prefix character)
135                let column = last_end + match_obj.start() + prechar.len() + 1;
136
137                results.push(ReversedLinkInfo {
138                    line_num,
139                    column,
140                    paren_content,
141                    bracket_content,
142                    paren_type,
143                    bracket_type,
144                });
145
146                last_end += match_obj.end();
147            }
148
149            line_num += 1;
150        }
151
152        results
153    }
154}
155
156impl Rule for MD011NoReversedLinks {
157    fn name(&self) -> &'static str {
158        "MD011"
159    }
160
161    fn description(&self) -> &'static str {
162        "Reversed link syntax"
163    }
164
165    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
166        let mut warnings = Vec::new();
167
168        let line_index = &ctx.line_index;
169
170        // Use filtered_lines() to automatically skip front-matter
171        for filtered_line in ctx.filtered_lines().skip_front_matter() {
172            let line_num = filtered_line.line_num;
173            let line = filtered_line.content;
174
175            let byte_pos = line_index.get_line_start_byte(line_num).unwrap_or(0);
176
177            let mut last_end = 0;
178
179            while let Some(cap) = get_cached_regex(REVERSED_LINK_REGEX_STR)
180                .ok()
181                .and_then(|re| re.captures(&line[last_end..]))
182            {
183                let match_obj = cap.get(0).unwrap();
184                let prechar = &cap[1];
185                let paren_content = cap[2].to_string();
186                let bracket_content = cap[3].to_string();
187
188                // Check if the brackets at the end are escaped
189                if bracket_content.ends_with('\\') {
190                    last_end += match_obj.end();
191                    continue;
192                }
193
194                // Manual negative lookahead: skip if followed by (
195                // This prevents matching (text)[ref](url) patterns
196                let end_pos = last_end + match_obj.end();
197                if end_pos < line.len() && line[end_pos..].starts_with('(') {
198                    last_end += match_obj.end();
199                    continue;
200                }
201
202                // Calculate the actual position
203                let match_start = last_end + match_obj.start() + prechar.len();
204                let match_byte_pos = byte_pos + match_start;
205
206                // Skip if in code block, inline code, HTML comments, math contexts, or Jinja templates
207                if ctx.is_in_code_block_or_span(match_byte_pos)
208                    || ctx.is_in_html_comment(match_byte_pos)
209                    || is_in_math_context(ctx, match_byte_pos)
210                    || ctx.is_in_jinja_range(match_byte_pos)
211                {
212                    last_end += match_obj.end();
213                    continue;
214                }
215
216                // Classify both components and determine correct order
217                let paren_type = Self::classify_component(&paren_content);
218                let bracket_type = Self::classify_component(&bracket_content);
219
220                let info = ReversedLinkInfo {
221                    line_num,
222                    column: match_start + 1,
223                    paren_content,
224                    bracket_content,
225                    paren_type,
226                    bracket_type,
227                };
228
229                let (text, url) = info.correct_order();
230
231                // Calculate the range for the actual reversed link (excluding prechar)
232                let actual_length = match_obj.len() - prechar.len();
233                let (start_line, start_col, end_line, end_col) =
234                    calculate_match_range(line_num, line, match_start, actual_length);
235
236                warnings.push(LintWarning {
237                    rule_name: Some(self.name().to_string()),
238                    message: format!("Reversed link syntax: use [{text}]({url}) instead"),
239                    line: start_line,
240                    column: start_col,
241                    end_line,
242                    end_column: end_col,
243                    severity: Severity::Warning,
244                    fix: Some(Fix {
245                        range: {
246                            let match_start_byte = byte_pos + match_start;
247                            let match_end_byte = match_start_byte + actual_length;
248                            match_start_byte..match_end_byte
249                        },
250                        replacement: format!("[{text}]({url})"),
251                    }),
252                });
253
254                last_end += match_obj.end();
255            }
256        }
257
258        Ok(warnings)
259    }
260
261    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
262        let content = ctx.content;
263        let mut result = content.to_string();
264        let mut offset: isize = 0;
265
266        let line_index = &ctx.line_index;
267
268        for info in Self::find_reversed_links(content) {
269            // Calculate absolute position in original content using LineIndex
270            let line_start = line_index.get_line_start_byte(info.line_num).unwrap_or(0);
271            let pos = line_start + (info.column - 1);
272
273            // Skip if in front matter using centralized utility
274            if ctx.is_in_front_matter(pos) {
275                continue;
276            }
277
278            // Skip if in any skip context
279            if !ctx.is_in_code_block_or_span(pos)
280                && !ctx.is_in_html_comment(pos)
281                && !is_in_math_context(ctx, pos)
282                && !ctx.is_in_jinja_range(pos)
283            {
284                let adjusted_pos = (pos as isize + offset) as usize;
285
286                // Use the info struct to get both original and corrected patterns
287                let original = info.original_pattern();
288                let replacement = info.corrected_pattern();
289
290                // Make sure we have the right substring before replacing
291                let end_pos = adjusted_pos + original.len();
292                if end_pos <= result.len() && adjusted_pos < result.len() {
293                    result.replace_range(adjusted_pos..end_pos, &replacement);
294                    // Update offset based on the difference in lengths
295                    offset += replacement.len() as isize - original.len() as isize;
296                }
297            }
298        }
299
300        Ok(result)
301    }
302
303    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
304        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
305    }
306
307    fn as_any(&self) -> &dyn std::any::Any {
308        self
309    }
310
311    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
312    where
313        Self: Sized,
314    {
315        Box::new(MD011NoReversedLinks)
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use crate::lint_context::LintContext;
323
324    #[test]
325    fn test_md011_basic() {
326        let rule = MD011NoReversedLinks;
327
328        // Should detect reversed links
329        let content = "(http://example.com)[Example]\n";
330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
331        let warnings = rule.check(&ctx).unwrap();
332        assert_eq!(warnings.len(), 1);
333        assert_eq!(warnings[0].line, 1);
334
335        // Should not detect correct links
336        let content = "[Example](http://example.com)\n";
337        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
338        let warnings = rule.check(&ctx).unwrap();
339        assert_eq!(warnings.len(), 0);
340    }
341
342    #[test]
343    fn test_md011_with_escaped_brackets() {
344        let rule = MD011NoReversedLinks;
345
346        // Should not detect if brackets are escaped
347        let content = "(url)[text\\]\n";
348        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
349        let warnings = rule.check(&ctx).unwrap();
350        assert_eq!(warnings.len(), 0);
351    }
352
353    #[test]
354    fn test_md011_no_false_positive_with_reference_link() {
355        let rule = MD011NoReversedLinks;
356
357        // Should not detect (text)[ref](url) as reversed
358        let content = "(text)[ref](url)\n";
359        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
360        let warnings = rule.check(&ctx).unwrap();
361        assert_eq!(warnings.len(), 0);
362    }
363
364    #[test]
365    fn test_md011_fix() {
366        let rule = MD011NoReversedLinks;
367
368        let content = "(http://example.com)[Example]\n(another/url)[text]\n";
369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
370        let fixed = rule.fix(&ctx).unwrap();
371        assert_eq!(fixed, "[Example](http://example.com)\n[text](another/url)\n");
372    }
373
374    #[test]
375    fn test_md011_in_code_block() {
376        let rule = MD011NoReversedLinks;
377
378        let content = "```\n(url)[text]\n```\n(url)[text]\n";
379        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
380        let warnings = rule.check(&ctx).unwrap();
381        assert_eq!(warnings.len(), 1);
382        assert_eq!(warnings[0].line, 4);
383    }
384
385    #[test]
386    fn test_md011_inline_code() {
387        let rule = MD011NoReversedLinks;
388
389        let content = "`(url)[text]` and (url)[text]\n";
390        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
391        let warnings = rule.check(&ctx).unwrap();
392        assert_eq!(warnings.len(), 1);
393        assert_eq!(warnings[0].column, 19);
394    }
395}