Skip to main content

rumdl_lib/rules/
md011_no_reversed_links.rs

1/// Rule MD011: No reversed link syntax
2///
3/// See [docs/md011.md](../../docs/md011.md) for full documentation, configuration, and examples.
4use crate::filtered_lines::FilteredLinesExt;
5use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
6use crate::utils::range_utils::calculate_match_range;
7use crate::utils::regex_cache::get_cached_regex;
8use crate::utils::skip_context::is_in_math_context;
9
10// Reversed link detection pattern
11const REVERSED_LINK_REGEX_STR: &str = r"(^|[^\\])\(([^()]+)\)\[([^\]]+)\]";
12
13/// Classification of a link component
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15enum LinkComponent {
16    /// Clear URL: has protocol, www., mailto:, or path prefix
17    ClearUrl,
18    /// Multiple words or sentence-like (likely link text, not URL)
19    MultiWord,
20    /// Single word - could be either URL or text
21    Ambiguous,
22}
23
24/// Information about a detected reversed link pattern
25#[derive(Debug, Clone)]
26struct ReversedLinkInfo {
27    line_num: usize,
28    column: usize,
29    /// Content found in parentheses
30    paren_content: String,
31    /// Content found in square brackets
32    bracket_content: String,
33    /// Classification of parentheses content
34    paren_type: LinkComponent,
35    /// Classification of bracket content
36    bracket_type: LinkComponent,
37}
38
39impl ReversedLinkInfo {
40    /// Determine the correct order: returns (text, url)
41    fn correct_order(&self) -> (&str, &str) {
42        use LinkComponent::*;
43
44        match (self.paren_type, self.bracket_type) {
45            // One side is clearly a URL - that's the URL
46            (ClearUrl, _) => (&self.bracket_content, &self.paren_content),
47            (_, ClearUrl) => (&self.paren_content, &self.bracket_content),
48
49            // One side is multi-word - that's the text, other is URL
50            (MultiWord, _) => (&self.paren_content, &self.bracket_content),
51            (_, MultiWord) => (&self.bracket_content, &self.paren_content),
52
53            // Both ambiguous: assume standard reversed pattern (url)[text]
54            (Ambiguous, Ambiguous) => (&self.bracket_content, &self.paren_content),
55        }
56    }
57
58    /// Get the original pattern as it appears in the source
59    fn original_pattern(&self) -> String {
60        format!("({})[{}]", self.paren_content, self.bracket_content)
61    }
62
63    /// Get the corrected pattern
64    fn corrected_pattern(&self) -> String {
65        let (text, url) = self.correct_order();
66        format!("[{text}]({url})")
67    }
68}
69
70#[derive(Clone)]
71pub struct MD011NoReversedLinks;
72
73impl MD011NoReversedLinks {
74    /// Classify a link component as URL, multi-word text, or ambiguous
75    fn classify_component(s: &str) -> LinkComponent {
76        let trimmed = s.trim();
77
78        // Check for clear URL indicators
79        if trimmed.starts_with("http://")
80            || trimmed.starts_with("https://")
81            || trimmed.starts_with("ftp://")
82            || trimmed.starts_with("www.")
83            || (trimmed.starts_with("mailto:") && trimmed.contains('@'))
84            || (trimmed.starts_with('/') && trimmed.len() > 1)
85            || (trimmed.starts_with("./") || trimmed.starts_with("../"))
86            || (trimmed.starts_with('#') && trimmed.len() > 1 && !trimmed[1..].contains(' '))
87        {
88            return LinkComponent::ClearUrl;
89        }
90
91        // Multi-word text is likely a description, not a URL
92        if trimmed.contains(' ') {
93            return LinkComponent::MultiWord;
94        }
95
96        // Single word - could be either
97        LinkComponent::Ambiguous
98    }
99
100    fn find_reversed_links(content: &str) -> Vec<ReversedLinkInfo> {
101        let mut results = Vec::new();
102        let mut line_num = 1;
103
104        for line in content.lines() {
105            let mut last_end = 0;
106
107            while let Some(cap) = get_cached_regex(REVERSED_LINK_REGEX_STR)
108                .ok()
109                .and_then(|re| re.captures(&line[last_end..]))
110            {
111                let match_obj = cap.get(0).unwrap();
112                let prechar = &cap[1];
113                let paren_content = cap[2].to_string();
114                let bracket_content = cap[3].to_string();
115
116                // Skip wiki-link patterns: if bracket content starts with [ or ends with ]
117                // This handles cases like (url)[[wiki-link]] being misdetected
118                if bracket_content.starts_with('[') || bracket_content.ends_with(']') {
119                    last_end += match_obj.end();
120                    continue;
121                }
122
123                // Skip footnote references: [^footnote]
124                // This prevents false positives like [link](url)[^footnote]
125                if bracket_content.starts_with('^') {
126                    last_end += match_obj.end();
127                    continue;
128                }
129
130                // Check if the brackets at the end are escaped
131                if bracket_content.ends_with('\\') {
132                    last_end += match_obj.end();
133                    continue;
134                }
135
136                // Manual negative lookahead: skip if followed by (
137                // This prevents matching (text)[ref](url) patterns
138                let end_pos = last_end + match_obj.end();
139                if end_pos < line.len() && line[end_pos..].starts_with('(') {
140                    last_end += match_obj.end();
141                    continue;
142                }
143
144                // Classify both components
145                let paren_type = Self::classify_component(&paren_content);
146                let bracket_type = Self::classify_component(&bracket_content);
147
148                // Calculate the actual column (accounting for any prefix character)
149                let column = last_end + match_obj.start() + prechar.len() + 1;
150
151                results.push(ReversedLinkInfo {
152                    line_num,
153                    column,
154                    paren_content,
155                    bracket_content,
156                    paren_type,
157                    bracket_type,
158                });
159
160                last_end += match_obj.end();
161            }
162
163            line_num += 1;
164        }
165
166        results
167    }
168}
169
170impl Rule for MD011NoReversedLinks {
171    fn name(&self) -> &'static str {
172        "MD011"
173    }
174
175    fn description(&self) -> &'static str {
176        "Reversed link syntax"
177    }
178
179    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
180        let mut warnings = Vec::new();
181
182        let line_index = &ctx.line_index;
183
184        // Use filtered_lines() to automatically skip front-matter and Obsidian comments
185        for filtered_line in ctx.filtered_lines().skip_front_matter().skip_obsidian_comments() {
186            let line_num = filtered_line.line_num;
187            let line = filtered_line.content;
188
189            let byte_pos = line_index.get_line_start_byte(line_num).unwrap_or(0);
190
191            let mut last_end = 0;
192
193            while let Some(cap) = get_cached_regex(REVERSED_LINK_REGEX_STR)
194                .ok()
195                .and_then(|re| re.captures(&line[last_end..]))
196            {
197                let match_obj = cap.get(0).unwrap();
198                let prechar = &cap[1];
199                let paren_content = cap[2].to_string();
200                let bracket_content = cap[3].to_string();
201
202                // Skip wiki-link patterns: if bracket content starts with [ or ends with ]
203                // This handles cases like (url)[[wiki-link]] being misdetected
204                if bracket_content.starts_with('[') || bracket_content.ends_with(']') {
205                    last_end += match_obj.end();
206                    continue;
207                }
208
209                // Skip footnote references: [^footnote]
210                // This prevents false positives like [link](url)[^footnote]
211                if bracket_content.starts_with('^') {
212                    last_end += match_obj.end();
213                    continue;
214                }
215
216                // Skip Dataview inline fields in Obsidian flavor
217                // Pattern: (field:: value)[text] is valid Obsidian syntax, not a reversed link
218                if ctx.flavor == crate::config::MarkdownFlavor::Obsidian && paren_content.contains("::") {
219                    last_end += match_obj.end();
220                    continue;
221                }
222
223                // Check if the brackets at the end are escaped
224                if bracket_content.ends_with('\\') {
225                    last_end += match_obj.end();
226                    continue;
227                }
228
229                // Manual negative lookahead: skip if followed by (
230                // This prevents matching (text)[ref](url) patterns
231                let end_pos = last_end + match_obj.end();
232                if end_pos < line.len() && line[end_pos..].starts_with('(') {
233                    last_end += match_obj.end();
234                    continue;
235                }
236
237                // Calculate the actual position
238                let match_start = last_end + match_obj.start() + prechar.len();
239                let match_byte_pos = byte_pos + match_start;
240
241                // Skip if in code block, inline code, HTML comments, math contexts, or Jinja templates
242                if ctx.is_in_code_block_or_span(match_byte_pos)
243                    || ctx.is_in_html_comment(match_byte_pos)
244                    || is_in_math_context(ctx, match_byte_pos)
245                    || ctx.is_in_jinja_range(match_byte_pos)
246                {
247                    last_end += match_obj.end();
248                    continue;
249                }
250
251                // Classify both components and determine correct order
252                let paren_type = Self::classify_component(&paren_content);
253                let bracket_type = Self::classify_component(&bracket_content);
254
255                let info = ReversedLinkInfo {
256                    line_num,
257                    column: match_start + 1,
258                    paren_content,
259                    bracket_content,
260                    paren_type,
261                    bracket_type,
262                };
263
264                let (text, url) = info.correct_order();
265
266                // Calculate the range for the actual reversed link (excluding prechar)
267                let actual_length = match_obj.len() - prechar.len();
268                let (start_line, start_col, end_line, end_col) =
269                    calculate_match_range(line_num, line, match_start, actual_length);
270
271                warnings.push(LintWarning {
272                    rule_name: Some(self.name().to_string()),
273                    message: format!("Reversed link syntax: use [{text}]({url}) instead"),
274                    line: start_line,
275                    column: start_col,
276                    end_line,
277                    end_column: end_col,
278                    severity: Severity::Error,
279                    fix: Some(Fix {
280                        range: {
281                            let match_start_byte = byte_pos + match_start;
282                            let match_end_byte = match_start_byte + actual_length;
283                            match_start_byte..match_end_byte
284                        },
285                        replacement: format!("[{text}]({url})"),
286                    }),
287                });
288
289                last_end += match_obj.end();
290            }
291        }
292
293        Ok(warnings)
294    }
295
296    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
297        let content = ctx.content;
298        let mut result = content.to_string();
299        let mut offset: isize = 0;
300
301        let line_index = &ctx.line_index;
302
303        for info in Self::find_reversed_links(content) {
304            // Calculate absolute position in original content using LineIndex
305            let line_start = line_index.get_line_start_byte(info.line_num).unwrap_or(0);
306            let pos = line_start + (info.column - 1);
307
308            // Skip if in front matter using centralized utility
309            if ctx.is_in_front_matter(pos) {
310                continue;
311            }
312
313            // Skip if in any skip context
314            if !ctx.is_in_code_block_or_span(pos)
315                && !ctx.is_in_html_comment(pos)
316                && !is_in_math_context(ctx, pos)
317                && !ctx.is_in_jinja_range(pos)
318            {
319                let adjusted_pos = (pos as isize + offset) as usize;
320
321                // Use the info struct to get both original and corrected patterns
322                let original = info.original_pattern();
323                let replacement = info.corrected_pattern();
324
325                // Make sure we have the right substring before replacing
326                let end_pos = adjusted_pos + original.len();
327                if end_pos <= result.len() && adjusted_pos < result.len() {
328                    result.replace_range(adjusted_pos..end_pos, &replacement);
329                    // Update offset based on the difference in lengths
330                    offset += replacement.len() as isize - original.len() as isize;
331                }
332            }
333        }
334
335        Ok(result)
336    }
337
338    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
339        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
340    }
341
342    fn as_any(&self) -> &dyn std::any::Any {
343        self
344    }
345
346    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
347    where
348        Self: Sized,
349    {
350        Box::new(MD011NoReversedLinks)
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357    use crate::lint_context::LintContext;
358
359    #[test]
360    fn test_md011_basic() {
361        let rule = MD011NoReversedLinks;
362
363        // Should detect reversed links
364        let content = "(http://example.com)[Example]\n";
365        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
366        let warnings = rule.check(&ctx).unwrap();
367        assert_eq!(warnings.len(), 1);
368        assert_eq!(warnings[0].line, 1);
369
370        // Should not detect correct links
371        let content = "[Example](http://example.com)\n";
372        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
373        let warnings = rule.check(&ctx).unwrap();
374        assert_eq!(warnings.len(), 0);
375    }
376
377    #[test]
378    fn test_md011_with_escaped_brackets() {
379        let rule = MD011NoReversedLinks;
380
381        // Should not detect if brackets are escaped
382        let content = "(url)[text\\]\n";
383        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
384        let warnings = rule.check(&ctx).unwrap();
385        assert_eq!(warnings.len(), 0);
386    }
387
388    #[test]
389    fn test_md011_no_false_positive_with_reference_link() {
390        let rule = MD011NoReversedLinks;
391
392        // Should not detect (text)[ref](url) as reversed
393        let content = "(text)[ref](url)\n";
394        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
395        let warnings = rule.check(&ctx).unwrap();
396        assert_eq!(warnings.len(), 0);
397    }
398
399    #[test]
400    fn test_md011_fix() {
401        let rule = MD011NoReversedLinks;
402
403        let content = "(http://example.com)[Example]\n(another/url)[text]\n";
404        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
405        let fixed = rule.fix(&ctx).unwrap();
406        assert_eq!(fixed, "[Example](http://example.com)\n[text](another/url)\n");
407    }
408
409    #[test]
410    fn test_md011_in_code_block() {
411        let rule = MD011NoReversedLinks;
412
413        let content = "```\n(url)[text]\n```\n(url)[text]\n";
414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
415        let warnings = rule.check(&ctx).unwrap();
416        assert_eq!(warnings.len(), 1);
417        assert_eq!(warnings[0].line, 4);
418    }
419
420    #[test]
421    fn test_md011_inline_code() {
422        let rule = MD011NoReversedLinks;
423
424        let content = "`(url)[text]` and (url)[text]\n";
425        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
426        let warnings = rule.check(&ctx).unwrap();
427        assert_eq!(warnings.len(), 1);
428        assert_eq!(warnings[0].column, 19);
429    }
430
431    #[test]
432    fn test_md011_no_false_positive_with_footnote() {
433        let rule = MD011NoReversedLinks;
434
435        // Should not detect [link](url)[^footnote] as reversed - this is valid markdown
436        // The [^footnote] is a footnote reference, not part of a reversed link
437        let content = "Some text with [a link](https://example.com/)[^ft].\n\n[^ft]: Note.\n";
438        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
439        let warnings = rule.check(&ctx).unwrap();
440        assert_eq!(warnings.len(), 0);
441
442        // Also test with multiple footnotes
443        let content = "[link1](url1)[^1] and [link2](url2)[^2]\n\n[^1]: First\n[^2]: Second\n";
444        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
445        let warnings = rule.check(&ctx).unwrap();
446        assert_eq!(warnings.len(), 0);
447
448        // But should still detect actual reversed links
449        let content = "(url)[text] and [link](url)[^footnote]\n";
450        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
451        let warnings = rule.check(&ctx).unwrap();
452        assert_eq!(warnings.len(), 1);
453        assert_eq!(warnings[0].line, 1);
454        assert_eq!(warnings[0].column, 1);
455    }
456
457    #[test]
458    fn test_md011_skip_dataview_inline_fields_obsidian() {
459        let rule = MD011NoReversedLinks;
460
461        // Dataview inline field pattern: (field:: value)[text]
462        // In Obsidian flavor, this should NOT be flagged as a reversed link
463        let content = "(status:: active)[link text]\n";
464        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
465        let warnings = rule.check(&ctx).unwrap();
466        assert_eq!(
467            warnings.len(),
468            0,
469            "Should not flag Dataview inline field in Obsidian flavor"
470        );
471
472        // Multiple inline fields
473        let content = "(author:: John)[read more] and (date:: 2024-01-01)[link]\n";
474        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
475        let warnings = rule.check(&ctx).unwrap();
476        assert_eq!(warnings.len(), 0, "Should not flag multiple Dataview inline fields");
477
478        // Mixed content: Dataview field and actual reversed link
479        let content = "(status:: done)[info] (url)[text]\n";
480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
481        let warnings = rule.check(&ctx).unwrap();
482        assert_eq!(warnings.len(), 1, "Should flag reversed link but not Dataview field");
483        assert_eq!(warnings[0].column, 23);
484    }
485
486    #[test]
487    fn test_md011_flag_dataview_in_standard_flavor() {
488        let rule = MD011NoReversedLinks;
489
490        // In Standard flavor, (field:: value)[text] is treated as a reversed link
491        // because Dataview is Obsidian-specific
492        let content = "(status:: active)[link text]\n";
493        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
494        let warnings = rule.check(&ctx).unwrap();
495        assert_eq!(
496            warnings.len(),
497            1,
498            "Should flag Dataview-like pattern in Standard flavor"
499        );
500    }
501
502    #[test]
503    fn test_md011_dataview_bracket_syntax_obsidian() {
504        let rule = MD011NoReversedLinks;
505
506        // Dataview also supports [field:: value] syntax inside brackets
507        // The pattern (field:: value)[text] should be skipped in Obsidian
508        let content = "Task has (priority:: high)[see details]\n";
509        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
510        let warnings = rule.check(&ctx).unwrap();
511        assert_eq!(warnings.len(), 0, "Should skip Dataview field with spaces");
512
513        // Field with no value (just key::)
514        let content = "(completed::)[marker]\n";
515        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
516        let warnings = rule.check(&ctx).unwrap();
517        assert_eq!(warnings.len(), 0, "Should skip Dataview field with empty value");
518    }
519}