Skip to main content

rumdl_lib/rules/
md011_no_reversed_links.rs

1/// Rule MD011: No reversed link syntax
2///
3/// See [docs/md011.md](../../docs/md011.md) for full documentation, configuration, and examples.
4use crate::filtered_lines::FilteredLinesExt;
5use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
6use crate::utils::range_utils::calculate_match_range;
7use crate::utils::regex_cache::get_cached_regex;
8use crate::utils::skip_context::is_in_math_context;
9
10// Reversed link detection pattern
11const REVERSED_LINK_REGEX_STR: &str = r"(^|[^\\])\(([^()]+)\)\[([^\]]+)\]";
12
13/// Classification of a link component
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15enum LinkComponent {
16    /// Clear URL: has protocol, www., mailto:, or path prefix
17    ClearUrl,
18    /// Multiple words or sentence-like (likely link text, not URL)
19    MultiWord,
20    /// Single word - could be either URL or text
21    Ambiguous,
22}
23
24/// Information about a detected reversed link pattern
25#[derive(Debug, Clone)]
26struct ReversedLinkInfo {
27    /// Content found in parentheses
28    paren_content: String,
29    /// Content found in square brackets
30    bracket_content: String,
31    /// Classification of parentheses content
32    paren_type: LinkComponent,
33    /// Classification of bracket content
34    bracket_type: LinkComponent,
35}
36
37impl ReversedLinkInfo {
38    /// Determine the correct order: returns (text, url)
39    fn correct_order(&self) -> (&str, &str) {
40        use LinkComponent::{Ambiguous, ClearUrl, MultiWord};
41
42        match (self.paren_type, self.bracket_type) {
43            // One side is clearly a URL - that's the URL
44            (ClearUrl, _) => (&self.bracket_content, &self.paren_content),
45            (_, ClearUrl) => (&self.paren_content, &self.bracket_content),
46
47            // One side is multi-word - that's the text, other is URL
48            (MultiWord, _) => (&self.paren_content, &self.bracket_content),
49            (_, MultiWord) => (&self.bracket_content, &self.paren_content),
50
51            // Both ambiguous: assume standard reversed pattern (url)[text]
52            (Ambiguous, Ambiguous) => (&self.bracket_content, &self.paren_content),
53        }
54    }
55}
56
57#[derive(Clone)]
58pub struct MD011NoReversedLinks;
59
60impl MD011NoReversedLinks {
61    /// Classify a link component as URL, multi-word text, or ambiguous
62    fn classify_component(s: &str) -> LinkComponent {
63        let trimmed = s.trim();
64
65        // Check for clear URL indicators
66        if trimmed.starts_with("http://")
67            || trimmed.starts_with("https://")
68            || trimmed.starts_with("ftp://")
69            || trimmed.starts_with("www.")
70            || (trimmed.starts_with("mailto:") && trimmed.contains('@'))
71            || (trimmed.starts_with('/') && trimmed.len() > 1)
72            || (trimmed.starts_with("./") || trimmed.starts_with("../"))
73            || (trimmed.starts_with('#') && trimmed.len() > 1 && !trimmed[1..].contains(' '))
74        {
75            return LinkComponent::ClearUrl;
76        }
77
78        // Multi-word text is likely a description, not a URL
79        if trimmed.contains(' ') {
80            return LinkComponent::MultiWord;
81        }
82
83        // Single word - could be either
84        LinkComponent::Ambiguous
85    }
86}
87
88impl Rule for MD011NoReversedLinks {
89    fn name(&self) -> &'static str {
90        "MD011"
91    }
92
93    fn description(&self) -> &'static str {
94        "Reversed link syntax"
95    }
96
97    fn category(&self) -> RuleCategory {
98        RuleCategory::Link
99    }
100
101    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
102        let mut warnings = Vec::new();
103
104        let line_index = &ctx.line_index;
105
106        // Use filtered_lines() to automatically skip front-matter and Obsidian comments
107        for filtered_line in ctx
108            .filtered_lines()
109            .skip_front_matter()
110            .skip_jsx_expressions()
111            .skip_mdx_comments()
112            .skip_obsidian_comments()
113        {
114            let line_num = filtered_line.line_num;
115            let line = filtered_line.content;
116
117            let byte_pos = line_index.get_line_start_byte(line_num).unwrap_or(0);
118
119            let mut last_end = 0;
120
121            while let Some(cap) = get_cached_regex(REVERSED_LINK_REGEX_STR)
122                .ok()
123                .and_then(|re| re.captures(&line[last_end..]))
124            {
125                let match_obj = cap.get(0).unwrap();
126                let prechar = &cap[1];
127                let paren_content = cap[2].to_string();
128                let bracket_content = cap[3].to_string();
129
130                // Skip wiki-link patterns: if bracket content starts with [ or ends with ]
131                // This handles cases like (url)[[wiki-link]] being misdetected
132                if bracket_content.starts_with('[') || bracket_content.ends_with(']') {
133                    last_end += match_obj.end();
134                    continue;
135                }
136
137                // Skip footnote references: [^footnote]
138                // This prevents false positives like [link](url)[^footnote]
139                if bracket_content.starts_with('^') {
140                    last_end += match_obj.end();
141                    continue;
142                }
143
144                // Skip Dataview inline fields in Obsidian flavor
145                // Pattern: (field:: value)[text] is valid Obsidian syntax, not a reversed link
146                if ctx.flavor == crate::config::MarkdownFlavor::Obsidian && paren_content.contains("::") {
147                    last_end += match_obj.end();
148                    continue;
149                }
150
151                // Check if the brackets at the end are escaped
152                if bracket_content.ends_with('\\') {
153                    last_end += match_obj.end();
154                    continue;
155                }
156
157                // Manual negative lookahead: skip if followed by (
158                // This prevents matching (text)[ref](url) patterns
159                let end_pos = last_end + match_obj.end();
160                if end_pos < line.len() && line[end_pos..].starts_with('(') {
161                    last_end += match_obj.end();
162                    continue;
163                }
164
165                // Calculate the actual position
166                let match_start = last_end + match_obj.start() + prechar.len();
167                let match_byte_pos = byte_pos + match_start;
168
169                // Skip if in code block, inline code, HTML comments, math contexts, or Jinja templates
170                if ctx.is_in_code_block_or_span(match_byte_pos)
171                    || ctx.is_in_html_comment(match_byte_pos)
172                    || ctx.is_in_mdx_comment(match_byte_pos)
173                    || is_in_math_context(ctx, match_byte_pos)
174                    || ctx.is_in_jinja_range(match_byte_pos)
175                {
176                    last_end += match_obj.end();
177                    continue;
178                }
179
180                // Classify both components and determine correct order
181                let paren_type = Self::classify_component(&paren_content);
182                let bracket_type = Self::classify_component(&bracket_content);
183
184                let info = ReversedLinkInfo {
185                    paren_content,
186                    bracket_content,
187                    paren_type,
188                    bracket_type,
189                };
190
191                let (text, url) = info.correct_order();
192
193                // Calculate the range for the actual reversed link (excluding prechar)
194                let actual_length = match_obj.len() - prechar.len();
195                let (start_line, start_col, end_line, end_col) =
196                    calculate_match_range(line_num, line, match_start, actual_length);
197
198                warnings.push(LintWarning {
199                    rule_name: Some(self.name().to_string()),
200                    message: format!("Reversed link syntax: use [{text}]({url}) instead"),
201                    line: start_line,
202                    column: start_col,
203                    end_line,
204                    end_column: end_col,
205                    severity: Severity::Error,
206                    fix: Some(Fix {
207                        range: {
208                            let match_start_byte = byte_pos + match_start;
209                            let match_end_byte = match_start_byte + actual_length;
210                            match_start_byte..match_end_byte
211                        },
212                        replacement: format!("[{text}]({url})"),
213                    }),
214                });
215
216                last_end += match_obj.end();
217            }
218        }
219
220        Ok(warnings)
221    }
222
223    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
224        let warnings = self.check(ctx)?;
225        let warnings =
226            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
227        if warnings.is_empty() {
228            return Ok(ctx.content.to_string());
229        }
230
231        let mut content = ctx.content.to_string();
232        // Apply fixes in reverse order to preserve byte offsets
233        let mut fixes: Vec<_> = warnings.iter().filter_map(|w| w.fix.as_ref()).collect();
234        fixes.sort_by(|a, b| b.range.start.cmp(&a.range.start));
235
236        for fix in fixes {
237            if fix.range.start < content.len() && fix.range.end <= content.len() {
238                content.replace_range(fix.range.clone(), &fix.replacement);
239            }
240        }
241        Ok(content)
242    }
243
244    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
245        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
246    }
247
248    fn as_any(&self) -> &dyn std::any::Any {
249        self
250    }
251
252    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
253    where
254        Self: Sized,
255    {
256        Box::new(MD011NoReversedLinks)
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use crate::lint_context::LintContext;
264
265    #[test]
266    fn test_md011_basic() {
267        let rule = MD011NoReversedLinks;
268
269        // Should detect reversed links
270        let content = "(http://example.com)[Example]\n";
271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
272        let warnings = rule.check(&ctx).unwrap();
273        assert_eq!(warnings.len(), 1);
274        assert_eq!(warnings[0].line, 1);
275
276        // Should not detect correct links
277        let content = "[Example](http://example.com)\n";
278        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
279        let warnings = rule.check(&ctx).unwrap();
280        assert_eq!(warnings.len(), 0);
281    }
282
283    #[test]
284    fn test_md011_with_escaped_brackets() {
285        let rule = MD011NoReversedLinks;
286
287        // Should not detect if brackets are escaped
288        let content = "(url)[text\\]\n";
289        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
290        let warnings = rule.check(&ctx).unwrap();
291        assert_eq!(warnings.len(), 0);
292    }
293
294    #[test]
295    fn test_md011_no_false_positive_with_reference_link() {
296        let rule = MD011NoReversedLinks;
297
298        // Should not detect (text)[ref](url) as reversed
299        let content = "(text)[ref](url)\n";
300        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
301        let warnings = rule.check(&ctx).unwrap();
302        assert_eq!(warnings.len(), 0);
303    }
304
305    #[test]
306    fn test_md011_fix() {
307        let rule = MD011NoReversedLinks;
308
309        let content = "(http://example.com)[Example]\n(another/url)[text]\n";
310        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
311        let fixed = rule.fix(&ctx).unwrap();
312        assert_eq!(fixed, "[Example](http://example.com)\n[text](another/url)\n");
313    }
314
315    #[test]
316    fn test_md011_in_code_block() {
317        let rule = MD011NoReversedLinks;
318
319        let content = "```\n(url)[text]\n```\n(url)[text]\n";
320        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
321        let warnings = rule.check(&ctx).unwrap();
322        assert_eq!(warnings.len(), 1);
323        assert_eq!(warnings[0].line, 4);
324    }
325
326    #[test]
327    fn test_md011_inline_code() {
328        let rule = MD011NoReversedLinks;
329
330        let content = "`(url)[text]` and (url)[text]\n";
331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
332        let warnings = rule.check(&ctx).unwrap();
333        assert_eq!(warnings.len(), 1);
334        assert_eq!(warnings[0].column, 19);
335    }
336
337    #[test]
338    fn test_md011_no_false_positive_with_footnote() {
339        let rule = MD011NoReversedLinks;
340
341        // Should not detect [link](url)[^footnote] as reversed - this is valid markdown
342        // The [^footnote] is a footnote reference, not part of a reversed link
343        let content = "Some text with [a link](https://example.com/)[^ft].\n\n[^ft]: Note.\n";
344        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
345        let warnings = rule.check(&ctx).unwrap();
346        assert_eq!(warnings.len(), 0);
347
348        // Also test with multiple footnotes
349        let content = "[link1](url1)[^1] and [link2](url2)[^2]\n\n[^1]: First\n[^2]: Second\n";
350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
351        let warnings = rule.check(&ctx).unwrap();
352        assert_eq!(warnings.len(), 0);
353
354        // But should still detect actual reversed links
355        let content = "(url)[text] and [link](url)[^footnote]\n";
356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
357        let warnings = rule.check(&ctx).unwrap();
358        assert_eq!(warnings.len(), 1);
359        assert_eq!(warnings[0].line, 1);
360        assert_eq!(warnings[0].column, 1);
361    }
362
363    #[test]
364    fn test_md011_skip_dataview_inline_fields_obsidian() {
365        let rule = MD011NoReversedLinks;
366
367        // Dataview inline field pattern: (field:: value)[text]
368        // In Obsidian flavor, this should NOT be flagged as a reversed link
369        let content = "(status:: active)[link text]\n";
370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
371        let warnings = rule.check(&ctx).unwrap();
372        assert_eq!(
373            warnings.len(),
374            0,
375            "Should not flag Dataview inline field in Obsidian flavor"
376        );
377
378        // Multiple inline fields
379        let content = "(author:: John)[read more] and (date:: 2024-01-01)[link]\n";
380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
381        let warnings = rule.check(&ctx).unwrap();
382        assert_eq!(warnings.len(), 0, "Should not flag multiple Dataview inline fields");
383
384        // Mixed content: Dataview field and actual reversed link
385        let content = "(status:: done)[info] (url)[text]\n";
386        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
387        let warnings = rule.check(&ctx).unwrap();
388        assert_eq!(warnings.len(), 1, "Should flag reversed link but not Dataview field");
389        assert_eq!(warnings[0].column, 23);
390    }
391
392    #[test]
393    fn test_md011_flag_dataview_in_standard_flavor() {
394        let rule = MD011NoReversedLinks;
395
396        // In Standard flavor, (field:: value)[text] is treated as a reversed link
397        // because Dataview is Obsidian-specific
398        let content = "(status:: active)[link text]\n";
399        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
400        let warnings = rule.check(&ctx).unwrap();
401        assert_eq!(
402            warnings.len(),
403            1,
404            "Should flag Dataview-like pattern in Standard flavor"
405        );
406    }
407
408    #[test]
409    fn test_md011_dataview_bracket_syntax_obsidian() {
410        let rule = MD011NoReversedLinks;
411
412        // Dataview also supports [field:: value] syntax inside brackets
413        // The pattern (field:: value)[text] should be skipped in Obsidian
414        let content = "Task has (priority:: high)[see details]\n";
415        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
416        let warnings = rule.check(&ctx).unwrap();
417        assert_eq!(warnings.len(), 0, "Should skip Dataview field with spaces");
418
419        // Field with no value (just key::)
420        let content = "(completed::)[marker]\n";
421        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
422        let warnings = rule.check(&ctx).unwrap();
423        assert_eq!(warnings.len(), 0, "Should skip Dataview field with empty value");
424    }
425
426    #[test]
427    fn test_md011_fix_skips_obsidian_comments() {
428        let rule = MD011NoReversedLinks;
429
430        // Reversed link inside Obsidian comment block should not be modified by fix()
431        let content = "%%\n(http://example.com)[hidden link]\n%%\n";
432        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
433
434        // check() should produce no warnings (Obsidian comment is skipped)
435        let warnings = rule.check(&ctx).unwrap();
436        assert_eq!(warnings.len(), 0, "check() should skip Obsidian comment content");
437
438        // fix() should not modify content inside Obsidian comments
439        let fixed = rule.fix(&ctx).unwrap();
440        assert_eq!(
441            fixed, content,
442            "fix() should not modify reversed links inside Obsidian comments"
443        );
444    }
445
446    #[test]
447    fn test_md011_fix_skips_obsidian_comments_with_surrounding_content() {
448        let rule = MD011NoReversedLinks;
449
450        // Mix of Obsidian comment and real reversed link
451        let content = "%%\n(http://example.com)[hidden]\n%%\n\n(http://real.com)[visible]\n";
452        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
453
454        // check() should only flag the visible one
455        let warnings = rule.check(&ctx).unwrap();
456        assert_eq!(warnings.len(), 1, "check() should only flag visible reversed link");
457        assert_eq!(warnings[0].line, 5);
458
459        // fix() should only fix the visible one, leaving comment content untouched
460        let fixed = rule.fix(&ctx).unwrap();
461        assert_eq!(
462            fixed, "%%\n(http://example.com)[hidden]\n%%\n\n[visible](http://real.com)\n",
463            "fix() should only modify visible reversed links"
464        );
465    }
466
467    #[test]
468    fn test_md011_fix_skips_dataview_fields_obsidian() {
469        let rule = MD011NoReversedLinks;
470
471        // Dataview inline field should not be modified by fix()
472        let content = "(status:: active)[link text]\n(http://example.com)[real link]\n";
473        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
474
475        let warnings = rule.check(&ctx).unwrap();
476        assert_eq!(warnings.len(), 1, "check() should only flag the real reversed link");
477
478        let fixed = rule.fix(&ctx).unwrap();
479        assert_eq!(
480            fixed, "(status:: active)[link text]\n[real link](http://example.com)\n",
481            "fix() should not modify Dataview inline fields"
482        );
483    }
484}