rumdl_lib/rules/
md054_link_image_style.rs

1//!
2//! Rule MD054: Link and image style should be consistent
3//!
4//! See [docs/md054.md](../../docs/md054.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::range_utils::calculate_match_range;
8use lazy_static::lazy_static;
9use regex::Regex;
10
11mod md054_config;
12use md054_config::MD054Config;
13
14lazy_static! {
15    // Updated regex patterns that work with Unicode characters
16    static ref AUTOLINK_RE: Regex = Regex::new(r"<([^<>]+)>").unwrap();
17    static ref INLINE_RE: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
18    static ref URL_INLINE_RE: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
19    static ref SHORTCUT_RE: Regex = Regex::new(r"\[([^\]]+)\]").unwrap();
20    static ref COLLAPSED_RE: Regex = Regex::new(r"\[([^\]]+)\]\[\]").unwrap();
21    static ref FULL_RE: Regex = Regex::new(r"\[([^\]]+)\]\[([^\]]+)\]").unwrap();
22    static ref CODE_BLOCK_DELIMITER: Regex = Regex::new(r"^(```|~~~)").unwrap();
23    static ref REFERENCE_DEF_RE: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
24}
25
26/// Rule MD054: Link and image style should be consistent
27///
28/// This rule is triggered when different link or image styles are used in the same document.
29/// Markdown supports various styles for links and images, and this rule enforces consistency.
30///
31/// ## Supported Link Styles
32///
33/// - **Autolink**: `<https://example.com>`
34/// - **Inline**: `[link text](https://example.com)`
35/// - **URL Inline**: Special case of inline links where the URL itself is also the link text: `[https://example.com](https://example.com)`
36/// - **Shortcut**: `[link text]` (requires a reference definition elsewhere in the document)
37/// - **Collapsed**: `[link text][]` (requires a reference definition with the same name)
38/// - **Full**: `[link text][reference]` (requires a reference definition for the reference)
39///
40/// ## Configuration Options
41///
42/// You can configure which link styles are allowed. By default, all styles are allowed:
43///
44/// ```yaml
45/// MD054:
46///   autolink: true    # Allow autolink style
47///   inline: true      # Allow inline style
48///   url_inline: true  # Allow URL inline style
49///   shortcut: true    # Allow shortcut style
50///   collapsed: true   # Allow collapsed style
51///   full: true        # Allow full style
52/// ```
53///
54/// To enforce a specific style, set only that style to `true` and all others to `false`.
55///
56/// ## Unicode Support
57///
58/// This rule fully supports Unicode characters in link text and URLs, including:
59/// - Combining characters (e.g., café)
60/// - Zero-width joiners (e.g., family emojis: 👨‍👩‍👧‍👦)
61/// - Right-to-left text (e.g., Arabic, Hebrew)
62/// - Emojis and other special characters
63///
64/// ## Rationale
65///
66/// Consistent link styles improve document readability and maintainability. Different link
67/// styles have different advantages (e.g., inline links are self-contained, reference links
68/// keep the content cleaner), but mixing styles can create confusion.
69///
70#[derive(Debug, Default, Clone)]
71pub struct MD054LinkImageStyle {
72    config: MD054Config,
73}
74
75impl MD054LinkImageStyle {
76    pub fn new(autolink: bool, collapsed: bool, full: bool, inline: bool, shortcut: bool, url_inline: bool) -> Self {
77        Self {
78            config: MD054Config {
79                autolink,
80                collapsed,
81                full,
82                inline,
83                shortcut,
84                url_inline,
85            },
86        }
87    }
88
89    pub fn from_config_struct(config: MD054Config) -> Self {
90        Self { config }
91    }
92
93    /// Check if a style is allowed based on configuration
94    fn is_style_allowed(&self, style: &str) -> bool {
95        match style {
96            "autolink" => self.config.autolink,
97            "collapsed" => self.config.collapsed,
98            "full" => self.config.full,
99            "inline" => self.config.inline,
100            "shortcut" => self.config.shortcut,
101            "url_inline" => self.config.url_inline,
102            _ => false,
103        }
104    }
105}
106
107#[derive(Debug)]
108struct LinkMatch {
109    style: &'static str,
110    start: usize,
111    end: usize,
112}
113
114impl Rule for MD054LinkImageStyle {
115    fn name(&self) -> &'static str {
116        "MD054"
117    }
118
119    fn description(&self) -> &'static str {
120        "Link and image style should be consistent"
121    }
122
123    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
124        let content = ctx.content;
125
126        // Early returns for performance
127        if content.is_empty() {
128            return Ok(Vec::new());
129        }
130
131        // Quick check for any link patterns before expensive processing
132        if !content.contains('[') && !content.contains('<') {
133            return Ok(Vec::new());
134        }
135
136        let mut warnings = Vec::new();
137        let lines: Vec<&str> = content.lines().collect();
138
139        for (line_num, line) in lines.iter().enumerate() {
140            // Skip code blocks and reference definitions early
141            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_code_block) {
142                continue;
143            }
144            if REFERENCE_DEF_RE.is_match(line) {
145                continue;
146            }
147            if line.trim_start().starts_with("<!--") {
148                continue;
149            }
150
151            // Quick check for any link patterns in this line
152            if !line.contains('[') && !line.contains('<') {
153                continue;
154            }
155
156            // Find all matches in the line
157            let mut matches = Vec::new();
158
159            // Find all autolinks
160            for cap in AUTOLINK_RE.captures_iter(line) {
161                let m = cap.get(0).unwrap();
162                matches.push(LinkMatch {
163                    style: "autolink",
164                    start: m.start(),
165                    end: m.end(),
166                });
167            }
168
169            // Find all full references
170            for cap in FULL_RE.captures_iter(line) {
171                let m = cap.get(0).unwrap();
172                matches.push(LinkMatch {
173                    style: "full",
174                    start: m.start(),
175                    end: m.end(),
176                });
177            }
178
179            // Find all collapsed references
180            for cap in COLLAPSED_RE.captures_iter(line) {
181                let m = cap.get(0).unwrap();
182                matches.push(LinkMatch {
183                    style: "collapsed",
184                    start: m.start(),
185                    end: m.end(),
186                });
187            }
188
189            // Find all inline links
190            for cap in INLINE_RE.captures_iter(line) {
191                let m = cap.get(0).unwrap();
192                let text = cap.get(1).unwrap().as_str();
193                let url = cap.get(2).unwrap().as_str();
194                matches.push(LinkMatch {
195                    style: if text == url { "url_inline" } else { "inline" },
196                    start: m.start(),
197                    end: m.end(),
198                });
199            }
200
201            // Sort matches by start position to ensure we don't double-count
202            matches.sort_by_key(|m| m.start);
203
204            // Remove overlapping matches (keep the first one)
205            let mut filtered_matches = Vec::new();
206            let mut last_end = 0;
207            for m in matches {
208                if m.start >= last_end {
209                    last_end = m.end;
210                    filtered_matches.push(m);
211                }
212            }
213
214            // Now find shortcut references that don't overlap with other matches
215            for cap in SHORTCUT_RE.captures_iter(line) {
216                let m = cap.get(0).unwrap();
217                let start = m.start();
218                let end = m.end();
219
220                // Check if this overlaps with any existing match
221                let overlaps = filtered_matches.iter().any(|existing| {
222                    (start >= existing.start && start < existing.end) || (end > existing.start && end <= existing.end)
223                });
224
225                if !overlaps {
226                    // Check if followed by '(', '[', '[]', or ']['
227                    let after = &line[end..];
228                    if !after.starts_with('(') && !after.starts_with('[') {
229                        filtered_matches.push(LinkMatch {
230                            style: "shortcut",
231                            start,
232                            end,
233                        });
234                    }
235                }
236            }
237
238            // Sort again after adding shortcuts
239            filtered_matches.sort_by_key(|m| m.start);
240
241            // Check each match
242            for m in filtered_matches {
243                let match_start_char = line[..m.start].chars().count();
244
245                if !ctx.is_in_code_span(line_num + 1, match_start_char) && !self.is_style_allowed(m.style) {
246                    let match_len = line[m.start..m.end].chars().count();
247                    let (start_line, start_col, end_line, end_col) =
248                        calculate_match_range(line_num + 1, line, match_start_char, match_len);
249
250                    warnings.push(LintWarning {
251                        rule_name: Some(self.name().to_string()),
252                        line: start_line,
253                        column: start_col,
254                        end_line,
255                        end_column: end_col,
256                        message: format!("Link/image style '{}' is not consistent with document", m.style),
257                        severity: Severity::Warning,
258                        fix: None,
259                    });
260                }
261            }
262        }
263        Ok(warnings)
264    }
265
266    fn fix(&self, _ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
267        // Automatic fixing for link styles is not supported and could break content
268        Err(LintError::FixFailed(
269            "MD054 does not support automatic fixing of link/image style consistency.".to_string(),
270        ))
271    }
272
273    fn fix_capability(&self) -> crate::rule::FixCapability {
274        crate::rule::FixCapability::Unfixable
275    }
276
277    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
278        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
279    }
280
281    fn as_any(&self) -> &dyn std::any::Any {
282        self
283    }
284
285    fn default_config_section(&self) -> Option<(String, toml::Value)> {
286        let json_value = serde_json::to_value(&self.config).ok()?;
287        Some((
288            self.name().to_string(),
289            crate::rule_config_serde::json_to_toml_value(&json_value)?,
290        ))
291    }
292
293    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
294    where
295        Self: Sized,
296    {
297        let rule_config = crate::rule_config_serde::load_rule_config::<MD054Config>(config);
298        Box::new(Self::from_config_struct(rule_config))
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305    use crate::lint_context::LintContext;
306
307    #[test]
308    fn test_all_styles_allowed_by_default() {
309        let rule = MD054LinkImageStyle::new(true, true, true, true, true, true);
310        let content = "[inline](url) [ref][] [ref] <autolink> [full][ref] [url](url)\n\n[ref]: url";
311        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
312        let result = rule.check(&ctx).unwrap();
313
314        assert_eq!(result.len(), 0);
315    }
316
317    #[test]
318    fn test_only_inline_allowed() {
319        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
320        let content = "[allowed](url) [not][ref] <https://bad.com> [bad][] [shortcut]\n\n[ref]: url\n[shortcut]: url";
321        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
322        let result = rule.check(&ctx).unwrap();
323
324        assert_eq!(result.len(), 4);
325        assert!(result[0].message.contains("'full'"));
326        assert!(result[1].message.contains("'autolink'"));
327        assert!(result[2].message.contains("'collapsed'"));
328        assert!(result[3].message.contains("'shortcut'"));
329    }
330
331    #[test]
332    fn test_only_autolink_allowed() {
333        let rule = MD054LinkImageStyle::new(true, false, false, false, false, false);
334        let content = "<https://good.com> [bad](url) [bad][ref]\n\n[ref]: url";
335        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
336        let result = rule.check(&ctx).unwrap();
337
338        assert_eq!(result.len(), 2);
339        assert!(result[0].message.contains("'inline'"));
340        assert!(result[1].message.contains("'full'"));
341    }
342
343    #[test]
344    fn test_url_inline_detection() {
345        let rule = MD054LinkImageStyle::new(false, false, false, true, false, true);
346        let content = "[https://example.com](https://example.com) [text](https://example.com)";
347        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
348        let result = rule.check(&ctx).unwrap();
349
350        // First is url_inline (allowed), second is inline (allowed)
351        assert_eq!(result.len(), 0);
352    }
353
354    #[test]
355    fn test_url_inline_not_allowed() {
356        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
357        let content = "[https://example.com](https://example.com)";
358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
359        let result = rule.check(&ctx).unwrap();
360
361        assert_eq!(result.len(), 1);
362        assert!(result[0].message.contains("'url_inline'"));
363    }
364
365    #[test]
366    fn test_shortcut_vs_full_detection() {
367        let rule = MD054LinkImageStyle::new(false, false, true, false, false, false);
368        let content = "[shortcut] [full][ref]\n\n[shortcut]: url\n[ref]: url2";
369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
370        let result = rule.check(&ctx).unwrap();
371
372        // Only shortcut should be flagged
373        assert_eq!(result.len(), 1);
374        assert!(result[0].message.contains("'shortcut'"));
375    }
376
377    #[test]
378    fn test_collapsed_reference() {
379        let rule = MD054LinkImageStyle::new(false, true, false, false, false, false);
380        let content = "[collapsed][] [bad][ref]\n\n[collapsed]: url\n[ref]: url2";
381        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
382        let result = rule.check(&ctx).unwrap();
383
384        assert_eq!(result.len(), 1);
385        assert!(result[0].message.contains("'full'"));
386    }
387
388    #[test]
389    fn test_code_blocks_ignored() {
390        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
391        let content = "```\n[ignored](url) <https://ignored.com>\n```\n\n[checked](url)";
392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
393        let result = rule.check(&ctx).unwrap();
394
395        // Only the link outside code block should be checked
396        assert_eq!(result.len(), 0);
397    }
398
399    #[test]
400    fn test_code_spans_ignored() {
401        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
402        let content = "`[ignored](url)` and `<https://ignored.com>` but [checked](url)";
403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
404        let result = rule.check(&ctx).unwrap();
405
406        // Only the link outside code spans should be checked
407        assert_eq!(result.len(), 0);
408    }
409
410    #[test]
411    fn test_reference_definitions_ignored() {
412        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
413        let content = "[ref]: https://example.com\n[ref2]: <https://example2.com>";
414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
415        let result = rule.check(&ctx).unwrap();
416
417        // Reference definitions should be ignored
418        assert_eq!(result.len(), 0);
419    }
420
421    #[test]
422    fn test_html_comments_ignored() {
423        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
424        let content = "<!-- [ignored](url) -->\n  <!-- <https://ignored.com> -->";
425        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
426        let result = rule.check(&ctx).unwrap();
427
428        assert_eq!(result.len(), 0);
429    }
430
431    #[test]
432    fn test_unicode_support() {
433        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
434        let content = "[café ☕](https://café.com) [emoji 😀](url) [한글](url) [עברית](url)";
435        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
436        let result = rule.check(&ctx).unwrap();
437
438        // All should be detected as inline (allowed)
439        assert_eq!(result.len(), 0);
440    }
441
442    #[test]
443    fn test_line_positions() {
444        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
445        let content = "Line 1\n\nLine 3 with <https://bad.com> here";
446        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
447        let result = rule.check(&ctx).unwrap();
448
449        assert_eq!(result.len(), 1);
450        assert_eq!(result[0].line, 3);
451        assert_eq!(result[0].column, 13); // Position of '<'
452    }
453
454    #[test]
455    fn test_multiple_links_same_line() {
456        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
457        let content = "[ok](url) but <bad> and [also][bad]\n\n[bad]: url";
458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459        let result = rule.check(&ctx).unwrap();
460
461        assert_eq!(result.len(), 2);
462        assert!(result[0].message.contains("'autolink'"));
463        assert!(result[1].message.contains("'full'"));
464    }
465
466    #[test]
467    fn test_empty_content() {
468        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
469        let content = "";
470        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
471        let result = rule.check(&ctx).unwrap();
472
473        assert_eq!(result.len(), 0);
474    }
475
476    #[test]
477    fn test_no_links() {
478        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
479        let content = "Just plain text without any links";
480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
481        let result = rule.check(&ctx).unwrap();
482
483        assert_eq!(result.len(), 0);
484    }
485
486    #[test]
487    fn test_fix_returns_error() {
488        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
489        let content = "[link](url)";
490        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
491        let result = rule.fix(&ctx);
492
493        assert!(result.is_err());
494        if let Err(LintError::FixFailed(msg)) = result {
495            assert!(msg.contains("does not support automatic fixing"));
496        }
497    }
498
499    #[test]
500    fn test_priority_order() {
501        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
502        // Test that [text][ref] is detected as full, not shortcut
503        let content = "[text][ref] not detected as [shortcut]\n\n[ref]: url\n[shortcut]: url2";
504        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
505        let result = rule.check(&ctx).unwrap();
506
507        assert_eq!(result.len(), 2);
508        assert!(result[0].message.contains("'full'"));
509        assert!(result[1].message.contains("'shortcut'"));
510    }
511
512    #[test]
513    fn test_not_shortcut_when_followed_by_bracket() {
514        let rule = MD054LinkImageStyle::new(false, false, false, true, true, false);
515        // [text][ should not be detected as shortcut
516        let content = "[text][ more text\n[text](url) is inline";
517        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
518        let result = rule.check(&ctx).unwrap();
519
520        // Only second line should have inline link
521        assert_eq!(result.len(), 0);
522    }
523
524    #[test]
525    fn test_complex_unicode_with_zwj() {
526        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
527        // Test with zero-width joiners and complex Unicode
528        let content = "[👨‍👩‍👧‍👦 family](url) [café☕](https://café.com)";
529        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
530        let result = rule.check(&ctx).unwrap();
531
532        // Both should be detected as inline (allowed)
533        assert_eq!(result.len(), 0);
534    }
535}