rumdl_lib/rules/
md054_link_image_style.rs

1//!
2//! Rule MD054: Link and image style should be consistent
3//!
4//! See [docs/md054.md](../../docs/md054.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::range_utils::calculate_match_range;
8use regex::Regex;
9use std::sync::LazyLock;
10
11mod md054_config;
12use md054_config::MD054Config;
13
14// Updated regex patterns that work with Unicode characters
15static AUTOLINK_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<([^<>]+)>").unwrap());
16static INLINE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
17static SHORTCUT_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]").unwrap());
18static COLLAPSED_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[\]").unwrap());
19static FULL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[([^\]]+)\]").unwrap());
20static REFERENCE_DEF_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap());
21
22/// Rule MD054: Link and image style should be consistent
23///
24/// This rule is triggered when different link or image styles are used in the same document.
25/// Markdown supports various styles for links and images, and this rule enforces consistency.
26///
27/// ## Supported Link Styles
28///
29/// - **Autolink**: `<https://example.com>`
30/// - **Inline**: `[link text](https://example.com)`
31/// - **URL Inline**: Special case of inline links where the URL itself is also the link text: `[https://example.com](https://example.com)`
32/// - **Shortcut**: `[link text]` (requires a reference definition elsewhere in the document)
33/// - **Collapsed**: `[link text][]` (requires a reference definition with the same name)
34/// - **Full**: `[link text][reference]` (requires a reference definition for the reference)
35///
36/// ## Configuration Options
37///
38/// You can configure which link styles are allowed. By default, all styles are allowed:
39///
40/// ```yaml
41/// MD054:
42///   autolink: true    # Allow autolink style
43///   inline: true      # Allow inline style
44///   url_inline: true  # Allow URL inline style
45///   shortcut: true    # Allow shortcut style
46///   collapsed: true   # Allow collapsed style
47///   full: true        # Allow full style
48/// ```
49///
50/// To enforce a specific style, set only that style to `true` and all others to `false`.
51///
52/// ## Unicode Support
53///
54/// This rule fully supports Unicode characters in link text and URLs, including:
55/// - Combining characters (e.g., café)
56/// - Zero-width joiners (e.g., family emojis: 👨‍👩‍👧‍👦)
57/// - Right-to-left text (e.g., Arabic, Hebrew)
58/// - Emojis and other special characters
59///
60/// ## Rationale
61///
62/// Consistent link styles improve document readability and maintainability. Different link
63/// styles have different advantages (e.g., inline links are self-contained, reference links
64/// keep the content cleaner), but mixing styles can create confusion.
65///
66#[derive(Debug, Default, Clone)]
67pub struct MD054LinkImageStyle {
68    config: MD054Config,
69}
70
71impl MD054LinkImageStyle {
72    pub fn new(autolink: bool, collapsed: bool, full: bool, inline: bool, shortcut: bool, url_inline: bool) -> Self {
73        Self {
74            config: MD054Config {
75                autolink,
76                collapsed,
77                full,
78                inline,
79                shortcut,
80                url_inline,
81            },
82        }
83    }
84
85    pub fn from_config_struct(config: MD054Config) -> Self {
86        Self { config }
87    }
88
89    /// Check if a style is allowed based on configuration
90    fn is_style_allowed(&self, style: &str) -> bool {
91        match style {
92            "autolink" => self.config.autolink,
93            "collapsed" => self.config.collapsed,
94            "full" => self.config.full,
95            "inline" => self.config.inline,
96            "shortcut" => self.config.shortcut,
97            "url_inline" => self.config.url_inline,
98            _ => false,
99        }
100    }
101}
102
103#[derive(Debug)]
104struct LinkMatch {
105    style: &'static str,
106    start: usize,
107    end: usize,
108}
109
110impl Rule for MD054LinkImageStyle {
111    fn name(&self) -> &'static str {
112        "MD054"
113    }
114
115    fn description(&self) -> &'static str {
116        "Link and image style should be consistent"
117    }
118
119    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
120        let content = ctx.content;
121
122        // Early returns for performance
123        if content.is_empty() {
124            return Ok(Vec::new());
125        }
126
127        // Quick check for any link patterns before expensive processing
128        if !content.contains('[') && !content.contains('<') {
129            return Ok(Vec::new());
130        }
131
132        let mut warnings = Vec::new();
133        let lines: Vec<&str> = content.lines().collect();
134
135        for (line_num, line) in lines.iter().enumerate() {
136            // Skip code blocks and reference definitions early
137            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_code_block) {
138                continue;
139            }
140            if REFERENCE_DEF_RE.is_match(line) {
141                continue;
142            }
143            if line.trim_start().starts_with("<!--") {
144                continue;
145            }
146
147            // Quick check for any link patterns in this line
148            if !line.contains('[') && !line.contains('<') {
149                continue;
150            }
151
152            // Find all matches in the line
153            let mut matches = Vec::new();
154
155            // Find all autolinks
156            for cap in AUTOLINK_RE.captures_iter(line) {
157                let m = cap.get(0).unwrap();
158                matches.push(LinkMatch {
159                    style: "autolink",
160                    start: m.start(),
161                    end: m.end(),
162                });
163            }
164
165            // Find all full references
166            for cap in FULL_RE.captures_iter(line) {
167                let m = cap.get(0).unwrap();
168                matches.push(LinkMatch {
169                    style: "full",
170                    start: m.start(),
171                    end: m.end(),
172                });
173            }
174
175            // Find all collapsed references
176            for cap in COLLAPSED_RE.captures_iter(line) {
177                let m = cap.get(0).unwrap();
178                matches.push(LinkMatch {
179                    style: "collapsed",
180                    start: m.start(),
181                    end: m.end(),
182                });
183            }
184
185            // Find all inline links
186            for cap in INLINE_RE.captures_iter(line) {
187                let m = cap.get(0).unwrap();
188                let text = cap.get(1).unwrap().as_str();
189                let url = cap.get(2).unwrap().as_str();
190                matches.push(LinkMatch {
191                    style: if text == url { "url_inline" } else { "inline" },
192                    start: m.start(),
193                    end: m.end(),
194                });
195            }
196
197            // Sort matches by start position to ensure we don't double-count
198            matches.sort_by_key(|m| m.start);
199
200            // Remove overlapping matches (keep the first one)
201            let mut filtered_matches = Vec::new();
202            let mut last_end = 0;
203            for m in matches {
204                if m.start >= last_end {
205                    last_end = m.end;
206                    filtered_matches.push(m);
207                }
208            }
209
210            // Now find shortcut references that don't overlap with other matches
211            for cap in SHORTCUT_RE.captures_iter(line) {
212                let m = cap.get(0).unwrap();
213                let start = m.start();
214                let end = m.end();
215
216                // Check if this overlaps with any existing match
217                let overlaps = filtered_matches.iter().any(|existing| {
218                    (start >= existing.start && start < existing.end) || (end > existing.start && end <= existing.end)
219                });
220
221                if !overlaps {
222                    // Check if followed by '(', '[', '[]', or ']['
223                    let after = &line[end..];
224                    if !after.starts_with('(') && !after.starts_with('[') {
225                        filtered_matches.push(LinkMatch {
226                            style: "shortcut",
227                            start,
228                            end,
229                        });
230                    }
231                }
232            }
233
234            // Sort again after adding shortcuts
235            filtered_matches.sort_by_key(|m| m.start);
236
237            // Check each match
238            for m in filtered_matches {
239                let match_start_char = line[..m.start].chars().count();
240
241                if !ctx.is_in_code_span(line_num + 1, match_start_char) && !self.is_style_allowed(m.style) {
242                    let match_len = line[m.start..m.end].chars().count();
243                    let (start_line, start_col, end_line, end_col) =
244                        calculate_match_range(line_num + 1, line, match_start_char, match_len);
245
246                    warnings.push(LintWarning {
247                        rule_name: Some(self.name().to_string()),
248                        line: start_line,
249                        column: start_col,
250                        end_line,
251                        end_column: end_col,
252                        message: format!("Link/image style '{}' is not allowed", m.style),
253                        severity: Severity::Warning,
254                        fix: None,
255                    });
256                }
257            }
258        }
259        Ok(warnings)
260    }
261
262    fn fix(&self, _ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
263        // Automatic fixing for link styles is not supported and could break content
264        Err(LintError::FixFailed(
265            "MD054 does not support automatic fixing of link/image style consistency.".to_string(),
266        ))
267    }
268
269    fn fix_capability(&self) -> crate::rule::FixCapability {
270        crate::rule::FixCapability::Unfixable
271    }
272
273    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
274        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
275    }
276
277    fn as_any(&self) -> &dyn std::any::Any {
278        self
279    }
280
281    fn default_config_section(&self) -> Option<(String, toml::Value)> {
282        let json_value = serde_json::to_value(&self.config).ok()?;
283        Some((
284            self.name().to_string(),
285            crate::rule_config_serde::json_to_toml_value(&json_value)?,
286        ))
287    }
288
289    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
290    where
291        Self: Sized,
292    {
293        let rule_config = crate::rule_config_serde::load_rule_config::<MD054Config>(config);
294        Box::new(Self::from_config_struct(rule_config))
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use crate::lint_context::LintContext;
302
303    #[test]
304    fn test_all_styles_allowed_by_default() {
305        let rule = MD054LinkImageStyle::new(true, true, true, true, true, true);
306        let content = "[inline](url) [ref][] [ref] <autolink> [full][ref] [url](url)\n\n[ref]: url";
307        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
308        let result = rule.check(&ctx).unwrap();
309
310        assert_eq!(result.len(), 0);
311    }
312
313    #[test]
314    fn test_only_inline_allowed() {
315        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
316        let content = "[allowed](url) [not][ref] <https://bad.com> [bad][] [shortcut]\n\n[ref]: url\n[shortcut]: url";
317        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
318        let result = rule.check(&ctx).unwrap();
319
320        assert_eq!(result.len(), 4);
321        assert!(result[0].message.contains("'full'"));
322        assert!(result[1].message.contains("'autolink'"));
323        assert!(result[2].message.contains("'collapsed'"));
324        assert!(result[3].message.contains("'shortcut'"));
325    }
326
327    #[test]
328    fn test_only_autolink_allowed() {
329        let rule = MD054LinkImageStyle::new(true, false, false, false, false, false);
330        let content = "<https://good.com> [bad](url) [bad][ref]\n\n[ref]: url";
331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
332        let result = rule.check(&ctx).unwrap();
333
334        assert_eq!(result.len(), 2);
335        assert!(result[0].message.contains("'inline'"));
336        assert!(result[1].message.contains("'full'"));
337    }
338
339    #[test]
340    fn test_url_inline_detection() {
341        let rule = MD054LinkImageStyle::new(false, false, false, true, false, true);
342        let content = "[https://example.com](https://example.com) [text](https://example.com)";
343        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
344        let result = rule.check(&ctx).unwrap();
345
346        // First is url_inline (allowed), second is inline (allowed)
347        assert_eq!(result.len(), 0);
348    }
349
350    #[test]
351    fn test_url_inline_not_allowed() {
352        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
353        let content = "[https://example.com](https://example.com)";
354        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
355        let result = rule.check(&ctx).unwrap();
356
357        assert_eq!(result.len(), 1);
358        assert!(result[0].message.contains("'url_inline'"));
359    }
360
361    #[test]
362    fn test_shortcut_vs_full_detection() {
363        let rule = MD054LinkImageStyle::new(false, false, true, false, false, false);
364        let content = "[shortcut] [full][ref]\n\n[shortcut]: url\n[ref]: url2";
365        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
366        let result = rule.check(&ctx).unwrap();
367
368        // Only shortcut should be flagged
369        assert_eq!(result.len(), 1);
370        assert!(result[0].message.contains("'shortcut'"));
371    }
372
373    #[test]
374    fn test_collapsed_reference() {
375        let rule = MD054LinkImageStyle::new(false, true, false, false, false, false);
376        let content = "[collapsed][] [bad][ref]\n\n[collapsed]: url\n[ref]: url2";
377        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
378        let result = rule.check(&ctx).unwrap();
379
380        assert_eq!(result.len(), 1);
381        assert!(result[0].message.contains("'full'"));
382    }
383
384    #[test]
385    fn test_code_blocks_ignored() {
386        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
387        let content = "```\n[ignored](url) <https://ignored.com>\n```\n\n[checked](url)";
388        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
389        let result = rule.check(&ctx).unwrap();
390
391        // Only the link outside code block should be checked
392        assert_eq!(result.len(), 0);
393    }
394
395    #[test]
396    fn test_code_spans_ignored() {
397        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
398        let content = "`[ignored](url)` and `<https://ignored.com>` but [checked](url)";
399        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
400        let result = rule.check(&ctx).unwrap();
401
402        // Only the link outside code spans should be checked
403        assert_eq!(result.len(), 0);
404    }
405
406    #[test]
407    fn test_reference_definitions_ignored() {
408        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
409        let content = "[ref]: https://example.com\n[ref2]: <https://example2.com>";
410        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
411        let result = rule.check(&ctx).unwrap();
412
413        // Reference definitions should be ignored
414        assert_eq!(result.len(), 0);
415    }
416
417    #[test]
418    fn test_html_comments_ignored() {
419        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
420        let content = "<!-- [ignored](url) -->\n  <!-- <https://ignored.com> -->";
421        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
422        let result = rule.check(&ctx).unwrap();
423
424        assert_eq!(result.len(), 0);
425    }
426
427    #[test]
428    fn test_unicode_support() {
429        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
430        let content = "[café ☕](https://café.com) [emoji 😀](url) [한글](url) [עברית](url)";
431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
432        let result = rule.check(&ctx).unwrap();
433
434        // All should be detected as inline (allowed)
435        assert_eq!(result.len(), 0);
436    }
437
438    #[test]
439    fn test_line_positions() {
440        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
441        let content = "Line 1\n\nLine 3 with <https://bad.com> here";
442        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
443        let result = rule.check(&ctx).unwrap();
444
445        assert_eq!(result.len(), 1);
446        assert_eq!(result[0].line, 3);
447        assert_eq!(result[0].column, 13); // Position of '<'
448    }
449
450    #[test]
451    fn test_multiple_links_same_line() {
452        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
453        let content = "[ok](url) but <bad> and [also][bad]\n\n[bad]: url";
454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
455        let result = rule.check(&ctx).unwrap();
456
457        assert_eq!(result.len(), 2);
458        assert!(result[0].message.contains("'autolink'"));
459        assert!(result[1].message.contains("'full'"));
460    }
461
462    #[test]
463    fn test_empty_content() {
464        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
465        let content = "";
466        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
467        let result = rule.check(&ctx).unwrap();
468
469        assert_eq!(result.len(), 0);
470    }
471
472    #[test]
473    fn test_no_links() {
474        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
475        let content = "Just plain text without any links";
476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
477        let result = rule.check(&ctx).unwrap();
478
479        assert_eq!(result.len(), 0);
480    }
481
482    #[test]
483    fn test_fix_returns_error() {
484        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
485        let content = "[link](url)";
486        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
487        let result = rule.fix(&ctx);
488
489        assert!(result.is_err());
490        if let Err(LintError::FixFailed(msg)) = result {
491            assert!(msg.contains("does not support automatic fixing"));
492        }
493    }
494
495    #[test]
496    fn test_priority_order() {
497        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
498        // Test that [text][ref] is detected as full, not shortcut
499        let content = "[text][ref] not detected as [shortcut]\n\n[ref]: url\n[shortcut]: url2";
500        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
501        let result = rule.check(&ctx).unwrap();
502
503        assert_eq!(result.len(), 2);
504        assert!(result[0].message.contains("'full'"));
505        assert!(result[1].message.contains("'shortcut'"));
506    }
507
508    #[test]
509    fn test_not_shortcut_when_followed_by_bracket() {
510        let rule = MD054LinkImageStyle::new(false, false, false, true, true, false);
511        // [text][ should not be detected as shortcut
512        let content = "[text][ more text\n[text](url) is inline";
513        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
514        let result = rule.check(&ctx).unwrap();
515
516        // Only second line should have inline link
517        assert_eq!(result.len(), 0);
518    }
519
520    #[test]
521    fn test_complex_unicode_with_zwj() {
522        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
523        // Test with zero-width joiners and complex Unicode
524        let content = "[👨‍👩‍👧‍👦 family](url) [café☕](https://café.com)";
525        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
526        let result = rule.check(&ctx).unwrap();
527
528        // Both should be detected as inline (allowed)
529        assert_eq!(result.len(), 0);
530    }
531}