rumdl_lib/rules/
md054_link_image_style.rs

1//!
2//! Rule MD054: Link and image style should be consistent
3//!
4//! See [docs/md054.md](../../docs/md054.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::range_utils::calculate_match_range;
8use lazy_static::lazy_static;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11
12mod md054_config;
13use md054_config::MD054Config;
14
15lazy_static! {
16    // Updated regex patterns that work with Unicode characters
17    static ref AUTOLINK_RE: Regex = Regex::new(r"<([^<>]+)>").unwrap();
18    static ref INLINE_RE: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
19    static ref URL_INLINE_RE: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
20    static ref SHORTCUT_RE: Regex = Regex::new(r"\[([^\]]+)\]").unwrap();
21    static ref COLLAPSED_RE: Regex = Regex::new(r"\[([^\]]+)\]\[\]").unwrap();
22    static ref FULL_RE: Regex = Regex::new(r"\[([^\]]+)\]\[([^\]]+)\]").unwrap();
23    static ref CODE_BLOCK_DELIMITER: Regex = Regex::new(r"^(```|~~~)").unwrap();
24    static ref REFERENCE_DEF_RE: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
25}
26
27/// Rule MD054: Link and image style should be consistent
28///
29/// This rule is triggered when different link or image styles are used in the same document.
30/// Markdown supports various styles for links and images, and this rule enforces consistency.
31///
32/// ## Supported Link Styles
33///
34/// - **Autolink**: `<https://example.com>`
35/// - **Inline**: `[link text](https://example.com)`
36/// - **URL Inline**: Special case of inline links where the URL itself is also the link text: `[https://example.com](https://example.com)`
37/// - **Shortcut**: `[link text]` (requires a reference definition elsewhere in the document)
38/// - **Collapsed**: `[link text][]` (requires a reference definition with the same name)
39/// - **Full**: `[link text][reference]` (requires a reference definition for the reference)
40///
41/// ## Configuration Options
42///
43/// You can configure which link styles are allowed. By default, all styles are allowed:
44///
45/// ```yaml
46/// MD054:
47///   autolink: true    # Allow autolink style
48///   inline: true      # Allow inline style
49///   url_inline: true  # Allow URL inline style
50///   shortcut: true    # Allow shortcut style
51///   collapsed: true   # Allow collapsed style
52///   full: true        # Allow full style
53/// ```
54///
55/// To enforce a specific style, set only that style to `true` and all others to `false`.
56///
57/// ## Unicode Support
58///
59/// This rule fully supports Unicode characters in link text and URLs, including:
60/// - Combining characters (e.g., café)
61/// - Zero-width joiners (e.g., family emojis: 👨‍👩‍👧‍👦)
62/// - Right-to-left text (e.g., Arabic, Hebrew)
63/// - Emojis and other special characters
64///
65/// ## Rationale
66///
67/// Consistent link styles improve document readability and maintainability. Different link
68/// styles have different advantages (e.g., inline links are self-contained, reference links
69/// keep the content cleaner), but mixing styles can create confusion.
70///
71#[derive(Debug, Eq, PartialEq, Hash, Serialize, Deserialize, Clone)]
72pub enum LinkImageStyle {
73    Autolink,
74    Inline,
75    UrlInline,
76    Shortcut,
77    Collapsed,
78    Full,
79}
80
81#[derive(Debug, Default, Clone)]
82pub struct MD054LinkImageStyle {
83    config: MD054Config,
84}
85
86impl MD054LinkImageStyle {
87    pub fn new(autolink: bool, collapsed: bool, full: bool, inline: bool, shortcut: bool, url_inline: bool) -> Self {
88        Self {
89            config: MD054Config {
90                autolink,
91                collapsed,
92                full,
93                inline,
94                shortcut,
95                url_inline,
96            },
97        }
98    }
99
100    pub fn from_config_struct(config: MD054Config) -> Self {
101        Self { config }
102    }
103
104    /// Check if a style is allowed based on configuration
105    fn is_style_allowed(&self, style: &str) -> bool {
106        match style {
107            "autolink" => self.config.autolink,
108            "collapsed" => self.config.collapsed,
109            "full" => self.config.full,
110            "inline" => self.config.inline,
111            "shortcut" => self.config.shortcut,
112            "url_inline" => self.config.url_inline,
113            _ => false,
114        }
115    }
116}
117
118#[derive(Debug)]
119struct LinkMatch {
120    style: &'static str,
121    start: usize,
122    end: usize,
123}
124
125impl Rule for MD054LinkImageStyle {
126    fn name(&self) -> &'static str {
127        "MD054"
128    }
129
130    fn description(&self) -> &'static str {
131        "Link and image style should be consistent"
132    }
133
134    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
135        let content = ctx.content;
136
137        // Early returns for performance
138        if content.is_empty() {
139            return Ok(Vec::new());
140        }
141
142        // Quick check for any link patterns before expensive processing
143        if !content.contains('[') && !content.contains('<') {
144            return Ok(Vec::new());
145        }
146
147        let mut warnings = Vec::new();
148        let lines: Vec<&str> = content.lines().collect();
149
150        for (line_num, line) in lines.iter().enumerate() {
151            // Skip code blocks and reference definitions early
152            if ctx.is_in_code_block(line_num + 1) {
153                continue;
154            }
155            if REFERENCE_DEF_RE.is_match(line) {
156                continue;
157            }
158            if line.trim_start().starts_with("<!--") {
159                continue;
160            }
161
162            // Quick check for any link patterns in this line
163            if !line.contains('[') && !line.contains('<') {
164                continue;
165            }
166
167            // Find all matches in the line
168            let mut matches = Vec::new();
169
170            // Find all autolinks
171            for cap in AUTOLINK_RE.captures_iter(line) {
172                let m = cap.get(0).unwrap();
173                matches.push(LinkMatch {
174                    style: "autolink",
175                    start: m.start(),
176                    end: m.end(),
177                });
178            }
179
180            // Find all full references
181            for cap in FULL_RE.captures_iter(line) {
182                let m = cap.get(0).unwrap();
183                matches.push(LinkMatch {
184                    style: "full",
185                    start: m.start(),
186                    end: m.end(),
187                });
188            }
189
190            // Find all collapsed references
191            for cap in COLLAPSED_RE.captures_iter(line) {
192                let m = cap.get(0).unwrap();
193                matches.push(LinkMatch {
194                    style: "collapsed",
195                    start: m.start(),
196                    end: m.end(),
197                });
198            }
199
200            // Find all inline links
201            for cap in INLINE_RE.captures_iter(line) {
202                let m = cap.get(0).unwrap();
203                let text = cap.get(1).unwrap().as_str();
204                let url = cap.get(2).unwrap().as_str();
205                matches.push(LinkMatch {
206                    style: if text == url { "url_inline" } else { "inline" },
207                    start: m.start(),
208                    end: m.end(),
209                });
210            }
211
212            // Sort matches by start position to ensure we don't double-count
213            matches.sort_by_key(|m| m.start);
214
215            // Remove overlapping matches (keep the first one)
216            let mut filtered_matches = Vec::new();
217            let mut last_end = 0;
218            for m in matches {
219                if m.start >= last_end {
220                    last_end = m.end;
221                    filtered_matches.push(m);
222                }
223            }
224
225            // Now find shortcut references that don't overlap with other matches
226            for cap in SHORTCUT_RE.captures_iter(line) {
227                let m = cap.get(0).unwrap();
228                let start = m.start();
229                let end = m.end();
230
231                // Check if this overlaps with any existing match
232                let overlaps = filtered_matches.iter().any(|existing| {
233                    (start >= existing.start && start < existing.end) || (end > existing.start && end <= existing.end)
234                });
235
236                if !overlaps {
237                    // Check if followed by '(', '[', '[]', or ']['
238                    let after = &line[end..];
239                    if !after.starts_with('(') && !after.starts_with('[') {
240                        filtered_matches.push(LinkMatch {
241                            style: "shortcut",
242                            start,
243                            end,
244                        });
245                    }
246                }
247            }
248
249            // Sort again after adding shortcuts
250            filtered_matches.sort_by_key(|m| m.start);
251
252            // Check each match
253            for m in filtered_matches {
254                let match_start_char = line[..m.start].chars().count();
255
256                if !ctx.is_in_code_span(line_num + 1, match_start_char) && !self.is_style_allowed(m.style) {
257                    let match_len = line[m.start..m.end].chars().count();
258                    let (start_line, start_col, end_line, end_col) =
259                        calculate_match_range(line_num + 1, line, match_start_char, match_len);
260
261                    warnings.push(LintWarning {
262                        rule_name: Some(self.name()),
263                        line: start_line,
264                        column: start_col,
265                        end_line,
266                        end_column: end_col,
267                        message: format!("Link/image style '{}' is not consistent with document", m.style),
268                        severity: Severity::Warning,
269                        fix: None,
270                    });
271                }
272            }
273        }
274        Ok(warnings)
275    }
276
277    fn fix(&self, _ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
278        // Automatic fixing for link styles is not supported and could break content
279        Err(LintError::FixFailed(
280            "MD054 does not support automatic fixing of link/image style consistency.".to_string(),
281        ))
282    }
283
284    fn fix_capability(&self) -> crate::rule::FixCapability {
285        crate::rule::FixCapability::Unfixable
286    }
287
288    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
289        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
290    }
291
292    fn as_any(&self) -> &dyn std::any::Any {
293        self
294    }
295
296    fn default_config_section(&self) -> Option<(String, toml::Value)> {
297        let json_value = serde_json::to_value(&self.config).ok()?;
298        Some((
299            self.name().to_string(),
300            crate::rule_config_serde::json_to_toml_value(&json_value)?,
301        ))
302    }
303
304    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
305    where
306        Self: Sized,
307    {
308        let rule_config = crate::rule_config_serde::load_rule_config::<MD054Config>(config);
309        Box::new(Self::from_config_struct(rule_config))
310    }
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use crate::lint_context::LintContext;
317
318    #[test]
319    fn test_all_styles_allowed_by_default() {
320        let rule = MD054LinkImageStyle::new(true, true, true, true, true, true);
321        let content = "[inline](url) [ref][] [ref] <autolink> [full][ref] [url](url)\n\n[ref]: url";
322        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
323        let result = rule.check(&ctx).unwrap();
324
325        assert_eq!(result.len(), 0);
326    }
327
328    #[test]
329    fn test_only_inline_allowed() {
330        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
331        let content = "[allowed](url) [not][ref] <https://bad.com> [bad][] [shortcut]\n\n[ref]: url\n[shortcut]: url";
332        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
333        let result = rule.check(&ctx).unwrap();
334
335        assert_eq!(result.len(), 4);
336        assert!(result[0].message.contains("'full'"));
337        assert!(result[1].message.contains("'autolink'"));
338        assert!(result[2].message.contains("'collapsed'"));
339        assert!(result[3].message.contains("'shortcut'"));
340    }
341
342    #[test]
343    fn test_only_autolink_allowed() {
344        let rule = MD054LinkImageStyle::new(true, false, false, false, false, false);
345        let content = "<https://good.com> [bad](url) [bad][ref]\n\n[ref]: url";
346        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
347        let result = rule.check(&ctx).unwrap();
348
349        assert_eq!(result.len(), 2);
350        assert!(result[0].message.contains("'inline'"));
351        assert!(result[1].message.contains("'full'"));
352    }
353
354    #[test]
355    fn test_url_inline_detection() {
356        let rule = MD054LinkImageStyle::new(false, false, false, true, false, true);
357        let content = "[https://example.com](https://example.com) [text](https://example.com)";
358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
359        let result = rule.check(&ctx).unwrap();
360
361        // First is url_inline (allowed), second is inline (allowed)
362        assert_eq!(result.len(), 0);
363    }
364
365    #[test]
366    fn test_url_inline_not_allowed() {
367        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
368        let content = "[https://example.com](https://example.com)";
369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
370        let result = rule.check(&ctx).unwrap();
371
372        assert_eq!(result.len(), 1);
373        assert!(result[0].message.contains("'url_inline'"));
374    }
375
376    #[test]
377    fn test_shortcut_vs_full_detection() {
378        let rule = MD054LinkImageStyle::new(false, false, true, false, false, false);
379        let content = "[shortcut] [full][ref]\n\n[shortcut]: url\n[ref]: url2";
380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
381        let result = rule.check(&ctx).unwrap();
382
383        // Only shortcut should be flagged
384        assert_eq!(result.len(), 1);
385        assert!(result[0].message.contains("'shortcut'"));
386    }
387
388    #[test]
389    fn test_collapsed_reference() {
390        let rule = MD054LinkImageStyle::new(false, true, false, false, false, false);
391        let content = "[collapsed][] [bad][ref]\n\n[collapsed]: url\n[ref]: url2";
392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
393        let result = rule.check(&ctx).unwrap();
394
395        assert_eq!(result.len(), 1);
396        assert!(result[0].message.contains("'full'"));
397    }
398
399    #[test]
400    fn test_code_blocks_ignored() {
401        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
402        let content = "```\n[ignored](url) <https://ignored.com>\n```\n\n[checked](url)";
403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
404        let result = rule.check(&ctx).unwrap();
405
406        // Only the link outside code block should be checked
407        assert_eq!(result.len(), 0);
408    }
409
410    #[test]
411    fn test_code_spans_ignored() {
412        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
413        let content = "`[ignored](url)` and `<https://ignored.com>` but [checked](url)";
414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
415        let result = rule.check(&ctx).unwrap();
416
417        // Only the link outside code spans should be checked
418        assert_eq!(result.len(), 0);
419    }
420
421    #[test]
422    fn test_reference_definitions_ignored() {
423        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
424        let content = "[ref]: https://example.com\n[ref2]: <https://example2.com>";
425        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
426        let result = rule.check(&ctx).unwrap();
427
428        // Reference definitions should be ignored
429        assert_eq!(result.len(), 0);
430    }
431
432    #[test]
433    fn test_html_comments_ignored() {
434        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
435        let content = "<!-- [ignored](url) -->\n  <!-- <https://ignored.com> -->";
436        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
437        let result = rule.check(&ctx).unwrap();
438
439        assert_eq!(result.len(), 0);
440    }
441
442    #[test]
443    fn test_unicode_support() {
444        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
445        let content = "[café ☕](https://café.com) [emoji 😀](url) [한글](url) [עברית](url)";
446        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
447        let result = rule.check(&ctx).unwrap();
448
449        // All should be detected as inline (allowed)
450        assert_eq!(result.len(), 0);
451    }
452
453    #[test]
454    fn test_line_positions() {
455        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
456        let content = "Line 1\n\nLine 3 with <https://bad.com> here";
457        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
458        let result = rule.check(&ctx).unwrap();
459
460        assert_eq!(result.len(), 1);
461        assert_eq!(result[0].line, 3);
462        assert_eq!(result[0].column, 13); // Position of '<'
463    }
464
465    #[test]
466    fn test_multiple_links_same_line() {
467        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
468        let content = "[ok](url) but <bad> and [also][bad]\n\n[bad]: url";
469        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
470        let result = rule.check(&ctx).unwrap();
471
472        assert_eq!(result.len(), 2);
473        assert!(result[0].message.contains("'autolink'"));
474        assert!(result[1].message.contains("'full'"));
475    }
476
477    #[test]
478    fn test_empty_content() {
479        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
480        let content = "";
481        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
482        let result = rule.check(&ctx).unwrap();
483
484        assert_eq!(result.len(), 0);
485    }
486
487    #[test]
488    fn test_no_links() {
489        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
490        let content = "Just plain text without any links";
491        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
492        let result = rule.check(&ctx).unwrap();
493
494        assert_eq!(result.len(), 0);
495    }
496
497    #[test]
498    fn test_fix_returns_error() {
499        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
500        let content = "[link](url)";
501        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
502        let result = rule.fix(&ctx);
503
504        assert!(result.is_err());
505        if let Err(LintError::FixFailed(msg)) = result {
506            assert!(msg.contains("does not support automatic fixing"));
507        }
508    }
509
510    #[test]
511    fn test_priority_order() {
512        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
513        // Test that [text][ref] is detected as full, not shortcut
514        let content = "[text][ref] not detected as [shortcut]\n\n[ref]: url\n[shortcut]: url2";
515        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
516        let result = rule.check(&ctx).unwrap();
517
518        assert_eq!(result.len(), 2);
519        assert!(result[0].message.contains("'full'"));
520        assert!(result[1].message.contains("'shortcut'"));
521    }
522
523    #[test]
524    fn test_not_shortcut_when_followed_by_bracket() {
525        let rule = MD054LinkImageStyle::new(false, false, false, true, true, false);
526        // [text][ should not be detected as shortcut
527        let content = "[text][ more text\n[text](url) is inline";
528        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
529        let result = rule.check(&ctx).unwrap();
530
531        // Only second line should have inline link
532        assert_eq!(result.len(), 0);
533    }
534
535    #[test]
536    fn test_complex_unicode_with_zwj() {
537        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
538        // Test with zero-width joiners and complex Unicode
539        let content = "[👨‍👩‍👧‍👦 family](url) [café☕](https://café.com)";
540        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
541        let result = rule.check(&ctx).unwrap();
542
543        // Both should be detected as inline (allowed)
544        assert_eq!(result.len(), 0);
545    }
546}