rumdl_lib/rules/
md054_link_image_style.rs

1//!
2//! Rule MD054: Link and image style should be consistent
3//!
4//! See [docs/md054.md](../../docs/md054.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::document_structure::DocumentStructure;
8use crate::utils::range_utils::calculate_match_range;
9use lazy_static::lazy_static;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12
13mod md054_config;
14use md054_config::MD054Config;
15
16lazy_static! {
17    // Updated regex patterns that work with Unicode characters
18    static ref AUTOLINK_RE: Regex = Regex::new(r"<([^<>]+)>").unwrap();
19    static ref INLINE_RE: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
20    static ref URL_INLINE_RE: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
21    static ref SHORTCUT_RE: Regex = Regex::new(r"\[([^\]]+)\]").unwrap();
22    static ref COLLAPSED_RE: Regex = Regex::new(r"\[([^\]]+)\]\[\]").unwrap();
23    static ref FULL_RE: Regex = Regex::new(r"\[([^\]]+)\]\[([^\]]+)\]").unwrap();
24    static ref CODE_BLOCK_DELIMITER: Regex = Regex::new(r"^(```|~~~)").unwrap();
25    static ref REFERENCE_DEF_RE: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
26}
27
28/// Rule MD054: Link and image style should be consistent
29///
30/// This rule is triggered when different link or image styles are used in the same document.
31/// Markdown supports various styles for links and images, and this rule enforces consistency.
32///
33/// ## Supported Link Styles
34///
35/// - **Autolink**: `<https://example.com>`
36/// - **Inline**: `[link text](https://example.com)`
37/// - **URL Inline**: Special case of inline links where the URL itself is also the link text: `[https://example.com](https://example.com)`
38/// - **Shortcut**: `[link text]` (requires a reference definition elsewhere in the document)
39/// - **Collapsed**: `[link text][]` (requires a reference definition with the same name)
40/// - **Full**: `[link text][reference]` (requires a reference definition for the reference)
41///
42/// ## Configuration Options
43///
44/// You can configure which link styles are allowed. By default, all styles are allowed:
45///
46/// ```yaml
47/// MD054:
48///   autolink: true    # Allow autolink style
49///   inline: true      # Allow inline style
50///   url_inline: true  # Allow URL inline style
51///   shortcut: true    # Allow shortcut style
52///   collapsed: true   # Allow collapsed style
53///   full: true        # Allow full style
54/// ```
55///
56/// To enforce a specific style, set only that style to `true` and all others to `false`.
57///
58/// ## Unicode Support
59///
60/// This rule fully supports Unicode characters in link text and URLs, including:
61/// - Combining characters (e.g., café)
62/// - Zero-width joiners (e.g., family emojis: 👨‍👩‍👧‍👦)
63/// - Right-to-left text (e.g., Arabic, Hebrew)
64/// - Emojis and other special characters
65///
66/// ## Rationale
67///
68/// Consistent link styles improve document readability and maintainability. Different link
69/// styles have different advantages (e.g., inline links are self-contained, reference links
70/// keep the content cleaner), but mixing styles can create confusion.
71///
72#[derive(Debug, Eq, PartialEq, Hash, Serialize, Deserialize, Clone)]
73pub enum LinkImageStyle {
74    Autolink,
75    Inline,
76    UrlInline,
77    Shortcut,
78    Collapsed,
79    Full,
80}
81
82#[derive(Debug, Default, Clone)]
83pub struct MD054LinkImageStyle {
84    config: MD054Config,
85}
86
87impl MD054LinkImageStyle {
88    pub fn new(autolink: bool, collapsed: bool, full: bool, inline: bool, shortcut: bool, url_inline: bool) -> Self {
89        Self {
90            config: MD054Config {
91                autolink,
92                collapsed,
93                full,
94                inline,
95                shortcut,
96                url_inline,
97            },
98        }
99    }
100
101    pub fn from_config_struct(config: MD054Config) -> Self {
102        Self { config }
103    }
104
105    /// Check if a style is allowed based on configuration
106    fn is_style_allowed(&self, style: &str) -> bool {
107        match style {
108            "autolink" => self.config.autolink,
109            "collapsed" => self.config.collapsed,
110            "full" => self.config.full,
111            "inline" => self.config.inline,
112            "shortcut" => self.config.shortcut,
113            "url_inline" => self.config.url_inline,
114            _ => false,
115        }
116    }
117}
118
119#[derive(Debug)]
120struct LinkMatch {
121    style: &'static str,
122    start: usize,
123    end: usize,
124}
125
126impl Rule for MD054LinkImageStyle {
127    fn name(&self) -> &'static str {
128        "MD054"
129    }
130
131    fn description(&self) -> &'static str {
132        "Link and image style should be consistent"
133    }
134
135    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
136        let content = ctx.content;
137
138        // Early returns for performance
139        if content.is_empty() {
140            return Ok(Vec::new());
141        }
142
143        // Quick check for any link patterns before expensive processing
144        if !content.contains('[') && !content.contains('<') {
145            return Ok(Vec::new());
146        }
147
148        let structure = DocumentStructure::new(content);
149        let mut warnings = Vec::new();
150        let lines: Vec<&str> = content.lines().collect();
151
152        for (line_num, line) in lines.iter().enumerate() {
153            // Skip code blocks and reference definitions early
154            if structure.is_in_code_block(line_num + 1) {
155                continue;
156            }
157            if REFERENCE_DEF_RE.is_match(line) {
158                continue;
159            }
160            if line.trim_start().starts_with("<!--") {
161                continue;
162            }
163
164            // Quick check for any link patterns in this line
165            if !line.contains('[') && !line.contains('<') {
166                continue;
167            }
168
169            // Find all matches in the line
170            let mut matches = Vec::new();
171
172            // Find all autolinks
173            for cap in AUTOLINK_RE.captures_iter(line) {
174                let m = cap.get(0).unwrap();
175                matches.push(LinkMatch {
176                    style: "autolink",
177                    start: m.start(),
178                    end: m.end(),
179                });
180            }
181
182            // Find all full references
183            for cap in FULL_RE.captures_iter(line) {
184                let m = cap.get(0).unwrap();
185                matches.push(LinkMatch {
186                    style: "full",
187                    start: m.start(),
188                    end: m.end(),
189                });
190            }
191
192            // Find all collapsed references
193            for cap in COLLAPSED_RE.captures_iter(line) {
194                let m = cap.get(0).unwrap();
195                matches.push(LinkMatch {
196                    style: "collapsed",
197                    start: m.start(),
198                    end: m.end(),
199                });
200            }
201
202            // Find all inline links
203            for cap in INLINE_RE.captures_iter(line) {
204                let m = cap.get(0).unwrap();
205                let text = cap.get(1).unwrap().as_str();
206                let url = cap.get(2).unwrap().as_str();
207                matches.push(LinkMatch {
208                    style: if text == url { "url_inline" } else { "inline" },
209                    start: m.start(),
210                    end: m.end(),
211                });
212            }
213
214            // Sort matches by start position to ensure we don't double-count
215            matches.sort_by_key(|m| m.start);
216
217            // Remove overlapping matches (keep the first one)
218            let mut filtered_matches = Vec::new();
219            let mut last_end = 0;
220            for m in matches {
221                if m.start >= last_end {
222                    last_end = m.end;
223                    filtered_matches.push(m);
224                }
225            }
226
227            // Now find shortcut references that don't overlap with other matches
228            for cap in SHORTCUT_RE.captures_iter(line) {
229                let m = cap.get(0).unwrap();
230                let start = m.start();
231                let end = m.end();
232
233                // Check if this overlaps with any existing match
234                let overlaps = filtered_matches.iter().any(|existing| {
235                    (start >= existing.start && start < existing.end) || (end > existing.start && end <= existing.end)
236                });
237
238                if !overlaps {
239                    // Check if followed by '(', '[', '[]', or ']['
240                    let after = &line[end..];
241                    if !after.starts_with('(') && !after.starts_with('[') {
242                        filtered_matches.push(LinkMatch {
243                            style: "shortcut",
244                            start,
245                            end,
246                        });
247                    }
248                }
249            }
250
251            // Sort again after adding shortcuts
252            filtered_matches.sort_by_key(|m| m.start);
253
254            // Check each match
255            for m in filtered_matches {
256                let match_start_char = line[..m.start].chars().count();
257
258                if !structure.is_in_code_span(line_num + 1, match_start_char + 1) && !self.is_style_allowed(m.style) {
259                    let match_len = line[m.start..m.end].chars().count();
260                    let (start_line, start_col, end_line, end_col) =
261                        calculate_match_range(line_num + 1, line, match_start_char, match_len);
262
263                    warnings.push(LintWarning {
264                        rule_name: Some(self.name()),
265                        line: start_line,
266                        column: start_col,
267                        end_line,
268                        end_column: end_col,
269                        message: format!("Link/image style '{}' is not consistent with document", m.style),
270                        severity: Severity::Warning,
271                        fix: None,
272                    });
273                }
274            }
275        }
276        Ok(warnings)
277    }
278
279    fn fix(&self, _ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
280        // Automatic fixing for link styles is not supported and could break content
281        Err(LintError::FixFailed(
282            "MD054 does not support automatic fixing of link/image style consistency.".to_string(),
283        ))
284    }
285
286    fn fix_capability(&self) -> crate::rule::FixCapability {
287        crate::rule::FixCapability::Unfixable
288    }
289
290    fn as_any(&self) -> &dyn std::any::Any {
291        self
292    }
293
294    fn default_config_section(&self) -> Option<(String, toml::Value)> {
295        let json_value = serde_json::to_value(&self.config).ok()?;
296        Some((
297            self.name().to_string(),
298            crate::rule_config_serde::json_to_toml_value(&json_value)?,
299        ))
300    }
301
302    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
303    where
304        Self: Sized,
305    {
306        let rule_config = crate::rule_config_serde::load_rule_config::<MD054Config>(config);
307        Box::new(Self::from_config_struct(rule_config))
308    }
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314    use crate::lint_context::LintContext;
315
316    #[test]
317    fn test_all_styles_allowed_by_default() {
318        let rule = MD054LinkImageStyle::new(true, true, true, true, true, true);
319        let content = "[inline](url) [ref][] [ref] <autolink> [full][ref] [url](url)\n\n[ref]: url";
320        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
321        let result = rule.check(&ctx).unwrap();
322
323        assert_eq!(result.len(), 0);
324    }
325
326    #[test]
327    fn test_only_inline_allowed() {
328        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
329        let content = "[allowed](url) [not][ref] <https://bad.com> [bad][] [shortcut]\n\n[ref]: url\n[shortcut]: url";
330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
331        let result = rule.check(&ctx).unwrap();
332
333        assert_eq!(result.len(), 4);
334        assert!(result[0].message.contains("'full'"));
335        assert!(result[1].message.contains("'autolink'"));
336        assert!(result[2].message.contains("'collapsed'"));
337        assert!(result[3].message.contains("'shortcut'"));
338    }
339
340    #[test]
341    fn test_only_autolink_allowed() {
342        let rule = MD054LinkImageStyle::new(true, false, false, false, false, false);
343        let content = "<https://good.com> [bad](url) [bad][ref]\n\n[ref]: url";
344        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
345        let result = rule.check(&ctx).unwrap();
346
347        assert_eq!(result.len(), 2);
348        assert!(result[0].message.contains("'inline'"));
349        assert!(result[1].message.contains("'full'"));
350    }
351
352    #[test]
353    fn test_url_inline_detection() {
354        let rule = MD054LinkImageStyle::new(false, false, false, true, false, true);
355        let content = "[https://example.com](https://example.com) [text](https://example.com)";
356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
357        let result = rule.check(&ctx).unwrap();
358
359        // First is url_inline (allowed), second is inline (allowed)
360        assert_eq!(result.len(), 0);
361    }
362
363    #[test]
364    fn test_url_inline_not_allowed() {
365        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
366        let content = "[https://example.com](https://example.com)";
367        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
368        let result = rule.check(&ctx).unwrap();
369
370        assert_eq!(result.len(), 1);
371        assert!(result[0].message.contains("'url_inline'"));
372    }
373
374    #[test]
375    fn test_shortcut_vs_full_detection() {
376        let rule = MD054LinkImageStyle::new(false, false, true, false, false, false);
377        let content = "[shortcut] [full][ref]\n\n[shortcut]: url\n[ref]: url2";
378        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
379        let result = rule.check(&ctx).unwrap();
380
381        // Only shortcut should be flagged
382        assert_eq!(result.len(), 1);
383        assert!(result[0].message.contains("'shortcut'"));
384    }
385
386    #[test]
387    fn test_collapsed_reference() {
388        let rule = MD054LinkImageStyle::new(false, true, false, false, false, false);
389        let content = "[collapsed][] [bad][ref]\n\n[collapsed]: url\n[ref]: url2";
390        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
391        let result = rule.check(&ctx).unwrap();
392
393        assert_eq!(result.len(), 1);
394        assert!(result[0].message.contains("'full'"));
395    }
396
397    #[test]
398    fn test_code_blocks_ignored() {
399        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
400        let content = "```\n[ignored](url) <https://ignored.com>\n```\n\n[checked](url)";
401        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
402        let result = rule.check(&ctx).unwrap();
403
404        // Only the link outside code block should be checked
405        assert_eq!(result.len(), 0);
406    }
407
408    #[test]
409    fn test_code_spans_ignored() {
410        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
411        let content = "`[ignored](url)` and `<https://ignored.com>` but [checked](url)";
412        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
413        let result = rule.check(&ctx).unwrap();
414
415        // Only the link outside code spans should be checked
416        assert_eq!(result.len(), 0);
417    }
418
419    #[test]
420    fn test_reference_definitions_ignored() {
421        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
422        let content = "[ref]: https://example.com\n[ref2]: <https://example2.com>";
423        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
424        let result = rule.check(&ctx).unwrap();
425
426        // Reference definitions should be ignored
427        assert_eq!(result.len(), 0);
428    }
429
430    #[test]
431    fn test_html_comments_ignored() {
432        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
433        let content = "<!-- [ignored](url) -->\n  <!-- <https://ignored.com> -->";
434        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
435        let result = rule.check(&ctx).unwrap();
436
437        assert_eq!(result.len(), 0);
438    }
439
440    #[test]
441    fn test_unicode_support() {
442        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
443        let content = "[café ☕](https://café.com) [emoji 😀](url) [한글](url) [עברית](url)";
444        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
445        let result = rule.check(&ctx).unwrap();
446
447        // All should be detected as inline (allowed)
448        assert_eq!(result.len(), 0);
449    }
450
451    #[test]
452    fn test_line_positions() {
453        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
454        let content = "Line 1\n\nLine 3 with <https://bad.com> here";
455        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
456        let result = rule.check(&ctx).unwrap();
457
458        assert_eq!(result.len(), 1);
459        assert_eq!(result[0].line, 3);
460        assert_eq!(result[0].column, 13); // Position of '<'
461    }
462
463    #[test]
464    fn test_multiple_links_same_line() {
465        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
466        let content = "[ok](url) but <bad> and [also][bad]\n\n[bad]: url";
467        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
468        let result = rule.check(&ctx).unwrap();
469
470        assert_eq!(result.len(), 2);
471        assert!(result[0].message.contains("'autolink'"));
472        assert!(result[1].message.contains("'full'"));
473    }
474
475    #[test]
476    fn test_empty_content() {
477        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
478        let content = "";
479        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
480        let result = rule.check(&ctx).unwrap();
481
482        assert_eq!(result.len(), 0);
483    }
484
485    #[test]
486    fn test_no_links() {
487        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
488        let content = "Just plain text without any links";
489        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
490        let result = rule.check(&ctx).unwrap();
491
492        assert_eq!(result.len(), 0);
493    }
494
495    #[test]
496    fn test_fix_returns_error() {
497        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
498        let content = "[link](url)";
499        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
500        let result = rule.fix(&ctx);
501
502        assert!(result.is_err());
503        if let Err(LintError::FixFailed(msg)) = result {
504            assert!(msg.contains("does not support automatic fixing"));
505        }
506    }
507
508    #[test]
509    fn test_priority_order() {
510        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
511        // Test that [text][ref] is detected as full, not shortcut
512        let content = "[text][ref] not detected as [shortcut]\n\n[ref]: url\n[shortcut]: url2";
513        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
514        let result = rule.check(&ctx).unwrap();
515
516        assert_eq!(result.len(), 2);
517        assert!(result[0].message.contains("'full'"));
518        assert!(result[1].message.contains("'shortcut'"));
519    }
520
521    #[test]
522    fn test_not_shortcut_when_followed_by_bracket() {
523        let rule = MD054LinkImageStyle::new(false, false, false, true, true, false);
524        // [text][ should not be detected as shortcut
525        let content = "[text][ more text\n[text](url) is inline";
526        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
527        let result = rule.check(&ctx).unwrap();
528
529        // Only second line should have inline link
530        assert_eq!(result.len(), 0);
531    }
532
533    #[test]
534    fn test_complex_unicode_with_zwj() {
535        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
536        // Test with zero-width joiners and complex Unicode
537        let content = "[👨‍👩‍👧‍👦 family](url) [café☕](https://café.com)";
538        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
539        let result = rule.check(&ctx).unwrap();
540
541        // Both should be detected as inline (allowed)
542        assert_eq!(result.len(), 0);
543    }
544}