Skip to main content

rumdl_lib/rules/
md054_link_image_style.rs

1//!
2//! Rule MD054: Link and image style should be consistent
3//!
4//! See [docs/md054.md](../../docs/md054.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::range_utils::calculate_match_range;
8use regex::Regex;
9use std::collections::BTreeSet;
10use std::sync::LazyLock;
11
12mod md054_config;
13use md054_config::MD054Config;
14
15// Updated regex patterns that work with Unicode characters
16static AUTOLINK_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<([^<>]+)>").unwrap());
17static INLINE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
18static SHORTCUT_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]").unwrap());
19static COLLAPSED_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[\]").unwrap());
20static FULL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[([^\]]+)\]").unwrap());
21static REFERENCE_DEF_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap());
22
23/// Rule MD054: Link and image style should be consistent
24///
25/// This rule is triggered when different link or image styles are used in the same document.
26/// Markdown supports various styles for links and images, and this rule enforces consistency.
27///
28/// ## Supported Link Styles
29///
30/// - **Autolink**: `<https://example.com>`
31/// - **Inline**: `[link text](https://example.com)`
32/// - **URL Inline**: Special case of inline links where the URL itself is also the link text: `[https://example.com](https://example.com)`
33/// - **Shortcut**: `[link text]` (requires a reference definition elsewhere in the document)
34/// - **Collapsed**: `[link text][]` (requires a reference definition with the same name)
35/// - **Full**: `[link text][reference]` (requires a reference definition for the reference)
36///
37/// ## Configuration Options
38///
39/// You can configure which link styles are allowed. By default, all styles are allowed:
40///
41/// ```yaml
42/// MD054:
43///   autolink: true    # Allow autolink style
44///   inline: true      # Allow inline style
45///   url_inline: true  # Allow URL inline style
46///   shortcut: true    # Allow shortcut style
47///   collapsed: true   # Allow collapsed style
48///   full: true        # Allow full style
49/// ```
50///
51/// To enforce a specific style, set only that style to `true` and all others to `false`.
52///
53/// ## Unicode Support
54///
55/// This rule fully supports Unicode characters in link text and URLs, including:
56/// - Combining characters (e.g., café)
57/// - Zero-width joiners (e.g., family emojis: 👨‍👩‍👧‍👦)
58/// - Right-to-left text (e.g., Arabic, Hebrew)
59/// - Emojis and other special characters
60///
61/// ## Rationale
62///
63/// Consistent link styles improve document readability and maintainability. Different link
64/// styles have different advantages (e.g., inline links are self-contained, reference links
65/// keep the content cleaner), but mixing styles can create confusion.
66///
67#[derive(Debug, Default, Clone)]
68pub struct MD054LinkImageStyle {
69    config: MD054Config,
70}
71
72impl MD054LinkImageStyle {
73    pub fn new(autolink: bool, collapsed: bool, full: bool, inline: bool, shortcut: bool, url_inline: bool) -> Self {
74        Self {
75            config: MD054Config {
76                autolink,
77                collapsed,
78                full,
79                inline,
80                shortcut,
81                url_inline,
82            },
83        }
84    }
85
86    pub fn from_config_struct(config: MD054Config) -> Self {
87        Self { config }
88    }
89
90    /// Check if a style is allowed based on configuration
91    fn is_style_allowed(&self, style: &str) -> bool {
92        match style {
93            "autolink" => self.config.autolink,
94            "collapsed" => self.config.collapsed,
95            "full" => self.config.full,
96            "inline" => self.config.inline,
97            "shortcut" => self.config.shortcut,
98            "url-inline" => self.config.url_inline,
99            _ => false,
100        }
101    }
102}
103
104#[derive(Debug)]
105struct LinkMatch {
106    style: &'static str,
107    start: usize,
108    end: usize,
109}
110
111impl Rule for MD054LinkImageStyle {
112    fn name(&self) -> &'static str {
113        "MD054"
114    }
115
116    fn description(&self) -> &'static str {
117        "Link and image style should be consistent"
118    }
119
120    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
121        let content = ctx.content;
122
123        // Early returns for performance
124        if content.is_empty() {
125            return Ok(Vec::new());
126        }
127
128        // Quick check for any link patterns before expensive processing
129        if !content.contains('[') && !content.contains('<') {
130            return Ok(Vec::new());
131        }
132
133        let mut warnings = Vec::new();
134        let lines = ctx.raw_lines();
135
136        for (line_num, line) in lines.iter().enumerate() {
137            // Skip code blocks and reference definitions early
138            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_code_block) {
139                continue;
140            }
141            if REFERENCE_DEF_RE.is_match(line) {
142                continue;
143            }
144            if line.trim_start().starts_with("<!--") {
145                continue;
146            }
147
148            // Quick check for any link patterns in this line
149            if !line.contains('[') && !line.contains('<') {
150                continue;
151            }
152
153            // Use BTreeSet to efficiently track occupied byte ranges
154            let mut occupied_ranges = BTreeSet::new();
155            let mut filtered_matches = Vec::new();
156
157            // Collect all non-shortcut matches first and track their byte ranges
158            let mut all_matches = Vec::new();
159
160            // Find all autolinks
161            for cap in AUTOLINK_RE.captures_iter(line) {
162                let m = cap.get(0).unwrap();
163                let content = cap.get(1).unwrap().as_str();
164
165                // Filter out HTML tags: only match if content starts with a URL scheme
166                // HTML tags like <br> should not be flagged as autolinks
167                let is_url = content.starts_with("http://")
168                    || content.starts_with("https://")
169                    || content.starts_with("ftp://")
170                    || content.starts_with("ftps://")
171                    || content.starts_with("mailto:");
172
173                if is_url {
174                    all_matches.push(LinkMatch {
175                        style: "autolink",
176                        start: m.start(),
177                        end: m.end(),
178                    });
179                }
180            }
181
182            // Find all full references
183            for cap in FULL_RE.captures_iter(line) {
184                let m = cap.get(0).unwrap();
185                all_matches.push(LinkMatch {
186                    style: "full",
187                    start: m.start(),
188                    end: m.end(),
189                });
190            }
191
192            // Find all collapsed references
193            for cap in COLLAPSED_RE.captures_iter(line) {
194                let m = cap.get(0).unwrap();
195                all_matches.push(LinkMatch {
196                    style: "collapsed",
197                    start: m.start(),
198                    end: m.end(),
199                });
200            }
201
202            // Find all inline links
203            for cap in INLINE_RE.captures_iter(line) {
204                let m = cap.get(0).unwrap();
205                let text = cap.get(1).unwrap().as_str();
206                let url = cap.get(2).unwrap().as_str();
207                all_matches.push(LinkMatch {
208                    style: if text == url { "url-inline" } else { "inline" },
209                    start: m.start(),
210                    end: m.end(),
211                });
212            }
213
214            // Sort matches by start position to ensure we don't double-count
215            all_matches.sort_by_key(|m| m.start);
216
217            // Remove overlapping matches (keep the first one) and build occupied ranges set
218            let mut last_end = 0;
219            for m in all_matches {
220                if m.start >= last_end {
221                    last_end = m.end;
222                    // Add each byte in the range to the set
223                    for byte_pos in m.start..m.end {
224                        occupied_ranges.insert(byte_pos);
225                    }
226                    filtered_matches.push(m);
227                }
228            }
229
230            // Now find shortcut references that don't overlap with other matches
231            // Using BTreeSet for O(log n) lookups instead of O(n) iteration
232            for cap in SHORTCUT_RE.captures_iter(line) {
233                let m = cap.get(0).unwrap();
234                let start = m.start();
235                let end = m.end();
236                let link_text = cap.get(1).unwrap().as_str();
237
238                // Filter out task list checkboxes: [ ], [x], or [X]
239                // Task list checkboxes should not be flagged as shortcut links
240                // Task list pattern: list marker (*, -, +) followed by whitespace, then [ ], [x], or [X]
241                if link_text.trim() == "" || link_text == "x" || link_text == "X" {
242                    // Check if this is preceded by a list marker with whitespace
243                    if start > 0 {
244                        let before = &line[..start];
245                        // Trim leading whitespace to handle indentation
246                        let trimmed_before = before.trim_start();
247                        // Check if starts with list marker (*, -, +) followed by whitespace
248                        if let Some(marker_char) = trimmed_before.chars().next()
249                            && (marker_char == '*' || marker_char == '-' || marker_char == '+')
250                            && trimmed_before.len() > 1
251                        {
252                            let after_marker = &trimmed_before[1..];
253                            if after_marker.chars().next().is_some_and(|c| c.is_whitespace()) {
254                                // This is a task list checkbox: marker + whitespace + [ ]
255                                continue;
256                            }
257                        }
258                    }
259                }
260
261                // Check if any byte in this range is occupied (O(log n) per byte)
262                let overlaps = (start..end).any(|byte_pos| occupied_ranges.contains(&byte_pos));
263
264                if !overlaps {
265                    // Check if followed by '(', '[', '[]', or ']['
266                    let after = &line[end..];
267                    if !after.starts_with('(') && !after.starts_with('[') {
268                        // Add this range to occupied set
269                        for byte_pos in start..end {
270                            occupied_ranges.insert(byte_pos);
271                        }
272                        filtered_matches.push(LinkMatch {
273                            style: "shortcut",
274                            start,
275                            end,
276                        });
277                    }
278                }
279            }
280
281            // Sort again after adding shortcuts
282            filtered_matches.sort_by_key(|m| m.start);
283
284            // Check each match
285            for m in filtered_matches {
286                let match_start_char = line[..m.start].chars().count();
287
288                // is_in_code_span expects 1-indexed column
289                if !ctx.is_in_code_span(line_num + 1, match_start_char + 1) && !self.is_style_allowed(m.style) {
290                    // calculate_match_range expects byte positions, not character counts
291                    let match_byte_len = m.end - m.start;
292                    let (start_line, start_col, end_line, end_col) =
293                        calculate_match_range(line_num + 1, line, m.start, match_byte_len);
294
295                    warnings.push(LintWarning {
296                        rule_name: Some(self.name().to_string()),
297                        line: start_line,
298                        column: start_col,
299                        end_line,
300                        end_column: end_col,
301                        message: format!("Link/image style '{}' is not allowed", m.style),
302                        severity: Severity::Warning,
303                        fix: None,
304                    });
305                }
306            }
307        }
308        Ok(warnings)
309    }
310
311    fn fix(&self, _ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
312        // Automatic fixing for link styles is not supported and could break content
313        Err(LintError::FixFailed(
314            "MD054 does not support automatic fixing of link/image style consistency.".to_string(),
315        ))
316    }
317
318    fn fix_capability(&self) -> crate::rule::FixCapability {
319        crate::rule::FixCapability::Unfixable
320    }
321
322    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
323        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
324    }
325
326    fn as_any(&self) -> &dyn std::any::Any {
327        self
328    }
329
330    fn default_config_section(&self) -> Option<(String, toml::Value)> {
331        let json_value = serde_json::to_value(&self.config).ok()?;
332        Some((
333            self.name().to_string(),
334            crate::rule_config_serde::json_to_toml_value(&json_value)?,
335        ))
336    }
337
338    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
339    where
340        Self: Sized,
341    {
342        let rule_config = crate::rule_config_serde::load_rule_config::<MD054Config>(config);
343        Box::new(Self::from_config_struct(rule_config))
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use crate::lint_context::LintContext;
351
352    #[test]
353    fn test_all_styles_allowed_by_default() {
354        let rule = MD054LinkImageStyle::new(true, true, true, true, true, true);
355        let content = "[inline](url) [ref][] [ref] <autolink> [full][ref] [url](url)\n\n[ref]: url";
356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
357        let result = rule.check(&ctx).unwrap();
358
359        assert_eq!(result.len(), 0);
360    }
361
362    #[test]
363    fn test_only_inline_allowed() {
364        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
365        let content = "[allowed](url) [not][ref] <https://bad.com> [bad][] [shortcut]\n\n[ref]: url\n[shortcut]: url";
366        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
367        let result = rule.check(&ctx).unwrap();
368
369        assert_eq!(result.len(), 4);
370        assert!(result[0].message.contains("'full'"));
371        assert!(result[1].message.contains("'autolink'"));
372        assert!(result[2].message.contains("'collapsed'"));
373        assert!(result[3].message.contains("'shortcut'"));
374    }
375
376    #[test]
377    fn test_only_autolink_allowed() {
378        let rule = MD054LinkImageStyle::new(true, false, false, false, false, false);
379        let content = "<https://good.com> [bad](url) [bad][ref]\n\n[ref]: url";
380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
381        let result = rule.check(&ctx).unwrap();
382
383        assert_eq!(result.len(), 2);
384        assert!(result[0].message.contains("'inline'"));
385        assert!(result[1].message.contains("'full'"));
386    }
387
388    #[test]
389    fn test_url_inline_detection() {
390        let rule = MD054LinkImageStyle::new(false, false, false, true, false, true);
391        let content = "[https://example.com](https://example.com) [text](https://example.com)";
392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
393        let result = rule.check(&ctx).unwrap();
394
395        // First is url_inline (allowed), second is inline (allowed)
396        assert_eq!(result.len(), 0);
397    }
398
399    #[test]
400    fn test_url_inline_not_allowed() {
401        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
402        let content = "[https://example.com](https://example.com)";
403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
404        let result = rule.check(&ctx).unwrap();
405
406        assert_eq!(result.len(), 1);
407        assert!(result[0].message.contains("'url-inline'"));
408    }
409
410    #[test]
411    fn test_shortcut_vs_full_detection() {
412        let rule = MD054LinkImageStyle::new(false, false, true, false, false, false);
413        let content = "[shortcut] [full][ref]\n\n[shortcut]: url\n[ref]: url2";
414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
415        let result = rule.check(&ctx).unwrap();
416
417        // Only shortcut should be flagged
418        assert_eq!(result.len(), 1);
419        assert!(result[0].message.contains("'shortcut'"));
420    }
421
422    #[test]
423    fn test_collapsed_reference() {
424        let rule = MD054LinkImageStyle::new(false, true, false, false, false, false);
425        let content = "[collapsed][] [bad][ref]\n\n[collapsed]: url\n[ref]: url2";
426        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
427        let result = rule.check(&ctx).unwrap();
428
429        assert_eq!(result.len(), 1);
430        assert!(result[0].message.contains("'full'"));
431    }
432
433    #[test]
434    fn test_code_blocks_ignored() {
435        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
436        let content = "```\n[ignored](url) <https://ignored.com>\n```\n\n[checked](url)";
437        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
438        let result = rule.check(&ctx).unwrap();
439
440        // Only the link outside code block should be checked
441        assert_eq!(result.len(), 0);
442    }
443
444    #[test]
445    fn test_code_spans_ignored() {
446        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
447        let content = "`[ignored](url)` and `<https://ignored.com>` but [checked](url)";
448        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
449        let result = rule.check(&ctx).unwrap();
450
451        // Only the link outside code spans should be checked
452        assert_eq!(result.len(), 0);
453    }
454
455    #[test]
456    fn test_reference_definitions_ignored() {
457        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
458        let content = "[ref]: https://example.com\n[ref2]: <https://example2.com>";
459        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
460        let result = rule.check(&ctx).unwrap();
461
462        // Reference definitions should be ignored
463        assert_eq!(result.len(), 0);
464    }
465
466    #[test]
467    fn test_html_comments_ignored() {
468        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
469        let content = "<!-- [ignored](url) -->\n  <!-- <https://ignored.com> -->";
470        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
471        let result = rule.check(&ctx).unwrap();
472
473        assert_eq!(result.len(), 0);
474    }
475
476    #[test]
477    fn test_unicode_support() {
478        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
479        let content = "[café ☕](https://café.com) [emoji 😀](url) [한글](url) [עברית](url)";
480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
481        let result = rule.check(&ctx).unwrap();
482
483        // All should be detected as inline (allowed)
484        assert_eq!(result.len(), 0);
485    }
486
487    #[test]
488    fn test_line_positions() {
489        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
490        let content = "Line 1\n\nLine 3 with <https://bad.com> here";
491        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
492        let result = rule.check(&ctx).unwrap();
493
494        assert_eq!(result.len(), 1);
495        assert_eq!(result[0].line, 3);
496        assert_eq!(result[0].column, 13); // Position of '<'
497    }
498
499    #[test]
500    fn test_multiple_links_same_line() {
501        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
502        let content = "[ok](url) but <https://good.com> and [also][bad]\n\n[bad]: url";
503        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
504        let result = rule.check(&ctx).unwrap();
505
506        assert_eq!(result.len(), 2);
507        assert!(result[0].message.contains("'autolink'"));
508        assert!(result[1].message.contains("'full'"));
509    }
510
511    #[test]
512    fn test_empty_content() {
513        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
514        let content = "";
515        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
516        let result = rule.check(&ctx).unwrap();
517
518        assert_eq!(result.len(), 0);
519    }
520
521    #[test]
522    fn test_no_links() {
523        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
524        let content = "Just plain text without any links";
525        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
526        let result = rule.check(&ctx).unwrap();
527
528        assert_eq!(result.len(), 0);
529    }
530
531    #[test]
532    fn test_fix_returns_error() {
533        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
534        let content = "[link](url)";
535        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
536        let result = rule.fix(&ctx);
537
538        assert!(result.is_err());
539        if let Err(LintError::FixFailed(msg)) = result {
540            assert!(msg.contains("does not support automatic fixing"));
541        }
542    }
543
544    #[test]
545    fn test_priority_order() {
546        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
547        // Test that [text][ref] is detected as full, not shortcut
548        let content = "[text][ref] not detected as [shortcut]\n\n[ref]: url\n[shortcut]: url2";
549        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
550        let result = rule.check(&ctx).unwrap();
551
552        assert_eq!(result.len(), 2);
553        assert!(result[0].message.contains("'full'"));
554        assert!(result[1].message.contains("'shortcut'"));
555    }
556
557    #[test]
558    fn test_not_shortcut_when_followed_by_bracket() {
559        let rule = MD054LinkImageStyle::new(false, false, false, true, true, false);
560        // [text][ should not be detected as shortcut
561        let content = "[text][ more text\n[text](url) is inline";
562        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
563        let result = rule.check(&ctx).unwrap();
564
565        // Only second line should have inline link
566        assert_eq!(result.len(), 0);
567    }
568
569    #[test]
570    fn test_cjk_correct_column_positions() {
571        // Verify that column positions use byte offsets, not character counts,
572        // so CJK text produces correct warning positions.
573        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
574        // "日本語テスト " = 7 chars, 19 bytes (6 CJK chars * 3 bytes + 1 space)
575        let content = "日本語テスト <https://example.com>";
576        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
577        let result = rule.check(&ctx).unwrap();
578
579        assert_eq!(result.len(), 1);
580        assert!(result[0].message.contains("'autolink'"));
581        // The '<' starts at byte position 19 (after 6 CJK chars * 3 bytes + 1 space)
582        // which is character position 8 (1-indexed)
583        assert_eq!(
584            result[0].column, 8,
585            "Column should be 1-indexed character position of '<'"
586        );
587    }
588
589    #[test]
590    fn test_code_span_detection_with_cjk_prefix() {
591        // Verify that is_in_code_span correctly detects code spans after CJK text
592        // This tests the 1-indexed column fix
593        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
594        // Link inside code span after CJK characters
595        let content = "日本語 `[link](url)` text";
596        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
597        let result = rule.check(&ctx).unwrap();
598
599        // The link is inside a code span, so it should not be flagged
600        assert_eq!(result.len(), 0, "Link inside code span should not be flagged");
601    }
602
603    #[test]
604    fn test_complex_unicode_with_zwj() {
605        let rule = MD054LinkImageStyle::new(false, false, false, true, false, false);
606        // Test with zero-width joiners and complex Unicode
607        let content = "[👨‍👩‍👧‍👦 family](url) [café☕](https://café.com)";
608        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
609        let result = rule.check(&ctx).unwrap();
610
611        // Both should be detected as inline (allowed)
612        assert_eq!(result.len(), 0);
613    }
614}