rumdl_lib/rules/
md053_link_image_reference_definitions.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::range_utils::calculate_line_range;
4use fancy_regex::Regex as FancyRegex;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10// Shortcut reference links: [reference] - must not be followed by another bracket
11// Allow references followed by punctuation like colon, period, comma (e.g., "[reference]:", "[reference].")
12// Don't exclude references followed by ": " in the middle of a line (only at start of line)
13static SHORTCUT_REFERENCE_REGEX: LazyLock<FancyRegex> =
14    LazyLock::new(|| FancyRegex::new(r"(?<!\!)\[([^\]]+)\](?!\[)").unwrap());
15
16// Link/image reference definition format: [reference]: URL
17static REFERENCE_DEFINITION_REGEX: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap());
19
20// Multi-line reference definition continuation pattern
21static CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s+(.+)$").unwrap());
22
23/// Configuration for MD053 rule
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
25#[serde(rename_all = "kebab-case")]
26pub struct MD053Config {
27    /// List of reference names to keep even if unused
28    #[serde(default = "default_ignored_definitions")]
29    pub ignored_definitions: Vec<String>,
30}
31
32impl Default for MD053Config {
33    fn default() -> Self {
34        Self {
35            ignored_definitions: default_ignored_definitions(),
36        }
37    }
38}
39
40fn default_ignored_definitions() -> Vec<String> {
41    Vec::new()
42}
43
44impl RuleConfig for MD053Config {
45    const RULE_NAME: &'static str = "MD053";
46}
47
48/// Rule MD053: Link and image reference definitions should be used
49///
50/// See [docs/md053.md](../../docs/md053.md) for full documentation, configuration, and examples.
51///
52/// This rule is triggered when a link or image reference definition is declared but not used
53/// anywhere in the document. Unused reference definitions can create confusion and clutter.
54///
55/// ## Supported Reference Formats
56///
57/// This rule handles the following reference formats:
58///
59/// - **Full reference links/images**: `[text][reference]` or `![text][reference]`
60/// - **Collapsed reference links/images**: `[text][]` or `![text][]`
61/// - **Shortcut reference links**: `[reference]` (must be defined elsewhere)
62/// - **Reference definitions**: `[reference]: URL "Optional Title"`
63/// - **Multi-line reference definitions**:
64///   ```markdown
65///   [reference]: URL
66///      "Optional title continued on next line"
67///   ```
68///
69/// ## Configuration Options
70///
71/// The rule supports the following configuration options:
72///
73/// ```yaml
74/// MD053:
75///   ignored_definitions: []  # List of reference definitions to ignore (never report as unused)
76/// ```
77///
78/// ## Performance Optimizations
79///
80/// This rule implements various performance optimizations for handling large documents:
81///
82/// 1. **Caching**: The rule caches parsed definitions and references based on content hashing
83/// 2. **Efficient Reference Matching**: Uses HashMaps for O(1) lookups of definitions
84/// 3. **Smart Code Block Handling**: Efficiently skips references inside code blocks/spans
85/// 4. **Lazy Evaluation**: Only processes necessary portions of the document
86///
87/// ## Edge Cases Handled
88///
89/// - **Case insensitivity**: References are matched case-insensitively
90/// - **Escaped characters**: Properly processes escaped characters in references
91/// - **Unicode support**: Handles non-ASCII characters in references and URLs
92/// - **Code blocks**: Ignores references inside code blocks and spans
93/// - **Special characters**: Properly handles references with special characters
94///
95/// ## Fix Behavior
96///
97/// This rule does not provide automatic fixes. Unused references must be manually reviewed
98/// and removed, as they may be intentionally kept for future use or as templates.
99#[derive(Clone)]
100pub struct MD053LinkImageReferenceDefinitions {
101    config: MD053Config,
102}
103
104impl MD053LinkImageReferenceDefinitions {
105    /// Create a new instance of the MD053 rule
106    pub fn new() -> Self {
107        Self {
108            config: MD053Config::default(),
109        }
110    }
111
112    /// Create a new instance with the given configuration
113    pub fn from_config_struct(config: MD053Config) -> Self {
114        Self { config }
115    }
116
117    /// Returns true if this pattern should be skipped during reference detection
118    fn should_skip_pattern(text: &str) -> bool {
119        // Don't skip pure numeric patterns - they could be footnote references like [1]
120        // Only skip numeric ranges like [1:3], [0:10], etc.
121        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
122            return true;
123        }
124
125        // Skip glob/wildcard patterns like [*], [...], [**]
126        if text == "*" || text == "..." || text == "**" {
127            return true;
128        }
129
130        // Skip patterns that are just punctuation or operators
131        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
132            return true;
133        }
134
135        // Skip very short non-word patterns (likely operators or syntax)
136        // But allow single digits (could be footnotes) and single letters
137        if text.len() <= 2 && !text.chars().all(|c| c.is_alphanumeric()) {
138            return true;
139        }
140
141        // Skip descriptive prose patterns with colon like [default: the project root]
142        // But allow reference-style patterns like [RFC: 1234], [Issue: 42], [See: Section 2]
143        // These are distinguished by having a short prefix (typically 1-2 words) before the colon
144        if text.contains(':') && text.contains(' ') && !text.contains('`') {
145            // Check if this looks like a reference pattern (short prefix before colon)
146            // vs a prose description (longer text before colon)
147            if let Some((before_colon, _)) = text.split_once(':') {
148                let before_trimmed = before_colon.trim();
149                // Count words before colon - references typically have 1-2 words
150                let word_count = before_trimmed.split_whitespace().count();
151                // If there are 3+ words before the colon, it's likely prose
152                if word_count >= 3 {
153                    return true;
154                }
155            }
156        }
157
158        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
159        if text.starts_with('!') {
160            return true;
161        }
162
163        // Note: We don't filter out patterns with backticks because backticks in reference names
164        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
165
166        // Also don't filter out references with dots - these are legitimate reference names
167        // like [tool.ruff] or [os.path] which are valid markdown references
168
169        // Note: We don't filter based on word count anymore because legitimate references
170        // can have many words, like "python language reference for import statements"
171        // Word count filtering was causing false positives where valid references were
172        // being incorrectly flagged as unused
173
174        false
175    }
176
177    /// Unescape a reference string by removing backslashes before special characters.
178    ///
179    /// This allows matching references like `[example\-reference]` with definitions like
180    /// `[example-reference]: http://example.com`
181    ///
182    /// Returns the unescaped reference string.
183    fn unescape_reference(reference: &str) -> String {
184        // Remove backslashes before special characters
185        reference.replace("\\", "")
186    }
187
188    /// Check if a reference definition is likely a comment-style reference.
189    ///
190    /// This recognizes common community patterns for comments in markdown:
191    /// - `[//]: # (comment)` - Most popular pattern
192    /// - `[comment]: # (text)` - Semantic pattern
193    /// - `[note]: # (text)` - Documentation pattern
194    /// - `[todo]: # (text)` - Task tracking pattern
195    /// - Any reference with just `#` as the URL (fragment-only, often unused)
196    ///
197    /// While not part of any official markdown spec (CommonMark, GFM), these patterns
198    /// are widely used across 23+ markdown implementations as documented in the community.
199    ///
200    /// # Arguments
201    /// * `ref_id` - The reference ID (already normalized to lowercase)
202    /// * `url` - The URL from the reference definition
203    ///
204    /// # Returns
205    /// `true` if this looks like a comment-style reference that should be ignored
206    fn is_likely_comment_reference(ref_id: &str, url: &str) -> bool {
207        // Common comment reference labels used in the community
208        const COMMENT_LABELS: &[&str] = &[
209            "//",      // [//]: # (comment) - most popular
210            "comment", // [comment]: # (text)
211            "note",    // [note]: # (text)
212            "todo",    // [todo]: # (text)
213            "fixme",   // [fixme]: # (text)
214            "hack",    // [hack]: # (text)
215        ];
216
217        let normalized_id = ref_id.trim().to_lowercase();
218        let normalized_url = url.trim();
219
220        // Pattern 1: Known comment labels with fragment URLs
221        // e.g., [//]: # (comment), [comment]: #section
222        if COMMENT_LABELS.contains(&normalized_id.as_str()) && normalized_url.starts_with('#') {
223            return true;
224        }
225
226        // Pattern 2: Any reference with just "#" as the URL
227        // This is often used as a comment placeholder or unused anchor
228        if normalized_url == "#" {
229            return true;
230        }
231
232        false
233    }
234
235    /// Find all link and image reference definitions in the content.
236    ///
237    /// This method returns a HashMap where the key is the normalized reference ID and the value is a vector of (start_line, end_line) tuples.
238    fn find_definitions(&self, ctx: &crate::lint_context::LintContext) -> HashMap<String, Vec<(usize, usize)>> {
239        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
240
241        // First, add all reference definitions from context
242        for ref_def in &ctx.reference_defs {
243            // Skip comment-style references (e.g., [//]: # (comment))
244            if Self::is_likely_comment_reference(&ref_def.id, &ref_def.url) {
245                continue;
246            }
247
248            // Apply unescape to handle escaped characters in definitions
249            let normalized_id = Self::unescape_reference(&ref_def.id); // Already lowercase from context
250            definitions
251                .entry(normalized_id)
252                .or_default()
253                .push((ref_def.line - 1, ref_def.line - 1)); // Convert to 0-indexed
254        }
255
256        // Handle multi-line definitions that might not be fully captured by ctx.reference_defs
257        let lines = &ctx.lines;
258        let mut i = 0;
259        while i < lines.len() {
260            let line_info = &lines[i];
261            let line = line_info.content(ctx.content);
262
263            // Skip code blocks and front matter using line info
264            if line_info.in_code_block || line_info.in_front_matter {
265                i += 1;
266                continue;
267            }
268
269            // Check for multi-line continuation of existing definitions
270            if i > 0 && CONTINUATION_REGEX.is_match(line) {
271                // Find the reference definition this continues
272                let mut def_start = i - 1;
273                while def_start > 0 && !REFERENCE_DEFINITION_REGEX.is_match(lines[def_start].content(ctx.content)) {
274                    def_start -= 1;
275                }
276
277                if let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(lines[def_start].content(ctx.content)) {
278                    let ref_id = caps.get(1).unwrap().as_str().trim();
279                    let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
280
281                    // Update the end line for this definition
282                    if let Some(ranges) = definitions.get_mut(&normalized_id)
283                        && let Some(last_range) = ranges.last_mut()
284                        && last_range.0 == def_start
285                    {
286                        last_range.1 = i;
287                    }
288                }
289            }
290            i += 1;
291        }
292        definitions
293    }
294
295    /// Find all link and image reference reference usages in the content.
296    ///
297    /// This method returns a HashSet of all normalized reference IDs found in usage.
298    /// It leverages cached data from LintContext for efficiency.
299    fn find_usages(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
300        let mut usages: HashSet<String> = HashSet::new();
301
302        // 1. Add usages from cached reference links in LintContext
303        for link in &ctx.links {
304            if link.is_reference
305                && let Some(ref_id) = &link.reference_id
306            {
307                // Ensure the link itself is not inside a code block line
308                if !ctx.line_info(link.line).is_some_and(|info| info.in_code_block) {
309                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
310                }
311            }
312        }
313
314        // 2. Add usages from cached reference images in LintContext
315        for image in &ctx.images {
316            if image.is_reference
317                && let Some(ref_id) = &image.reference_id
318            {
319                // Ensure the image itself is not inside a code block line
320                if !ctx.line_info(image.line).is_some_and(|info| info.in_code_block) {
321                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
322                }
323            }
324        }
325
326        // 3. Add usages from footnote references (e.g., [^1], [^note])
327        // pulldown-cmark returns the id without the ^ prefix, but definitions have it
328        for footnote_ref in &ctx.footnote_refs {
329            // Ensure the footnote reference is not inside a code block line
330            if !ctx.line_info(footnote_ref.line).is_some_and(|info| info.in_code_block) {
331                // Add ^ prefix to match definition format
332                let ref_id = format!("^{}", footnote_ref.id);
333                usages.insert(ref_id.to_lowercase());
334            }
335        }
336
337        // 4. Find shortcut references [ref] not already handled by DocumentStructure.links
338        //    and ensure they are not within code spans or code blocks.
339        // Cache code spans once before the loop
340        let code_spans = ctx.code_spans();
341
342        for line_info in ctx.lines.iter() {
343            // Skip lines in code blocks or front matter
344            if line_info.in_code_block || line_info.in_front_matter {
345                continue;
346            }
347
348            // Skip lines that are reference definitions (start with [ref]: at beginning)
349            if REFERENCE_DEFINITION_REGEX.is_match(line_info.content(ctx.content)) {
350                continue;
351            }
352
353            // Find potential shortcut references
354            for caps in SHORTCUT_REFERENCE_REGEX
355                .captures_iter(line_info.content(ctx.content))
356                .flatten()
357            {
358                if let Some(full_match) = caps.get(0)
359                    && let Some(ref_id_match) = caps.get(1)
360                {
361                    // Check if the match is within a code span
362                    let match_byte_offset = line_info.byte_offset + full_match.start();
363                    let in_code_span = code_spans
364                        .iter()
365                        .any(|span| match_byte_offset >= span.byte_offset && match_byte_offset < span.byte_end);
366
367                    if !in_code_span {
368                        let ref_id = ref_id_match.as_str().trim();
369
370                        if !Self::should_skip_pattern(ref_id) {
371                            let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
372                            usages.insert(normalized_id);
373                        }
374                    }
375                }
376            }
377        }
378
379        // NOTE: The complex recursive loop trying to find references within definitions
380        // has been removed as it's not standard Markdown behavior for finding *usages*.
381        // Usages refer to `[text][ref]`, `![alt][ref]`, `[ref]`, etc., in the main content,
382        // not references potentially embedded within the URL or title of another definition.
383
384        usages
385    }
386
387    /// Get unused references with their line ranges.
388    ///
389    /// This method uses the cached definitions to improve performance.
390    ///
391    /// Note: References that are only used inside code blocks are still considered unused,
392    /// as code blocks are treated as examples or documentation rather than actual content.
393    fn get_unused_references(
394        &self,
395        definitions: &HashMap<String, Vec<(usize, usize)>>,
396        usages: &HashSet<String>,
397    ) -> Vec<(String, usize, usize)> {
398        let mut unused = Vec::new();
399        for (id, ranges) in definitions {
400            // If this id is not used anywhere and is not in the ignored list
401            if !usages.contains(id) && !self.is_ignored_definition(id) {
402                // Only report as unused if there's exactly one definition
403                // Multiple definitions are already reported as duplicates
404                if ranges.len() == 1 {
405                    let (start, end) = ranges[0];
406                    unused.push((id.clone(), start, end));
407                }
408                // If there are multiple definitions (duplicates), don't report them as unused
409                // They're already being reported as duplicate definitions
410            }
411        }
412        unused
413    }
414
415    /// Check if a definition should be ignored (kept even if unused)
416    fn is_ignored_definition(&self, definition_id: &str) -> bool {
417        self.config
418            .ignored_definitions
419            .iter()
420            .any(|ignored| ignored.eq_ignore_ascii_case(definition_id))
421    }
422}
423
424impl Default for MD053LinkImageReferenceDefinitions {
425    fn default() -> Self {
426        Self::new()
427    }
428}
429
430impl Rule for MD053LinkImageReferenceDefinitions {
431    fn name(&self) -> &'static str {
432        "MD053"
433    }
434
435    fn description(&self) -> &'static str {
436        "Link and image reference definitions should be needed"
437    }
438
439    /// Check the content for unused and duplicate link/image reference definitions.
440    ///
441    /// This implementation uses caching for improved performance on large documents.
442    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
443        // Find definitions and usages using LintContext
444        let definitions = self.find_definitions(ctx);
445        let usages = self.find_usages(ctx);
446
447        // Get unused references by comparing definitions and usages
448        let unused_refs = self.get_unused_references(&definitions, &usages);
449
450        let mut warnings = Vec::new();
451
452        // Check for duplicate definitions (case-insensitive per CommonMark spec)
453        let mut seen_definitions: HashMap<String, (String, usize)> = HashMap::new(); // lowercase -> (original, first_line)
454
455        for (definition_id, ranges) in &definitions {
456            // Skip ignored definitions for duplicate checking
457            if self.is_ignored_definition(definition_id) {
458                continue;
459            }
460
461            if ranges.len() > 1 {
462                // Multiple definitions with exact same ID (already lowercase)
463                for (i, &(start_line, _)) in ranges.iter().enumerate() {
464                    if i > 0 {
465                        // Skip the first occurrence, report all others
466                        let line_num = start_line + 1;
467                        let line_content = ctx.lines.get(start_line).map(|l| l.content(ctx.content)).unwrap_or("");
468                        let (start_line_1idx, start_col, end_line, end_col) =
469                            calculate_line_range(line_num, line_content);
470
471                        warnings.push(LintWarning {
472                            rule_name: Some(self.name().to_string()),
473                            line: start_line_1idx,
474                            column: start_col,
475                            end_line,
476                            end_column: end_col,
477                            message: format!("Duplicate link or image reference definition: [{definition_id}]"),
478                            severity: Severity::Warning,
479                            fix: None,
480                        });
481                    }
482                }
483            }
484
485            // Track for case-variant duplicates
486            if let Some(&(start_line, _)) = ranges.first() {
487                // Find the original case version from the line
488                if let Some(line_info) = ctx.lines.get(start_line)
489                    && let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(line_info.content(ctx.content))
490                {
491                    let original_id = caps.get(1).unwrap().as_str().trim();
492                    let lower_id = original_id.to_lowercase();
493
494                    if let Some((first_original, first_line)) = seen_definitions.get(&lower_id) {
495                        // Found a case-variant duplicate
496                        if first_original != original_id {
497                            let line_num = start_line + 1;
498                            let line_content = line_info.content(ctx.content);
499                            let (start_line_1idx, start_col, end_line, end_col) =
500                                calculate_line_range(line_num, line_content);
501
502                            warnings.push(LintWarning {
503                                    rule_name: Some(self.name().to_string()),
504                                    line: start_line_1idx,
505                                    column: start_col,
506                                    end_line,
507                                    end_column: end_col,
508                                    message: format!("Duplicate link or image reference definition: [{}] (conflicts with [{}] on line {})",
509                                                   original_id, first_original, first_line + 1),
510                                    severity: Severity::Warning,
511                                    fix: None,
512                                });
513                        }
514                    } else {
515                        seen_definitions.insert(lower_id, (original_id.to_string(), start_line));
516                    }
517                }
518            }
519        }
520
521        // Create warnings for unused references
522        for (definition, start, _end) in unused_refs {
523            let line_num = start + 1; // 1-indexed line numbers
524            let line_content = ctx.lines.get(start).map(|l| l.content(ctx.content)).unwrap_or("");
525
526            // Calculate precise character range for the entire reference definition line
527            let (start_line, start_col, end_line, end_col) = calculate_line_range(line_num, line_content);
528
529            warnings.push(LintWarning {
530                rule_name: Some(self.name().to_string()),
531                line: start_line,
532                column: start_col,
533                end_line,
534                end_column: end_col,
535                message: format!("Unused link/image reference: [{definition}]"),
536                severity: Severity::Warning,
537                fix: None, // MD053 is warning-only, no automatic fixes
538            });
539        }
540
541        Ok(warnings)
542    }
543
544    /// MD053 does not provide automatic fixes
545    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
546        // This rule is warning-only, no automatic fixes provided
547        Ok(ctx.content.to_string())
548    }
549
550    /// Check if this rule should be skipped for performance
551    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
552        // Skip if content is empty or has no links/images
553        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
554    }
555
556    fn as_any(&self) -> &dyn std::any::Any {
557        self
558    }
559
560    fn default_config_section(&self) -> Option<(String, toml::Value)> {
561        let default_config = MD053Config::default();
562        let json_value = serde_json::to_value(&default_config).ok()?;
563        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
564        if let toml::Value::Table(table) = toml_value {
565            if !table.is_empty() {
566                Some((MD053Config::RULE_NAME.to_string(), toml::Value::Table(table)))
567            } else {
568                None
569            }
570        } else {
571            None
572        }
573    }
574
575    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
576    where
577        Self: Sized,
578    {
579        let rule_config = crate::rule_config_serde::load_rule_config::<MD053Config>(config);
580        Box::new(MD053LinkImageReferenceDefinitions::from_config_struct(rule_config))
581    }
582}
583
584#[cfg(test)]
585mod tests {
586    use super::*;
587    use crate::lint_context::LintContext;
588
589    #[test]
590    fn test_used_reference_link() {
591        let rule = MD053LinkImageReferenceDefinitions::new();
592        let content = "[text][ref]\n\n[ref]: https://example.com";
593        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
594        let result = rule.check(&ctx).unwrap();
595
596        assert_eq!(result.len(), 0);
597    }
598
599    #[test]
600    fn test_unused_reference_definition() {
601        let rule = MD053LinkImageReferenceDefinitions::new();
602        let content = "[unused]: https://example.com";
603        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
604        let result = rule.check(&ctx).unwrap();
605
606        assert_eq!(result.len(), 1);
607        assert!(result[0].message.contains("Unused link/image reference: [unused]"));
608    }
609
610    #[test]
611    fn test_used_reference_image() {
612        let rule = MD053LinkImageReferenceDefinitions::new();
613        let content = "![alt][img]\n\n[img]: image.jpg";
614        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
615        let result = rule.check(&ctx).unwrap();
616
617        assert_eq!(result.len(), 0);
618    }
619
620    #[test]
621    fn test_case_insensitive_matching() {
622        let rule = MD053LinkImageReferenceDefinitions::new();
623        let content = "[Text][REF]\n\n[ref]: https://example.com";
624        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
625        let result = rule.check(&ctx).unwrap();
626
627        assert_eq!(result.len(), 0);
628    }
629
630    #[test]
631    fn test_shortcut_reference() {
632        let rule = MD053LinkImageReferenceDefinitions::new();
633        let content = "[ref]\n\n[ref]: https://example.com";
634        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
635        let result = rule.check(&ctx).unwrap();
636
637        assert_eq!(result.len(), 0);
638    }
639
640    #[test]
641    fn test_collapsed_reference() {
642        let rule = MD053LinkImageReferenceDefinitions::new();
643        let content = "[ref][]\n\n[ref]: https://example.com";
644        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
645        let result = rule.check(&ctx).unwrap();
646
647        assert_eq!(result.len(), 0);
648    }
649
650    #[test]
651    fn test_multiple_unused_definitions() {
652        let rule = MD053LinkImageReferenceDefinitions::new();
653        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
654        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
655        let result = rule.check(&ctx).unwrap();
656
657        assert_eq!(result.len(), 3);
658
659        // The warnings might not be in the same order, so collect all messages
660        let messages: Vec<String> = result.iter().map(|w| w.message.clone()).collect();
661        assert!(messages.iter().any(|m| m.contains("unused1")));
662        assert!(messages.iter().any(|m| m.contains("unused2")));
663        assert!(messages.iter().any(|m| m.contains("unused3")));
664    }
665
666    #[test]
667    fn test_mixed_used_and_unused() {
668        let rule = MD053LinkImageReferenceDefinitions::new();
669        let content = "[used]\n\n[used]: url1\n[unused]: url2";
670        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
671        let result = rule.check(&ctx).unwrap();
672
673        assert_eq!(result.len(), 1);
674        assert!(result[0].message.contains("unused"));
675    }
676
677    #[test]
678    fn test_multiline_definition() {
679        let rule = MD053LinkImageReferenceDefinitions::new();
680        let content = "[ref]: https://example.com\n  \"Title on next line\"";
681        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
682        let result = rule.check(&ctx).unwrap();
683
684        assert_eq!(result.len(), 1); // Still unused
685    }
686
687    #[test]
688    fn test_reference_in_code_block() {
689        let rule = MD053LinkImageReferenceDefinitions::new();
690        let content = "```\n[ref]\n```\n\n[ref]: https://example.com";
691        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
692        let result = rule.check(&ctx).unwrap();
693
694        // Reference used only in code block is still considered unused
695        assert_eq!(result.len(), 1);
696    }
697
698    #[test]
699    fn test_reference_in_inline_code() {
700        let rule = MD053LinkImageReferenceDefinitions::new();
701        let content = "`[ref]`\n\n[ref]: https://example.com";
702        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
703        let result = rule.check(&ctx).unwrap();
704
705        // Reference in inline code is not a usage
706        assert_eq!(result.len(), 1);
707    }
708
709    #[test]
710    fn test_escaped_reference() {
711        let rule = MD053LinkImageReferenceDefinitions::new();
712        let content = "[example\\-ref]\n\n[example-ref]: https://example.com";
713        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
714        let result = rule.check(&ctx).unwrap();
715
716        // Should match despite escaping
717        assert_eq!(result.len(), 0);
718    }
719
720    #[test]
721    fn test_duplicate_definitions() {
722        let rule = MD053LinkImageReferenceDefinitions::new();
723        let content = "[ref]: url1\n[ref]: url2\n\n[ref]";
724        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
725        let result = rule.check(&ctx).unwrap();
726
727        // Should flag the duplicate definition even though it's used (matches markdownlint)
728        assert_eq!(result.len(), 1);
729    }
730
731    #[test]
732    fn test_fix_returns_original() {
733        // MD053 is warning-only, fix should return original content
734        let rule = MD053LinkImageReferenceDefinitions::new();
735        let content = "[used]\n\n[used]: url1\n[unused]: url2\n\nMore content";
736        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
737        let fixed = rule.fix(&ctx).unwrap();
738
739        assert_eq!(fixed, content);
740    }
741
742    #[test]
743    fn test_fix_preserves_content() {
744        // MD053 is warning-only, fix should preserve all content
745        let rule = MD053LinkImageReferenceDefinitions::new();
746        let content = "Content\n\n[unused]: url\n\nMore content";
747        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
748        let fixed = rule.fix(&ctx).unwrap();
749
750        assert_eq!(fixed, content);
751    }
752
753    #[test]
754    fn test_fix_does_not_remove() {
755        // MD053 is warning-only, fix should not remove anything
756        let rule = MD053LinkImageReferenceDefinitions::new();
757        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
758        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
759        let fixed = rule.fix(&ctx).unwrap();
760
761        assert_eq!(fixed, content);
762    }
763
764    #[test]
765    fn test_special_characters_in_reference() {
766        let rule = MD053LinkImageReferenceDefinitions::new();
767        let content = "[ref-with_special.chars]\n\n[ref-with_special.chars]: url";
768        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
769        let result = rule.check(&ctx).unwrap();
770
771        assert_eq!(result.len(), 0);
772    }
773
774    #[test]
775    fn test_find_definitions() {
776        let rule = MD053LinkImageReferenceDefinitions::new();
777        let content = "[ref1]: url1\n[ref2]: url2\nSome text\n[ref3]: url3";
778        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
779        let defs = rule.find_definitions(&ctx);
780
781        assert_eq!(defs.len(), 3);
782        assert!(defs.contains_key("ref1"));
783        assert!(defs.contains_key("ref2"));
784        assert!(defs.contains_key("ref3"));
785    }
786
787    #[test]
788    fn test_find_usages() {
789        let rule = MD053LinkImageReferenceDefinitions::new();
790        let content = "[text][ref1] and [ref2] and ![img][ref3]";
791        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
792        let usages = rule.find_usages(&ctx);
793
794        assert!(usages.contains("ref1"));
795        assert!(usages.contains("ref2"));
796        assert!(usages.contains("ref3"));
797    }
798
799    #[test]
800    fn test_ignored_definitions_config() {
801        // Test with ignored definitions
802        let config = MD053Config {
803            ignored_definitions: vec!["todo".to_string(), "draft".to_string()],
804        };
805        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
806
807        let content = "[todo]: https://example.com/todo\n[draft]: https://example.com/draft\n[unused]: https://example.com/unused";
808        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
809        let result = rule.check(&ctx).unwrap();
810
811        // Should only flag "unused", not "todo" or "draft"
812        assert_eq!(result.len(), 1);
813        assert!(result[0].message.contains("unused"));
814        assert!(!result[0].message.contains("todo"));
815        assert!(!result[0].message.contains("draft"));
816    }
817
818    #[test]
819    fn test_ignored_definitions_case_insensitive() {
820        // Test case-insensitive matching of ignored definitions
821        let config = MD053Config {
822            ignored_definitions: vec!["TODO".to_string()],
823        };
824        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
825
826        let content = "[todo]: https://example.com/todo\n[unused]: https://example.com/unused";
827        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
828        let result = rule.check(&ctx).unwrap();
829
830        // Should only flag "unused", not "todo" (matches "TODO" case-insensitively)
831        assert_eq!(result.len(), 1);
832        assert!(result[0].message.contains("unused"));
833        assert!(!result[0].message.contains("todo"));
834    }
835
836    #[test]
837    fn test_default_config_section() {
838        let rule = MD053LinkImageReferenceDefinitions::default();
839        let config_section = rule.default_config_section();
840
841        assert!(config_section.is_some());
842        let (name, value) = config_section.unwrap();
843        assert_eq!(name, "MD053");
844
845        // Should contain the ignored_definitions option with default empty array
846        if let toml::Value::Table(table) = value {
847            assert!(table.contains_key("ignored-definitions"));
848            assert_eq!(table["ignored-definitions"], toml::Value::Array(vec![]));
849        } else {
850            panic!("Expected TOML table");
851        }
852    }
853
854    #[test]
855    fn test_fix_with_ignored_definitions() {
856        // MD053 is warning-only, fix should not remove anything even with ignored definitions
857        let config = MD053Config {
858            ignored_definitions: vec!["template".to_string()],
859        };
860        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
861
862        let content = "[template]: https://example.com/template\n[unused]: https://example.com/unused\n\nSome content.";
863        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
864        let fixed = rule.fix(&ctx).unwrap();
865
866        // Should keep everything since MD053 doesn't fix
867        assert_eq!(fixed, content);
868    }
869
870    #[test]
871    fn test_duplicate_definitions_exact_case() {
872        let rule = MD053LinkImageReferenceDefinitions::new();
873        let content = "[ref]: url1\n[ref]: url2\n[ref]: url3";
874        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
875        let result = rule.check(&ctx).unwrap();
876
877        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
878        // Plus 1 unused warning
879        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
880        assert_eq!(duplicate_warnings.len(), 2);
881        assert_eq!(duplicate_warnings[0].line, 2);
882        assert_eq!(duplicate_warnings[1].line, 3);
883    }
884
885    #[test]
886    fn test_duplicate_definitions_case_variants() {
887        let rule = MD053LinkImageReferenceDefinitions::new();
888        let content =
889            "[method resolution order]: url1\n[Method Resolution Order]: url2\n[METHOD RESOLUTION ORDER]: url3";
890        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
891        let result = rule.check(&ctx).unwrap();
892
893        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
894        // Note: These are treated as exact duplicates since they normalize to the same ID
895        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
896        assert_eq!(duplicate_warnings.len(), 2);
897
898        // The exact duplicate messages don't include "conflicts with"
899        // Only case-variant duplicates with different normalized forms would
900        assert_eq!(duplicate_warnings[0].line, 2);
901        assert_eq!(duplicate_warnings[1].line, 3);
902    }
903
904    #[test]
905    fn test_duplicate_and_unused() {
906        let rule = MD053LinkImageReferenceDefinitions::new();
907        let content = "[used]\n[used]: url1\n[used]: url2\n[unused]: url3";
908        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
909        let result = rule.check(&ctx).unwrap();
910
911        // Should have 1 duplicate warning and 1 unused warning
912        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
913        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
914
915        assert_eq!(duplicate_warnings.len(), 1);
916        assert_eq!(unused_warnings.len(), 1);
917        assert_eq!(duplicate_warnings[0].line, 3); // Second [used] definition
918        assert_eq!(unused_warnings[0].line, 4); // [unused] definition
919    }
920
921    #[test]
922    fn test_duplicate_with_usage() {
923        let rule = MD053LinkImageReferenceDefinitions::new();
924        // Even if used, duplicates should still be reported
925        let content = "[ref]\n\n[ref]: url1\n[ref]: url2";
926        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
927        let result = rule.check(&ctx).unwrap();
928
929        // Should have 1 duplicate warning (no unused since it's referenced)
930        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
931        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
932
933        assert_eq!(duplicate_warnings.len(), 1);
934        assert_eq!(unused_warnings.len(), 0);
935        assert_eq!(duplicate_warnings[0].line, 4);
936    }
937
938    #[test]
939    fn test_no_duplicate_different_ids() {
940        let rule = MD053LinkImageReferenceDefinitions::new();
941        let content = "[ref1]: url1\n[ref2]: url2\n[ref3]: url3";
942        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
943        let result = rule.check(&ctx).unwrap();
944
945        // Should have no duplicate warnings, only unused warnings
946        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
947        assert_eq!(duplicate_warnings.len(), 0);
948    }
949
950    #[test]
951    fn test_comment_style_reference_double_slash() {
952        let rule = MD053LinkImageReferenceDefinitions::new();
953        // Most popular comment pattern: [//]: # (comment)
954        let content = "[//]: # (This is a comment)\n\nSome regular text.";
955        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
956        let result = rule.check(&ctx).unwrap();
957
958        // Should not report as unused - it's recognized as a comment
959        assert_eq!(result.len(), 0, "Comment-style reference [//]: # should not be flagged");
960    }
961
962    #[test]
963    fn test_comment_style_reference_comment_label() {
964        let rule = MD053LinkImageReferenceDefinitions::new();
965        // Semantic comment pattern: [comment]: # (text)
966        let content = "[comment]: # (This is a semantic comment)\n\n[note]: # (This is a note)";
967        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
968        let result = rule.check(&ctx).unwrap();
969
970        // Should not report either as unused
971        assert_eq!(result.len(), 0, "Comment-style references should not be flagged");
972    }
973
974    #[test]
975    fn test_comment_style_reference_todo_fixme() {
976        let rule = MD053LinkImageReferenceDefinitions::new();
977        // Task tracking patterns: [todo]: # and [fixme]: #
978        let content = "[todo]: # (Add more examples)\n[fixme]: # (Fix this later)\n[hack]: # (Temporary workaround)";
979        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
980        let result = rule.check(&ctx).unwrap();
981
982        // Should not report any as unused
983        assert_eq!(result.len(), 0, "TODO/FIXME comment patterns should not be flagged");
984    }
985
986    #[test]
987    fn test_comment_style_reference_fragment_only() {
988        let rule = MD053LinkImageReferenceDefinitions::new();
989        // Any reference with just "#" as URL should be treated as a comment
990        let content = "[anything]: #\n[ref]: #\n\nSome text.";
991        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
992        let result = rule.check(&ctx).unwrap();
993
994        // Should not report as unused - fragment-only URLs are often comments
995        assert_eq!(result.len(), 0, "References with just '#' URL should not be flagged");
996    }
997
998    #[test]
999    fn test_comment_vs_real_reference() {
1000        let rule = MD053LinkImageReferenceDefinitions::new();
1001        // Mix of comment and real reference - only real one should be flagged if unused
1002        let content = "[//]: # (This is a comment)\n[real-ref]: https://example.com\n\nSome text.";
1003        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1004        let result = rule.check(&ctx).unwrap();
1005
1006        // Should only report the real reference as unused
1007        assert_eq!(result.len(), 1, "Only real unused references should be flagged");
1008        assert!(result[0].message.contains("real-ref"), "Should flag the real reference");
1009    }
1010
1011    #[test]
1012    fn test_comment_with_fragment_section() {
1013        let rule = MD053LinkImageReferenceDefinitions::new();
1014        // Comment pattern with a fragment section (still a comment)
1015        let content = "[//]: #section (Comment about section)\n\nSome text.";
1016        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1017        let result = rule.check(&ctx).unwrap();
1018
1019        // Should not report as unused - it's still a comment pattern
1020        assert_eq!(result.len(), 0, "Comment with fragment section should not be flagged");
1021    }
1022
1023    #[test]
1024    fn test_is_likely_comment_reference_helper() {
1025        // Test the helper function directly
1026        assert!(
1027            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("//", "#"),
1028            "[//]: # should be recognized as comment"
1029        );
1030        assert!(
1031            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("comment", "#section"),
1032            "[comment]: #section should be recognized as comment"
1033        );
1034        assert!(
1035            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("note", "#"),
1036            "[note]: # should be recognized as comment"
1037        );
1038        assert!(
1039            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("todo", "#"),
1040            "[todo]: # should be recognized as comment"
1041        );
1042        assert!(
1043            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("anything", "#"),
1044            "Any label with just '#' should be recognized as comment"
1045        );
1046        assert!(
1047            !MD053LinkImageReferenceDefinitions::is_likely_comment_reference("ref", "https://example.com"),
1048            "Real URL should not be recognized as comment"
1049        );
1050        assert!(
1051            !MD053LinkImageReferenceDefinitions::is_likely_comment_reference("link", "http://test.com"),
1052            "Real URL should not be recognized as comment"
1053        );
1054    }
1055
1056    #[test]
1057    fn test_reference_with_colon_in_name() {
1058        // References containing colons and spaces should be recognized as valid references
1059        let rule = MD053LinkImageReferenceDefinitions::new();
1060        let content = "Check [RFC: 1234] for specs.\n\n[RFC: 1234]: https://example.com\n";
1061        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1062        let result = rule.check(&ctx).unwrap();
1063
1064        assert!(
1065            result.is_empty(),
1066            "Reference with colon should be recognized as used, got warnings: {result:?}"
1067        );
1068    }
1069
1070    #[test]
1071    fn test_reference_with_colon_various_styles() {
1072        // Test various RFC-style and similar references with colons
1073        let rule = MD053LinkImageReferenceDefinitions::new();
1074        let content = r#"See [RFC: 1234] and [Issue: 42] and [PR: 100].
1075
1076[RFC: 1234]: https://example.com/rfc1234
1077[Issue: 42]: https://example.com/issue42
1078[PR: 100]: https://example.com/pr100
1079"#;
1080        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1081        let result = rule.check(&ctx).unwrap();
1082
1083        assert!(
1084            result.is_empty(),
1085            "All colon-style references should be recognized as used, got warnings: {result:?}"
1086        );
1087    }
1088
1089    #[test]
1090    fn test_should_skip_pattern_allows_rfc_style() {
1091        // Verify that should_skip_pattern does NOT skip RFC-style references with colons
1092        // This tests the fix for the bug where references with ": " were incorrectly skipped
1093        assert!(
1094            !MD053LinkImageReferenceDefinitions::should_skip_pattern("RFC: 1234"),
1095            "RFC-style references should NOT be skipped"
1096        );
1097        assert!(
1098            !MD053LinkImageReferenceDefinitions::should_skip_pattern("Issue: 42"),
1099            "Issue-style references should NOT be skipped"
1100        );
1101        assert!(
1102            !MD053LinkImageReferenceDefinitions::should_skip_pattern("PR: 100"),
1103            "PR-style references should NOT be skipped"
1104        );
1105        assert!(
1106            !MD053LinkImageReferenceDefinitions::should_skip_pattern("See: Section 2"),
1107            "References with 'See:' should NOT be skipped"
1108        );
1109        assert!(
1110            !MD053LinkImageReferenceDefinitions::should_skip_pattern("foo:bar"),
1111            "References without space after colon should NOT be skipped"
1112        );
1113    }
1114
1115    #[test]
1116    fn test_should_skip_pattern_skips_prose() {
1117        // Verify that prose-like patterns (3+ words before colon) are still skipped
1118        assert!(
1119            MD053LinkImageReferenceDefinitions::should_skip_pattern("default value is: something"),
1120            "Prose with 3+ words before colon SHOULD be skipped"
1121        );
1122        assert!(
1123            MD053LinkImageReferenceDefinitions::should_skip_pattern("this is a label: description"),
1124            "Prose with 4 words before colon SHOULD be skipped"
1125        );
1126        assert!(
1127            MD053LinkImageReferenceDefinitions::should_skip_pattern("the project root: path/to/dir"),
1128            "Prose-like descriptions SHOULD be skipped"
1129        );
1130    }
1131}