rumdl_lib/rules/
md053_link_image_reference_definitions.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::range_utils::calculate_line_range;
4use fancy_regex::Regex as FancyRegex;
5use lazy_static::lazy_static;
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Link reference format: [text][reference]
12    // REMOVED: static ref LINK_REFERENCE_REGEX: FancyRegex = FancyRegex::new(r"\[([^\]]*)\]\s*\[([^\]]*)\]").unwrap();
13
14    // Image reference format: ![text][reference]
15    // REMOVED: static ref IMAGE_REFERENCE_REGEX: FancyRegex = FancyRegex::new(r"!\[([^\]]*)\]\s*\[([^\]]*)\]").unwrap();
16
17    // Shortcut reference links: [reference] - must not be followed by another bracket
18    // Allow references followed by punctuation like colon, period, comma (e.g., "[reference]:", "[reference].")
19    // Don't exclude references followed by ": " in the middle of a line (only at start of line)
20    static ref SHORTCUT_REFERENCE_REGEX: FancyRegex =
21        FancyRegex::new(r"(?<!\!)\[([^\]]+)\](?!\[)").unwrap();
22
23    // REMOVED: Empty reference links: [text][] or ![text][]
24    // static ref EMPTY_LINK_REFERENCE_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\s*\[\s*\]").unwrap();
25    // static ref EMPTY_IMAGE_REFERENCE_REGEX: Regex = Regex::new(r"!\[([^\]]+)\]\s*\[\s*\]").unwrap();
26
27    // Link/image reference definition format: [reference]: URL
28    static ref REFERENCE_DEFINITION_REGEX: Regex =
29        Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
30
31    // Multi-line reference definition continuation pattern
32    static ref CONTINUATION_REGEX: Regex = Regex::new(r"^\s+(.+)$").unwrap();
33
34    // Code block regex - support indented code blocks for MkDocs tabs
35    static ref CODE_BLOCK_START_REGEX: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
36    static ref CODE_BLOCK_END_REGEX: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})\s*$").unwrap();
37}
38
39/// Configuration for MD053 rule
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
41#[serde(rename_all = "kebab-case")]
42pub struct MD053Config {
43    /// List of reference names to keep even if unused
44    #[serde(default = "default_ignored_definitions")]
45    pub ignored_definitions: Vec<String>,
46}
47
48impl Default for MD053Config {
49    fn default() -> Self {
50        Self {
51            ignored_definitions: default_ignored_definitions(),
52        }
53    }
54}
55
56fn default_ignored_definitions() -> Vec<String> {
57    Vec::new()
58}
59
60impl RuleConfig for MD053Config {
61    const RULE_NAME: &'static str = "MD053";
62}
63
64/// Rule MD053: Link and image reference definitions should be used
65///
66/// See [docs/md053.md](../../docs/md053.md) for full documentation, configuration, and examples.
67///
68/// This rule is triggered when a link or image reference definition is declared but not used
69/// anywhere in the document. Unused reference definitions can create confusion and clutter.
70///
71/// ## Supported Reference Formats
72///
73/// This rule handles the following reference formats:
74///
75/// - **Full reference links/images**: `[text][reference]` or `![text][reference]`
76/// - **Collapsed reference links/images**: `[text][]` or `![text][]`
77/// - **Shortcut reference links**: `[reference]` (must be defined elsewhere)
78/// - **Reference definitions**: `[reference]: URL "Optional Title"`
79/// - **Multi-line reference definitions**:
80///   ```markdown
81///   [reference]: URL
82///      "Optional title continued on next line"
83///   ```
84///
85/// ## Configuration Options
86///
87/// The rule supports the following configuration options:
88///
89/// ```yaml
90/// MD053:
91///   ignored_definitions: []  # List of reference definitions to ignore (never report as unused)
92/// ```
93///
94/// ## Performance Optimizations
95///
96/// This rule implements various performance optimizations for handling large documents:
97///
98/// 1. **Caching**: The rule caches parsed definitions and references based on content hashing
99/// 2. **Efficient Reference Matching**: Uses HashMaps for O(1) lookups of definitions
100/// 3. **Smart Code Block Handling**: Efficiently skips references inside code blocks/spans
101/// 4. **Lazy Evaluation**: Only processes necessary portions of the document
102///
103/// ## Edge Cases Handled
104///
105/// - **Case insensitivity**: References are matched case-insensitively
106/// - **Escaped characters**: Properly processes escaped characters in references
107/// - **Unicode support**: Handles non-ASCII characters in references and URLs
108/// - **Code blocks**: Ignores references inside code blocks and spans
109/// - **Special characters**: Properly handles references with special characters
110///
111/// ## Fix Behavior
112///
113/// This rule does not provide automatic fixes. Unused references must be manually reviewed
114/// and removed, as they may be intentionally kept for future use or as templates.
115#[derive(Clone)]
116pub struct MD053LinkImageReferenceDefinitions {
117    config: MD053Config,
118}
119
120impl MD053LinkImageReferenceDefinitions {
121    /// Create a new instance of the MD053 rule
122    pub fn new() -> Self {
123        Self {
124            config: MD053Config::default(),
125        }
126    }
127
128    /// Create a new instance with the given configuration
129    pub fn from_config_struct(config: MD053Config) -> Self {
130        Self { config }
131    }
132
133    /// Check if a pattern is likely NOT a markdown reference
134    /// Returns true if this pattern should be skipped
135    fn is_likely_not_reference(text: &str) -> bool {
136        // Don't skip pure numeric patterns - they could be footnote references like [1]
137        // Only skip numeric ranges like [1:3], [0:10], etc.
138        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
139            return true;
140        }
141
142        // Skip glob/wildcard patterns like [*], [...], [**]
143        if text == "*" || text == "..." || text == "**" {
144            return true;
145        }
146
147        // Skip patterns that are just punctuation or operators
148        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
149            return true;
150        }
151
152        // Skip very short non-word patterns (likely operators or syntax)
153        // But allow single digits (could be footnotes) and single letters
154        if text.len() <= 2 && !text.chars().all(|c| c.is_alphanumeric()) {
155            return true;
156        }
157
158        // Skip descriptive patterns with colon like [default: the project root]
159        // But allow simple numeric ranges which are handled above
160        if text.contains(':') && text.contains(' ') {
161            return true;
162        }
163
164        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
165        if text.starts_with('!') {
166            return true;
167        }
168
169        // Note: We don't filter out patterns with backticks because backticks in reference names
170        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
171
172        // Also don't filter out references with dots - these are legitimate reference names
173        // like [tool.ruff] or [os.path] which are valid markdown references
174
175        // Note: We don't filter based on word count anymore because legitimate references
176        // can have many words, like "python language reference for import statements"
177        // Word count filtering was causing false positives where valid references were
178        // being incorrectly flagged as unused
179
180        false
181    }
182
183    /// Unescape a reference string by removing backslashes before special characters.
184    ///
185    /// This allows matching references like `[example\-reference]` with definitions like
186    /// `[example-reference]: http://example.com`
187    ///
188    /// Returns the unescaped reference string.
189    fn unescape_reference(reference: &str) -> String {
190        // Remove backslashes before special characters
191        reference.replace("\\", "")
192    }
193
194    /// Find all link and image reference definitions in the content.
195    ///
196    /// This method returns a HashMap where the key is the normalized reference ID and the value is a vector of (start_line, end_line) tuples.
197    fn find_definitions(&self, ctx: &crate::lint_context::LintContext) -> HashMap<String, Vec<(usize, usize)>> {
198        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
199
200        // First, add all reference definitions from context
201        for ref_def in &ctx.reference_defs {
202            // Apply unescape to handle escaped characters in definitions
203            let normalized_id = Self::unescape_reference(&ref_def.id); // Already lowercase from context
204            definitions
205                .entry(normalized_id)
206                .or_default()
207                .push((ref_def.line - 1, ref_def.line - 1)); // Convert to 0-indexed
208        }
209
210        // Handle multi-line definitions that might not be fully captured by ctx.reference_defs
211        let lines = &ctx.lines;
212        let mut i = 0;
213        while i < lines.len() {
214            let line_info = &lines[i];
215            let line = &line_info.content;
216
217            // Skip code blocks and front matter using line info
218            if line_info.in_code_block || ctx.is_in_front_matter(i + 1) {
219                i += 1;
220                continue;
221            }
222
223            // Check for multi-line continuation of existing definitions
224            if i > 0 && CONTINUATION_REGEX.is_match(line) {
225                // Find the reference definition this continues
226                let mut def_start = i - 1;
227                while def_start > 0 && !REFERENCE_DEFINITION_REGEX.is_match(&lines[def_start].content) {
228                    def_start -= 1;
229                }
230
231                if let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(&lines[def_start].content) {
232                    let ref_id = caps.get(1).unwrap().as_str().trim();
233                    let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
234
235                    // Update the end line for this definition
236                    if let Some(ranges) = definitions.get_mut(&normalized_id)
237                        && let Some(last_range) = ranges.last_mut()
238                        && last_range.0 == def_start
239                    {
240                        last_range.1 = i;
241                    }
242                }
243            }
244            i += 1;
245        }
246        definitions
247    }
248
249    /// Find all link and image reference reference usages in the content.
250    ///
251    /// This method returns a HashSet of all normalized reference IDs found in usage.
252    /// It leverages cached data from LintContext for efficiency.
253    fn find_usages(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
254        let mut usages: HashSet<String> = HashSet::new();
255
256        // 1. Add usages from cached reference links in LintContext
257        for link in &ctx.links {
258            if link.is_reference
259                && let Some(ref_id) = &link.reference_id
260            {
261                // Ensure the link itself is not inside a code block line
262                if !ctx.is_in_code_block(link.line) {
263                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
264                }
265            }
266        }
267
268        // 2. Add usages from cached reference images in LintContext
269        for image in &ctx.images {
270            if image.is_reference
271                && let Some(ref_id) = &image.reference_id
272            {
273                // Ensure the image itself is not inside a code block line
274                if !ctx.is_in_code_block(image.line) {
275                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
276                }
277            }
278        }
279
280        // 3. Find shortcut references [ref] not already handled by DocumentStructure.links
281        //    and ensure they are not within code spans or code blocks.
282        // Cache code spans once before the loop
283        let code_spans = ctx.code_spans();
284
285        for (i, line_info) in ctx.lines.iter().enumerate() {
286            let line_num = i + 1; // 1-indexed
287
288            // Skip lines in code blocks or front matter
289            if line_info.in_code_block || ctx.is_in_front_matter(line_num) {
290                continue;
291            }
292
293            // Skip lines that are reference definitions (start with [ref]: at beginning)
294            if REFERENCE_DEFINITION_REGEX.is_match(&line_info.content) {
295                continue;
296            }
297
298            // Find potential shortcut references
299            for caps in SHORTCUT_REFERENCE_REGEX.captures_iter(&line_info.content).flatten() {
300                if let Some(full_match) = caps.get(0)
301                    && let Some(ref_id_match) = caps.get(1)
302                {
303                    // Check if the match is within a code span
304                    let match_byte_offset = line_info.byte_offset + full_match.start();
305                    let in_code_span = code_spans
306                        .iter()
307                        .any(|span| match_byte_offset >= span.byte_offset && match_byte_offset < span.byte_end);
308
309                    if !in_code_span {
310                        let ref_id = ref_id_match.as_str().trim();
311
312                        // Skip patterns that are likely not markdown references
313                        if !Self::is_likely_not_reference(ref_id) {
314                            let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
315                            usages.insert(normalized_id);
316                        }
317                    }
318                }
319            }
320        }
321
322        // NOTE: The complex recursive loop trying to find references within definitions
323        // has been removed as it's not standard Markdown behavior for finding *usages*.
324        // Usages refer to `[text][ref]`, `![alt][ref]`, `[ref]`, etc., in the main content,
325        // not references potentially embedded within the URL or title of another definition.
326
327        usages
328    }
329
330    /// Get unused references with their line ranges.
331    ///
332    /// This method uses the cached definitions to improve performance.
333    ///
334    /// Note: References that are only used inside code blocks are still considered unused,
335    /// as code blocks are treated as examples or documentation rather than actual content.
336    fn get_unused_references(
337        &self,
338        definitions: &HashMap<String, Vec<(usize, usize)>>,
339        usages: &HashSet<String>,
340    ) -> Vec<(String, usize, usize)> {
341        let mut unused = Vec::new();
342        for (id, ranges) in definitions {
343            // If this id is not used anywhere and is not in the ignored list
344            if !usages.contains(id) && !self.is_ignored_definition(id) {
345                // Only report as unused if there's exactly one definition
346                // Multiple definitions are already reported as duplicates
347                if ranges.len() == 1 {
348                    let (start, end) = ranges[0];
349                    unused.push((id.clone(), start, end));
350                }
351                // If there are multiple definitions (duplicates), don't report them as unused
352                // They're already being reported as duplicate definitions
353            }
354        }
355        unused
356    }
357
358    /// Check if a definition should be ignored (kept even if unused)
359    fn is_ignored_definition(&self, definition_id: &str) -> bool {
360        self.config
361            .ignored_definitions
362            .iter()
363            .any(|ignored| ignored.eq_ignore_ascii_case(definition_id))
364    }
365}
366
367impl Default for MD053LinkImageReferenceDefinitions {
368    fn default() -> Self {
369        Self::new()
370    }
371}
372
373impl Rule for MD053LinkImageReferenceDefinitions {
374    fn name(&self) -> &'static str {
375        "MD053"
376    }
377
378    fn description(&self) -> &'static str {
379        "Link and image reference definitions should be needed"
380    }
381
382    /// Check the content for unused and duplicate link/image reference definitions.
383    ///
384    /// This implementation uses caching for improved performance on large documents.
385    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
386        // Find definitions and usages using LintContext
387        let definitions = self.find_definitions(ctx);
388        let usages = self.find_usages(ctx);
389
390        // Get unused references by comparing definitions and usages
391        let unused_refs = self.get_unused_references(&definitions, &usages);
392
393        let mut warnings = Vec::new();
394
395        // Check for duplicate definitions (case-insensitive per CommonMark spec)
396        let mut seen_definitions: HashMap<String, (String, usize)> = HashMap::new(); // lowercase -> (original, first_line)
397
398        for (definition_id, ranges) in &definitions {
399            // Skip ignored definitions for duplicate checking
400            if self.is_ignored_definition(definition_id) {
401                continue;
402            }
403
404            if ranges.len() > 1 {
405                // Multiple definitions with exact same ID (already lowercase)
406                for (i, &(start_line, _)) in ranges.iter().enumerate() {
407                    if i > 0 {
408                        // Skip the first occurrence, report all others
409                        let line_num = start_line + 1;
410                        let line_content = ctx.lines.get(start_line).map(|l| l.content.as_str()).unwrap_or("");
411                        let (start_line_1idx, start_col, end_line, end_col) =
412                            calculate_line_range(line_num, line_content);
413
414                        warnings.push(LintWarning {
415                            rule_name: Some(self.name()),
416                            line: start_line_1idx,
417                            column: start_col,
418                            end_line,
419                            end_column: end_col,
420                            message: format!("Duplicate link or image reference definition: [{definition_id}]"),
421                            severity: Severity::Warning,
422                            fix: None,
423                        });
424                    }
425                }
426            }
427
428            // Track for case-variant duplicates
429            if let Some(&(start_line, _)) = ranges.first() {
430                // Find the original case version from the line
431                if let Some(line_info) = ctx.lines.get(start_line)
432                    && let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(&line_info.content)
433                {
434                    let original_id = caps.get(1).unwrap().as_str().trim();
435                    let lower_id = original_id.to_lowercase();
436
437                    if let Some((first_original, first_line)) = seen_definitions.get(&lower_id) {
438                        // Found a case-variant duplicate
439                        if first_original != original_id {
440                            let line_num = start_line + 1;
441                            let line_content = &line_info.content;
442                            let (start_line_1idx, start_col, end_line, end_col) =
443                                calculate_line_range(line_num, line_content);
444
445                            warnings.push(LintWarning {
446                                    rule_name: Some(self.name()),
447                                    line: start_line_1idx,
448                                    column: start_col,
449                                    end_line,
450                                    end_column: end_col,
451                                    message: format!("Duplicate link or image reference definition: [{}] (conflicts with [{}] on line {})",
452                                                   original_id, first_original, first_line + 1),
453                                    severity: Severity::Warning,
454                                    fix: None,
455                                });
456                        }
457                    } else {
458                        seen_definitions.insert(lower_id, (original_id.to_string(), start_line));
459                    }
460                }
461            }
462        }
463
464        // Create warnings for unused references
465        for (definition, start, _end) in unused_refs {
466            let line_num = start + 1; // 1-indexed line numbers
467            let line_content = ctx.lines.get(start).map(|l| l.content.as_str()).unwrap_or("");
468
469            // Calculate precise character range for the entire reference definition line
470            let (start_line, start_col, end_line, end_col) = calculate_line_range(line_num, line_content);
471
472            warnings.push(LintWarning {
473                rule_name: Some(self.name()),
474                line: start_line,
475                column: start_col,
476                end_line,
477                end_column: end_col,
478                message: format!("Unused link/image reference: [{definition}]"),
479                severity: Severity::Warning,
480                fix: None, // MD053 is warning-only, no automatic fixes
481            });
482        }
483
484        Ok(warnings)
485    }
486
487    /// MD053 does not provide automatic fixes
488    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
489        // This rule is warning-only, no automatic fixes provided
490        Ok(ctx.content.to_string())
491    }
492
493    /// Check if this rule should be skipped for performance
494    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
495        // Skip if content is empty or has no links/images
496        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
497    }
498
499    fn as_any(&self) -> &dyn std::any::Any {
500        self
501    }
502
503    fn default_config_section(&self) -> Option<(String, toml::Value)> {
504        let default_config = MD053Config::default();
505        let json_value = serde_json::to_value(&default_config).ok()?;
506        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
507        if let toml::Value::Table(table) = toml_value {
508            if !table.is_empty() {
509                Some((MD053Config::RULE_NAME.to_string(), toml::Value::Table(table)))
510            } else {
511                None
512            }
513        } else {
514            None
515        }
516    }
517
518    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
519    where
520        Self: Sized,
521    {
522        let rule_config = crate::rule_config_serde::load_rule_config::<MD053Config>(config);
523        Box::new(MD053LinkImageReferenceDefinitions::from_config_struct(rule_config))
524    }
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530    use crate::lint_context::LintContext;
531
532    #[test]
533    fn test_used_reference_link() {
534        let rule = MD053LinkImageReferenceDefinitions::new();
535        let content = "[text][ref]\n\n[ref]: https://example.com";
536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
537        let result = rule.check(&ctx).unwrap();
538
539        assert_eq!(result.len(), 0);
540    }
541
542    #[test]
543    fn test_unused_reference_definition() {
544        let rule = MD053LinkImageReferenceDefinitions::new();
545        let content = "[unused]: https://example.com";
546        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
547        let result = rule.check(&ctx).unwrap();
548
549        assert_eq!(result.len(), 1);
550        assert!(result[0].message.contains("Unused link/image reference: [unused]"));
551    }
552
553    #[test]
554    fn test_used_reference_image() {
555        let rule = MD053LinkImageReferenceDefinitions::new();
556        let content = "![alt][img]\n\n[img]: image.jpg";
557        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
558        let result = rule.check(&ctx).unwrap();
559
560        assert_eq!(result.len(), 0);
561    }
562
563    #[test]
564    fn test_case_insensitive_matching() {
565        let rule = MD053LinkImageReferenceDefinitions::new();
566        let content = "[Text][REF]\n\n[ref]: https://example.com";
567        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
568        let result = rule.check(&ctx).unwrap();
569
570        assert_eq!(result.len(), 0);
571    }
572
573    #[test]
574    fn test_shortcut_reference() {
575        let rule = MD053LinkImageReferenceDefinitions::new();
576        let content = "[ref]\n\n[ref]: https://example.com";
577        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
578        let result = rule.check(&ctx).unwrap();
579
580        assert_eq!(result.len(), 0);
581    }
582
583    #[test]
584    fn test_collapsed_reference() {
585        let rule = MD053LinkImageReferenceDefinitions::new();
586        let content = "[ref][]\n\n[ref]: https://example.com";
587        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
588        let result = rule.check(&ctx).unwrap();
589
590        assert_eq!(result.len(), 0);
591    }
592
593    #[test]
594    fn test_multiple_unused_definitions() {
595        let rule = MD053LinkImageReferenceDefinitions::new();
596        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
597        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
598        let result = rule.check(&ctx).unwrap();
599
600        assert_eq!(result.len(), 3);
601
602        // The warnings might not be in the same order, so collect all messages
603        let messages: Vec<String> = result.iter().map(|w| w.message.clone()).collect();
604        assert!(messages.iter().any(|m| m.contains("unused1")));
605        assert!(messages.iter().any(|m| m.contains("unused2")));
606        assert!(messages.iter().any(|m| m.contains("unused3")));
607    }
608
609    #[test]
610    fn test_mixed_used_and_unused() {
611        let rule = MD053LinkImageReferenceDefinitions::new();
612        let content = "[used]\n\n[used]: url1\n[unused]: url2";
613        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
614        let result = rule.check(&ctx).unwrap();
615
616        assert_eq!(result.len(), 1);
617        assert!(result[0].message.contains("unused"));
618    }
619
620    #[test]
621    fn test_multiline_definition() {
622        let rule = MD053LinkImageReferenceDefinitions::new();
623        let content = "[ref]: https://example.com\n  \"Title on next line\"";
624        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
625        let result = rule.check(&ctx).unwrap();
626
627        assert_eq!(result.len(), 1); // Still unused
628    }
629
630    #[test]
631    fn test_reference_in_code_block() {
632        let rule = MD053LinkImageReferenceDefinitions::new();
633        let content = "```\n[ref]\n```\n\n[ref]: https://example.com";
634        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
635        let result = rule.check(&ctx).unwrap();
636
637        // Reference used only in code block is still considered unused
638        assert_eq!(result.len(), 1);
639    }
640
641    #[test]
642    fn test_reference_in_inline_code() {
643        let rule = MD053LinkImageReferenceDefinitions::new();
644        let content = "`[ref]`\n\n[ref]: https://example.com";
645        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
646        let result = rule.check(&ctx).unwrap();
647
648        // Reference in inline code is not a usage
649        assert_eq!(result.len(), 1);
650    }
651
652    #[test]
653    fn test_escaped_reference() {
654        let rule = MD053LinkImageReferenceDefinitions::new();
655        let content = "[example\\-ref]\n\n[example-ref]: https://example.com";
656        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
657        let result = rule.check(&ctx).unwrap();
658
659        // Should match despite escaping
660        assert_eq!(result.len(), 0);
661    }
662
663    #[test]
664    fn test_duplicate_definitions() {
665        let rule = MD053LinkImageReferenceDefinitions::new();
666        let content = "[ref]: url1\n[ref]: url2\n\n[ref]";
667        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
668        let result = rule.check(&ctx).unwrap();
669
670        // Should flag the duplicate definition even though it's used (matches markdownlint)
671        assert_eq!(result.len(), 1);
672    }
673
674    #[test]
675    fn test_fix_returns_original() {
676        // MD053 is warning-only, fix should return original content
677        let rule = MD053LinkImageReferenceDefinitions::new();
678        let content = "[used]\n\n[used]: url1\n[unused]: url2\n\nMore content";
679        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
680        let fixed = rule.fix(&ctx).unwrap();
681
682        assert_eq!(fixed, content);
683    }
684
685    #[test]
686    fn test_fix_preserves_content() {
687        // MD053 is warning-only, fix should preserve all content
688        let rule = MD053LinkImageReferenceDefinitions::new();
689        let content = "Content\n\n[unused]: url\n\nMore content";
690        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
691        let fixed = rule.fix(&ctx).unwrap();
692
693        assert_eq!(fixed, content);
694    }
695
696    #[test]
697    fn test_fix_does_not_remove() {
698        // MD053 is warning-only, fix should not remove anything
699        let rule = MD053LinkImageReferenceDefinitions::new();
700        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
701        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
702        let fixed = rule.fix(&ctx).unwrap();
703
704        assert_eq!(fixed, content);
705    }
706
707    #[test]
708    fn test_special_characters_in_reference() {
709        let rule = MD053LinkImageReferenceDefinitions::new();
710        let content = "[ref-with_special.chars]\n\n[ref-with_special.chars]: url";
711        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
712        let result = rule.check(&ctx).unwrap();
713
714        assert_eq!(result.len(), 0);
715    }
716
717    #[test]
718    fn test_find_definitions() {
719        let rule = MD053LinkImageReferenceDefinitions::new();
720        let content = "[ref1]: url1\n[ref2]: url2\nSome text\n[ref3]: url3";
721        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
722        let defs = rule.find_definitions(&ctx);
723
724        assert_eq!(defs.len(), 3);
725        assert!(defs.contains_key("ref1"));
726        assert!(defs.contains_key("ref2"));
727        assert!(defs.contains_key("ref3"));
728    }
729
730    #[test]
731    fn test_find_usages() {
732        let rule = MD053LinkImageReferenceDefinitions::new();
733        let content = "[text][ref1] and [ref2] and ![img][ref3]";
734        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
735        let usages = rule.find_usages(&ctx);
736
737        assert!(usages.contains("ref1"));
738        assert!(usages.contains("ref2"));
739        assert!(usages.contains("ref3"));
740    }
741
742    #[test]
743    fn test_ignored_definitions_config() {
744        // Test with ignored definitions
745        let config = MD053Config {
746            ignored_definitions: vec!["todo".to_string(), "draft".to_string()],
747        };
748        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
749
750        let content = "[todo]: https://example.com/todo\n[draft]: https://example.com/draft\n[unused]: https://example.com/unused";
751        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
752        let result = rule.check(&ctx).unwrap();
753
754        // Should only flag "unused", not "todo" or "draft"
755        assert_eq!(result.len(), 1);
756        assert!(result[0].message.contains("unused"));
757        assert!(!result[0].message.contains("todo"));
758        assert!(!result[0].message.contains("draft"));
759    }
760
761    #[test]
762    fn test_ignored_definitions_case_insensitive() {
763        // Test case-insensitive matching of ignored definitions
764        let config = MD053Config {
765            ignored_definitions: vec!["TODO".to_string()],
766        };
767        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
768
769        let content = "[todo]: https://example.com/todo\n[unused]: https://example.com/unused";
770        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
771        let result = rule.check(&ctx).unwrap();
772
773        // Should only flag "unused", not "todo" (matches "TODO" case-insensitively)
774        assert_eq!(result.len(), 1);
775        assert!(result[0].message.contains("unused"));
776        assert!(!result[0].message.contains("todo"));
777    }
778
779    #[test]
780    fn test_default_config_section() {
781        let rule = MD053LinkImageReferenceDefinitions::default();
782        let config_section = rule.default_config_section();
783
784        assert!(config_section.is_some());
785        let (name, value) = config_section.unwrap();
786        assert_eq!(name, "MD053");
787
788        // Should contain the ignored_definitions option with default empty array
789        if let toml::Value::Table(table) = value {
790            assert!(table.contains_key("ignored-definitions"));
791            assert_eq!(table["ignored-definitions"], toml::Value::Array(vec![]));
792        } else {
793            panic!("Expected TOML table");
794        }
795    }
796
797    #[test]
798    fn test_fix_with_ignored_definitions() {
799        // MD053 is warning-only, fix should not remove anything even with ignored definitions
800        let config = MD053Config {
801            ignored_definitions: vec!["template".to_string()],
802        };
803        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
804
805        let content = "[template]: https://example.com/template\n[unused]: https://example.com/unused\n\nSome content.";
806        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
807        let fixed = rule.fix(&ctx).unwrap();
808
809        // Should keep everything since MD053 doesn't fix
810        assert_eq!(fixed, content);
811    }
812
813    #[test]
814    fn test_duplicate_definitions_exact_case() {
815        let rule = MD053LinkImageReferenceDefinitions::new();
816        let content = "[ref]: url1\n[ref]: url2\n[ref]: url3";
817        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
818        let result = rule.check(&ctx).unwrap();
819
820        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
821        // Plus 1 unused warning
822        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
823        assert_eq!(duplicate_warnings.len(), 2);
824        assert_eq!(duplicate_warnings[0].line, 2);
825        assert_eq!(duplicate_warnings[1].line, 3);
826    }
827
828    #[test]
829    fn test_duplicate_definitions_case_variants() {
830        let rule = MD053LinkImageReferenceDefinitions::new();
831        let content =
832            "[method resolution order]: url1\n[Method Resolution Order]: url2\n[METHOD RESOLUTION ORDER]: url3";
833        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
834        let result = rule.check(&ctx).unwrap();
835
836        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
837        // Note: These are treated as exact duplicates since they normalize to the same ID
838        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
839        assert_eq!(duplicate_warnings.len(), 2);
840
841        // The exact duplicate messages don't include "conflicts with"
842        // Only case-variant duplicates with different normalized forms would
843        assert_eq!(duplicate_warnings[0].line, 2);
844        assert_eq!(duplicate_warnings[1].line, 3);
845    }
846
847    #[test]
848    fn test_duplicate_and_unused() {
849        let rule = MD053LinkImageReferenceDefinitions::new();
850        let content = "[used]\n[used]: url1\n[used]: url2\n[unused]: url3";
851        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
852        let result = rule.check(&ctx).unwrap();
853
854        // Should have 1 duplicate warning and 1 unused warning
855        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
856        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
857
858        assert_eq!(duplicate_warnings.len(), 1);
859        assert_eq!(unused_warnings.len(), 1);
860        assert_eq!(duplicate_warnings[0].line, 3); // Second [used] definition
861        assert_eq!(unused_warnings[0].line, 4); // [unused] definition
862    }
863
864    #[test]
865    fn test_duplicate_with_usage() {
866        let rule = MD053LinkImageReferenceDefinitions::new();
867        // Even if used, duplicates should still be reported
868        let content = "[ref]\n\n[ref]: url1\n[ref]: url2";
869        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
870        let result = rule.check(&ctx).unwrap();
871
872        // Should have 1 duplicate warning (no unused since it's referenced)
873        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
874        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
875
876        assert_eq!(duplicate_warnings.len(), 1);
877        assert_eq!(unused_warnings.len(), 0);
878        assert_eq!(duplicate_warnings[0].line, 4);
879    }
880
881    #[test]
882    fn test_no_duplicate_different_ids() {
883        let rule = MD053LinkImageReferenceDefinitions::new();
884        let content = "[ref1]: url1\n[ref2]: url2\n[ref3]: url3";
885        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
886        let result = rule.check(&ctx).unwrap();
887
888        // Should have no duplicate warnings, only unused warnings
889        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
890        assert_eq!(duplicate_warnings.len(), 0);
891    }
892}