rumdl_lib/rules/
md053_link_image_reference_definitions.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::document_structure::DocumentStructure;
4use crate::utils::range_utils::calculate_line_range;
5use fancy_regex::Regex as FancyRegex;
6use lazy_static::lazy_static;
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10
11lazy_static! {
12    // Link reference format: [text][reference]
13    // REMOVED: static ref LINK_REFERENCE_REGEX: FancyRegex = FancyRegex::new(r"\[([^\]]*)\]\s*\[([^\]]*)\]").unwrap();
14
15    // Image reference format: ![text][reference]
16    // REMOVED: static ref IMAGE_REFERENCE_REGEX: FancyRegex = FancyRegex::new(r"!\[([^\]]*)\]\s*\[([^\]]*)\]").unwrap();
17
18    // Shortcut reference links: [reference] - must not be followed by another bracket
19    // Allow references followed by punctuation like colon, period, comma (e.g., "[reference]:", "[reference].")
20    // Don't exclude references followed by ": " in the middle of a line (only at start of line)
21    static ref SHORTCUT_REFERENCE_REGEX: FancyRegex =
22        FancyRegex::new(r"(?<!\!)\[([^\]]+)\](?!\[)").unwrap();
23
24    // REMOVED: Empty reference links: [text][] or ![text][]
25    // static ref EMPTY_LINK_REFERENCE_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\s*\[\s*\]").unwrap();
26    // static ref EMPTY_IMAGE_REFERENCE_REGEX: Regex = Regex::new(r"!\[([^\]]+)\]\s*\[\s*\]").unwrap();
27
28    // Link/image reference definition format: [reference]: URL
29    static ref REFERENCE_DEFINITION_REGEX: Regex =
30        Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
31
32    // Multi-line reference definition continuation pattern
33    static ref CONTINUATION_REGEX: Regex = Regex::new(r"^\s+(.+)$").unwrap();
34
35    // Code block regex - support indented code blocks for MkDocs tabs
36    static ref CODE_BLOCK_START_REGEX: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
37    static ref CODE_BLOCK_END_REGEX: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})\s*$").unwrap();
38}
39
40/// Configuration for MD053 rule
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
42#[serde(rename_all = "kebab-case")]
43pub struct MD053Config {
44    /// List of reference names to keep even if unused
45    #[serde(default = "default_ignored_definitions")]
46    pub ignored_definitions: Vec<String>,
47}
48
49impl Default for MD053Config {
50    fn default() -> Self {
51        Self {
52            ignored_definitions: default_ignored_definitions(),
53        }
54    }
55}
56
57fn default_ignored_definitions() -> Vec<String> {
58    Vec::new()
59}
60
61impl RuleConfig for MD053Config {
62    const RULE_NAME: &'static str = "MD053";
63}
64
65/// Rule MD053: Link and image reference definitions should be used
66///
67/// See [docs/md053.md](../../docs/md053.md) for full documentation, configuration, and examples.
68///
69/// This rule is triggered when a link or image reference definition is declared but not used
70/// anywhere in the document. Unused reference definitions can create confusion and clutter.
71///
72/// ## Supported Reference Formats
73///
74/// This rule handles the following reference formats:
75///
76/// - **Full reference links/images**: `[text][reference]` or `![text][reference]`
77/// - **Collapsed reference links/images**: `[text][]` or `![text][]`
78/// - **Shortcut reference links**: `[reference]` (must be defined elsewhere)
79/// - **Reference definitions**: `[reference]: URL "Optional Title"`
80/// - **Multi-line reference definitions**:
81///   ```markdown
82///   [reference]: URL
83///      "Optional title continued on next line"
84///   ```
85///
86/// ## Configuration Options
87///
88/// The rule supports the following configuration options:
89///
90/// ```yaml
91/// MD053:
92///   ignored_definitions: []  # List of reference definitions to ignore (never report as unused)
93/// ```
94///
95/// ## Performance Optimizations
96///
97/// This rule implements various performance optimizations for handling large documents:
98///
99/// 1. **Caching**: The rule caches parsed definitions and references based on content hashing
100/// 2. **Efficient Reference Matching**: Uses HashMaps for O(1) lookups of definitions
101/// 3. **Smart Code Block Handling**: Efficiently skips references inside code blocks/spans
102/// 4. **Lazy Evaluation**: Only processes necessary portions of the document
103///
104/// ## Edge Cases Handled
105///
106/// - **Case insensitivity**: References are matched case-insensitively
107/// - **Escaped characters**: Properly processes escaped characters in references
108/// - **Unicode support**: Handles non-ASCII characters in references and URLs
109/// - **Code blocks**: Ignores references inside code blocks and spans
110/// - **Special characters**: Properly handles references with special characters
111///
112/// ## Fix Behavior
113///
114/// This rule does not provide automatic fixes. Unused references must be manually reviewed
115/// and removed, as they may be intentionally kept for future use or as templates.
116#[derive(Clone)]
117pub struct MD053LinkImageReferenceDefinitions {
118    config: MD053Config,
119}
120
121impl MD053LinkImageReferenceDefinitions {
122    /// Create a new instance of the MD053 rule
123    pub fn new() -> Self {
124        Self {
125            config: MD053Config::default(),
126        }
127    }
128
129    /// Create a new instance with the given configuration
130    pub fn from_config_struct(config: MD053Config) -> Self {
131        Self { config }
132    }
133
134    /// Check if a pattern is likely NOT a markdown reference
135    /// Returns true if this pattern should be skipped
136    fn is_likely_not_reference(text: &str) -> bool {
137        // Don't skip pure numeric patterns - they could be footnote references like [1]
138        // Only skip numeric ranges like [1:3], [0:10], etc.
139        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
140            return true;
141        }
142
143        // Skip glob/wildcard patterns like [*], [...], [**]
144        if text == "*" || text == "..." || text == "**" {
145            return true;
146        }
147
148        // Skip patterns that are just punctuation or operators
149        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
150            return true;
151        }
152
153        // Skip very short non-word patterns (likely operators or syntax)
154        // But allow single digits (could be footnotes) and single letters
155        if text.len() <= 2 && !text.chars().all(|c| c.is_alphanumeric()) {
156            return true;
157        }
158
159        // Skip descriptive patterns with colon like [default: the project root]
160        // But allow simple numeric ranges which are handled above
161        if text.contains(':') && text.contains(' ') {
162            return true;
163        }
164
165        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
166        if text.starts_with('!') {
167            return true;
168        }
169
170        // Note: We don't filter out patterns with backticks because backticks in reference names
171        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
172
173        // Also don't filter out references with dots - these are legitimate reference names
174        // like [tool.ruff] or [os.path] which are valid markdown references
175
176        // Note: We don't filter based on word count anymore because legitimate references
177        // can have many words, like "python language reference for import statements"
178        // Word count filtering was causing false positives where valid references were
179        // being incorrectly flagged as unused
180
181        false
182    }
183
184    /// Unescape a reference string by removing backslashes before special characters.
185    ///
186    /// This allows matching references like `[example\-reference]` with definitions like
187    /// `[example-reference]: http://example.com`
188    ///
189    /// Returns the unescaped reference string.
190    fn unescape_reference(reference: &str) -> String {
191        // Remove backslashes before special characters
192        reference.replace("\\", "")
193    }
194
195    /// Find all link and image reference definitions in the content.
196    ///
197    /// This method returns a HashMap where the key is the normalized reference ID and the value is a vector of (start_line, end_line) tuples.
198    fn find_definitions(
199        &self,
200        ctx: &crate::lint_context::LintContext,
201        doc_structure: &DocumentStructure,
202    ) -> HashMap<String, Vec<(usize, usize)>> {
203        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
204
205        // First, add all reference definitions from context
206        for ref_def in &ctx.reference_defs {
207            // Apply unescape to handle escaped characters in definitions
208            let normalized_id = Self::unescape_reference(&ref_def.id); // Already lowercase from context
209            definitions
210                .entry(normalized_id)
211                .or_default()
212                .push((ref_def.line - 1, ref_def.line - 1)); // Convert to 0-indexed
213        }
214
215        // Handle multi-line definitions that might not be fully captured by ctx.reference_defs
216        let lines = &ctx.lines;
217        let mut i = 0;
218        while i < lines.len() {
219            let line_info = &lines[i];
220            let line = &line_info.content;
221
222            // Skip code blocks and front matter using line info
223            if line_info.in_code_block || doc_structure.is_in_front_matter(i + 1) {
224                i += 1;
225                continue;
226            }
227
228            // Check for multi-line continuation of existing definitions
229            if i > 0 && CONTINUATION_REGEX.is_match(line) {
230                // Find the reference definition this continues
231                let mut def_start = i - 1;
232                while def_start > 0 && !REFERENCE_DEFINITION_REGEX.is_match(&lines[def_start].content) {
233                    def_start -= 1;
234                }
235
236                if let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(&lines[def_start].content) {
237                    let ref_id = caps.get(1).unwrap().as_str().trim();
238                    let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
239
240                    // Update the end line for this definition
241                    if let Some(ranges) = definitions.get_mut(&normalized_id)
242                        && let Some(last_range) = ranges.last_mut()
243                        && last_range.0 == def_start
244                    {
245                        last_range.1 = i;
246                    }
247                }
248            }
249            i += 1;
250        }
251        definitions
252    }
253
254    /// Find all link and image reference reference usages in the content.
255    ///
256    /// This method returns a HashSet of all normalized reference IDs found in usage.
257    /// It leverages cached data from LintContext for efficiency.
258    fn find_usages(
259        &self,
260        doc_structure: &DocumentStructure,
261        ctx: &crate::lint_context::LintContext,
262    ) -> HashSet<String> {
263        let mut usages: HashSet<String> = HashSet::new();
264
265        // 1. Add usages from cached reference links in LintContext
266        for link in &ctx.links {
267            if link.is_reference
268                && let Some(ref_id) = &link.reference_id
269            {
270                // Ensure the link itself is not inside a code block line
271                if !doc_structure.is_in_code_block(link.line) {
272                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
273                }
274            }
275        }
276
277        // 2. Add usages from cached reference images in LintContext
278        for image in &ctx.images {
279            if image.is_reference
280                && let Some(ref_id) = &image.reference_id
281            {
282                // Ensure the image itself is not inside a code block line
283                if !doc_structure.is_in_code_block(image.line) {
284                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
285                }
286            }
287        }
288
289        // 3. Find shortcut references [ref] not already handled by DocumentStructure.links
290        //    and ensure they are not within code spans or code blocks.
291        // Cache code spans once before the loop
292        let code_spans = ctx.code_spans();
293
294        for (i, line_info) in ctx.lines.iter().enumerate() {
295            let line_num = i + 1; // 1-indexed
296
297            // Skip lines in code blocks or front matter
298            if line_info.in_code_block || doc_structure.is_in_front_matter(line_num) {
299                continue;
300            }
301
302            // Skip lines that are reference definitions (start with [ref]: at beginning)
303            if REFERENCE_DEFINITION_REGEX.is_match(&line_info.content) {
304                continue;
305            }
306
307            // Find potential shortcut references
308            for caps in SHORTCUT_REFERENCE_REGEX.captures_iter(&line_info.content).flatten() {
309                if let Some(full_match) = caps.get(0)
310                    && let Some(ref_id_match) = caps.get(1)
311                {
312                    // Check if the match is within a code span
313                    let match_byte_offset = line_info.byte_offset + full_match.start();
314                    let in_code_span = code_spans
315                        .iter()
316                        .any(|span| match_byte_offset >= span.byte_offset && match_byte_offset < span.byte_end);
317
318                    if !in_code_span {
319                        let ref_id = ref_id_match.as_str().trim();
320
321                        // Skip patterns that are likely not markdown references
322                        if !Self::is_likely_not_reference(ref_id) {
323                            let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
324                            usages.insert(normalized_id);
325                        }
326                    }
327                }
328            }
329        }
330
331        // NOTE: The complex recursive loop trying to find references within definitions
332        // has been removed as it's not standard Markdown behavior for finding *usages*.
333        // Usages refer to `[text][ref]`, `![alt][ref]`, `[ref]`, etc., in the main content,
334        // not references potentially embedded within the URL or title of another definition.
335
336        usages
337    }
338
339    /// Get unused references with their line ranges.
340    ///
341    /// This method uses the cached definitions to improve performance.
342    ///
343    /// Note: References that are only used inside code blocks are still considered unused,
344    /// as code blocks are treated as examples or documentation rather than actual content.
345    fn get_unused_references(
346        &self,
347        definitions: &HashMap<String, Vec<(usize, usize)>>,
348        usages: &HashSet<String>,
349    ) -> Vec<(String, usize, usize)> {
350        let mut unused = Vec::new();
351        for (id, ranges) in definitions {
352            // If this id is not used anywhere and is not in the ignored list
353            if !usages.contains(id) && !self.is_ignored_definition(id) {
354                // Only report as unused if there's exactly one definition
355                // Multiple definitions are already reported as duplicates
356                if ranges.len() == 1 {
357                    let (start, end) = ranges[0];
358                    unused.push((id.clone(), start, end));
359                }
360                // If there are multiple definitions (duplicates), don't report them as unused
361                // They're already being reported as duplicate definitions
362            }
363        }
364        unused
365    }
366
367    /// Check if a definition should be ignored (kept even if unused)
368    fn is_ignored_definition(&self, definition_id: &str) -> bool {
369        self.config
370            .ignored_definitions
371            .iter()
372            .any(|ignored| ignored.eq_ignore_ascii_case(definition_id))
373    }
374}
375
376impl Default for MD053LinkImageReferenceDefinitions {
377    fn default() -> Self {
378        Self::new()
379    }
380}
381
382impl Rule for MD053LinkImageReferenceDefinitions {
383    fn name(&self) -> &'static str {
384        "MD053"
385    }
386
387    fn description(&self) -> &'static str {
388        "Link and image reference definitions should be needed"
389    }
390
391    /// Check the content for unused and duplicate link/image reference definitions.
392    ///
393    /// This implementation uses caching for improved performance on large documents.
394    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
395        let content = ctx.content;
396        // Compute DocumentStructure once
397        let doc_structure = DocumentStructure::new(content);
398
399        // Find definitions and usages using DocumentStructure
400        let definitions = self.find_definitions(ctx, &doc_structure);
401        let usages = self.find_usages(&doc_structure, ctx);
402
403        // Get unused references by comparing definitions and usages
404        let unused_refs = self.get_unused_references(&definitions, &usages);
405
406        let mut warnings = Vec::new();
407
408        // Check for duplicate definitions (case-insensitive per CommonMark spec)
409        let mut seen_definitions: HashMap<String, (String, usize)> = HashMap::new(); // lowercase -> (original, first_line)
410
411        for (definition_id, ranges) in &definitions {
412            // Skip ignored definitions for duplicate checking
413            if self.is_ignored_definition(definition_id) {
414                continue;
415            }
416
417            if ranges.len() > 1 {
418                // Multiple definitions with exact same ID (already lowercase)
419                for (i, &(start_line, _)) in ranges.iter().enumerate() {
420                    if i > 0 {
421                        // Skip the first occurrence, report all others
422                        let line_num = start_line + 1;
423                        let line_content = ctx.lines.get(start_line).map(|l| l.content.as_str()).unwrap_or("");
424                        let (start_line_1idx, start_col, end_line, end_col) =
425                            calculate_line_range(line_num, line_content);
426
427                        warnings.push(LintWarning {
428                            rule_name: Some(self.name()),
429                            line: start_line_1idx,
430                            column: start_col,
431                            end_line,
432                            end_column: end_col,
433                            message: format!("Duplicate link or image reference definition: [{definition_id}]"),
434                            severity: Severity::Warning,
435                            fix: None,
436                        });
437                    }
438                }
439            }
440
441            // Track for case-variant duplicates
442            if let Some(&(start_line, _)) = ranges.first() {
443                // Find the original case version from the line
444                if let Some(line_info) = ctx.lines.get(start_line)
445                    && let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(&line_info.content)
446                {
447                    let original_id = caps.get(1).unwrap().as_str().trim();
448                    let lower_id = original_id.to_lowercase();
449
450                    if let Some((first_original, first_line)) = seen_definitions.get(&lower_id) {
451                        // Found a case-variant duplicate
452                        if first_original != original_id {
453                            let line_num = start_line + 1;
454                            let line_content = &line_info.content;
455                            let (start_line_1idx, start_col, end_line, end_col) =
456                                calculate_line_range(line_num, line_content);
457
458                            warnings.push(LintWarning {
459                                    rule_name: Some(self.name()),
460                                    line: start_line_1idx,
461                                    column: start_col,
462                                    end_line,
463                                    end_column: end_col,
464                                    message: format!("Duplicate link or image reference definition: [{}] (conflicts with [{}] on line {})",
465                                                   original_id, first_original, first_line + 1),
466                                    severity: Severity::Warning,
467                                    fix: None,
468                                });
469                        }
470                    } else {
471                        seen_definitions.insert(lower_id, (original_id.to_string(), start_line));
472                    }
473                }
474            }
475        }
476
477        // Create warnings for unused references
478        for (definition, start, _end) in unused_refs {
479            let line_num = start + 1; // 1-indexed line numbers
480            let line_content = ctx.lines.get(start).map(|l| l.content.as_str()).unwrap_or("");
481
482            // Calculate precise character range for the entire reference definition line
483            let (start_line, start_col, end_line, end_col) = calculate_line_range(line_num, line_content);
484
485            warnings.push(LintWarning {
486                rule_name: Some(self.name()),
487                line: start_line,
488                column: start_col,
489                end_line,
490                end_column: end_col,
491                message: format!("Unused link/image reference: [{definition}]"),
492                severity: Severity::Warning,
493                fix: None, // MD053 is warning-only, no automatic fixes
494            });
495        }
496
497        Ok(warnings)
498    }
499
500    /// MD053 does not provide automatic fixes
501    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
502        // This rule is warning-only, no automatic fixes provided
503        Ok(ctx.content.to_string())
504    }
505
506    /// Check if this rule should be skipped for performance
507    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
508        // Skip if content is empty or has no reference definitions
509        ctx.content.is_empty() || !ctx.content.contains("]:")
510    }
511
512    fn as_any(&self) -> &dyn std::any::Any {
513        self
514    }
515
516    fn default_config_section(&self) -> Option<(String, toml::Value)> {
517        let default_config = MD053Config::default();
518        let json_value = serde_json::to_value(&default_config).ok()?;
519        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
520        if let toml::Value::Table(table) = toml_value {
521            if !table.is_empty() {
522                Some((MD053Config::RULE_NAME.to_string(), toml::Value::Table(table)))
523            } else {
524                None
525            }
526        } else {
527            None
528        }
529    }
530
531    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
532    where
533        Self: Sized,
534    {
535        let rule_config = crate::rule_config_serde::load_rule_config::<MD053Config>(config);
536        Box::new(MD053LinkImageReferenceDefinitions::from_config_struct(rule_config))
537    }
538}
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543    use crate::lint_context::LintContext;
544
545    #[test]
546    fn test_used_reference_link() {
547        let rule = MD053LinkImageReferenceDefinitions::new();
548        let content = "[text][ref]\n\n[ref]: https://example.com";
549        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
550        let result = rule.check(&ctx).unwrap();
551
552        assert_eq!(result.len(), 0);
553    }
554
555    #[test]
556    fn test_unused_reference_definition() {
557        let rule = MD053LinkImageReferenceDefinitions::new();
558        let content = "[unused]: https://example.com";
559        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
560        let result = rule.check(&ctx).unwrap();
561
562        assert_eq!(result.len(), 1);
563        assert!(result[0].message.contains("Unused link/image reference: [unused]"));
564    }
565
566    #[test]
567    fn test_used_reference_image() {
568        let rule = MD053LinkImageReferenceDefinitions::new();
569        let content = "![alt][img]\n\n[img]: image.jpg";
570        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
571        let result = rule.check(&ctx).unwrap();
572
573        assert_eq!(result.len(), 0);
574    }
575
576    #[test]
577    fn test_case_insensitive_matching() {
578        let rule = MD053LinkImageReferenceDefinitions::new();
579        let content = "[Text][REF]\n\n[ref]: https://example.com";
580        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
581        let result = rule.check(&ctx).unwrap();
582
583        assert_eq!(result.len(), 0);
584    }
585
586    #[test]
587    fn test_shortcut_reference() {
588        let rule = MD053LinkImageReferenceDefinitions::new();
589        let content = "[ref]\n\n[ref]: https://example.com";
590        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
591        let result = rule.check(&ctx).unwrap();
592
593        assert_eq!(result.len(), 0);
594    }
595
596    #[test]
597    fn test_collapsed_reference() {
598        let rule = MD053LinkImageReferenceDefinitions::new();
599        let content = "[ref][]\n\n[ref]: https://example.com";
600        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
601        let result = rule.check(&ctx).unwrap();
602
603        assert_eq!(result.len(), 0);
604    }
605
606    #[test]
607    fn test_multiple_unused_definitions() {
608        let rule = MD053LinkImageReferenceDefinitions::new();
609        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
610        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
611        let result = rule.check(&ctx).unwrap();
612
613        assert_eq!(result.len(), 3);
614
615        // The warnings might not be in the same order, so collect all messages
616        let messages: Vec<String> = result.iter().map(|w| w.message.clone()).collect();
617        assert!(messages.iter().any(|m| m.contains("unused1")));
618        assert!(messages.iter().any(|m| m.contains("unused2")));
619        assert!(messages.iter().any(|m| m.contains("unused3")));
620    }
621
622    #[test]
623    fn test_mixed_used_and_unused() {
624        let rule = MD053LinkImageReferenceDefinitions::new();
625        let content = "[used]\n\n[used]: url1\n[unused]: url2";
626        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
627        let result = rule.check(&ctx).unwrap();
628
629        assert_eq!(result.len(), 1);
630        assert!(result[0].message.contains("unused"));
631    }
632
633    #[test]
634    fn test_multiline_definition() {
635        let rule = MD053LinkImageReferenceDefinitions::new();
636        let content = "[ref]: https://example.com\n  \"Title on next line\"";
637        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
638        let result = rule.check(&ctx).unwrap();
639
640        assert_eq!(result.len(), 1); // Still unused
641    }
642
643    #[test]
644    fn test_reference_in_code_block() {
645        let rule = MD053LinkImageReferenceDefinitions::new();
646        let content = "```\n[ref]\n```\n\n[ref]: https://example.com";
647        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
648        let result = rule.check(&ctx).unwrap();
649
650        // Reference used only in code block is still considered unused
651        assert_eq!(result.len(), 1);
652    }
653
654    #[test]
655    fn test_reference_in_inline_code() {
656        let rule = MD053LinkImageReferenceDefinitions::new();
657        let content = "`[ref]`\n\n[ref]: https://example.com";
658        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
659        let result = rule.check(&ctx).unwrap();
660
661        // Reference in inline code is not a usage
662        assert_eq!(result.len(), 1);
663    }
664
665    #[test]
666    fn test_escaped_reference() {
667        let rule = MD053LinkImageReferenceDefinitions::new();
668        let content = "[example\\-ref]\n\n[example-ref]: https://example.com";
669        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
670        let result = rule.check(&ctx).unwrap();
671
672        // Should match despite escaping
673        assert_eq!(result.len(), 0);
674    }
675
676    #[test]
677    fn test_duplicate_definitions() {
678        let rule = MD053LinkImageReferenceDefinitions::new();
679        let content = "[ref]: url1\n[ref]: url2\n\n[ref]";
680        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
681        let result = rule.check(&ctx).unwrap();
682
683        // Should flag the duplicate definition even though it's used (matches markdownlint)
684        assert_eq!(result.len(), 1);
685    }
686
687    #[test]
688    fn test_fix_returns_original() {
689        // MD053 is warning-only, fix should return original content
690        let rule = MD053LinkImageReferenceDefinitions::new();
691        let content = "[used]\n\n[used]: url1\n[unused]: url2\n\nMore content";
692        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
693        let fixed = rule.fix(&ctx).unwrap();
694
695        assert_eq!(fixed, content);
696    }
697
698    #[test]
699    fn test_fix_preserves_content() {
700        // MD053 is warning-only, fix should preserve all content
701        let rule = MD053LinkImageReferenceDefinitions::new();
702        let content = "Content\n\n[unused]: url\n\nMore content";
703        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
704        let fixed = rule.fix(&ctx).unwrap();
705
706        assert_eq!(fixed, content);
707    }
708
709    #[test]
710    fn test_fix_does_not_remove() {
711        // MD053 is warning-only, fix should not remove anything
712        let rule = MD053LinkImageReferenceDefinitions::new();
713        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
714        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
715        let fixed = rule.fix(&ctx).unwrap();
716
717        assert_eq!(fixed, content);
718    }
719
720    #[test]
721    fn test_special_characters_in_reference() {
722        let rule = MD053LinkImageReferenceDefinitions::new();
723        let content = "[ref-with_special.chars]\n\n[ref-with_special.chars]: url";
724        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
725        let result = rule.check(&ctx).unwrap();
726
727        assert_eq!(result.len(), 0);
728    }
729
730    #[test]
731    fn test_find_definitions() {
732        let rule = MD053LinkImageReferenceDefinitions::new();
733        let content = "[ref1]: url1\n[ref2]: url2\nSome text\n[ref3]: url3";
734        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
735        let doc = DocumentStructure::new(content);
736        let defs = rule.find_definitions(&ctx, &doc);
737
738        assert_eq!(defs.len(), 3);
739        assert!(defs.contains_key("ref1"));
740        assert!(defs.contains_key("ref2"));
741        assert!(defs.contains_key("ref3"));
742    }
743
744    #[test]
745    fn test_find_usages() {
746        let rule = MD053LinkImageReferenceDefinitions::new();
747        let content = "[text][ref1] and [ref2] and ![img][ref3]";
748        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
749        let doc = DocumentStructure::new(content);
750        let usages = rule.find_usages(&doc, &ctx);
751
752        assert!(usages.contains("ref1"));
753        assert!(usages.contains("ref2"));
754        assert!(usages.contains("ref3"));
755    }
756
757    #[test]
758    fn test_ignored_definitions_config() {
759        // Test with ignored definitions
760        let config = MD053Config {
761            ignored_definitions: vec!["todo".to_string(), "draft".to_string()],
762        };
763        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
764
765        let content = "[todo]: https://example.com/todo\n[draft]: https://example.com/draft\n[unused]: https://example.com/unused";
766        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
767        let result = rule.check(&ctx).unwrap();
768
769        // Should only flag "unused", not "todo" or "draft"
770        assert_eq!(result.len(), 1);
771        assert!(result[0].message.contains("unused"));
772        assert!(!result[0].message.contains("todo"));
773        assert!(!result[0].message.contains("draft"));
774    }
775
776    #[test]
777    fn test_ignored_definitions_case_insensitive() {
778        // Test case-insensitive matching of ignored definitions
779        let config = MD053Config {
780            ignored_definitions: vec!["TODO".to_string()],
781        };
782        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
783
784        let content = "[todo]: https://example.com/todo\n[unused]: https://example.com/unused";
785        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
786        let result = rule.check(&ctx).unwrap();
787
788        // Should only flag "unused", not "todo" (matches "TODO" case-insensitively)
789        assert_eq!(result.len(), 1);
790        assert!(result[0].message.contains("unused"));
791        assert!(!result[0].message.contains("todo"));
792    }
793
794    #[test]
795    fn test_default_config_section() {
796        let rule = MD053LinkImageReferenceDefinitions::default();
797        let config_section = rule.default_config_section();
798
799        assert!(config_section.is_some());
800        let (name, value) = config_section.unwrap();
801        assert_eq!(name, "MD053");
802
803        // Should contain the ignored_definitions option with default empty array
804        if let toml::Value::Table(table) = value {
805            assert!(table.contains_key("ignored-definitions"));
806            assert_eq!(table["ignored-definitions"], toml::Value::Array(vec![]));
807        } else {
808            panic!("Expected TOML table");
809        }
810    }
811
812    #[test]
813    fn test_fix_with_ignored_definitions() {
814        // MD053 is warning-only, fix should not remove anything even with ignored definitions
815        let config = MD053Config {
816            ignored_definitions: vec!["template".to_string()],
817        };
818        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
819
820        let content = "[template]: https://example.com/template\n[unused]: https://example.com/unused\n\nSome content.";
821        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
822        let fixed = rule.fix(&ctx).unwrap();
823
824        // Should keep everything since MD053 doesn't fix
825        assert_eq!(fixed, content);
826    }
827
828    #[test]
829    fn test_duplicate_definitions_exact_case() {
830        let rule = MD053LinkImageReferenceDefinitions::new();
831        let content = "[ref]: url1\n[ref]: url2\n[ref]: url3";
832        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
833        let result = rule.check(&ctx).unwrap();
834
835        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
836        // Plus 1 unused warning
837        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
838        assert_eq!(duplicate_warnings.len(), 2);
839        assert_eq!(duplicate_warnings[0].line, 2);
840        assert_eq!(duplicate_warnings[1].line, 3);
841    }
842
843    #[test]
844    fn test_duplicate_definitions_case_variants() {
845        let rule = MD053LinkImageReferenceDefinitions::new();
846        let content =
847            "[method resolution order]: url1\n[Method Resolution Order]: url2\n[METHOD RESOLUTION ORDER]: url3";
848        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
849        let result = rule.check(&ctx).unwrap();
850
851        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
852        // Note: These are treated as exact duplicates since they normalize to the same ID
853        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
854        assert_eq!(duplicate_warnings.len(), 2);
855
856        // The exact duplicate messages don't include "conflicts with"
857        // Only case-variant duplicates with different normalized forms would
858        assert_eq!(duplicate_warnings[0].line, 2);
859        assert_eq!(duplicate_warnings[1].line, 3);
860    }
861
862    #[test]
863    fn test_duplicate_and_unused() {
864        let rule = MD053LinkImageReferenceDefinitions::new();
865        let content = "[used]\n[used]: url1\n[used]: url2\n[unused]: url3";
866        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
867        let result = rule.check(&ctx).unwrap();
868
869        // Should have 1 duplicate warning and 1 unused warning
870        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
871        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
872
873        assert_eq!(duplicate_warnings.len(), 1);
874        assert_eq!(unused_warnings.len(), 1);
875        assert_eq!(duplicate_warnings[0].line, 3); // Second [used] definition
876        assert_eq!(unused_warnings[0].line, 4); // [unused] definition
877    }
878
879    #[test]
880    fn test_duplicate_with_usage() {
881        let rule = MD053LinkImageReferenceDefinitions::new();
882        // Even if used, duplicates should still be reported
883        let content = "[ref]\n\n[ref]: url1\n[ref]: url2";
884        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
885        let result = rule.check(&ctx).unwrap();
886
887        // Should have 1 duplicate warning (no unused since it's referenced)
888        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
889        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
890
891        assert_eq!(duplicate_warnings.len(), 1);
892        assert_eq!(unused_warnings.len(), 0);
893        assert_eq!(duplicate_warnings[0].line, 4);
894    }
895
896    #[test]
897    fn test_no_duplicate_different_ids() {
898        let rule = MD053LinkImageReferenceDefinitions::new();
899        let content = "[ref1]: url1\n[ref2]: url2\n[ref3]: url3";
900        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
901        let result = rule.check(&ctx).unwrap();
902
903        // Should have no duplicate warnings, only unused warnings
904        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
905        assert_eq!(duplicate_warnings.len(), 0);
906    }
907}