rumdl_lib/rules/
md053_link_image_reference_definitions.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::range_utils::calculate_line_range;
4use fancy_regex::Regex as FancyRegex;
5use lazy_static::lazy_static;
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Link reference format: [text][reference]
12    // REMOVED: static ref LINK_REFERENCE_REGEX: FancyRegex = FancyRegex::new(r"\[([^\]]*)\]\s*\[([^\]]*)\]").unwrap();
13
14    // Image reference format: ![text][reference]
15    // REMOVED: static ref IMAGE_REFERENCE_REGEX: FancyRegex = FancyRegex::new(r"!\[([^\]]*)\]\s*\[([^\]]*)\]").unwrap();
16
17    // Shortcut reference links: [reference] - must not be followed by another bracket
18    // Allow references followed by punctuation like colon, period, comma (e.g., "[reference]:", "[reference].")
19    // Don't exclude references followed by ": " in the middle of a line (only at start of line)
20    static ref SHORTCUT_REFERENCE_REGEX: FancyRegex =
21        FancyRegex::new(r"(?<!\!)\[([^\]]+)\](?!\[)").unwrap();
22
23    // REMOVED: Empty reference links: [text][] or ![text][]
24    // static ref EMPTY_LINK_REFERENCE_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\s*\[\s*\]").unwrap();
25    // static ref EMPTY_IMAGE_REFERENCE_REGEX: Regex = Regex::new(r"!\[([^\]]+)\]\s*\[\s*\]").unwrap();
26
27    // Link/image reference definition format: [reference]: URL
28    static ref REFERENCE_DEFINITION_REGEX: Regex =
29        Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
30
31    // Multi-line reference definition continuation pattern
32    static ref CONTINUATION_REGEX: Regex = Regex::new(r"^\s+(.+)$").unwrap();
33
34    // Code block regex - support indented code blocks for MkDocs tabs
35    static ref CODE_BLOCK_START_REGEX: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
36    static ref CODE_BLOCK_END_REGEX: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})\s*$").unwrap();
37}
38
39/// Configuration for MD053 rule
40#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
41#[serde(rename_all = "kebab-case")]
42pub struct MD053Config {
43    /// List of reference names to keep even if unused
44    #[serde(default = "default_ignored_definitions")]
45    pub ignored_definitions: Vec<String>,
46}
47
48impl Default for MD053Config {
49    fn default() -> Self {
50        Self {
51            ignored_definitions: default_ignored_definitions(),
52        }
53    }
54}
55
56fn default_ignored_definitions() -> Vec<String> {
57    Vec::new()
58}
59
60impl RuleConfig for MD053Config {
61    const RULE_NAME: &'static str = "MD053";
62}
63
64/// Rule MD053: Link and image reference definitions should be used
65///
66/// See [docs/md053.md](../../docs/md053.md) for full documentation, configuration, and examples.
67///
68/// This rule is triggered when a link or image reference definition is declared but not used
69/// anywhere in the document. Unused reference definitions can create confusion and clutter.
70///
71/// ## Supported Reference Formats
72///
73/// This rule handles the following reference formats:
74///
75/// - **Full reference links/images**: `[text][reference]` or `![text][reference]`
76/// - **Collapsed reference links/images**: `[text][]` or `![text][]`
77/// - **Shortcut reference links**: `[reference]` (must be defined elsewhere)
78/// - **Reference definitions**: `[reference]: URL "Optional Title"`
79/// - **Multi-line reference definitions**:
80///   ```markdown
81///   [reference]: URL
82///      "Optional title continued on next line"
83///   ```
84///
85/// ## Configuration Options
86///
87/// The rule supports the following configuration options:
88///
89/// ```yaml
90/// MD053:
91///   ignored_definitions: []  # List of reference definitions to ignore (never report as unused)
92/// ```
93///
94/// ## Performance Optimizations
95///
96/// This rule implements various performance optimizations for handling large documents:
97///
98/// 1. **Caching**: The rule caches parsed definitions and references based on content hashing
99/// 2. **Efficient Reference Matching**: Uses HashMaps for O(1) lookups of definitions
100/// 3. **Smart Code Block Handling**: Efficiently skips references inside code blocks/spans
101/// 4. **Lazy Evaluation**: Only processes necessary portions of the document
102///
103/// ## Edge Cases Handled
104///
105/// - **Case insensitivity**: References are matched case-insensitively
106/// - **Escaped characters**: Properly processes escaped characters in references
107/// - **Unicode support**: Handles non-ASCII characters in references and URLs
108/// - **Code blocks**: Ignores references inside code blocks and spans
109/// - **Special characters**: Properly handles references with special characters
110///
111/// ## Fix Behavior
112///
113/// This rule does not provide automatic fixes. Unused references must be manually reviewed
114/// and removed, as they may be intentionally kept for future use or as templates.
115#[derive(Clone)]
116pub struct MD053LinkImageReferenceDefinitions {
117    config: MD053Config,
118}
119
120impl MD053LinkImageReferenceDefinitions {
121    /// Create a new instance of the MD053 rule
122    pub fn new() -> Self {
123        Self {
124            config: MD053Config::default(),
125        }
126    }
127
128    /// Create a new instance with the given configuration
129    pub fn from_config_struct(config: MD053Config) -> Self {
130        Self { config }
131    }
132
133    /// Returns true if this pattern should be skipped during reference detection
134    fn should_skip_pattern(text: &str) -> bool {
135        // Don't skip pure numeric patterns - they could be footnote references like [1]
136        // Only skip numeric ranges like [1:3], [0:10], etc.
137        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
138            return true;
139        }
140
141        // Skip glob/wildcard patterns like [*], [...], [**]
142        if text == "*" || text == "..." || text == "**" {
143            return true;
144        }
145
146        // Skip patterns that are just punctuation or operators
147        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
148            return true;
149        }
150
151        // Skip very short non-word patterns (likely operators or syntax)
152        // But allow single digits (could be footnotes) and single letters
153        if text.len() <= 2 && !text.chars().all(|c| c.is_alphanumeric()) {
154            return true;
155        }
156
157        // Skip descriptive patterns with colon like [default: the project root]
158        // But allow simple numeric ranges which are handled above
159        // And allow patterns with backticks (valid code references)
160        if text.contains(':') && text.contains(' ') && !text.contains('`') {
161            return true;
162        }
163
164        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
165        if text.starts_with('!') {
166            return true;
167        }
168
169        // Note: We don't filter out patterns with backticks because backticks in reference names
170        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
171
172        // Also don't filter out references with dots - these are legitimate reference names
173        // like [tool.ruff] or [os.path] which are valid markdown references
174
175        // Note: We don't filter based on word count anymore because legitimate references
176        // can have many words, like "python language reference for import statements"
177        // Word count filtering was causing false positives where valid references were
178        // being incorrectly flagged as unused
179
180        false
181    }
182
183    /// Unescape a reference string by removing backslashes before special characters.
184    ///
185    /// This allows matching references like `[example\-reference]` with definitions like
186    /// `[example-reference]: http://example.com`
187    ///
188    /// Returns the unescaped reference string.
189    fn unescape_reference(reference: &str) -> String {
190        // Remove backslashes before special characters
191        reference.replace("\\", "")
192    }
193
194    /// Check if a reference definition is likely a comment-style reference.
195    ///
196    /// This recognizes common community patterns for comments in markdown:
197    /// - `[//]: # (comment)` - Most popular pattern
198    /// - `[comment]: # (text)` - Semantic pattern
199    /// - `[note]: # (text)` - Documentation pattern
200    /// - `[todo]: # (text)` - Task tracking pattern
201    /// - Any reference with just `#` as the URL (fragment-only, often unused)
202    ///
203    /// While not part of any official markdown spec (CommonMark, GFM), these patterns
204    /// are widely used across 23+ markdown implementations as documented in the community.
205    ///
206    /// # Arguments
207    /// * `ref_id` - The reference ID (already normalized to lowercase)
208    /// * `url` - The URL from the reference definition
209    ///
210    /// # Returns
211    /// `true` if this looks like a comment-style reference that should be ignored
212    fn is_likely_comment_reference(ref_id: &str, url: &str) -> bool {
213        // Common comment reference labels used in the community
214        const COMMENT_LABELS: &[&str] = &[
215            "//",      // [//]: # (comment) - most popular
216            "comment", // [comment]: # (text)
217            "note",    // [note]: # (text)
218            "todo",    // [todo]: # (text)
219            "fixme",   // [fixme]: # (text)
220            "hack",    // [hack]: # (text)
221        ];
222
223        let normalized_id = ref_id.trim().to_lowercase();
224        let normalized_url = url.trim();
225
226        // Pattern 1: Known comment labels with fragment URLs
227        // e.g., [//]: # (comment), [comment]: #section
228        if COMMENT_LABELS.contains(&normalized_id.as_str()) && normalized_url.starts_with('#') {
229            return true;
230        }
231
232        // Pattern 2: Any reference with just "#" as the URL
233        // This is often used as a comment placeholder or unused anchor
234        if normalized_url == "#" {
235            return true;
236        }
237
238        false
239    }
240
241    /// Find all link and image reference definitions in the content.
242    ///
243    /// This method returns a HashMap where the key is the normalized reference ID and the value is a vector of (start_line, end_line) tuples.
244    fn find_definitions(&self, ctx: &crate::lint_context::LintContext) -> HashMap<String, Vec<(usize, usize)>> {
245        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
246
247        // First, add all reference definitions from context
248        for ref_def in &ctx.reference_defs {
249            // Skip comment-style references (e.g., [//]: # (comment))
250            if Self::is_likely_comment_reference(&ref_def.id, &ref_def.url) {
251                continue;
252            }
253
254            // Apply unescape to handle escaped characters in definitions
255            let normalized_id = Self::unescape_reference(&ref_def.id); // Already lowercase from context
256            definitions
257                .entry(normalized_id)
258                .or_default()
259                .push((ref_def.line - 1, ref_def.line - 1)); // Convert to 0-indexed
260        }
261
262        // Handle multi-line definitions that might not be fully captured by ctx.reference_defs
263        let lines = &ctx.lines;
264        let mut i = 0;
265        while i < lines.len() {
266            let line_info = &lines[i];
267            let line = &line_info.content;
268
269            // Skip code blocks and front matter using line info
270            if line_info.in_code_block || line_info.in_front_matter {
271                i += 1;
272                continue;
273            }
274
275            // Check for multi-line continuation of existing definitions
276            if i > 0 && CONTINUATION_REGEX.is_match(line) {
277                // Find the reference definition this continues
278                let mut def_start = i - 1;
279                while def_start > 0 && !REFERENCE_DEFINITION_REGEX.is_match(&lines[def_start].content) {
280                    def_start -= 1;
281                }
282
283                if let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(&lines[def_start].content) {
284                    let ref_id = caps.get(1).unwrap().as_str().trim();
285                    let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
286
287                    // Update the end line for this definition
288                    if let Some(ranges) = definitions.get_mut(&normalized_id)
289                        && let Some(last_range) = ranges.last_mut()
290                        && last_range.0 == def_start
291                    {
292                        last_range.1 = i;
293                    }
294                }
295            }
296            i += 1;
297        }
298        definitions
299    }
300
301    /// Find all link and image reference reference usages in the content.
302    ///
303    /// This method returns a HashSet of all normalized reference IDs found in usage.
304    /// It leverages cached data from LintContext for efficiency.
305    fn find_usages(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
306        let mut usages: HashSet<String> = HashSet::new();
307
308        // 1. Add usages from cached reference links in LintContext
309        for link in &ctx.links {
310            if link.is_reference
311                && let Some(ref_id) = &link.reference_id
312            {
313                // Ensure the link itself is not inside a code block line
314                if !ctx.line_info(link.line).is_some_and(|info| info.in_code_block) {
315                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
316                }
317            }
318        }
319
320        // 2. Add usages from cached reference images in LintContext
321        for image in &ctx.images {
322            if image.is_reference
323                && let Some(ref_id) = &image.reference_id
324            {
325                // Ensure the image itself is not inside a code block line
326                if !ctx.line_info(image.line).is_some_and(|info| info.in_code_block) {
327                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
328                }
329            }
330        }
331
332        // 3. Find shortcut references [ref] not already handled by DocumentStructure.links
333        //    and ensure they are not within code spans or code blocks.
334        // Cache code spans once before the loop
335        let code_spans = ctx.code_spans();
336
337        for line_info in ctx.lines.iter() {
338            // Skip lines in code blocks or front matter
339            if line_info.in_code_block || line_info.in_front_matter {
340                continue;
341            }
342
343            // Skip lines that are reference definitions (start with [ref]: at beginning)
344            if REFERENCE_DEFINITION_REGEX.is_match(&line_info.content) {
345                continue;
346            }
347
348            // Find potential shortcut references
349            for caps in SHORTCUT_REFERENCE_REGEX.captures_iter(&line_info.content).flatten() {
350                if let Some(full_match) = caps.get(0)
351                    && let Some(ref_id_match) = caps.get(1)
352                {
353                    // Check if the match is within a code span
354                    let match_byte_offset = line_info.byte_offset + full_match.start();
355                    let in_code_span = code_spans
356                        .iter()
357                        .any(|span| match_byte_offset >= span.byte_offset && match_byte_offset < span.byte_end);
358
359                    if !in_code_span {
360                        let ref_id = ref_id_match.as_str().trim();
361
362                        if !Self::should_skip_pattern(ref_id) {
363                            let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
364                            usages.insert(normalized_id);
365                        }
366                    }
367                }
368            }
369        }
370
371        // NOTE: The complex recursive loop trying to find references within definitions
372        // has been removed as it's not standard Markdown behavior for finding *usages*.
373        // Usages refer to `[text][ref]`, `![alt][ref]`, `[ref]`, etc., in the main content,
374        // not references potentially embedded within the URL or title of another definition.
375
376        usages
377    }
378
379    /// Get unused references with their line ranges.
380    ///
381    /// This method uses the cached definitions to improve performance.
382    ///
383    /// Note: References that are only used inside code blocks are still considered unused,
384    /// as code blocks are treated as examples or documentation rather than actual content.
385    fn get_unused_references(
386        &self,
387        definitions: &HashMap<String, Vec<(usize, usize)>>,
388        usages: &HashSet<String>,
389    ) -> Vec<(String, usize, usize)> {
390        let mut unused = Vec::new();
391        for (id, ranges) in definitions {
392            // If this id is not used anywhere and is not in the ignored list
393            if !usages.contains(id) && !self.is_ignored_definition(id) {
394                // Only report as unused if there's exactly one definition
395                // Multiple definitions are already reported as duplicates
396                if ranges.len() == 1 {
397                    let (start, end) = ranges[0];
398                    unused.push((id.clone(), start, end));
399                }
400                // If there are multiple definitions (duplicates), don't report them as unused
401                // They're already being reported as duplicate definitions
402            }
403        }
404        unused
405    }
406
407    /// Check if a definition should be ignored (kept even if unused)
408    fn is_ignored_definition(&self, definition_id: &str) -> bool {
409        self.config
410            .ignored_definitions
411            .iter()
412            .any(|ignored| ignored.eq_ignore_ascii_case(definition_id))
413    }
414}
415
416impl Default for MD053LinkImageReferenceDefinitions {
417    fn default() -> Self {
418        Self::new()
419    }
420}
421
422impl Rule for MD053LinkImageReferenceDefinitions {
423    fn name(&self) -> &'static str {
424        "MD053"
425    }
426
427    fn description(&self) -> &'static str {
428        "Link and image reference definitions should be needed"
429    }
430
431    /// Check the content for unused and duplicate link/image reference definitions.
432    ///
433    /// This implementation uses caching for improved performance on large documents.
434    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
435        // Find definitions and usages using LintContext
436        let definitions = self.find_definitions(ctx);
437        let usages = self.find_usages(ctx);
438
439        // Get unused references by comparing definitions and usages
440        let unused_refs = self.get_unused_references(&definitions, &usages);
441
442        let mut warnings = Vec::new();
443
444        // Check for duplicate definitions (case-insensitive per CommonMark spec)
445        let mut seen_definitions: HashMap<String, (String, usize)> = HashMap::new(); // lowercase -> (original, first_line)
446
447        for (definition_id, ranges) in &definitions {
448            // Skip ignored definitions for duplicate checking
449            if self.is_ignored_definition(definition_id) {
450                continue;
451            }
452
453            if ranges.len() > 1 {
454                // Multiple definitions with exact same ID (already lowercase)
455                for (i, &(start_line, _)) in ranges.iter().enumerate() {
456                    if i > 0 {
457                        // Skip the first occurrence, report all others
458                        let line_num = start_line + 1;
459                        let line_content = ctx.lines.get(start_line).map(|l| l.content.as_str()).unwrap_or("");
460                        let (start_line_1idx, start_col, end_line, end_col) =
461                            calculate_line_range(line_num, line_content);
462
463                        warnings.push(LintWarning {
464                            rule_name: Some(self.name().to_string()),
465                            line: start_line_1idx,
466                            column: start_col,
467                            end_line,
468                            end_column: end_col,
469                            message: format!("Duplicate link or image reference definition: [{definition_id}]"),
470                            severity: Severity::Warning,
471                            fix: None,
472                        });
473                    }
474                }
475            }
476
477            // Track for case-variant duplicates
478            if let Some(&(start_line, _)) = ranges.first() {
479                // Find the original case version from the line
480                if let Some(line_info) = ctx.lines.get(start_line)
481                    && let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(&line_info.content)
482                {
483                    let original_id = caps.get(1).unwrap().as_str().trim();
484                    let lower_id = original_id.to_lowercase();
485
486                    if let Some((first_original, first_line)) = seen_definitions.get(&lower_id) {
487                        // Found a case-variant duplicate
488                        if first_original != original_id {
489                            let line_num = start_line + 1;
490                            let line_content = &line_info.content;
491                            let (start_line_1idx, start_col, end_line, end_col) =
492                                calculate_line_range(line_num, line_content);
493
494                            warnings.push(LintWarning {
495                                    rule_name: Some(self.name().to_string()),
496                                    line: start_line_1idx,
497                                    column: start_col,
498                                    end_line,
499                                    end_column: end_col,
500                                    message: format!("Duplicate link or image reference definition: [{}] (conflicts with [{}] on line {})",
501                                                   original_id, first_original, first_line + 1),
502                                    severity: Severity::Warning,
503                                    fix: None,
504                                });
505                        }
506                    } else {
507                        seen_definitions.insert(lower_id, (original_id.to_string(), start_line));
508                    }
509                }
510            }
511        }
512
513        // Create warnings for unused references
514        for (definition, start, _end) in unused_refs {
515            let line_num = start + 1; // 1-indexed line numbers
516            let line_content = ctx.lines.get(start).map(|l| l.content.as_str()).unwrap_or("");
517
518            // Calculate precise character range for the entire reference definition line
519            let (start_line, start_col, end_line, end_col) = calculate_line_range(line_num, line_content);
520
521            warnings.push(LintWarning {
522                rule_name: Some(self.name().to_string()),
523                line: start_line,
524                column: start_col,
525                end_line,
526                end_column: end_col,
527                message: format!("Unused link/image reference: [{definition}]"),
528                severity: Severity::Warning,
529                fix: None, // MD053 is warning-only, no automatic fixes
530            });
531        }
532
533        Ok(warnings)
534    }
535
536    /// MD053 does not provide automatic fixes
537    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
538        // This rule is warning-only, no automatic fixes provided
539        Ok(ctx.content.to_string())
540    }
541
542    /// Check if this rule should be skipped for performance
543    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
544        // Skip if content is empty or has no links/images
545        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
546    }
547
548    fn as_any(&self) -> &dyn std::any::Any {
549        self
550    }
551
552    fn default_config_section(&self) -> Option<(String, toml::Value)> {
553        let default_config = MD053Config::default();
554        let json_value = serde_json::to_value(&default_config).ok()?;
555        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
556        if let toml::Value::Table(table) = toml_value {
557            if !table.is_empty() {
558                Some((MD053Config::RULE_NAME.to_string(), toml::Value::Table(table)))
559            } else {
560                None
561            }
562        } else {
563            None
564        }
565    }
566
567    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
568    where
569        Self: Sized,
570    {
571        let rule_config = crate::rule_config_serde::load_rule_config::<MD053Config>(config);
572        Box::new(MD053LinkImageReferenceDefinitions::from_config_struct(rule_config))
573    }
574}
575
576#[cfg(test)]
577mod tests {
578    use super::*;
579    use crate::lint_context::LintContext;
580
581    #[test]
582    fn test_used_reference_link() {
583        let rule = MD053LinkImageReferenceDefinitions::new();
584        let content = "[text][ref]\n\n[ref]: https://example.com";
585        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
586        let result = rule.check(&ctx).unwrap();
587
588        assert_eq!(result.len(), 0);
589    }
590
591    #[test]
592    fn test_unused_reference_definition() {
593        let rule = MD053LinkImageReferenceDefinitions::new();
594        let content = "[unused]: https://example.com";
595        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
596        let result = rule.check(&ctx).unwrap();
597
598        assert_eq!(result.len(), 1);
599        assert!(result[0].message.contains("Unused link/image reference: [unused]"));
600    }
601
602    #[test]
603    fn test_used_reference_image() {
604        let rule = MD053LinkImageReferenceDefinitions::new();
605        let content = "![alt][img]\n\n[img]: image.jpg";
606        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
607        let result = rule.check(&ctx).unwrap();
608
609        assert_eq!(result.len(), 0);
610    }
611
612    #[test]
613    fn test_case_insensitive_matching() {
614        let rule = MD053LinkImageReferenceDefinitions::new();
615        let content = "[Text][REF]\n\n[ref]: https://example.com";
616        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
617        let result = rule.check(&ctx).unwrap();
618
619        assert_eq!(result.len(), 0);
620    }
621
622    #[test]
623    fn test_shortcut_reference() {
624        let rule = MD053LinkImageReferenceDefinitions::new();
625        let content = "[ref]\n\n[ref]: https://example.com";
626        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
627        let result = rule.check(&ctx).unwrap();
628
629        assert_eq!(result.len(), 0);
630    }
631
632    #[test]
633    fn test_collapsed_reference() {
634        let rule = MD053LinkImageReferenceDefinitions::new();
635        let content = "[ref][]\n\n[ref]: https://example.com";
636        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
637        let result = rule.check(&ctx).unwrap();
638
639        assert_eq!(result.len(), 0);
640    }
641
642    #[test]
643    fn test_multiple_unused_definitions() {
644        let rule = MD053LinkImageReferenceDefinitions::new();
645        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
646        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
647        let result = rule.check(&ctx).unwrap();
648
649        assert_eq!(result.len(), 3);
650
651        // The warnings might not be in the same order, so collect all messages
652        let messages: Vec<String> = result.iter().map(|w| w.message.clone()).collect();
653        assert!(messages.iter().any(|m| m.contains("unused1")));
654        assert!(messages.iter().any(|m| m.contains("unused2")));
655        assert!(messages.iter().any(|m| m.contains("unused3")));
656    }
657
658    #[test]
659    fn test_mixed_used_and_unused() {
660        let rule = MD053LinkImageReferenceDefinitions::new();
661        let content = "[used]\n\n[used]: url1\n[unused]: url2";
662        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
663        let result = rule.check(&ctx).unwrap();
664
665        assert_eq!(result.len(), 1);
666        assert!(result[0].message.contains("unused"));
667    }
668
669    #[test]
670    fn test_multiline_definition() {
671        let rule = MD053LinkImageReferenceDefinitions::new();
672        let content = "[ref]: https://example.com\n  \"Title on next line\"";
673        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
674        let result = rule.check(&ctx).unwrap();
675
676        assert_eq!(result.len(), 1); // Still unused
677    }
678
679    #[test]
680    fn test_reference_in_code_block() {
681        let rule = MD053LinkImageReferenceDefinitions::new();
682        let content = "```\n[ref]\n```\n\n[ref]: https://example.com";
683        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
684        let result = rule.check(&ctx).unwrap();
685
686        // Reference used only in code block is still considered unused
687        assert_eq!(result.len(), 1);
688    }
689
690    #[test]
691    fn test_reference_in_inline_code() {
692        let rule = MD053LinkImageReferenceDefinitions::new();
693        let content = "`[ref]`\n\n[ref]: https://example.com";
694        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
695        let result = rule.check(&ctx).unwrap();
696
697        // Reference in inline code is not a usage
698        assert_eq!(result.len(), 1);
699    }
700
701    #[test]
702    fn test_escaped_reference() {
703        let rule = MD053LinkImageReferenceDefinitions::new();
704        let content = "[example\\-ref]\n\n[example-ref]: https://example.com";
705        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
706        let result = rule.check(&ctx).unwrap();
707
708        // Should match despite escaping
709        assert_eq!(result.len(), 0);
710    }
711
712    #[test]
713    fn test_duplicate_definitions() {
714        let rule = MD053LinkImageReferenceDefinitions::new();
715        let content = "[ref]: url1\n[ref]: url2\n\n[ref]";
716        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
717        let result = rule.check(&ctx).unwrap();
718
719        // Should flag the duplicate definition even though it's used (matches markdownlint)
720        assert_eq!(result.len(), 1);
721    }
722
723    #[test]
724    fn test_fix_returns_original() {
725        // MD053 is warning-only, fix should return original content
726        let rule = MD053LinkImageReferenceDefinitions::new();
727        let content = "[used]\n\n[used]: url1\n[unused]: url2\n\nMore content";
728        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
729        let fixed = rule.fix(&ctx).unwrap();
730
731        assert_eq!(fixed, content);
732    }
733
734    #[test]
735    fn test_fix_preserves_content() {
736        // MD053 is warning-only, fix should preserve all content
737        let rule = MD053LinkImageReferenceDefinitions::new();
738        let content = "Content\n\n[unused]: url\n\nMore content";
739        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
740        let fixed = rule.fix(&ctx).unwrap();
741
742        assert_eq!(fixed, content);
743    }
744
745    #[test]
746    fn test_fix_does_not_remove() {
747        // MD053 is warning-only, fix should not remove anything
748        let rule = MD053LinkImageReferenceDefinitions::new();
749        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
750        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
751        let fixed = rule.fix(&ctx).unwrap();
752
753        assert_eq!(fixed, content);
754    }
755
756    #[test]
757    fn test_special_characters_in_reference() {
758        let rule = MD053LinkImageReferenceDefinitions::new();
759        let content = "[ref-with_special.chars]\n\n[ref-with_special.chars]: url";
760        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
761        let result = rule.check(&ctx).unwrap();
762
763        assert_eq!(result.len(), 0);
764    }
765
766    #[test]
767    fn test_find_definitions() {
768        let rule = MD053LinkImageReferenceDefinitions::new();
769        let content = "[ref1]: url1\n[ref2]: url2\nSome text\n[ref3]: url3";
770        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
771        let defs = rule.find_definitions(&ctx);
772
773        assert_eq!(defs.len(), 3);
774        assert!(defs.contains_key("ref1"));
775        assert!(defs.contains_key("ref2"));
776        assert!(defs.contains_key("ref3"));
777    }
778
779    #[test]
780    fn test_find_usages() {
781        let rule = MD053LinkImageReferenceDefinitions::new();
782        let content = "[text][ref1] and [ref2] and ![img][ref3]";
783        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
784        let usages = rule.find_usages(&ctx);
785
786        assert!(usages.contains("ref1"));
787        assert!(usages.contains("ref2"));
788        assert!(usages.contains("ref3"));
789    }
790
791    #[test]
792    fn test_ignored_definitions_config() {
793        // Test with ignored definitions
794        let config = MD053Config {
795            ignored_definitions: vec!["todo".to_string(), "draft".to_string()],
796        };
797        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
798
799        let content = "[todo]: https://example.com/todo\n[draft]: https://example.com/draft\n[unused]: https://example.com/unused";
800        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
801        let result = rule.check(&ctx).unwrap();
802
803        // Should only flag "unused", not "todo" or "draft"
804        assert_eq!(result.len(), 1);
805        assert!(result[0].message.contains("unused"));
806        assert!(!result[0].message.contains("todo"));
807        assert!(!result[0].message.contains("draft"));
808    }
809
810    #[test]
811    fn test_ignored_definitions_case_insensitive() {
812        // Test case-insensitive matching of ignored definitions
813        let config = MD053Config {
814            ignored_definitions: vec!["TODO".to_string()],
815        };
816        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
817
818        let content = "[todo]: https://example.com/todo\n[unused]: https://example.com/unused";
819        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
820        let result = rule.check(&ctx).unwrap();
821
822        // Should only flag "unused", not "todo" (matches "TODO" case-insensitively)
823        assert_eq!(result.len(), 1);
824        assert!(result[0].message.contains("unused"));
825        assert!(!result[0].message.contains("todo"));
826    }
827
828    #[test]
829    fn test_default_config_section() {
830        let rule = MD053LinkImageReferenceDefinitions::default();
831        let config_section = rule.default_config_section();
832
833        assert!(config_section.is_some());
834        let (name, value) = config_section.unwrap();
835        assert_eq!(name, "MD053");
836
837        // Should contain the ignored_definitions option with default empty array
838        if let toml::Value::Table(table) = value {
839            assert!(table.contains_key("ignored-definitions"));
840            assert_eq!(table["ignored-definitions"], toml::Value::Array(vec![]));
841        } else {
842            panic!("Expected TOML table");
843        }
844    }
845
846    #[test]
847    fn test_fix_with_ignored_definitions() {
848        // MD053 is warning-only, fix should not remove anything even with ignored definitions
849        let config = MD053Config {
850            ignored_definitions: vec!["template".to_string()],
851        };
852        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
853
854        let content = "[template]: https://example.com/template\n[unused]: https://example.com/unused\n\nSome content.";
855        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
856        let fixed = rule.fix(&ctx).unwrap();
857
858        // Should keep everything since MD053 doesn't fix
859        assert_eq!(fixed, content);
860    }
861
862    #[test]
863    fn test_duplicate_definitions_exact_case() {
864        let rule = MD053LinkImageReferenceDefinitions::new();
865        let content = "[ref]: url1\n[ref]: url2\n[ref]: url3";
866        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
867        let result = rule.check(&ctx).unwrap();
868
869        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
870        // Plus 1 unused warning
871        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
872        assert_eq!(duplicate_warnings.len(), 2);
873        assert_eq!(duplicate_warnings[0].line, 2);
874        assert_eq!(duplicate_warnings[1].line, 3);
875    }
876
877    #[test]
878    fn test_duplicate_definitions_case_variants() {
879        let rule = MD053LinkImageReferenceDefinitions::new();
880        let content =
881            "[method resolution order]: url1\n[Method Resolution Order]: url2\n[METHOD RESOLUTION ORDER]: url3";
882        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
883        let result = rule.check(&ctx).unwrap();
884
885        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
886        // Note: These are treated as exact duplicates since they normalize to the same ID
887        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
888        assert_eq!(duplicate_warnings.len(), 2);
889
890        // The exact duplicate messages don't include "conflicts with"
891        // Only case-variant duplicates with different normalized forms would
892        assert_eq!(duplicate_warnings[0].line, 2);
893        assert_eq!(duplicate_warnings[1].line, 3);
894    }
895
896    #[test]
897    fn test_duplicate_and_unused() {
898        let rule = MD053LinkImageReferenceDefinitions::new();
899        let content = "[used]\n[used]: url1\n[used]: url2\n[unused]: url3";
900        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
901        let result = rule.check(&ctx).unwrap();
902
903        // Should have 1 duplicate warning and 1 unused warning
904        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
905        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
906
907        assert_eq!(duplicate_warnings.len(), 1);
908        assert_eq!(unused_warnings.len(), 1);
909        assert_eq!(duplicate_warnings[0].line, 3); // Second [used] definition
910        assert_eq!(unused_warnings[0].line, 4); // [unused] definition
911    }
912
913    #[test]
914    fn test_duplicate_with_usage() {
915        let rule = MD053LinkImageReferenceDefinitions::new();
916        // Even if used, duplicates should still be reported
917        let content = "[ref]\n\n[ref]: url1\n[ref]: url2";
918        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
919        let result = rule.check(&ctx).unwrap();
920
921        // Should have 1 duplicate warning (no unused since it's referenced)
922        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
923        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
924
925        assert_eq!(duplicate_warnings.len(), 1);
926        assert_eq!(unused_warnings.len(), 0);
927        assert_eq!(duplicate_warnings[0].line, 4);
928    }
929
930    #[test]
931    fn test_no_duplicate_different_ids() {
932        let rule = MD053LinkImageReferenceDefinitions::new();
933        let content = "[ref1]: url1\n[ref2]: url2\n[ref3]: url3";
934        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
935        let result = rule.check(&ctx).unwrap();
936
937        // Should have no duplicate warnings, only unused warnings
938        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
939        assert_eq!(duplicate_warnings.len(), 0);
940    }
941
942    #[test]
943    fn test_comment_style_reference_double_slash() {
944        let rule = MD053LinkImageReferenceDefinitions::new();
945        // Most popular comment pattern: [//]: # (comment)
946        let content = "[//]: # (This is a comment)\n\nSome regular text.";
947        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
948        let result = rule.check(&ctx).unwrap();
949
950        // Should not report as unused - it's recognized as a comment
951        assert_eq!(result.len(), 0, "Comment-style reference [//]: # should not be flagged");
952    }
953
954    #[test]
955    fn test_comment_style_reference_comment_label() {
956        let rule = MD053LinkImageReferenceDefinitions::new();
957        // Semantic comment pattern: [comment]: # (text)
958        let content = "[comment]: # (This is a semantic comment)\n\n[note]: # (This is a note)";
959        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
960        let result = rule.check(&ctx).unwrap();
961
962        // Should not report either as unused
963        assert_eq!(result.len(), 0, "Comment-style references should not be flagged");
964    }
965
966    #[test]
967    fn test_comment_style_reference_todo_fixme() {
968        let rule = MD053LinkImageReferenceDefinitions::new();
969        // Task tracking patterns: [todo]: # and [fixme]: #
970        let content = "[todo]: # (Add more examples)\n[fixme]: # (Fix this later)\n[hack]: # (Temporary workaround)";
971        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
972        let result = rule.check(&ctx).unwrap();
973
974        // Should not report any as unused
975        assert_eq!(result.len(), 0, "TODO/FIXME comment patterns should not be flagged");
976    }
977
978    #[test]
979    fn test_comment_style_reference_fragment_only() {
980        let rule = MD053LinkImageReferenceDefinitions::new();
981        // Any reference with just "#" as URL should be treated as a comment
982        let content = "[anything]: #\n[ref]: #\n\nSome text.";
983        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
984        let result = rule.check(&ctx).unwrap();
985
986        // Should not report as unused - fragment-only URLs are often comments
987        assert_eq!(result.len(), 0, "References with just '#' URL should not be flagged");
988    }
989
990    #[test]
991    fn test_comment_vs_real_reference() {
992        let rule = MD053LinkImageReferenceDefinitions::new();
993        // Mix of comment and real reference - only real one should be flagged if unused
994        let content = "[//]: # (This is a comment)\n[real-ref]: https://example.com\n\nSome text.";
995        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
996        let result = rule.check(&ctx).unwrap();
997
998        // Should only report the real reference as unused
999        assert_eq!(result.len(), 1, "Only real unused references should be flagged");
1000        assert!(result[0].message.contains("real-ref"), "Should flag the real reference");
1001    }
1002
1003    #[test]
1004    fn test_comment_with_fragment_section() {
1005        let rule = MD053LinkImageReferenceDefinitions::new();
1006        // Comment pattern with a fragment section (still a comment)
1007        let content = "[//]: #section (Comment about section)\n\nSome text.";
1008        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1009        let result = rule.check(&ctx).unwrap();
1010
1011        // Should not report as unused - it's still a comment pattern
1012        assert_eq!(result.len(), 0, "Comment with fragment section should not be flagged");
1013    }
1014
1015    #[test]
1016    fn test_is_likely_comment_reference_helper() {
1017        // Test the helper function directly
1018        assert!(
1019            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("//", "#"),
1020            "[//]: # should be recognized as comment"
1021        );
1022        assert!(
1023            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("comment", "#section"),
1024            "[comment]: #section should be recognized as comment"
1025        );
1026        assert!(
1027            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("note", "#"),
1028            "[note]: # should be recognized as comment"
1029        );
1030        assert!(
1031            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("todo", "#"),
1032            "[todo]: # should be recognized as comment"
1033        );
1034        assert!(
1035            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("anything", "#"),
1036            "Any label with just '#' should be recognized as comment"
1037        );
1038        assert!(
1039            !MD053LinkImageReferenceDefinitions::is_likely_comment_reference("ref", "https://example.com"),
1040            "Real URL should not be recognized as comment"
1041        );
1042        assert!(
1043            !MD053LinkImageReferenceDefinitions::is_likely_comment_reference("link", "http://test.com"),
1044            "Real URL should not be recognized as comment"
1045        );
1046    }
1047}