rumdl_lib/rules/
md053_link_image_reference_definitions.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::range_utils::calculate_line_range;
4use fancy_regex::Regex as FancyRegex;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10// Shortcut reference links: [reference] - must not be followed by another bracket
11// Allow references followed by punctuation like colon, period, comma (e.g., "[reference]:", "[reference].")
12// Don't exclude references followed by ": " in the middle of a line (only at start of line)
13static SHORTCUT_REFERENCE_REGEX: LazyLock<FancyRegex> =
14    LazyLock::new(|| FancyRegex::new(r"(?<!\!)\[([^\]]+)\](?!\[)").unwrap());
15
16// Link/image reference definition format: [reference]: URL
17static REFERENCE_DEFINITION_REGEX: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap());
19
20// Multi-line reference definition continuation pattern
21static CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s+(.+)$").unwrap());
22
23/// Configuration for MD053 rule
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
25#[serde(rename_all = "kebab-case")]
26pub struct MD053Config {
27    /// List of reference names to keep even if unused
28    #[serde(default = "default_ignored_definitions")]
29    pub ignored_definitions: Vec<String>,
30}
31
32impl Default for MD053Config {
33    fn default() -> Self {
34        Self {
35            ignored_definitions: default_ignored_definitions(),
36        }
37    }
38}
39
40fn default_ignored_definitions() -> Vec<String> {
41    Vec::new()
42}
43
44impl RuleConfig for MD053Config {
45    const RULE_NAME: &'static str = "MD053";
46}
47
48/// Rule MD053: Link and image reference definitions should be used
49///
50/// See [docs/md053.md](../../docs/md053.md) for full documentation, configuration, and examples.
51///
52/// This rule is triggered when a link or image reference definition is declared but not used
53/// anywhere in the document. Unused reference definitions can create confusion and clutter.
54///
55/// ## Supported Reference Formats
56///
57/// This rule handles the following reference formats:
58///
59/// - **Full reference links/images**: `[text][reference]` or `![text][reference]`
60/// - **Collapsed reference links/images**: `[text][]` or `![text][]`
61/// - **Shortcut reference links**: `[reference]` (must be defined elsewhere)
62/// - **Reference definitions**: `[reference]: URL "Optional Title"`
63/// - **Multi-line reference definitions**:
64///   ```markdown
65///   [reference]: URL
66///      "Optional title continued on next line"
67///   ```
68///
69/// ## Configuration Options
70///
71/// The rule supports the following configuration options:
72///
73/// ```yaml
74/// MD053:
75///   ignored_definitions: []  # List of reference definitions to ignore (never report as unused)
76/// ```
77///
78/// ## Performance Optimizations
79///
80/// This rule implements various performance optimizations for handling large documents:
81///
82/// 1. **Caching**: The rule caches parsed definitions and references based on content hashing
83/// 2. **Efficient Reference Matching**: Uses HashMaps for O(1) lookups of definitions
84/// 3. **Smart Code Block Handling**: Efficiently skips references inside code blocks/spans
85/// 4. **Lazy Evaluation**: Only processes necessary portions of the document
86///
87/// ## Edge Cases Handled
88///
89/// - **Case insensitivity**: References are matched case-insensitively
90/// - **Escaped characters**: Properly processes escaped characters in references
91/// - **Unicode support**: Handles non-ASCII characters in references and URLs
92/// - **Code blocks**: Ignores references inside code blocks and spans
93/// - **Special characters**: Properly handles references with special characters
94///
95/// ## Fix Behavior
96///
97/// This rule does not provide automatic fixes. Unused references must be manually reviewed
98/// and removed, as they may be intentionally kept for future use or as templates.
99#[derive(Clone)]
100pub struct MD053LinkImageReferenceDefinitions {
101    config: MD053Config,
102}
103
104impl MD053LinkImageReferenceDefinitions {
105    /// Create a new instance of the MD053 rule
106    pub fn new() -> Self {
107        Self {
108            config: MD053Config::default(),
109        }
110    }
111
112    /// Create a new instance with the given configuration
113    pub fn from_config_struct(config: MD053Config) -> Self {
114        Self { config }
115    }
116
117    /// Returns true if this pattern should be skipped during reference detection
118    fn should_skip_pattern(text: &str) -> bool {
119        // Don't skip pure numeric patterns - they could be footnote references like [1]
120        // Only skip numeric ranges like [1:3], [0:10], etc.
121        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
122            return true;
123        }
124
125        // Skip glob/wildcard patterns like [*], [...], [**]
126        if text == "*" || text == "..." || text == "**" {
127            return true;
128        }
129
130        // Skip patterns that are just punctuation or operators
131        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
132            return true;
133        }
134
135        // Skip very short non-word patterns (likely operators or syntax)
136        // But allow single digits (could be footnotes) and single letters
137        if text.len() <= 2 && !text.chars().all(|c| c.is_alphanumeric()) {
138            return true;
139        }
140
141        // Skip descriptive patterns with colon like [default: the project root]
142        // But allow simple numeric ranges which are handled above
143        // And allow patterns with backticks (valid code references)
144        if text.contains(':') && text.contains(' ') && !text.contains('`') {
145            return true;
146        }
147
148        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
149        if text.starts_with('!') {
150            return true;
151        }
152
153        // Note: We don't filter out patterns with backticks because backticks in reference names
154        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
155
156        // Also don't filter out references with dots - these are legitimate reference names
157        // like [tool.ruff] or [os.path] which are valid markdown references
158
159        // Note: We don't filter based on word count anymore because legitimate references
160        // can have many words, like "python language reference for import statements"
161        // Word count filtering was causing false positives where valid references were
162        // being incorrectly flagged as unused
163
164        false
165    }
166
167    /// Unescape a reference string by removing backslashes before special characters.
168    ///
169    /// This allows matching references like `[example\-reference]` with definitions like
170    /// `[example-reference]: http://example.com`
171    ///
172    /// Returns the unescaped reference string.
173    fn unescape_reference(reference: &str) -> String {
174        // Remove backslashes before special characters
175        reference.replace("\\", "")
176    }
177
178    /// Check if a reference definition is likely a comment-style reference.
179    ///
180    /// This recognizes common community patterns for comments in markdown:
181    /// - `[//]: # (comment)` - Most popular pattern
182    /// - `[comment]: # (text)` - Semantic pattern
183    /// - `[note]: # (text)` - Documentation pattern
184    /// - `[todo]: # (text)` - Task tracking pattern
185    /// - Any reference with just `#` as the URL (fragment-only, often unused)
186    ///
187    /// While not part of any official markdown spec (CommonMark, GFM), these patterns
188    /// are widely used across 23+ markdown implementations as documented in the community.
189    ///
190    /// # Arguments
191    /// * `ref_id` - The reference ID (already normalized to lowercase)
192    /// * `url` - The URL from the reference definition
193    ///
194    /// # Returns
195    /// `true` if this looks like a comment-style reference that should be ignored
196    fn is_likely_comment_reference(ref_id: &str, url: &str) -> bool {
197        // Common comment reference labels used in the community
198        const COMMENT_LABELS: &[&str] = &[
199            "//",      // [//]: # (comment) - most popular
200            "comment", // [comment]: # (text)
201            "note",    // [note]: # (text)
202            "todo",    // [todo]: # (text)
203            "fixme",   // [fixme]: # (text)
204            "hack",    // [hack]: # (text)
205        ];
206
207        let normalized_id = ref_id.trim().to_lowercase();
208        let normalized_url = url.trim();
209
210        // Pattern 1: Known comment labels with fragment URLs
211        // e.g., [//]: # (comment), [comment]: #section
212        if COMMENT_LABELS.contains(&normalized_id.as_str()) && normalized_url.starts_with('#') {
213            return true;
214        }
215
216        // Pattern 2: Any reference with just "#" as the URL
217        // This is often used as a comment placeholder or unused anchor
218        if normalized_url == "#" {
219            return true;
220        }
221
222        false
223    }
224
225    /// Find all link and image reference definitions in the content.
226    ///
227    /// This method returns a HashMap where the key is the normalized reference ID and the value is a vector of (start_line, end_line) tuples.
228    fn find_definitions(&self, ctx: &crate::lint_context::LintContext) -> HashMap<String, Vec<(usize, usize)>> {
229        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
230
231        // First, add all reference definitions from context
232        for ref_def in &ctx.reference_defs {
233            // Skip comment-style references (e.g., [//]: # (comment))
234            if Self::is_likely_comment_reference(&ref_def.id, &ref_def.url) {
235                continue;
236            }
237
238            // Apply unescape to handle escaped characters in definitions
239            let normalized_id = Self::unescape_reference(&ref_def.id); // Already lowercase from context
240            definitions
241                .entry(normalized_id)
242                .or_default()
243                .push((ref_def.line - 1, ref_def.line - 1)); // Convert to 0-indexed
244        }
245
246        // Handle multi-line definitions that might not be fully captured by ctx.reference_defs
247        let lines = &ctx.lines;
248        let mut i = 0;
249        while i < lines.len() {
250            let line_info = &lines[i];
251            let line = line_info.content(ctx.content);
252
253            // Skip code blocks and front matter using line info
254            if line_info.in_code_block || line_info.in_front_matter {
255                i += 1;
256                continue;
257            }
258
259            // Check for multi-line continuation of existing definitions
260            if i > 0 && CONTINUATION_REGEX.is_match(line) {
261                // Find the reference definition this continues
262                let mut def_start = i - 1;
263                while def_start > 0 && !REFERENCE_DEFINITION_REGEX.is_match(lines[def_start].content(ctx.content)) {
264                    def_start -= 1;
265                }
266
267                if let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(lines[def_start].content(ctx.content)) {
268                    let ref_id = caps.get(1).unwrap().as_str().trim();
269                    let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
270
271                    // Update the end line for this definition
272                    if let Some(ranges) = definitions.get_mut(&normalized_id)
273                        && let Some(last_range) = ranges.last_mut()
274                        && last_range.0 == def_start
275                    {
276                        last_range.1 = i;
277                    }
278                }
279            }
280            i += 1;
281        }
282        definitions
283    }
284
285    /// Find all link and image reference reference usages in the content.
286    ///
287    /// This method returns a HashSet of all normalized reference IDs found in usage.
288    /// It leverages cached data from LintContext for efficiency.
289    fn find_usages(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
290        let mut usages: HashSet<String> = HashSet::new();
291
292        // 1. Add usages from cached reference links in LintContext
293        for link in &ctx.links {
294            if link.is_reference
295                && let Some(ref_id) = &link.reference_id
296            {
297                // Ensure the link itself is not inside a code block line
298                if !ctx.line_info(link.line).is_some_and(|info| info.in_code_block) {
299                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
300                }
301            }
302        }
303
304        // 2. Add usages from cached reference images in LintContext
305        for image in &ctx.images {
306            if image.is_reference
307                && let Some(ref_id) = &image.reference_id
308            {
309                // Ensure the image itself is not inside a code block line
310                if !ctx.line_info(image.line).is_some_and(|info| info.in_code_block) {
311                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
312                }
313            }
314        }
315
316        // 3. Add usages from footnote references (e.g., [^1], [^note])
317        // pulldown-cmark returns the id without the ^ prefix, but definitions have it
318        for footnote_ref in &ctx.footnote_refs {
319            // Ensure the footnote reference is not inside a code block line
320            if !ctx.line_info(footnote_ref.line).is_some_and(|info| info.in_code_block) {
321                // Add ^ prefix to match definition format
322                let ref_id = format!("^{}", footnote_ref.id);
323                usages.insert(ref_id.to_lowercase());
324            }
325        }
326
327        // 4. Find shortcut references [ref] not already handled by DocumentStructure.links
328        //    and ensure they are not within code spans or code blocks.
329        // Cache code spans once before the loop
330        let code_spans = ctx.code_spans();
331
332        for line_info in ctx.lines.iter() {
333            // Skip lines in code blocks or front matter
334            if line_info.in_code_block || line_info.in_front_matter {
335                continue;
336            }
337
338            // Skip lines that are reference definitions (start with [ref]: at beginning)
339            if REFERENCE_DEFINITION_REGEX.is_match(line_info.content(ctx.content)) {
340                continue;
341            }
342
343            // Find potential shortcut references
344            for caps in SHORTCUT_REFERENCE_REGEX
345                .captures_iter(line_info.content(ctx.content))
346                .flatten()
347            {
348                if let Some(full_match) = caps.get(0)
349                    && let Some(ref_id_match) = caps.get(1)
350                {
351                    // Check if the match is within a code span
352                    let match_byte_offset = line_info.byte_offset + full_match.start();
353                    let in_code_span = code_spans
354                        .iter()
355                        .any(|span| match_byte_offset >= span.byte_offset && match_byte_offset < span.byte_end);
356
357                    if !in_code_span {
358                        let ref_id = ref_id_match.as_str().trim();
359
360                        if !Self::should_skip_pattern(ref_id) {
361                            let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
362                            usages.insert(normalized_id);
363                        }
364                    }
365                }
366            }
367        }
368
369        // NOTE: The complex recursive loop trying to find references within definitions
370        // has been removed as it's not standard Markdown behavior for finding *usages*.
371        // Usages refer to `[text][ref]`, `![alt][ref]`, `[ref]`, etc., in the main content,
372        // not references potentially embedded within the URL or title of another definition.
373
374        usages
375    }
376
377    /// Get unused references with their line ranges.
378    ///
379    /// This method uses the cached definitions to improve performance.
380    ///
381    /// Note: References that are only used inside code blocks are still considered unused,
382    /// as code blocks are treated as examples or documentation rather than actual content.
383    fn get_unused_references(
384        &self,
385        definitions: &HashMap<String, Vec<(usize, usize)>>,
386        usages: &HashSet<String>,
387    ) -> Vec<(String, usize, usize)> {
388        let mut unused = Vec::new();
389        for (id, ranges) in definitions {
390            // If this id is not used anywhere and is not in the ignored list
391            if !usages.contains(id) && !self.is_ignored_definition(id) {
392                // Only report as unused if there's exactly one definition
393                // Multiple definitions are already reported as duplicates
394                if ranges.len() == 1 {
395                    let (start, end) = ranges[0];
396                    unused.push((id.clone(), start, end));
397                }
398                // If there are multiple definitions (duplicates), don't report them as unused
399                // They're already being reported as duplicate definitions
400            }
401        }
402        unused
403    }
404
405    /// Check if a definition should be ignored (kept even if unused)
406    fn is_ignored_definition(&self, definition_id: &str) -> bool {
407        self.config
408            .ignored_definitions
409            .iter()
410            .any(|ignored| ignored.eq_ignore_ascii_case(definition_id))
411    }
412}
413
414impl Default for MD053LinkImageReferenceDefinitions {
415    fn default() -> Self {
416        Self::new()
417    }
418}
419
420impl Rule for MD053LinkImageReferenceDefinitions {
421    fn name(&self) -> &'static str {
422        "MD053"
423    }
424
425    fn description(&self) -> &'static str {
426        "Link and image reference definitions should be needed"
427    }
428
429    /// Check the content for unused and duplicate link/image reference definitions.
430    ///
431    /// This implementation uses caching for improved performance on large documents.
432    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
433        // Find definitions and usages using LintContext
434        let definitions = self.find_definitions(ctx);
435        let usages = self.find_usages(ctx);
436
437        // Get unused references by comparing definitions and usages
438        let unused_refs = self.get_unused_references(&definitions, &usages);
439
440        let mut warnings = Vec::new();
441
442        // Check for duplicate definitions (case-insensitive per CommonMark spec)
443        let mut seen_definitions: HashMap<String, (String, usize)> = HashMap::new(); // lowercase -> (original, first_line)
444
445        for (definition_id, ranges) in &definitions {
446            // Skip ignored definitions for duplicate checking
447            if self.is_ignored_definition(definition_id) {
448                continue;
449            }
450
451            if ranges.len() > 1 {
452                // Multiple definitions with exact same ID (already lowercase)
453                for (i, &(start_line, _)) in ranges.iter().enumerate() {
454                    if i > 0 {
455                        // Skip the first occurrence, report all others
456                        let line_num = start_line + 1;
457                        let line_content = ctx.lines.get(start_line).map(|l| l.content(ctx.content)).unwrap_or("");
458                        let (start_line_1idx, start_col, end_line, end_col) =
459                            calculate_line_range(line_num, line_content);
460
461                        warnings.push(LintWarning {
462                            rule_name: Some(self.name().to_string()),
463                            line: start_line_1idx,
464                            column: start_col,
465                            end_line,
466                            end_column: end_col,
467                            message: format!("Duplicate link or image reference definition: [{definition_id}]"),
468                            severity: Severity::Warning,
469                            fix: None,
470                        });
471                    }
472                }
473            }
474
475            // Track for case-variant duplicates
476            if let Some(&(start_line, _)) = ranges.first() {
477                // Find the original case version from the line
478                if let Some(line_info) = ctx.lines.get(start_line)
479                    && let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(line_info.content(ctx.content))
480                {
481                    let original_id = caps.get(1).unwrap().as_str().trim();
482                    let lower_id = original_id.to_lowercase();
483
484                    if let Some((first_original, first_line)) = seen_definitions.get(&lower_id) {
485                        // Found a case-variant duplicate
486                        if first_original != original_id {
487                            let line_num = start_line + 1;
488                            let line_content = line_info.content(ctx.content);
489                            let (start_line_1idx, start_col, end_line, end_col) =
490                                calculate_line_range(line_num, line_content);
491
492                            warnings.push(LintWarning {
493                                    rule_name: Some(self.name().to_string()),
494                                    line: start_line_1idx,
495                                    column: start_col,
496                                    end_line,
497                                    end_column: end_col,
498                                    message: format!("Duplicate link or image reference definition: [{}] (conflicts with [{}] on line {})",
499                                                   original_id, first_original, first_line + 1),
500                                    severity: Severity::Warning,
501                                    fix: None,
502                                });
503                        }
504                    } else {
505                        seen_definitions.insert(lower_id, (original_id.to_string(), start_line));
506                    }
507                }
508            }
509        }
510
511        // Create warnings for unused references
512        for (definition, start, _end) in unused_refs {
513            let line_num = start + 1; // 1-indexed line numbers
514            let line_content = ctx.lines.get(start).map(|l| l.content(ctx.content)).unwrap_or("");
515
516            // Calculate precise character range for the entire reference definition line
517            let (start_line, start_col, end_line, end_col) = calculate_line_range(line_num, line_content);
518
519            warnings.push(LintWarning {
520                rule_name: Some(self.name().to_string()),
521                line: start_line,
522                column: start_col,
523                end_line,
524                end_column: end_col,
525                message: format!("Unused link/image reference: [{definition}]"),
526                severity: Severity::Warning,
527                fix: None, // MD053 is warning-only, no automatic fixes
528            });
529        }
530
531        Ok(warnings)
532    }
533
534    /// MD053 does not provide automatic fixes
535    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
536        // This rule is warning-only, no automatic fixes provided
537        Ok(ctx.content.to_string())
538    }
539
540    /// Check if this rule should be skipped for performance
541    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
542        // Skip if content is empty or has no links/images
543        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
544    }
545
546    fn as_any(&self) -> &dyn std::any::Any {
547        self
548    }
549
550    fn default_config_section(&self) -> Option<(String, toml::Value)> {
551        let default_config = MD053Config::default();
552        let json_value = serde_json::to_value(&default_config).ok()?;
553        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
554        if let toml::Value::Table(table) = toml_value {
555            if !table.is_empty() {
556                Some((MD053Config::RULE_NAME.to_string(), toml::Value::Table(table)))
557            } else {
558                None
559            }
560        } else {
561            None
562        }
563    }
564
565    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
566    where
567        Self: Sized,
568    {
569        let rule_config = crate::rule_config_serde::load_rule_config::<MD053Config>(config);
570        Box::new(MD053LinkImageReferenceDefinitions::from_config_struct(rule_config))
571    }
572}
573
574#[cfg(test)]
575mod tests {
576    use super::*;
577    use crate::lint_context::LintContext;
578
579    #[test]
580    fn test_used_reference_link() {
581        let rule = MD053LinkImageReferenceDefinitions::new();
582        let content = "[text][ref]\n\n[ref]: https://example.com";
583        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
584        let result = rule.check(&ctx).unwrap();
585
586        assert_eq!(result.len(), 0);
587    }
588
589    #[test]
590    fn test_unused_reference_definition() {
591        let rule = MD053LinkImageReferenceDefinitions::new();
592        let content = "[unused]: https://example.com";
593        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
594        let result = rule.check(&ctx).unwrap();
595
596        assert_eq!(result.len(), 1);
597        assert!(result[0].message.contains("Unused link/image reference: [unused]"));
598    }
599
600    #[test]
601    fn test_used_reference_image() {
602        let rule = MD053LinkImageReferenceDefinitions::new();
603        let content = "![alt][img]\n\n[img]: image.jpg";
604        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
605        let result = rule.check(&ctx).unwrap();
606
607        assert_eq!(result.len(), 0);
608    }
609
610    #[test]
611    fn test_case_insensitive_matching() {
612        let rule = MD053LinkImageReferenceDefinitions::new();
613        let content = "[Text][REF]\n\n[ref]: https://example.com";
614        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
615        let result = rule.check(&ctx).unwrap();
616
617        assert_eq!(result.len(), 0);
618    }
619
620    #[test]
621    fn test_shortcut_reference() {
622        let rule = MD053LinkImageReferenceDefinitions::new();
623        let content = "[ref]\n\n[ref]: https://example.com";
624        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
625        let result = rule.check(&ctx).unwrap();
626
627        assert_eq!(result.len(), 0);
628    }
629
630    #[test]
631    fn test_collapsed_reference() {
632        let rule = MD053LinkImageReferenceDefinitions::new();
633        let content = "[ref][]\n\n[ref]: https://example.com";
634        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
635        let result = rule.check(&ctx).unwrap();
636
637        assert_eq!(result.len(), 0);
638    }
639
640    #[test]
641    fn test_multiple_unused_definitions() {
642        let rule = MD053LinkImageReferenceDefinitions::new();
643        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
644        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
645        let result = rule.check(&ctx).unwrap();
646
647        assert_eq!(result.len(), 3);
648
649        // The warnings might not be in the same order, so collect all messages
650        let messages: Vec<String> = result.iter().map(|w| w.message.clone()).collect();
651        assert!(messages.iter().any(|m| m.contains("unused1")));
652        assert!(messages.iter().any(|m| m.contains("unused2")));
653        assert!(messages.iter().any(|m| m.contains("unused3")));
654    }
655
656    #[test]
657    fn test_mixed_used_and_unused() {
658        let rule = MD053LinkImageReferenceDefinitions::new();
659        let content = "[used]\n\n[used]: url1\n[unused]: url2";
660        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
661        let result = rule.check(&ctx).unwrap();
662
663        assert_eq!(result.len(), 1);
664        assert!(result[0].message.contains("unused"));
665    }
666
667    #[test]
668    fn test_multiline_definition() {
669        let rule = MD053LinkImageReferenceDefinitions::new();
670        let content = "[ref]: https://example.com\n  \"Title on next line\"";
671        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
672        let result = rule.check(&ctx).unwrap();
673
674        assert_eq!(result.len(), 1); // Still unused
675    }
676
677    #[test]
678    fn test_reference_in_code_block() {
679        let rule = MD053LinkImageReferenceDefinitions::new();
680        let content = "```\n[ref]\n```\n\n[ref]: https://example.com";
681        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
682        let result = rule.check(&ctx).unwrap();
683
684        // Reference used only in code block is still considered unused
685        assert_eq!(result.len(), 1);
686    }
687
688    #[test]
689    fn test_reference_in_inline_code() {
690        let rule = MD053LinkImageReferenceDefinitions::new();
691        let content = "`[ref]`\n\n[ref]: https://example.com";
692        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
693        let result = rule.check(&ctx).unwrap();
694
695        // Reference in inline code is not a usage
696        assert_eq!(result.len(), 1);
697    }
698
699    #[test]
700    fn test_escaped_reference() {
701        let rule = MD053LinkImageReferenceDefinitions::new();
702        let content = "[example\\-ref]\n\n[example-ref]: https://example.com";
703        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
704        let result = rule.check(&ctx).unwrap();
705
706        // Should match despite escaping
707        assert_eq!(result.len(), 0);
708    }
709
710    #[test]
711    fn test_duplicate_definitions() {
712        let rule = MD053LinkImageReferenceDefinitions::new();
713        let content = "[ref]: url1\n[ref]: url2\n\n[ref]";
714        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
715        let result = rule.check(&ctx).unwrap();
716
717        // Should flag the duplicate definition even though it's used (matches markdownlint)
718        assert_eq!(result.len(), 1);
719    }
720
721    #[test]
722    fn test_fix_returns_original() {
723        // MD053 is warning-only, fix should return original content
724        let rule = MD053LinkImageReferenceDefinitions::new();
725        let content = "[used]\n\n[used]: url1\n[unused]: url2\n\nMore content";
726        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
727        let fixed = rule.fix(&ctx).unwrap();
728
729        assert_eq!(fixed, content);
730    }
731
732    #[test]
733    fn test_fix_preserves_content() {
734        // MD053 is warning-only, fix should preserve all content
735        let rule = MD053LinkImageReferenceDefinitions::new();
736        let content = "Content\n\n[unused]: url\n\nMore content";
737        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
738        let fixed = rule.fix(&ctx).unwrap();
739
740        assert_eq!(fixed, content);
741    }
742
743    #[test]
744    fn test_fix_does_not_remove() {
745        // MD053 is warning-only, fix should not remove anything
746        let rule = MD053LinkImageReferenceDefinitions::new();
747        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
748        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
749        let fixed = rule.fix(&ctx).unwrap();
750
751        assert_eq!(fixed, content);
752    }
753
754    #[test]
755    fn test_special_characters_in_reference() {
756        let rule = MD053LinkImageReferenceDefinitions::new();
757        let content = "[ref-with_special.chars]\n\n[ref-with_special.chars]: url";
758        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
759        let result = rule.check(&ctx).unwrap();
760
761        assert_eq!(result.len(), 0);
762    }
763
764    #[test]
765    fn test_find_definitions() {
766        let rule = MD053LinkImageReferenceDefinitions::new();
767        let content = "[ref1]: url1\n[ref2]: url2\nSome text\n[ref3]: url3";
768        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
769        let defs = rule.find_definitions(&ctx);
770
771        assert_eq!(defs.len(), 3);
772        assert!(defs.contains_key("ref1"));
773        assert!(defs.contains_key("ref2"));
774        assert!(defs.contains_key("ref3"));
775    }
776
777    #[test]
778    fn test_find_usages() {
779        let rule = MD053LinkImageReferenceDefinitions::new();
780        let content = "[text][ref1] and [ref2] and ![img][ref3]";
781        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
782        let usages = rule.find_usages(&ctx);
783
784        assert!(usages.contains("ref1"));
785        assert!(usages.contains("ref2"));
786        assert!(usages.contains("ref3"));
787    }
788
789    #[test]
790    fn test_ignored_definitions_config() {
791        // Test with ignored definitions
792        let config = MD053Config {
793            ignored_definitions: vec!["todo".to_string(), "draft".to_string()],
794        };
795        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
796
797        let content = "[todo]: https://example.com/todo\n[draft]: https://example.com/draft\n[unused]: https://example.com/unused";
798        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
799        let result = rule.check(&ctx).unwrap();
800
801        // Should only flag "unused", not "todo" or "draft"
802        assert_eq!(result.len(), 1);
803        assert!(result[0].message.contains("unused"));
804        assert!(!result[0].message.contains("todo"));
805        assert!(!result[0].message.contains("draft"));
806    }
807
808    #[test]
809    fn test_ignored_definitions_case_insensitive() {
810        // Test case-insensitive matching of ignored definitions
811        let config = MD053Config {
812            ignored_definitions: vec!["TODO".to_string()],
813        };
814        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
815
816        let content = "[todo]: https://example.com/todo\n[unused]: https://example.com/unused";
817        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
818        let result = rule.check(&ctx).unwrap();
819
820        // Should only flag "unused", not "todo" (matches "TODO" case-insensitively)
821        assert_eq!(result.len(), 1);
822        assert!(result[0].message.contains("unused"));
823        assert!(!result[0].message.contains("todo"));
824    }
825
826    #[test]
827    fn test_default_config_section() {
828        let rule = MD053LinkImageReferenceDefinitions::default();
829        let config_section = rule.default_config_section();
830
831        assert!(config_section.is_some());
832        let (name, value) = config_section.unwrap();
833        assert_eq!(name, "MD053");
834
835        // Should contain the ignored_definitions option with default empty array
836        if let toml::Value::Table(table) = value {
837            assert!(table.contains_key("ignored-definitions"));
838            assert_eq!(table["ignored-definitions"], toml::Value::Array(vec![]));
839        } else {
840            panic!("Expected TOML table");
841        }
842    }
843
844    #[test]
845    fn test_fix_with_ignored_definitions() {
846        // MD053 is warning-only, fix should not remove anything even with ignored definitions
847        let config = MD053Config {
848            ignored_definitions: vec!["template".to_string()],
849        };
850        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
851
852        let content = "[template]: https://example.com/template\n[unused]: https://example.com/unused\n\nSome content.";
853        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
854        let fixed = rule.fix(&ctx).unwrap();
855
856        // Should keep everything since MD053 doesn't fix
857        assert_eq!(fixed, content);
858    }
859
860    #[test]
861    fn test_duplicate_definitions_exact_case() {
862        let rule = MD053LinkImageReferenceDefinitions::new();
863        let content = "[ref]: url1\n[ref]: url2\n[ref]: url3";
864        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
865        let result = rule.check(&ctx).unwrap();
866
867        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
868        // Plus 1 unused warning
869        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
870        assert_eq!(duplicate_warnings.len(), 2);
871        assert_eq!(duplicate_warnings[0].line, 2);
872        assert_eq!(duplicate_warnings[1].line, 3);
873    }
874
875    #[test]
876    fn test_duplicate_definitions_case_variants() {
877        let rule = MD053LinkImageReferenceDefinitions::new();
878        let content =
879            "[method resolution order]: url1\n[Method Resolution Order]: url2\n[METHOD RESOLUTION ORDER]: url3";
880        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
881        let result = rule.check(&ctx).unwrap();
882
883        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
884        // Note: These are treated as exact duplicates since they normalize to the same ID
885        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
886        assert_eq!(duplicate_warnings.len(), 2);
887
888        // The exact duplicate messages don't include "conflicts with"
889        // Only case-variant duplicates with different normalized forms would
890        assert_eq!(duplicate_warnings[0].line, 2);
891        assert_eq!(duplicate_warnings[1].line, 3);
892    }
893
894    #[test]
895    fn test_duplicate_and_unused() {
896        let rule = MD053LinkImageReferenceDefinitions::new();
897        let content = "[used]\n[used]: url1\n[used]: url2\n[unused]: url3";
898        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
899        let result = rule.check(&ctx).unwrap();
900
901        // Should have 1 duplicate warning and 1 unused warning
902        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
903        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
904
905        assert_eq!(duplicate_warnings.len(), 1);
906        assert_eq!(unused_warnings.len(), 1);
907        assert_eq!(duplicate_warnings[0].line, 3); // Second [used] definition
908        assert_eq!(unused_warnings[0].line, 4); // [unused] definition
909    }
910
911    #[test]
912    fn test_duplicate_with_usage() {
913        let rule = MD053LinkImageReferenceDefinitions::new();
914        // Even if used, duplicates should still be reported
915        let content = "[ref]\n\n[ref]: url1\n[ref]: url2";
916        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
917        let result = rule.check(&ctx).unwrap();
918
919        // Should have 1 duplicate warning (no unused since it's referenced)
920        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
921        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
922
923        assert_eq!(duplicate_warnings.len(), 1);
924        assert_eq!(unused_warnings.len(), 0);
925        assert_eq!(duplicate_warnings[0].line, 4);
926    }
927
928    #[test]
929    fn test_no_duplicate_different_ids() {
930        let rule = MD053LinkImageReferenceDefinitions::new();
931        let content = "[ref1]: url1\n[ref2]: url2\n[ref3]: url3";
932        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
933        let result = rule.check(&ctx).unwrap();
934
935        // Should have no duplicate warnings, only unused warnings
936        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
937        assert_eq!(duplicate_warnings.len(), 0);
938    }
939
940    #[test]
941    fn test_comment_style_reference_double_slash() {
942        let rule = MD053LinkImageReferenceDefinitions::new();
943        // Most popular comment pattern: [//]: # (comment)
944        let content = "[//]: # (This is a comment)\n\nSome regular text.";
945        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
946        let result = rule.check(&ctx).unwrap();
947
948        // Should not report as unused - it's recognized as a comment
949        assert_eq!(result.len(), 0, "Comment-style reference [//]: # should not be flagged");
950    }
951
952    #[test]
953    fn test_comment_style_reference_comment_label() {
954        let rule = MD053LinkImageReferenceDefinitions::new();
955        // Semantic comment pattern: [comment]: # (text)
956        let content = "[comment]: # (This is a semantic comment)\n\n[note]: # (This is a note)";
957        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
958        let result = rule.check(&ctx).unwrap();
959
960        // Should not report either as unused
961        assert_eq!(result.len(), 0, "Comment-style references should not be flagged");
962    }
963
964    #[test]
965    fn test_comment_style_reference_todo_fixme() {
966        let rule = MD053LinkImageReferenceDefinitions::new();
967        // Task tracking patterns: [todo]: # and [fixme]: #
968        let content = "[todo]: # (Add more examples)\n[fixme]: # (Fix this later)\n[hack]: # (Temporary workaround)";
969        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
970        let result = rule.check(&ctx).unwrap();
971
972        // Should not report any as unused
973        assert_eq!(result.len(), 0, "TODO/FIXME comment patterns should not be flagged");
974    }
975
976    #[test]
977    fn test_comment_style_reference_fragment_only() {
978        let rule = MD053LinkImageReferenceDefinitions::new();
979        // Any reference with just "#" as URL should be treated as a comment
980        let content = "[anything]: #\n[ref]: #\n\nSome text.";
981        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
982        let result = rule.check(&ctx).unwrap();
983
984        // Should not report as unused - fragment-only URLs are often comments
985        assert_eq!(result.len(), 0, "References with just '#' URL should not be flagged");
986    }
987
988    #[test]
989    fn test_comment_vs_real_reference() {
990        let rule = MD053LinkImageReferenceDefinitions::new();
991        // Mix of comment and real reference - only real one should be flagged if unused
992        let content = "[//]: # (This is a comment)\n[real-ref]: https://example.com\n\nSome text.";
993        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
994        let result = rule.check(&ctx).unwrap();
995
996        // Should only report the real reference as unused
997        assert_eq!(result.len(), 1, "Only real unused references should be flagged");
998        assert!(result[0].message.contains("real-ref"), "Should flag the real reference");
999    }
1000
1001    #[test]
1002    fn test_comment_with_fragment_section() {
1003        let rule = MD053LinkImageReferenceDefinitions::new();
1004        // Comment pattern with a fragment section (still a comment)
1005        let content = "[//]: #section (Comment about section)\n\nSome text.";
1006        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1007        let result = rule.check(&ctx).unwrap();
1008
1009        // Should not report as unused - it's still a comment pattern
1010        assert_eq!(result.len(), 0, "Comment with fragment section should not be flagged");
1011    }
1012
1013    #[test]
1014    fn test_is_likely_comment_reference_helper() {
1015        // Test the helper function directly
1016        assert!(
1017            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("//", "#"),
1018            "[//]: # should be recognized as comment"
1019        );
1020        assert!(
1021            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("comment", "#section"),
1022            "[comment]: #section should be recognized as comment"
1023        );
1024        assert!(
1025            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("note", "#"),
1026            "[note]: # should be recognized as comment"
1027        );
1028        assert!(
1029            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("todo", "#"),
1030            "[todo]: # should be recognized as comment"
1031        );
1032        assert!(
1033            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("anything", "#"),
1034            "Any label with just '#' should be recognized as comment"
1035        );
1036        assert!(
1037            !MD053LinkImageReferenceDefinitions::is_likely_comment_reference("ref", "https://example.com"),
1038            "Real URL should not be recognized as comment"
1039        );
1040        assert!(
1041            !MD053LinkImageReferenceDefinitions::is_likely_comment_reference("link", "http://test.com"),
1042            "Real URL should not be recognized as comment"
1043        );
1044    }
1045}