Skip to main content

rumdl_lib/rules/
md053_link_image_reference_definitions.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::range_utils::calculate_line_range;
4use fancy_regex::Regex as FancyRegex;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10// Shortcut reference links: [reference] - must not be followed by another bracket
11// Allow references followed by punctuation like colon, period, comma (e.g., "[reference]:", "[reference].")
12// Don't exclude references followed by ": " in the middle of a line (only at start of line)
13static SHORTCUT_REFERENCE_REGEX: LazyLock<FancyRegex> =
14    LazyLock::new(|| FancyRegex::new(r"(?<!\!)\[([^\]]+)\](?!\[)").unwrap());
15
16// Link/image reference definition format: [reference]: URL
17static REFERENCE_DEFINITION_REGEX: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap());
19
20// Multi-line reference definition continuation pattern
21static CONTINUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s+(.+)$").unwrap());
22
23/// Configuration for MD053 rule
24#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
25#[serde(rename_all = "kebab-case")]
26pub struct MD053Config {
27    /// List of reference names to keep even if unused
28    #[serde(default = "default_ignored_definitions")]
29    pub ignored_definitions: Vec<String>,
30}
31
32impl Default for MD053Config {
33    fn default() -> Self {
34        Self {
35            ignored_definitions: default_ignored_definitions(),
36        }
37    }
38}
39
40fn default_ignored_definitions() -> Vec<String> {
41    Vec::new()
42}
43
44impl RuleConfig for MD053Config {
45    const RULE_NAME: &'static str = "MD053";
46}
47
48/// Rule MD053: Link and image reference definitions should be used
49///
50/// See [docs/md053.md](../../docs/md053.md) for full documentation, configuration, and examples.
51///
52/// This rule is triggered when a link or image reference definition is declared but not used
53/// anywhere in the document. Unused reference definitions can create confusion and clutter.
54///
55/// ## Supported Reference Formats
56///
57/// This rule handles the following reference formats:
58///
59/// - **Full reference links/images**: `[text][reference]` or `![text][reference]`
60/// - **Collapsed reference links/images**: `[text][]` or `![text][]`
61/// - **Shortcut reference links**: `[reference]` (must be defined elsewhere)
62/// - **Reference definitions**: `[reference]: URL "Optional Title"`
63/// - **Multi-line reference definitions**:
64///   ```markdown
65///   [reference]: URL
66///      "Optional title continued on next line"
67///   ```
68///
69/// ## Configuration Options
70///
71/// The rule supports the following configuration options:
72///
73/// ```yaml
74/// MD053:
75///   ignored_definitions: []  # List of reference definitions to ignore (never report as unused)
76/// ```
77///
78/// ## Performance Optimizations
79///
80/// This rule implements various performance optimizations for handling large documents:
81///
82/// 1. **Caching**: The rule caches parsed definitions and references based on content hashing
83/// 2. **Efficient Reference Matching**: Uses HashMaps for O(1) lookups of definitions
84/// 3. **Smart Code Block Handling**: Efficiently skips references inside code blocks/spans
85/// 4. **Lazy Evaluation**: Only processes necessary portions of the document
86///
87/// ## Edge Cases Handled
88///
89/// - **Case insensitivity**: References are matched case-insensitively
90/// - **Escaped characters**: Properly processes escaped characters in references
91/// - **Unicode support**: Handles non-ASCII characters in references and URLs
92/// - **Code blocks**: Ignores references inside code blocks and spans
93/// - **Special characters**: Properly handles references with special characters
94///
95/// ## Fix Behavior
96///
97/// This rule does not provide automatic fixes. Unused references must be manually reviewed
98/// and removed, as they may be intentionally kept for future use or as templates.
99#[derive(Clone)]
100pub struct MD053LinkImageReferenceDefinitions {
101    config: MD053Config,
102}
103
104impl MD053LinkImageReferenceDefinitions {
105    /// Create a new instance of the MD053 rule
106    pub fn new() -> Self {
107        Self {
108            config: MD053Config::default(),
109        }
110    }
111
112    /// Create a new instance with the given configuration
113    pub fn from_config_struct(config: MD053Config) -> Self {
114        Self { config }
115    }
116
117    /// Returns true if this pattern should be skipped during reference detection
118    fn should_skip_pattern(text: &str) -> bool {
119        // Don't skip pure numeric patterns - they could be footnote references like [1]
120        // Only skip numeric ranges like [1:3], [0:10], etc.
121        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
122            return true;
123        }
124
125        // Skip glob/wildcard patterns like [*], [...], [**]
126        if text == "*" || text == "..." || text == "**" {
127            return true;
128        }
129
130        // Skip patterns that are just punctuation or operators
131        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
132            return true;
133        }
134
135        // Skip very short non-word patterns (likely operators or syntax)
136        // But allow single digits (could be footnotes) and single letters
137        if text.len() <= 2 && !text.chars().all(|c| c.is_alphanumeric()) {
138            return true;
139        }
140
141        // Skip descriptive prose patterns with colon like [default: the project root]
142        // But allow reference-style patterns like [RFC: 1234], [Issue: 42], [See: Section 2]
143        // These are distinguished by having a short prefix (typically 1-2 words) before the colon
144        if text.contains(':') && text.contains(' ') && !text.contains('`') {
145            // Check if this looks like a reference pattern (short prefix before colon)
146            // vs a prose description (longer text before colon)
147            if let Some((before_colon, _)) = text.split_once(':') {
148                let before_trimmed = before_colon.trim();
149                // Count words before colon - references typically have 1-2 words
150                let word_count = before_trimmed.split_whitespace().count();
151                // If there are 3+ words before the colon, it's likely prose
152                if word_count >= 3 {
153                    return true;
154                }
155            }
156        }
157
158        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
159        if text.starts_with('!') {
160            return true;
161        }
162
163        // Note: We don't filter out patterns with backticks because backticks in reference names
164        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
165
166        // Also don't filter out references with dots - these are legitimate reference names
167        // like [tool.ruff] or [os.path] which are valid markdown references
168
169        // Note: We don't filter based on word count anymore because legitimate references
170        // can have many words, like "python language reference for import statements"
171        // Word count filtering was causing false positives where valid references were
172        // being incorrectly flagged as unused
173
174        false
175    }
176
177    /// Unescape a reference string by removing backslashes before special characters.
178    ///
179    /// This allows matching references like `[example\-reference]` with definitions like
180    /// `[example-reference]: http://example.com`
181    ///
182    /// Returns the unescaped reference string.
183    fn unescape_reference(reference: &str) -> String {
184        // Remove backslashes before special characters
185        reference.replace("\\", "")
186    }
187
188    /// Check if a reference definition is likely a comment-style reference.
189    ///
190    /// This recognizes common community patterns for comments in markdown:
191    /// - `[//]: # (comment)` - Most popular pattern
192    /// - `[comment]: # (text)` - Semantic pattern
193    /// - `[note]: # (text)` - Documentation pattern
194    /// - `[todo]: # (text)` - Task tracking pattern
195    /// - Any reference with just `#` as the URL (fragment-only, often unused)
196    ///
197    /// While not part of any official markdown spec (CommonMark, GFM), these patterns
198    /// are widely used across 23+ markdown implementations as documented in the community.
199    ///
200    /// # Arguments
201    /// * `ref_id` - The reference ID (already normalized to lowercase)
202    /// * `url` - The URL from the reference definition
203    ///
204    /// # Returns
205    /// `true` if this looks like a comment-style reference that should be ignored
206    fn is_likely_comment_reference(ref_id: &str, url: &str) -> bool {
207        // Common comment reference labels used in the community
208        const COMMENT_LABELS: &[&str] = &[
209            "//",      // [//]: # (comment) - most popular
210            "comment", // [comment]: # (text)
211            "note",    // [note]: # (text)
212            "todo",    // [todo]: # (text)
213            "fixme",   // [fixme]: # (text)
214            "hack",    // [hack]: # (text)
215        ];
216
217        let normalized_id = ref_id.trim().to_lowercase();
218        let normalized_url = url.trim();
219
220        // Pattern 1: Known comment labels with fragment URLs
221        // e.g., [//]: # (comment), [comment]: #section
222        if COMMENT_LABELS.contains(&normalized_id.as_str()) && normalized_url.starts_with('#') {
223            return true;
224        }
225
226        // Pattern 2: Any reference with just "#" as the URL
227        // This is often used as a comment placeholder or unused anchor
228        if normalized_url == "#" {
229            return true;
230        }
231
232        false
233    }
234
235    /// Find all link and image reference definitions in the content.
236    ///
237    /// This method returns a HashMap where the key is the normalized reference ID and the value is a vector of (start_line, end_line) tuples.
238    fn find_definitions(&self, ctx: &crate::lint_context::LintContext) -> HashMap<String, Vec<(usize, usize)>> {
239        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
240
241        // First, add all reference definitions from context
242        for ref_def in &ctx.reference_defs {
243            // Skip comment-style references (e.g., [//]: # (comment))
244            if Self::is_likely_comment_reference(&ref_def.id, &ref_def.url) {
245                continue;
246            }
247
248            // Apply unescape to handle escaped characters in definitions
249            let normalized_id = Self::unescape_reference(&ref_def.id); // Already lowercase from context
250            definitions
251                .entry(normalized_id)
252                .or_default()
253                .push((ref_def.line - 1, ref_def.line - 1)); // Convert to 0-indexed
254        }
255
256        // Handle multi-line definitions by tracking the last definition seen
257        let lines = &ctx.lines;
258        let mut last_def_line: Option<usize> = None;
259        let mut last_def_id: Option<String> = None;
260
261        for (i, line_info) in lines.iter().enumerate() {
262            if line_info.in_code_block || line_info.in_front_matter {
263                last_def_line = None;
264                last_def_id = None;
265                continue;
266            }
267
268            let line = line_info.content(ctx.content);
269
270            if let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(line) {
271                // Track this definition for potential continuation
272                let ref_id = caps.get(1).unwrap().as_str().trim();
273                let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
274                last_def_line = Some(i);
275                last_def_id = Some(normalized_id);
276            } else if let Some(def_start) = last_def_line
277                && let Some(ref def_id) = last_def_id
278                && CONTINUATION_REGEX.is_match(line)
279            {
280                // Extend the definition's end line
281                if let Some(ranges) = definitions.get_mut(def_id.as_str())
282                    && let Some(last_range) = ranges.last_mut()
283                    && last_range.0 == def_start
284                {
285                    last_range.1 = i;
286                }
287            } else {
288                // Non-continuation, non-definition line resets tracking
289                last_def_line = None;
290                last_def_id = None;
291            }
292        }
293        definitions
294    }
295
296    /// Find all link and image reference reference usages in the content.
297    ///
298    /// This method returns a HashSet of all normalized reference IDs found in usage.
299    /// It leverages cached data from LintContext for efficiency.
300    fn find_usages(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
301        let mut usages: HashSet<String> = HashSet::new();
302
303        // 1. Add usages from cached reference links in LintContext
304        for link in &ctx.links {
305            if link.is_reference
306                && let Some(ref_id) = &link.reference_id
307                && !ctx.line_info(link.line).is_some_and(|info| info.in_code_block)
308            {
309                usages.insert(Self::unescape_reference(ref_id).to_lowercase());
310            }
311        }
312
313        // 2. Add usages from cached reference images in LintContext
314        for image in &ctx.images {
315            if image.is_reference
316                && let Some(ref_id) = &image.reference_id
317                && !ctx.line_info(image.line).is_some_and(|info| info.in_code_block)
318            {
319                usages.insert(Self::unescape_reference(ref_id).to_lowercase());
320            }
321        }
322
323        // 3. Add usages from footnote references (e.g., [^1], [^note])
324        for footnote_ref in &ctx.footnote_refs {
325            if !ctx.line_info(footnote_ref.line).is_some_and(|info| info.in_code_block) {
326                let ref_id = format!("^{}", footnote_ref.id);
327                usages.insert(ref_id.to_lowercase());
328            }
329        }
330
331        // 4. Find shortcut references [ref] not already handled by DocumentStructure.links
332        //    and ensure they are not within code spans or code blocks.
333        let code_spans = ctx.code_spans();
334
335        // Build sorted array of code span byte ranges for binary search
336        let mut span_ranges: Vec<(usize, usize)> = code_spans
337            .iter()
338            .map(|span| (span.byte_offset, span.byte_end))
339            .collect();
340        span_ranges.sort_unstable_by_key(|&(start, _)| start);
341
342        for line_info in ctx.lines.iter() {
343            if line_info.in_code_block || line_info.in_front_matter {
344                continue;
345            }
346
347            let line_content = line_info.content(ctx.content);
348
349            // Quick check: skip lines without '[' (no possible references)
350            if !line_content.contains('[') {
351                continue;
352            }
353
354            // Skip lines that are reference definitions
355            if REFERENCE_DEFINITION_REGEX.is_match(line_content) {
356                continue;
357            }
358
359            for caps in SHORTCUT_REFERENCE_REGEX.captures_iter(line_content).flatten() {
360                if let Some(full_match) = caps.get(0)
361                    && let Some(ref_id_match) = caps.get(1)
362                {
363                    let match_byte_offset = line_info.byte_offset + full_match.start();
364
365                    // Binary search for code span containment
366                    let in_code_span = span_ranges
367                        .binary_search_by(|&(start, end)| {
368                            if match_byte_offset < start {
369                                std::cmp::Ordering::Greater
370                            } else if match_byte_offset >= end {
371                                std::cmp::Ordering::Less
372                            } else {
373                                std::cmp::Ordering::Equal
374                            }
375                        })
376                        .is_ok();
377
378                    if !in_code_span {
379                        let ref_id = ref_id_match.as_str().trim();
380
381                        if !Self::should_skip_pattern(ref_id) {
382                            let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
383                            usages.insert(normalized_id);
384                        }
385                    }
386                }
387            }
388        }
389
390        usages
391    }
392
393    /// Get unused references with their line ranges.
394    ///
395    /// This method uses the cached definitions to improve performance.
396    ///
397    /// Note: References that are only used inside code blocks are still considered unused,
398    /// as code blocks are treated as examples or documentation rather than actual content.
399    fn get_unused_references(
400        &self,
401        definitions: &HashMap<String, Vec<(usize, usize)>>,
402        usages: &HashSet<String>,
403    ) -> Vec<(String, usize, usize)> {
404        let mut unused = Vec::new();
405        for (id, ranges) in definitions {
406            // If this id is not used anywhere and is not in the ignored list
407            if !usages.contains(id) && !self.is_ignored_definition(id) {
408                // Only report as unused if there's exactly one definition
409                // Multiple definitions are already reported as duplicates
410                if ranges.len() == 1 {
411                    let (start, end) = ranges[0];
412                    unused.push((id.clone(), start, end));
413                }
414                // If there are multiple definitions (duplicates), don't report them as unused
415                // They're already being reported as duplicate definitions
416            }
417        }
418        unused
419    }
420
421    /// Check if a definition should be ignored (kept even if unused)
422    fn is_ignored_definition(&self, definition_id: &str) -> bool {
423        self.config
424            .ignored_definitions
425            .iter()
426            .any(|ignored| ignored.eq_ignore_ascii_case(definition_id))
427    }
428}
429
430impl Default for MD053LinkImageReferenceDefinitions {
431    fn default() -> Self {
432        Self::new()
433    }
434}
435
436impl Rule for MD053LinkImageReferenceDefinitions {
437    fn name(&self) -> &'static str {
438        "MD053"
439    }
440
441    fn description(&self) -> &'static str {
442        "Link and image reference definitions should be needed"
443    }
444
445    /// Check the content for unused and duplicate link/image reference definitions.
446    ///
447    /// This implementation uses caching for improved performance on large documents.
448    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
449        // Find definitions and usages using LintContext
450        let definitions = self.find_definitions(ctx);
451        let usages = self.find_usages(ctx);
452
453        // Get unused references by comparing definitions and usages
454        let unused_refs = self.get_unused_references(&definitions, &usages);
455
456        let mut warnings = Vec::new();
457
458        // Check for duplicate definitions (case-insensitive per CommonMark spec)
459        let mut seen_definitions: HashMap<String, (String, usize)> = HashMap::new(); // lowercase -> (original, first_line)
460
461        for (definition_id, ranges) in &definitions {
462            // Skip ignored definitions for duplicate checking
463            if self.is_ignored_definition(definition_id) {
464                continue;
465            }
466
467            if ranges.len() > 1 {
468                // Multiple definitions with exact same ID (already lowercase)
469                for (i, &(start_line, _)) in ranges.iter().enumerate() {
470                    if i > 0 {
471                        // Skip the first occurrence, report all others
472                        let line_num = start_line + 1;
473                        let line_content = ctx.lines.get(start_line).map(|l| l.content(ctx.content)).unwrap_or("");
474                        let (start_line_1idx, start_col, end_line, end_col) =
475                            calculate_line_range(line_num, line_content);
476
477                        warnings.push(LintWarning {
478                            rule_name: Some(self.name().to_string()),
479                            line: start_line_1idx,
480                            column: start_col,
481                            end_line,
482                            end_column: end_col,
483                            message: format!("Duplicate link or image reference definition: [{definition_id}]"),
484                            severity: Severity::Warning,
485                            fix: None,
486                        });
487                    }
488                }
489            }
490
491            // Track for case-variant duplicates
492            if let Some(&(start_line, _)) = ranges.first() {
493                // Find the original case version from the line
494                if let Some(line_info) = ctx.lines.get(start_line)
495                    && let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(line_info.content(ctx.content))
496                {
497                    let original_id = caps.get(1).unwrap().as_str().trim();
498                    let lower_id = original_id.to_lowercase();
499
500                    if let Some((first_original, first_line)) = seen_definitions.get(&lower_id) {
501                        // Found a case-variant duplicate
502                        if first_original != original_id {
503                            let line_num = start_line + 1;
504                            let line_content = line_info.content(ctx.content);
505                            let (start_line_1idx, start_col, end_line, end_col) =
506                                calculate_line_range(line_num, line_content);
507
508                            warnings.push(LintWarning {
509                                    rule_name: Some(self.name().to_string()),
510                                    line: start_line_1idx,
511                                    column: start_col,
512                                    end_line,
513                                    end_column: end_col,
514                                    message: format!("Duplicate link or image reference definition: [{}] (conflicts with [{}] on line {})",
515                                                   original_id, first_original, first_line + 1),
516                                    severity: Severity::Warning,
517                                    fix: None,
518                                });
519                        }
520                    } else {
521                        seen_definitions.insert(lower_id, (original_id.to_string(), start_line));
522                    }
523                }
524            }
525        }
526
527        // Create warnings for unused references
528        for (definition, start, _end) in unused_refs {
529            let line_num = start + 1; // 1-indexed line numbers
530            let line_content = ctx.lines.get(start).map(|l| l.content(ctx.content)).unwrap_or("");
531
532            // Calculate precise character range for the entire reference definition line
533            let (start_line, start_col, end_line, end_col) = calculate_line_range(line_num, line_content);
534
535            warnings.push(LintWarning {
536                rule_name: Some(self.name().to_string()),
537                line: start_line,
538                column: start_col,
539                end_line,
540                end_column: end_col,
541                message: format!("Unused link/image reference: [{definition}]"),
542                severity: Severity::Warning,
543                fix: None, // MD053 is warning-only, no automatic fixes
544            });
545        }
546
547        Ok(warnings)
548    }
549
550    /// MD053 does not provide automatic fixes
551    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
552        // This rule is warning-only, no automatic fixes provided
553        Ok(ctx.content.to_string())
554    }
555
556    /// Check if this rule should be skipped for performance
557    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
558        // Skip if content is empty or has no links/images
559        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
560    }
561
562    fn as_any(&self) -> &dyn std::any::Any {
563        self
564    }
565
566    fn default_config_section(&self) -> Option<(String, toml::Value)> {
567        let default_config = MD053Config::default();
568        let json_value = serde_json::to_value(&default_config).ok()?;
569        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
570        if let toml::Value::Table(table) = toml_value {
571            if !table.is_empty() {
572                Some((MD053Config::RULE_NAME.to_string(), toml::Value::Table(table)))
573            } else {
574                None
575            }
576        } else {
577            None
578        }
579    }
580
581    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
582    where
583        Self: Sized,
584    {
585        let rule_config = crate::rule_config_serde::load_rule_config::<MD053Config>(config);
586        Box::new(MD053LinkImageReferenceDefinitions::from_config_struct(rule_config))
587    }
588}
589
590#[cfg(test)]
591mod tests {
592    use super::*;
593    use crate::lint_context::LintContext;
594
595    #[test]
596    fn test_used_reference_link() {
597        let rule = MD053LinkImageReferenceDefinitions::new();
598        let content = "[text][ref]\n\n[ref]: https://example.com";
599        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
600        let result = rule.check(&ctx).unwrap();
601
602        assert_eq!(result.len(), 0);
603    }
604
605    #[test]
606    fn test_unused_reference_definition() {
607        let rule = MD053LinkImageReferenceDefinitions::new();
608        let content = "[unused]: https://example.com";
609        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
610        let result = rule.check(&ctx).unwrap();
611
612        assert_eq!(result.len(), 1);
613        assert!(result[0].message.contains("Unused link/image reference: [unused]"));
614    }
615
616    #[test]
617    fn test_used_reference_image() {
618        let rule = MD053LinkImageReferenceDefinitions::new();
619        let content = "![alt][img]\n\n[img]: image.jpg";
620        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
621        let result = rule.check(&ctx).unwrap();
622
623        assert_eq!(result.len(), 0);
624    }
625
626    #[test]
627    fn test_case_insensitive_matching() {
628        let rule = MD053LinkImageReferenceDefinitions::new();
629        let content = "[Text][REF]\n\n[ref]: https://example.com";
630        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
631        let result = rule.check(&ctx).unwrap();
632
633        assert_eq!(result.len(), 0);
634    }
635
636    #[test]
637    fn test_shortcut_reference() {
638        let rule = MD053LinkImageReferenceDefinitions::new();
639        let content = "[ref]\n\n[ref]: https://example.com";
640        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
641        let result = rule.check(&ctx).unwrap();
642
643        assert_eq!(result.len(), 0);
644    }
645
646    #[test]
647    fn test_collapsed_reference() {
648        let rule = MD053LinkImageReferenceDefinitions::new();
649        let content = "[ref][]\n\n[ref]: https://example.com";
650        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
651        let result = rule.check(&ctx).unwrap();
652
653        assert_eq!(result.len(), 0);
654    }
655
656    #[test]
657    fn test_multiple_unused_definitions() {
658        let rule = MD053LinkImageReferenceDefinitions::new();
659        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
660        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
661        let result = rule.check(&ctx).unwrap();
662
663        assert_eq!(result.len(), 3);
664
665        // The warnings might not be in the same order, so collect all messages
666        let messages: Vec<String> = result.iter().map(|w| w.message.clone()).collect();
667        assert!(messages.iter().any(|m| m.contains("unused1")));
668        assert!(messages.iter().any(|m| m.contains("unused2")));
669        assert!(messages.iter().any(|m| m.contains("unused3")));
670    }
671
672    #[test]
673    fn test_mixed_used_and_unused() {
674        let rule = MD053LinkImageReferenceDefinitions::new();
675        let content = "[used]\n\n[used]: url1\n[unused]: url2";
676        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
677        let result = rule.check(&ctx).unwrap();
678
679        assert_eq!(result.len(), 1);
680        assert!(result[0].message.contains("unused"));
681    }
682
683    #[test]
684    fn test_multiline_definition() {
685        let rule = MD053LinkImageReferenceDefinitions::new();
686        let content = "[ref]: https://example.com\n  \"Title on next line\"";
687        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
688        let result = rule.check(&ctx).unwrap();
689
690        assert_eq!(result.len(), 1); // Still unused
691    }
692
693    #[test]
694    fn test_reference_in_code_block() {
695        let rule = MD053LinkImageReferenceDefinitions::new();
696        let content = "```\n[ref]\n```\n\n[ref]: https://example.com";
697        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
698        let result = rule.check(&ctx).unwrap();
699
700        // Reference used only in code block is still considered unused
701        assert_eq!(result.len(), 1);
702    }
703
704    #[test]
705    fn test_reference_in_inline_code() {
706        let rule = MD053LinkImageReferenceDefinitions::new();
707        let content = "`[ref]`\n\n[ref]: https://example.com";
708        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
709        let result = rule.check(&ctx).unwrap();
710
711        // Reference in inline code is not a usage
712        assert_eq!(result.len(), 1);
713    }
714
715    #[test]
716    fn test_escaped_reference() {
717        let rule = MD053LinkImageReferenceDefinitions::new();
718        let content = "[example\\-ref]\n\n[example-ref]: https://example.com";
719        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
720        let result = rule.check(&ctx).unwrap();
721
722        // Should match despite escaping
723        assert_eq!(result.len(), 0);
724    }
725
726    #[test]
727    fn test_duplicate_definitions() {
728        let rule = MD053LinkImageReferenceDefinitions::new();
729        let content = "[ref]: url1\n[ref]: url2\n\n[ref]";
730        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
731        let result = rule.check(&ctx).unwrap();
732
733        // Should flag the duplicate definition even though it's used (matches markdownlint)
734        assert_eq!(result.len(), 1);
735    }
736
737    #[test]
738    fn test_fix_returns_original() {
739        // MD053 is warning-only, fix should return original content
740        let rule = MD053LinkImageReferenceDefinitions::new();
741        let content = "[used]\n\n[used]: url1\n[unused]: url2\n\nMore content";
742        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
743        let fixed = rule.fix(&ctx).unwrap();
744
745        assert_eq!(fixed, content);
746    }
747
748    #[test]
749    fn test_fix_preserves_content() {
750        // MD053 is warning-only, fix should preserve all content
751        let rule = MD053LinkImageReferenceDefinitions::new();
752        let content = "Content\n\n[unused]: url\n\nMore content";
753        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
754        let fixed = rule.fix(&ctx).unwrap();
755
756        assert_eq!(fixed, content);
757    }
758
759    #[test]
760    fn test_fix_does_not_remove() {
761        // MD053 is warning-only, fix should not remove anything
762        let rule = MD053LinkImageReferenceDefinitions::new();
763        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
764        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
765        let fixed = rule.fix(&ctx).unwrap();
766
767        assert_eq!(fixed, content);
768    }
769
770    #[test]
771    fn test_special_characters_in_reference() {
772        let rule = MD053LinkImageReferenceDefinitions::new();
773        let content = "[ref-with_special.chars]\n\n[ref-with_special.chars]: url";
774        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
775        let result = rule.check(&ctx).unwrap();
776
777        assert_eq!(result.len(), 0);
778    }
779
780    #[test]
781    fn test_find_definitions() {
782        let rule = MD053LinkImageReferenceDefinitions::new();
783        let content = "[ref1]: url1\n[ref2]: url2\nSome text\n[ref3]: url3";
784        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
785        let defs = rule.find_definitions(&ctx);
786
787        assert_eq!(defs.len(), 3);
788        assert!(defs.contains_key("ref1"));
789        assert!(defs.contains_key("ref2"));
790        assert!(defs.contains_key("ref3"));
791    }
792
793    #[test]
794    fn test_find_usages() {
795        let rule = MD053LinkImageReferenceDefinitions::new();
796        let content = "[text][ref1] and [ref2] and ![img][ref3]";
797        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
798        let usages = rule.find_usages(&ctx);
799
800        assert!(usages.contains("ref1"));
801        assert!(usages.contains("ref2"));
802        assert!(usages.contains("ref3"));
803    }
804
805    #[test]
806    fn test_ignored_definitions_config() {
807        // Test with ignored definitions
808        let config = MD053Config {
809            ignored_definitions: vec!["todo".to_string(), "draft".to_string()],
810        };
811        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
812
813        let content = "[todo]: https://example.com/todo\n[draft]: https://example.com/draft\n[unused]: https://example.com/unused";
814        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
815        let result = rule.check(&ctx).unwrap();
816
817        // Should only flag "unused", not "todo" or "draft"
818        assert_eq!(result.len(), 1);
819        assert!(result[0].message.contains("unused"));
820        assert!(!result[0].message.contains("todo"));
821        assert!(!result[0].message.contains("draft"));
822    }
823
824    #[test]
825    fn test_ignored_definitions_case_insensitive() {
826        // Test case-insensitive matching of ignored definitions
827        let config = MD053Config {
828            ignored_definitions: vec!["TODO".to_string()],
829        };
830        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
831
832        let content = "[todo]: https://example.com/todo\n[unused]: https://example.com/unused";
833        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
834        let result = rule.check(&ctx).unwrap();
835
836        // Should only flag "unused", not "todo" (matches "TODO" case-insensitively)
837        assert_eq!(result.len(), 1);
838        assert!(result[0].message.contains("unused"));
839        assert!(!result[0].message.contains("todo"));
840    }
841
842    #[test]
843    fn test_default_config_section() {
844        let rule = MD053LinkImageReferenceDefinitions::default();
845        let config_section = rule.default_config_section();
846
847        assert!(config_section.is_some());
848        let (name, value) = config_section.unwrap();
849        assert_eq!(name, "MD053");
850
851        // Should contain the ignored_definitions option with default empty array
852        if let toml::Value::Table(table) = value {
853            assert!(table.contains_key("ignored-definitions"));
854            assert_eq!(table["ignored-definitions"], toml::Value::Array(vec![]));
855        } else {
856            panic!("Expected TOML table");
857        }
858    }
859
860    #[test]
861    fn test_fix_with_ignored_definitions() {
862        // MD053 is warning-only, fix should not remove anything even with ignored definitions
863        let config = MD053Config {
864            ignored_definitions: vec!["template".to_string()],
865        };
866        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
867
868        let content = "[template]: https://example.com/template\n[unused]: https://example.com/unused\n\nSome content.";
869        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
870        let fixed = rule.fix(&ctx).unwrap();
871
872        // Should keep everything since MD053 doesn't fix
873        assert_eq!(fixed, content);
874    }
875
876    #[test]
877    fn test_duplicate_definitions_exact_case() {
878        let rule = MD053LinkImageReferenceDefinitions::new();
879        let content = "[ref]: url1\n[ref]: url2\n[ref]: url3";
880        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
881        let result = rule.check(&ctx).unwrap();
882
883        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
884        // Plus 1 unused warning
885        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
886        assert_eq!(duplicate_warnings.len(), 2);
887        assert_eq!(duplicate_warnings[0].line, 2);
888        assert_eq!(duplicate_warnings[1].line, 3);
889    }
890
891    #[test]
892    fn test_duplicate_definitions_case_variants() {
893        let rule = MD053LinkImageReferenceDefinitions::new();
894        let content =
895            "[method resolution order]: url1\n[Method Resolution Order]: url2\n[METHOD RESOLUTION ORDER]: url3";
896        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
897        let result = rule.check(&ctx).unwrap();
898
899        // Should have 2 duplicate warnings (for the 2nd and 3rd definitions)
900        // Note: These are treated as exact duplicates since they normalize to the same ID
901        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
902        assert_eq!(duplicate_warnings.len(), 2);
903
904        // The exact duplicate messages don't include "conflicts with"
905        // Only case-variant duplicates with different normalized forms would
906        assert_eq!(duplicate_warnings[0].line, 2);
907        assert_eq!(duplicate_warnings[1].line, 3);
908    }
909
910    #[test]
911    fn test_duplicate_and_unused() {
912        let rule = MD053LinkImageReferenceDefinitions::new();
913        let content = "[used]\n[used]: url1\n[used]: url2\n[unused]: url3";
914        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
915        let result = rule.check(&ctx).unwrap();
916
917        // Should have 1 duplicate warning and 1 unused warning
918        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
919        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
920
921        assert_eq!(duplicate_warnings.len(), 1);
922        assert_eq!(unused_warnings.len(), 1);
923        assert_eq!(duplicate_warnings[0].line, 3); // Second [used] definition
924        assert_eq!(unused_warnings[0].line, 4); // [unused] definition
925    }
926
927    #[test]
928    fn test_duplicate_with_usage() {
929        let rule = MD053LinkImageReferenceDefinitions::new();
930        // Even if used, duplicates should still be reported
931        let content = "[ref]\n\n[ref]: url1\n[ref]: url2";
932        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
933        let result = rule.check(&ctx).unwrap();
934
935        // Should have 1 duplicate warning (no unused since it's referenced)
936        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
937        let unused_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Unused")).collect();
938
939        assert_eq!(duplicate_warnings.len(), 1);
940        assert_eq!(unused_warnings.len(), 0);
941        assert_eq!(duplicate_warnings[0].line, 4);
942    }
943
944    #[test]
945    fn test_no_duplicate_different_ids() {
946        let rule = MD053LinkImageReferenceDefinitions::new();
947        let content = "[ref1]: url1\n[ref2]: url2\n[ref3]: url3";
948        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
949        let result = rule.check(&ctx).unwrap();
950
951        // Should have no duplicate warnings, only unused warnings
952        let duplicate_warnings: Vec<_> = result.iter().filter(|w| w.message.contains("Duplicate")).collect();
953        assert_eq!(duplicate_warnings.len(), 0);
954    }
955
956    #[test]
957    fn test_comment_style_reference_double_slash() {
958        let rule = MD053LinkImageReferenceDefinitions::new();
959        // Most popular comment pattern: [//]: # (comment)
960        let content = "[//]: # (This is a comment)\n\nSome regular text.";
961        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
962        let result = rule.check(&ctx).unwrap();
963
964        // Should not report as unused - it's recognized as a comment
965        assert_eq!(result.len(), 0, "Comment-style reference [//]: # should not be flagged");
966    }
967
968    #[test]
969    fn test_comment_style_reference_comment_label() {
970        let rule = MD053LinkImageReferenceDefinitions::new();
971        // Semantic comment pattern: [comment]: # (text)
972        let content = "[comment]: # (This is a semantic comment)\n\n[note]: # (This is a note)";
973        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
974        let result = rule.check(&ctx).unwrap();
975
976        // Should not report either as unused
977        assert_eq!(result.len(), 0, "Comment-style references should not be flagged");
978    }
979
980    #[test]
981    fn test_comment_style_reference_todo_fixme() {
982        let rule = MD053LinkImageReferenceDefinitions::new();
983        // Task tracking patterns: [todo]: # and [fixme]: #
984        let content = "[todo]: # (Add more examples)\n[fixme]: # (Fix this later)\n[hack]: # (Temporary workaround)";
985        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
986        let result = rule.check(&ctx).unwrap();
987
988        // Should not report any as unused
989        assert_eq!(result.len(), 0, "TODO/FIXME comment patterns should not be flagged");
990    }
991
992    #[test]
993    fn test_comment_style_reference_fragment_only() {
994        let rule = MD053LinkImageReferenceDefinitions::new();
995        // Any reference with just "#" as URL should be treated as a comment
996        let content = "[anything]: #\n[ref]: #\n\nSome text.";
997        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
998        let result = rule.check(&ctx).unwrap();
999
1000        // Should not report as unused - fragment-only URLs are often comments
1001        assert_eq!(result.len(), 0, "References with just '#' URL should not be flagged");
1002    }
1003
1004    #[test]
1005    fn test_comment_vs_real_reference() {
1006        let rule = MD053LinkImageReferenceDefinitions::new();
1007        // Mix of comment and real reference - only real one should be flagged if unused
1008        let content = "[//]: # (This is a comment)\n[real-ref]: https://example.com\n\nSome text.";
1009        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1010        let result = rule.check(&ctx).unwrap();
1011
1012        // Should only report the real reference as unused
1013        assert_eq!(result.len(), 1, "Only real unused references should be flagged");
1014        assert!(result[0].message.contains("real-ref"), "Should flag the real reference");
1015    }
1016
1017    #[test]
1018    fn test_comment_with_fragment_section() {
1019        let rule = MD053LinkImageReferenceDefinitions::new();
1020        // Comment pattern with a fragment section (still a comment)
1021        let content = "[//]: #section (Comment about section)\n\nSome text.";
1022        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1023        let result = rule.check(&ctx).unwrap();
1024
1025        // Should not report as unused - it's still a comment pattern
1026        assert_eq!(result.len(), 0, "Comment with fragment section should not be flagged");
1027    }
1028
1029    #[test]
1030    fn test_is_likely_comment_reference_helper() {
1031        // Test the helper function directly
1032        assert!(
1033            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("//", "#"),
1034            "[//]: # should be recognized as comment"
1035        );
1036        assert!(
1037            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("comment", "#section"),
1038            "[comment]: #section should be recognized as comment"
1039        );
1040        assert!(
1041            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("note", "#"),
1042            "[note]: # should be recognized as comment"
1043        );
1044        assert!(
1045            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("todo", "#"),
1046            "[todo]: # should be recognized as comment"
1047        );
1048        assert!(
1049            MD053LinkImageReferenceDefinitions::is_likely_comment_reference("anything", "#"),
1050            "Any label with just '#' should be recognized as comment"
1051        );
1052        assert!(
1053            !MD053LinkImageReferenceDefinitions::is_likely_comment_reference("ref", "https://example.com"),
1054            "Real URL should not be recognized as comment"
1055        );
1056        assert!(
1057            !MD053LinkImageReferenceDefinitions::is_likely_comment_reference("link", "http://test.com"),
1058            "Real URL should not be recognized as comment"
1059        );
1060    }
1061
1062    #[test]
1063    fn test_reference_with_colon_in_name() {
1064        // References containing colons and spaces should be recognized as valid references
1065        let rule = MD053LinkImageReferenceDefinitions::new();
1066        let content = "Check [RFC: 1234] for specs.\n\n[RFC: 1234]: https://example.com\n";
1067        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1068        let result = rule.check(&ctx).unwrap();
1069
1070        assert!(
1071            result.is_empty(),
1072            "Reference with colon should be recognized as used, got warnings: {result:?}"
1073        );
1074    }
1075
1076    #[test]
1077    fn test_reference_with_colon_various_styles() {
1078        // Test various RFC-style and similar references with colons
1079        let rule = MD053LinkImageReferenceDefinitions::new();
1080        let content = r#"See [RFC: 1234] and [Issue: 42] and [PR: 100].
1081
1082[RFC: 1234]: https://example.com/rfc1234
1083[Issue: 42]: https://example.com/issue42
1084[PR: 100]: https://example.com/pr100
1085"#;
1086        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1087        let result = rule.check(&ctx).unwrap();
1088
1089        assert!(
1090            result.is_empty(),
1091            "All colon-style references should be recognized as used, got warnings: {result:?}"
1092        );
1093    }
1094
1095    #[test]
1096    fn test_should_skip_pattern_allows_rfc_style() {
1097        // Verify that should_skip_pattern does NOT skip RFC-style references with colons
1098        // This tests the fix for the bug where references with ": " were incorrectly skipped
1099        assert!(
1100            !MD053LinkImageReferenceDefinitions::should_skip_pattern("RFC: 1234"),
1101            "RFC-style references should NOT be skipped"
1102        );
1103        assert!(
1104            !MD053LinkImageReferenceDefinitions::should_skip_pattern("Issue: 42"),
1105            "Issue-style references should NOT be skipped"
1106        );
1107        assert!(
1108            !MD053LinkImageReferenceDefinitions::should_skip_pattern("PR: 100"),
1109            "PR-style references should NOT be skipped"
1110        );
1111        assert!(
1112            !MD053LinkImageReferenceDefinitions::should_skip_pattern("See: Section 2"),
1113            "References with 'See:' should NOT be skipped"
1114        );
1115        assert!(
1116            !MD053LinkImageReferenceDefinitions::should_skip_pattern("foo:bar"),
1117            "References without space after colon should NOT be skipped"
1118        );
1119    }
1120
1121    #[test]
1122    fn test_should_skip_pattern_skips_prose() {
1123        // Verify that prose-like patterns (3+ words before colon) are still skipped
1124        assert!(
1125            MD053LinkImageReferenceDefinitions::should_skip_pattern("default value is: something"),
1126            "Prose with 3+ words before colon SHOULD be skipped"
1127        );
1128        assert!(
1129            MD053LinkImageReferenceDefinitions::should_skip_pattern("this is a label: description"),
1130            "Prose with 4 words before colon SHOULD be skipped"
1131        );
1132        assert!(
1133            MD053LinkImageReferenceDefinitions::should_skip_pattern("the project root: path/to/dir"),
1134            "Prose-like descriptions SHOULD be skipped"
1135        );
1136    }
1137
1138    #[test]
1139    fn test_many_code_spans_with_shortcut_references() {
1140        // Exercises the binary search path for code span containment.
1141        // With many code spans, linear search would be slow; binary search stays O(log n).
1142        let rule = MD053LinkImageReferenceDefinitions::new();
1143
1144        let mut lines = Vec::new();
1145        // Generate many code spans interleaved with shortcut references
1146        for i in 0..100 {
1147            lines.push(format!("Some `code{i}` text and [used_ref] here"));
1148        }
1149        lines.push(String::new());
1150        lines.push("[used_ref]: https://example.com".to_string());
1151        lines.push("[unused_ref]: https://unused.com".to_string());
1152
1153        let content = lines.join("\n");
1154        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1155        let result = rule.check(&ctx).unwrap();
1156
1157        // used_ref is referenced 100 times, so only unused_ref should be reported
1158        assert_eq!(result.len(), 1);
1159        assert!(result[0].message.contains("unused_ref"));
1160    }
1161
1162    #[test]
1163    fn test_multiline_definition_continuation_tracking() {
1164        // Exercises the forward-tracking for multi-line definitions.
1165        // Definitions with title on the next line should be treated as a single unit.
1166        let rule = MD053LinkImageReferenceDefinitions::new();
1167        let content = "\
1168[ref1]: https://example.com
1169   \"Title on next line\"
1170
1171[ref2]: https://example2.com
1172   \"Another title\"
1173
1174Some text using [ref1] here.
1175";
1176        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1177        let result = rule.check(&ctx).unwrap();
1178
1179        // ref1 is used, ref2 is not
1180        assert_eq!(result.len(), 1);
1181        assert!(result[0].message.contains("ref2"));
1182    }
1183
1184    #[test]
1185    fn test_code_span_at_boundary_does_not_hide_reference() {
1186        // A reference immediately after a code span should still be detected
1187        let rule = MD053LinkImageReferenceDefinitions::new();
1188        let content = "`code`[ref]\n\n[ref]: https://example.com";
1189        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1190        let result = rule.check(&ctx).unwrap();
1191
1192        // [ref] is outside the code span, so it counts as a usage
1193        assert_eq!(result.len(), 0);
1194    }
1195
1196    #[test]
1197    fn test_reference_inside_code_span_not_counted() {
1198        // A reference inside a code span should NOT be counted as usage
1199        let rule = MD053LinkImageReferenceDefinitions::new();
1200        let content = "Use `[ref]` in code\n\n[ref]: https://example.com";
1201        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1202        let result = rule.check(&ctx).unwrap();
1203
1204        // [ref] is inside a code span, so the definition is unused
1205        assert_eq!(result.len(), 1);
1206    }
1207}