rumdl_lib/rules/
md053_link_image_reference_definitions.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::document_structure::DocumentStructure;
4use crate::utils::range_utils::calculate_line_range;
5use fancy_regex::Regex as FancyRegex;
6use lazy_static::lazy_static;
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10
11lazy_static! {
12    // Link reference format: [text][reference]
13    // REMOVED: static ref LINK_REFERENCE_REGEX: FancyRegex = FancyRegex::new(r"\[([^\]]*)\]\s*\[([^\]]*)\]").unwrap();
14
15    // Image reference format: ![text][reference]
16    // REMOVED: static ref IMAGE_REFERENCE_REGEX: FancyRegex = FancyRegex::new(r"!\[([^\]]*)\]\s*\[([^\]]*)\]").unwrap();
17
18    // Shortcut reference links: [reference] - must not be followed by a colon to avoid matching definitions
19    static ref SHORTCUT_REFERENCE_REGEX: FancyRegex =
20        FancyRegex::new(r"(?<!\!)\[([^\]]+)\](?!\s*[\[(:])").unwrap();
21
22    // REMOVED: Empty reference links: [text][] or ![text][]
23    // static ref EMPTY_LINK_REFERENCE_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\s*\[\s*\]").unwrap();
24    // static ref EMPTY_IMAGE_REFERENCE_REGEX: Regex = Regex::new(r"!\[([^\]]+)\]\s*\[\s*\]").unwrap();
25
26    // Link/image reference definition format: [reference]: URL
27    static ref REFERENCE_DEFINITION_REGEX: Regex =
28        Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
29
30    // Multi-line reference definition continuation pattern
31    static ref CONTINUATION_REGEX: Regex = Regex::new(r"^\s+(.+)$").unwrap();
32
33    // Code block regex
34    static ref CODE_BLOCK_START_REGEX: Regex = Regex::new(r"^```").unwrap();
35    static ref CODE_BLOCK_END_REGEX: Regex = Regex::new(r"^```\s*$").unwrap();
36}
37
38/// Configuration for MD053 rule
39#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
40#[serde(rename_all = "kebab-case")]
41pub struct MD053Config {
42    /// List of reference names to keep even if unused
43    #[serde(default = "default_ignored_definitions")]
44    pub ignored_definitions: Vec<String>,
45}
46
47impl Default for MD053Config {
48    fn default() -> Self {
49        Self {
50            ignored_definitions: default_ignored_definitions(),
51        }
52    }
53}
54
55fn default_ignored_definitions() -> Vec<String> {
56    Vec::new()
57}
58
59impl RuleConfig for MD053Config {
60    const RULE_NAME: &'static str = "MD053";
61}
62
63/// Rule MD053: Link and image reference definitions should be used
64///
65/// See [docs/md053.md](../../docs/md053.md) for full documentation, configuration, and examples.
66///
67/// This rule is triggered when a link or image reference definition is declared but not used
68/// anywhere in the document. Unused reference definitions can create confusion and clutter.
69///
70/// ## Supported Reference Formats
71///
72/// This rule handles the following reference formats:
73///
74/// - **Full reference links/images**: `[text][reference]` or `![text][reference]`
75/// - **Collapsed reference links/images**: `[text][]` or `![text][]`
76/// - **Shortcut reference links**: `[reference]` (must be defined elsewhere)
77/// - **Reference definitions**: `[reference]: URL "Optional Title"`
78/// - **Multi-line reference definitions**:
79///   ```markdown
80///   [reference]: URL
81///      "Optional title continued on next line"
82///   ```
83///
84/// ## Configuration Options
85///
86/// The rule supports the following configuration options:
87///
88/// ```yaml
89/// MD053:
90///   ignored_definitions: []  # List of reference definitions to ignore (never report as unused)
91/// ```
92///
93/// ## Performance Optimizations
94///
95/// This rule implements various performance optimizations for handling large documents:
96///
97/// 1. **Caching**: The rule caches parsed definitions and references based on content hashing
98/// 2. **Efficient Reference Matching**: Uses HashMaps for O(1) lookups of definitions
99/// 3. **Smart Code Block Handling**: Efficiently skips references inside code blocks/spans
100/// 4. **Lazy Evaluation**: Only processes necessary portions of the document
101///
102/// ## Edge Cases Handled
103///
104/// - **Case insensitivity**: References are matched case-insensitively
105/// - **Escaped characters**: Properly processes escaped characters in references
106/// - **Unicode support**: Handles non-ASCII characters in references and URLs
107/// - **Code blocks**: Ignores references inside code blocks and spans
108/// - **Special characters**: Properly handles references with special characters
109///
110/// ## Fix Behavior
111///
112/// When fixing issues, this rule removes unused reference definitions while preserving
113/// the document's structure, including handling proper blank line formatting around
114/// the removed definitions.
115#[derive(Clone)]
116pub struct MD053LinkImageReferenceDefinitions {
117    config: MD053Config,
118}
119
120impl MD053LinkImageReferenceDefinitions {
121    /// Create a new instance of the MD053 rule
122    pub fn new() -> Self {
123        Self {
124            config: MD053Config::default(),
125        }
126    }
127
128    /// Create a new instance with the given configuration
129    pub fn from_config_struct(config: MD053Config) -> Self {
130        Self { config }
131    }
132
133    /// Unescape a reference string by removing backslashes before special characters.
134    ///
135    /// This allows matching references like `[example\-reference]` with definitions like
136    /// `[example-reference]: http://example.com`
137    ///
138    /// Returns the unescaped reference string.
139    fn unescape_reference(reference: &str) -> String {
140        // Remove backslashes before special characters
141        reference.replace("\\", "")
142    }
143
144    /// Find all link and image reference definitions in the content.
145    ///
146    /// This method returns a HashMap where the key is the normalized reference ID and the value is a vector of (start_line, end_line) tuples.
147    fn find_definitions(
148        &self,
149        ctx: &crate::lint_context::LintContext,
150        doc_structure: &DocumentStructure,
151    ) -> HashMap<String, Vec<(usize, usize)>> {
152        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
153
154        // First, add all reference definitions from context
155        for ref_def in &ctx.reference_defs {
156            // Apply unescape to handle escaped characters in definitions
157            let normalized_id = Self::unescape_reference(&ref_def.id); // Already lowercase from context
158            definitions
159                .entry(normalized_id)
160                .or_default()
161                .push((ref_def.line - 1, ref_def.line - 1)); // Convert to 0-indexed
162        }
163
164        // Handle multi-line definitions that might not be fully captured by ctx.reference_defs
165        let lines = &ctx.lines;
166        let mut i = 0;
167        while i < lines.len() {
168            let line_info = &lines[i];
169            let line = &line_info.content;
170
171            // Skip code blocks and front matter using line info
172            if line_info.in_code_block || doc_structure.is_in_front_matter(i + 1) {
173                i += 1;
174                continue;
175            }
176
177            // Check for multi-line continuation of existing definitions
178            if i > 0 && CONTINUATION_REGEX.is_match(line) {
179                // Find the reference definition this continues
180                let mut def_start = i - 1;
181                while def_start > 0 && !REFERENCE_DEFINITION_REGEX.is_match(&lines[def_start].content) {
182                    def_start -= 1;
183                }
184
185                if let Some(caps) = REFERENCE_DEFINITION_REGEX.captures(&lines[def_start].content) {
186                    let ref_id = caps.get(1).unwrap().as_str().trim();
187                    let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
188
189                    // Update the end line for this definition
190                    if let Some(ranges) = definitions.get_mut(&normalized_id)
191                        && let Some(last_range) = ranges.last_mut()
192                        && last_range.0 == def_start
193                    {
194                        last_range.1 = i;
195                    }
196                }
197            }
198            i += 1;
199        }
200        definitions
201    }
202
203    /// Find all link and image reference reference usages in the content.
204    ///
205    /// This method returns a HashSet of all normalized reference IDs found in usage.
206    /// It leverages cached data from LintContext for efficiency.
207    fn find_usages(
208        &self,
209        doc_structure: &DocumentStructure,
210        ctx: &crate::lint_context::LintContext,
211    ) -> HashSet<String> {
212        let mut usages: HashSet<String> = HashSet::new();
213
214        // 1. Add usages from cached reference links in LintContext
215        for link in &ctx.links {
216            if link.is_reference
217                && let Some(ref_id) = &link.reference_id
218            {
219                // Ensure the link itself is not inside a code block line
220                if !doc_structure.is_in_code_block(link.line) {
221                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
222                }
223            }
224        }
225
226        // 2. Add usages from cached reference images in LintContext
227        for image in &ctx.images {
228            if image.is_reference
229                && let Some(ref_id) = &image.reference_id
230            {
231                // Ensure the image itself is not inside a code block line
232                if !doc_structure.is_in_code_block(image.line) {
233                    usages.insert(Self::unescape_reference(ref_id).to_lowercase());
234                }
235            }
236        }
237
238        // 3. Find shortcut references [ref] not already handled by DocumentStructure.links
239        //    and ensure they are not within code spans or code blocks.
240        // Cache code spans once before the loop
241        let code_spans = ctx.code_spans();
242
243        for (i, line_info) in ctx.lines.iter().enumerate() {
244            let line_num = i + 1; // 1-indexed
245
246            // Skip lines in code blocks or front matter
247            if line_info.in_code_block || doc_structure.is_in_front_matter(line_num) {
248                continue;
249            }
250
251            // Find potential shortcut references
252            for caps in SHORTCUT_REFERENCE_REGEX.captures_iter(&line_info.content).flatten() {
253                if let Some(full_match) = caps.get(0)
254                    && let Some(ref_id_match) = caps.get(1)
255                {
256                    // Check if the match is within a code span
257                    let match_byte_offset = line_info.byte_offset + full_match.start();
258                    let in_code_span = code_spans
259                        .iter()
260                        .any(|span| match_byte_offset >= span.byte_offset && match_byte_offset < span.byte_end);
261
262                    if !in_code_span {
263                        let ref_id = ref_id_match.as_str().trim();
264                        let normalized_id = Self::unescape_reference(ref_id).to_lowercase();
265                        usages.insert(normalized_id);
266                    }
267                }
268            }
269        }
270
271        // NOTE: The complex recursive loop trying to find references within definitions
272        // has been removed as it's not standard Markdown behavior for finding *usages*.
273        // Usages refer to `[text][ref]`, `![alt][ref]`, `[ref]`, etc., in the main content,
274        // not references potentially embedded within the URL or title of another definition.
275
276        usages
277    }
278
279    /// Get unused references with their line ranges.
280    ///
281    /// This method uses the cached definitions to improve performance.
282    ///
283    /// Note: References that are only used inside code blocks are still considered unused,
284    /// as code blocks are treated as examples or documentation rather than actual content.
285    fn get_unused_references(
286        &self,
287        definitions: &HashMap<String, Vec<(usize, usize)>>,
288        usages: &HashSet<String>,
289    ) -> Vec<(String, usize, usize)> {
290        let mut unused = Vec::new();
291        for (id, ranges) in definitions {
292            // If this id is not used anywhere and is not in the ignored list, all its ranges are unused
293            if !usages.contains(id) && !self.is_ignored_definition(id) {
294                for (start, end) in ranges {
295                    unused.push((id.clone(), *start, *end));
296                }
297            }
298        }
299        unused
300    }
301
302    /// Check if a definition should be ignored (kept even if unused)
303    fn is_ignored_definition(&self, definition_id: &str) -> bool {
304        self.config
305            .ignored_definitions
306            .iter()
307            .any(|ignored| ignored.eq_ignore_ascii_case(definition_id))
308    }
309
310    /// Clean up multiple consecutive blank lines that might be created after removing references
311    fn clean_up_blank_lines(&self, content: &str) -> String {
312        let lines: Vec<&str> = content.lines().collect();
313        let mut result_lines = Vec::new();
314        let mut consecutive_blanks = 0;
315
316        for line in lines {
317            if line.trim().is_empty() {
318                consecutive_blanks += 1;
319                if consecutive_blanks <= 1 {
320                    // Allow up to 1 consecutive blank line
321                    result_lines.push(line);
322                }
323            } else {
324                consecutive_blanks = 0;
325                result_lines.push(line);
326            }
327        }
328
329        // Remove leading and trailing blank lines
330        while !result_lines.is_empty() && result_lines[0].trim().is_empty() {
331            result_lines.remove(0);
332        }
333        while !result_lines.is_empty() && result_lines[result_lines.len() - 1].trim().is_empty() {
334            result_lines.pop();
335        }
336
337        // Don't add trailing newlines - let the content determine its own ending
338        result_lines.join("\n")
339    }
340}
341
342impl Default for MD053LinkImageReferenceDefinitions {
343    fn default() -> Self {
344        Self::new()
345    }
346}
347
348impl Rule for MD053LinkImageReferenceDefinitions {
349    fn name(&self) -> &'static str {
350        "MD053"
351    }
352
353    fn description(&self) -> &'static str {
354        "Link and image reference definitions should be needed"
355    }
356
357    /// Check the content for unused link/image reference definitions.
358    ///
359    /// This implementation uses caching for improved performance on large documents.
360    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
361        let content = ctx.content;
362        // Compute DocumentStructure once
363        let doc_structure = DocumentStructure::new(content);
364
365        // Find definitions and usages using DocumentStructure
366        let definitions = self.find_definitions(ctx, &doc_structure);
367        let usages = self.find_usages(&doc_structure, ctx);
368
369        // Get unused references by comparing definitions and usages
370        let unused_refs = self.get_unused_references(&definitions, &usages);
371
372        let mut warnings = Vec::new();
373
374        // Create warnings for unused references
375        for (definition, start, _end) in unused_refs {
376            let line_num = start + 1; // 1-indexed line numbers
377            let line_content = ctx.lines.get(start).map(|l| l.content.as_str()).unwrap_or("");
378
379            // Calculate precise character range for the entire reference definition line
380            let (start_line, start_col, end_line, end_col) = calculate_line_range(line_num, line_content);
381
382            warnings.push(LintWarning {
383                rule_name: Some(self.name()),
384                line: start_line,
385                column: start_col,
386                end_line,
387                end_column: end_col,
388                message: format!("Unused link/image reference: [{definition}]"),
389                severity: Severity::Warning,
390                fix: Some(Fix {
391                    // Remove the entire line including the newline
392                    range: {
393                        let line_start = ctx.line_to_byte_offset(line_num).unwrap_or(0);
394                        let line_end = if line_num < ctx.lines.len() {
395                            ctx.line_to_byte_offset(line_num + 1).unwrap_or(content.len())
396                        } else {
397                            content.len()
398                        };
399                        line_start..line_end
400                    },
401                    replacement: String::new(), // Remove the line
402                }),
403            });
404        }
405
406        Ok(warnings)
407    }
408
409    /// Fix the content by removing unused link/image reference definitions.
410    ///
411    /// This implementation uses caching for improved performance on large documents.
412    /// It optimizes the process by:
413    /// 1. Using cached definitions to avoid re-parsing the document
414    /// 2. Preserving document structure while removing unused references
415    /// 3. Cleaning up any formatting issues created by the removals
416    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
417        let content = ctx.content;
418        let doc_structure = DocumentStructure::new(content);
419
420        // Find definitions and usages using DocumentStructure
421        let definitions = self.find_definitions(ctx, &doc_structure);
422        let usages = self.find_usages(&doc_structure, ctx);
423
424        // Get unused references by comparing definitions and usages
425        let unused_refs = self.get_unused_references(&definitions, &usages);
426
427        // If no unused references, return original content
428        if unused_refs.is_empty() {
429            return Ok(content.to_string());
430        }
431
432        // Collect all line ranges to remove (sort by start line descending)
433        let mut lines_to_remove: Vec<(usize, usize)> =
434            unused_refs.iter().map(|(_, start, end)| (*start, *end)).collect();
435        lines_to_remove.sort_by(|a, b| b.0.cmp(&a.0)); // Sort descending by start line
436
437        // Remove lines from end to beginning to preserve line numbers
438        let lines: Vec<&str> = ctx.lines.iter().map(|l| l.content.as_str()).collect();
439        let mut result_lines: Vec<&str> = lines.clone();
440
441        for (start_line, end_line) in lines_to_remove {
442            // Remove lines from start_line to end_line (inclusive)
443            if start_line < result_lines.len() && end_line < result_lines.len() {
444                result_lines.drain(start_line..=end_line);
445            }
446        }
447
448        // Join the remaining lines
449        let mut result = result_lines.join("\n");
450
451        // Preserve original ending (with or without newline)
452        if content.ends_with('\n') && !result.ends_with('\n') {
453            result.push('\n');
454        }
455
456        // Clean up multiple consecutive blank lines that might have been created
457        let cleaned = self.clean_up_blank_lines(&result);
458
459        Ok(cleaned)
460    }
461
462    /// Check if this rule should be skipped for performance
463    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
464        // Skip if content is empty or has no reference definitions
465        ctx.content.is_empty() || !ctx.content.contains("]:")
466    }
467
468    fn as_any(&self) -> &dyn std::any::Any {
469        self
470    }
471
472    fn default_config_section(&self) -> Option<(String, toml::Value)> {
473        let default_config = MD053Config::default();
474        let json_value = serde_json::to_value(&default_config).ok()?;
475        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
476        if let toml::Value::Table(table) = toml_value {
477            if !table.is_empty() {
478                Some((MD053Config::RULE_NAME.to_string(), toml::Value::Table(table)))
479            } else {
480                None
481            }
482        } else {
483            None
484        }
485    }
486
487    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
488    where
489        Self: Sized,
490    {
491        let rule_config = crate::rule_config_serde::load_rule_config::<MD053Config>(config);
492        Box::new(MD053LinkImageReferenceDefinitions::from_config_struct(rule_config))
493    }
494}
495
496#[cfg(test)]
497mod tests {
498    use super::*;
499    use crate::lint_context::LintContext;
500
501    #[test]
502    fn test_used_reference_link() {
503        let rule = MD053LinkImageReferenceDefinitions::new();
504        let content = "[text][ref]\n\n[ref]: https://example.com";
505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
506        let result = rule.check(&ctx).unwrap();
507
508        assert_eq!(result.len(), 0);
509    }
510
511    #[test]
512    fn test_unused_reference_definition() {
513        let rule = MD053LinkImageReferenceDefinitions::new();
514        let content = "[unused]: https://example.com";
515        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
516        let result = rule.check(&ctx).unwrap();
517
518        assert_eq!(result.len(), 1);
519        assert!(result[0].message.contains("Unused link/image reference: [unused]"));
520    }
521
522    #[test]
523    fn test_used_reference_image() {
524        let rule = MD053LinkImageReferenceDefinitions::new();
525        let content = "![alt][img]\n\n[img]: image.jpg";
526        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
527        let result = rule.check(&ctx).unwrap();
528
529        assert_eq!(result.len(), 0);
530    }
531
532    #[test]
533    fn test_case_insensitive_matching() {
534        let rule = MD053LinkImageReferenceDefinitions::new();
535        let content = "[Text][REF]\n\n[ref]: https://example.com";
536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
537        let result = rule.check(&ctx).unwrap();
538
539        assert_eq!(result.len(), 0);
540    }
541
542    #[test]
543    fn test_shortcut_reference() {
544        let rule = MD053LinkImageReferenceDefinitions::new();
545        let content = "[ref]\n\n[ref]: https://example.com";
546        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
547        let result = rule.check(&ctx).unwrap();
548
549        assert_eq!(result.len(), 0);
550    }
551
552    #[test]
553    fn test_collapsed_reference() {
554        let rule = MD053LinkImageReferenceDefinitions::new();
555        let content = "[ref][]\n\n[ref]: https://example.com";
556        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
557        let result = rule.check(&ctx).unwrap();
558
559        assert_eq!(result.len(), 0);
560    }
561
562    #[test]
563    fn test_multiple_unused_definitions() {
564        let rule = MD053LinkImageReferenceDefinitions::new();
565        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
566        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
567        let result = rule.check(&ctx).unwrap();
568
569        assert_eq!(result.len(), 3);
570
571        // The warnings might not be in the same order, so collect all messages
572        let messages: Vec<String> = result.iter().map(|w| w.message.clone()).collect();
573        assert!(messages.iter().any(|m| m.contains("unused1")));
574        assert!(messages.iter().any(|m| m.contains("unused2")));
575        assert!(messages.iter().any(|m| m.contains("unused3")));
576    }
577
578    #[test]
579    fn test_mixed_used_and_unused() {
580        let rule = MD053LinkImageReferenceDefinitions::new();
581        let content = "[used]\n\n[used]: url1\n[unused]: url2";
582        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
583        let result = rule.check(&ctx).unwrap();
584
585        assert_eq!(result.len(), 1);
586        assert!(result[0].message.contains("unused"));
587    }
588
589    #[test]
590    fn test_multiline_definition() {
591        let rule = MD053LinkImageReferenceDefinitions::new();
592        let content = "[ref]: https://example.com\n  \"Title on next line\"";
593        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
594        let result = rule.check(&ctx).unwrap();
595
596        assert_eq!(result.len(), 1); // Still unused
597    }
598
599    #[test]
600    fn test_reference_in_code_block() {
601        let rule = MD053LinkImageReferenceDefinitions::new();
602        let content = "```\n[ref]\n```\n\n[ref]: https://example.com";
603        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
604        let result = rule.check(&ctx).unwrap();
605
606        // Reference used only in code block is still considered unused
607        assert_eq!(result.len(), 1);
608    }
609
610    #[test]
611    fn test_reference_in_inline_code() {
612        let rule = MD053LinkImageReferenceDefinitions::new();
613        let content = "`[ref]`\n\n[ref]: https://example.com";
614        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
615        let result = rule.check(&ctx).unwrap();
616
617        // Reference in inline code is not a usage
618        assert_eq!(result.len(), 1);
619    }
620
621    #[test]
622    fn test_escaped_reference() {
623        let rule = MD053LinkImageReferenceDefinitions::new();
624        let content = "[example\\-ref]\n\n[example-ref]: https://example.com";
625        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
626        let result = rule.check(&ctx).unwrap();
627
628        // Should match despite escaping
629        assert_eq!(result.len(), 0);
630    }
631
632    #[test]
633    fn test_duplicate_definitions() {
634        let rule = MD053LinkImageReferenceDefinitions::new();
635        let content = "[ref]: url1\n[ref]: url2\n\n[ref]";
636        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
637        let result = rule.check(&ctx).unwrap();
638
639        // Both definitions are used (Markdown uses the first one)
640        assert_eq!(result.len(), 0);
641    }
642
643    #[test]
644    fn test_fix_removes_unused_definition() {
645        let rule = MD053LinkImageReferenceDefinitions::new();
646        let content = "[used]\n\n[used]: url1\n[unused]: url2\n\nMore content";
647        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
648        let fixed = rule.fix(&ctx).unwrap();
649
650        assert!(fixed.contains("[used]: url1"));
651        assert!(!fixed.contains("[unused]: url2"));
652        assert!(fixed.contains("More content"));
653    }
654
655    #[test]
656    fn test_fix_preserves_blank_lines() {
657        let rule = MD053LinkImageReferenceDefinitions::new();
658        let content = "Content\n\n[unused]: url\n\nMore content";
659        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
660        let fixed = rule.fix(&ctx).unwrap();
661
662        assert_eq!(fixed, "Content\n\nMore content");
663    }
664
665    #[test]
666    fn test_fix_multiple_consecutive_definitions() {
667        let rule = MD053LinkImageReferenceDefinitions::new();
668        let content = "[unused1]: url1\n[unused2]: url2\n[unused3]: url3";
669        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
670        let fixed = rule.fix(&ctx).unwrap();
671
672        assert_eq!(fixed, "");
673    }
674
675    #[test]
676    fn test_special_characters_in_reference() {
677        let rule = MD053LinkImageReferenceDefinitions::new();
678        let content = "[ref-with_special.chars]\n\n[ref-with_special.chars]: url";
679        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
680        let result = rule.check(&ctx).unwrap();
681
682        assert_eq!(result.len(), 0);
683    }
684
685    #[test]
686    fn test_find_definitions() {
687        let rule = MD053LinkImageReferenceDefinitions::new();
688        let content = "[ref1]: url1\n[ref2]: url2\nSome text\n[ref3]: url3";
689        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
690        let doc = DocumentStructure::new(content);
691        let defs = rule.find_definitions(&ctx, &doc);
692
693        assert_eq!(defs.len(), 3);
694        assert!(defs.contains_key("ref1"));
695        assert!(defs.contains_key("ref2"));
696        assert!(defs.contains_key("ref3"));
697    }
698
699    #[test]
700    fn test_find_usages() {
701        let rule = MD053LinkImageReferenceDefinitions::new();
702        let content = "[text][ref1] and [ref2] and ![img][ref3]";
703        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
704        let doc = DocumentStructure::new(content);
705        let usages = rule.find_usages(&doc, &ctx);
706
707        assert!(usages.contains("ref1"));
708        assert!(usages.contains("ref2"));
709        assert!(usages.contains("ref3"));
710    }
711
712    #[test]
713    fn test_clean_up_blank_lines() {
714        let rule = MD053LinkImageReferenceDefinitions::new();
715
716        // Test multiple consecutive blank lines
717        assert_eq!(rule.clean_up_blank_lines("text\n\n\n\nmore text"), "text\n\nmore text");
718
719        // Test leading/trailing blank lines
720        assert_eq!(rule.clean_up_blank_lines("\n\ntext\n\n"), "text");
721    }
722
723    #[test]
724    fn test_ignored_definitions_config() {
725        // Test with ignored definitions
726        let config = MD053Config {
727            ignored_definitions: vec!["todo".to_string(), "draft".to_string()],
728        };
729        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
730
731        let content = "[todo]: https://example.com/todo\n[draft]: https://example.com/draft\n[unused]: https://example.com/unused";
732        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
733        let result = rule.check(&ctx).unwrap();
734
735        // Should only flag "unused", not "todo" or "draft"
736        assert_eq!(result.len(), 1);
737        assert!(result[0].message.contains("unused"));
738        assert!(!result[0].message.contains("todo"));
739        assert!(!result[0].message.contains("draft"));
740    }
741
742    #[test]
743    fn test_ignored_definitions_case_insensitive() {
744        // Test case-insensitive matching of ignored definitions
745        let config = MD053Config {
746            ignored_definitions: vec!["TODO".to_string()],
747        };
748        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
749
750        let content = "[todo]: https://example.com/todo\n[unused]: https://example.com/unused";
751        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
752        let result = rule.check(&ctx).unwrap();
753
754        // Should only flag "unused", not "todo" (matches "TODO" case-insensitively)
755        assert_eq!(result.len(), 1);
756        assert!(result[0].message.contains("unused"));
757        assert!(!result[0].message.contains("todo"));
758    }
759
760    #[test]
761    fn test_default_config_section() {
762        let rule = MD053LinkImageReferenceDefinitions::default();
763        let config_section = rule.default_config_section();
764
765        assert!(config_section.is_some());
766        let (name, value) = config_section.unwrap();
767        assert_eq!(name, "MD053");
768
769        // Should contain the ignored_definitions option with default empty array
770        if let toml::Value::Table(table) = value {
771            assert!(table.contains_key("ignored-definitions"));
772            assert_eq!(table["ignored-definitions"], toml::Value::Array(vec![]));
773        } else {
774            panic!("Expected TOML table");
775        }
776    }
777
778    #[test]
779    fn test_fix_respects_ignored_definitions() {
780        // Test that fix respects ignored definitions
781        let config = MD053Config {
782            ignored_definitions: vec!["template".to_string()],
783        };
784        let rule = MD053LinkImageReferenceDefinitions::from_config_struct(config);
785
786        let content = "[template]: https://example.com/template\n[unused]: https://example.com/unused\n\nSome content.";
787        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
788        let fixed = rule.fix(&ctx).unwrap();
789
790        // Should keep template but remove unused
791        assert!(fixed.contains("[template]: https://example.com/template"));
792        assert!(!fixed.contains("[unused]: https://example.com/unused"));
793        assert!(fixed.contains("Some content."));
794    }
795}