rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_front_matter, is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url (standard regex is fine)
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s*.*").unwrap();
14
15    // Pattern for list items to exclude from reference checks (standard regex is fine)
16    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
17
18    // Pattern for code blocks (standard regex is fine)
19    static ref FENCED_CODE_START: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
20
21    // Pattern for output example sections (standard regex is fine)
22    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
23
24    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
25    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]").unwrap();
26}
27
28/// Rule MD052: Reference links and images should use reference style
29///
30/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
31///
32/// This rule is triggered when a reference link or image uses a reference that isn't defined.
33#[derive(Clone, Default)]
34pub struct MD052ReferenceLinkImages {}
35
36impl MD052ReferenceLinkImages {
37    pub fn new() -> Self {
38        Self {}
39    }
40
41    /// Check if a position is inside any code span
42    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
43        code_spans
44            .iter()
45            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
46    }
47
48    /// Check if a byte position is within an HTML comment
49    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
50        for m in HTML_COMMENT_PATTERN.find_iter(content) {
51            if m.start() <= byte_pos && byte_pos < m.end() {
52                return true;
53            }
54        }
55        false
56    }
57
58    /// Check if a byte position is within an HTML tag
59    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
60        // Check HTML tags
61        for html_tag in ctx.html_tags().iter() {
62            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
63                return true;
64            }
65        }
66        false
67    }
68
69    fn extract_references(&self, content: &str) -> HashSet<String> {
70        let mut references = HashSet::new();
71        let mut in_code_block = false;
72        let mut code_fence_marker = String::new();
73
74        for line in content.lines() {
75            // Handle code block boundaries
76            if let Some(cap) = FENCED_CODE_START.captures(line) {
77                if let Some(marker) = cap.get(0) {
78                    let marker_str = marker.as_str().to_string();
79                    if !in_code_block {
80                        in_code_block = true;
81                        code_fence_marker = marker_str;
82                    } else if line.trim().starts_with(&code_fence_marker) {
83                        in_code_block = false;
84                        code_fence_marker.clear();
85                    }
86                }
87                continue;
88            }
89
90            // Skip lines in code blocks
91            if in_code_block {
92                continue;
93            }
94
95            if let Some(cap) = REF_REGEX.captures(line) {
96                // Store references in lowercase for case-insensitive comparison
97                if let Some(reference) = cap.get(1) {
98                    references.insert(reference.as_str().to_lowercase());
99                }
100            }
101        }
102
103        references
104    }
105
106    fn find_undefined_references(
107        &self,
108        content: &str,
109        references: &HashSet<String>,
110        ctx: &crate::lint_context::LintContext,
111        mkdocs_mode: bool,
112    ) -> Vec<(usize, usize, usize, String)> {
113        let mut undefined = Vec::new();
114        let mut reported_refs = HashMap::new();
115        let mut in_code_block = false;
116        let mut code_fence_marker = String::new();
117        let mut in_example_section = false;
118
119        // Get code spans once for the entire function
120        let code_spans = ctx.code_spans();
121
122        // Use cached data for reference links and images
123        for link in &ctx.links {
124            if !link.is_reference {
125                continue; // Skip inline links
126            }
127
128            // Skip links inside code spans
129            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
130                continue;
131            }
132
133            // Skip links inside HTML comments
134            if Self::is_in_html_comment(content, link.byte_offset) {
135                continue;
136            }
137
138            // Skip links inside math contexts
139            if is_in_math_context(ctx, link.byte_offset) {
140                continue;
141            }
142
143            // Skip links inside table cells
144            if is_in_table_cell(ctx, link.line, link.start_col) {
145                continue;
146            }
147
148            // Skip links inside frontmatter (convert from 1-based to 0-based line numbers)
149            if is_in_front_matter(content, link.line.saturating_sub(1)) {
150                continue;
151            }
152
153            if let Some(ref_id) = &link.reference_id {
154                let reference_lower = ref_id.to_lowercase();
155
156                // Skip MkDocs auto-references if in MkDocs mode
157                // Check both the reference_id and the link text for shorthand references
158                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&link.text)) {
159                    continue;
160                }
161
162                // Check if reference is defined
163                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
164                    // Check if the line is in an example section or list item
165                    if let Some(line_info) = ctx.line_info(link.line) {
166                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
167                            in_example_section = true;
168                            continue;
169                        }
170
171                        if in_example_section {
172                            continue;
173                        }
174
175                        // Skip list items
176                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
177                            continue;
178                        }
179                    }
180
181                    let match_len = link.byte_end - link.byte_offset;
182                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
183                    reported_refs.insert(reference_lower, true);
184                }
185            }
186        }
187
188        // Use cached data for reference images
189        for image in &ctx.images {
190            if !image.is_reference {
191                continue; // Skip inline images
192            }
193
194            // Skip images inside code spans
195            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
196                continue;
197            }
198
199            // Skip images inside HTML comments
200            if Self::is_in_html_comment(content, image.byte_offset) {
201                continue;
202            }
203
204            // Skip images inside math contexts
205            if is_in_math_context(ctx, image.byte_offset) {
206                continue;
207            }
208
209            // Skip images inside table cells
210            if is_in_table_cell(ctx, image.line, image.start_col) {
211                continue;
212            }
213
214            // Skip images inside frontmatter (convert from 1-based to 0-based line numbers)
215            if is_in_front_matter(content, image.line.saturating_sub(1)) {
216                continue;
217            }
218
219            if let Some(ref_id) = &image.reference_id {
220                let reference_lower = ref_id.to_lowercase();
221
222                // Skip MkDocs auto-references if in MkDocs mode
223                // Check both the reference_id and the alt text for shorthand references
224                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&image.alt_text)) {
225                    continue;
226                }
227
228                // Check if reference is defined
229                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
230                    // Check if the line is in an example section or list item
231                    if let Some(line_info) = ctx.line_info(image.line) {
232                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
233                            in_example_section = true;
234                            continue;
235                        }
236
237                        if in_example_section {
238                            continue;
239                        }
240
241                        // Skip list items
242                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
243                            continue;
244                        }
245                    }
246
247                    let match_len = image.byte_end - image.byte_offset;
248                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
249                    reported_refs.insert(reference_lower, true);
250                }
251            }
252        }
253
254        // Build a set of byte ranges that are already covered by parsed links/images
255        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
256
257        // Add ranges from parsed links
258        for link in &ctx.links {
259            covered_ranges.push((link.byte_offset, link.byte_end));
260        }
261
262        // Add ranges from parsed images
263        for image in &ctx.images {
264            covered_ranges.push((image.byte_offset, image.byte_end));
265        }
266
267        // Sort ranges by start position
268        covered_ranges.sort_by_key(|&(start, _)| start);
269
270        // Handle shortcut references [text] which aren't captured in ctx.links
271        // Need to use regex for these
272        let lines: Vec<&str> = content.lines().collect();
273        in_example_section = false; // Reset for line-by-line processing
274
275        for (line_num, line) in lines.iter().enumerate() {
276            // Skip lines in frontmatter (line_num is already 0-based)
277            if is_in_front_matter(content, line_num) {
278                continue;
279            }
280
281            // Handle code blocks
282            if let Some(cap) = FENCED_CODE_START.captures(line) {
283                if let Some(marker) = cap.get(0) {
284                    let marker_str = marker.as_str().to_string();
285                    if !in_code_block {
286                        in_code_block = true;
287                        code_fence_marker = marker_str;
288                    } else if line.trim().starts_with(&code_fence_marker) {
289                        in_code_block = false;
290                        code_fence_marker.clear();
291                    }
292                }
293                continue;
294            }
295
296            if in_code_block {
297                continue;
298            }
299
300            // Check for example sections
301            if OUTPUT_EXAMPLE_START.is_match(line) {
302                in_example_section = true;
303                continue;
304            }
305
306            if in_example_section {
307                // Check if we're exiting the example section (another heading)
308                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
309                    in_example_section = false;
310                } else {
311                    continue;
312                }
313            }
314
315            // Skip list items
316            if LIST_ITEM_REGEX.is_match(line) {
317                continue;
318            }
319
320            // Skip GitHub alerts/callouts (e.g., > [!TIP])
321            if GITHUB_ALERT_REGEX.is_match(line) {
322                continue;
323            }
324
325            // Check shortcut references: [reference]
326            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
327                for cap in captures {
328                    if let Some(ref_match) = cap.get(1) {
329                        let reference = ref_match.as_str();
330                        let reference_lower = reference.to_lowercase();
331
332                        // Skip GitHub alerts (e.g., !NOTE, !TIP, !WARNING, !IMPORTANT, !CAUTION)
333                        if let Some(alert_type) = reference.strip_prefix('!')
334                            && matches!(alert_type, "NOTE" | "TIP" | "WARNING" | "IMPORTANT" | "CAUTION")
335                        {
336                            continue;
337                        }
338
339                        // Skip MkDocs auto-references if in MkDocs mode
340                        if mkdocs_mode && is_mkdocs_auto_reference(reference) {
341                            continue;
342                        }
343
344                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
345                            let full_match = cap.get(0).unwrap();
346                            let col = full_match.start();
347
348                            // Skip if inside code span
349                            let code_spans = ctx.code_spans();
350                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
351                                continue;
352                            }
353
354                            // Check if this position is within a covered range
355                            let line_start_byte = ctx.line_offsets[line_num];
356                            let byte_pos = line_start_byte + col;
357
358                            // Skip if inside HTML comment
359                            if Self::is_in_html_comment(content, byte_pos) {
360                                continue;
361                            }
362
363                            // Skip if inside HTML tag
364                            if Self::is_in_html_tag(ctx, byte_pos) {
365                                continue;
366                            }
367
368                            // Skip if inside math context
369                            if is_in_math_context(ctx, byte_pos) {
370                                continue;
371                            }
372
373                            // Skip if inside table cell
374                            if is_in_table_cell(ctx, line_num + 1, col) {
375                                continue;
376                            }
377
378                            let byte_end = byte_pos + (full_match.end() - full_match.start());
379
380                            // Check if this shortcut ref overlaps with any parsed link/image
381                            let mut is_covered = false;
382                            for &(range_start, range_end) in &covered_ranges {
383                                if range_start <= byte_pos && byte_end <= range_end {
384                                    // This shortcut ref is completely within a parsed link/image
385                                    is_covered = true;
386                                    break;
387                                }
388                                if range_start > byte_end {
389                                    // No need to check further (ranges are sorted)
390                                    break;
391                                }
392                            }
393
394                            if is_covered {
395                                continue;
396                            }
397
398                            // More sophisticated checks to avoid false positives
399
400                            // Check 1: If preceded by ], this might be part of [text][ref]
401                            // Look for the pattern ...][ref] and check if there's a matching [ before
402                            if col > 0 && line.chars().nth(col.saturating_sub(1)) == Some(']') {
403                                // Look backwards for a [ that would make this [text][ref]
404                                let mut bracket_count = 1; // We already saw one ]
405                                let mut check_pos = col.saturating_sub(2);
406                                let mut found_opening = false;
407
408                                while check_pos > 0 {
409                                    match line.chars().nth(check_pos) {
410                                        Some(']') => bracket_count += 1,
411                                        Some('[') => {
412                                            bracket_count -= 1;
413                                            if bracket_count == 0 {
414                                                // Check if this [ is escaped
415                                                if check_pos == 0 || line.chars().nth(check_pos - 1) != Some('\\') {
416                                                    found_opening = true;
417                                                }
418                                                break;
419                                            }
420                                        }
421                                        _ => {}
422                                    }
423                                    if check_pos == 0 {
424                                        break;
425                                    }
426                                    check_pos = check_pos.saturating_sub(1);
427                                }
428
429                                if found_opening {
430                                    // This is part of [text][ref], skip it
431                                    continue;
432                                }
433                            }
434
435                            // Check 2: If there's an escaped bracket pattern before this
436                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
437                            let before_text = &line[..col];
438                            if before_text.contains("\\]") {
439                                // Check if there's a \[ before the \]
440                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
441                                    let search_text = &before_text[..escaped_close_pos];
442                                    if search_text.contains("\\[") {
443                                        // This looks like \[...\][ref], skip it
444                                        continue;
445                                    }
446                                }
447                            }
448
449                            let match_len = full_match.end() - full_match.start();
450                            undefined.push((line_num, col, match_len, reference.to_string()));
451                            reported_refs.insert(reference_lower, true);
452                        }
453                    }
454                }
455            }
456        }
457
458        undefined
459    }
460}
461
462impl Rule for MD052ReferenceLinkImages {
463    fn name(&self) -> &'static str {
464        "MD052"
465    }
466
467    fn description(&self) -> &'static str {
468        "Reference links and images should use a reference that exists"
469    }
470
471    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
472        let content = ctx.content;
473        let mut warnings = Vec::new();
474
475        // Check if we're in MkDocs mode from the context
476        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
477
478        let references = self.extract_references(content);
479
480        // Use optimized detection method with cached link/image data
481        for (line_num, col, match_len, reference) in
482            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
483        {
484            let lines: Vec<&str> = content.lines().collect();
485            let line_content = lines.get(line_num).unwrap_or(&"");
486
487            // Calculate precise character range for the entire undefined reference
488            let (start_line, start_col, end_line, end_col) =
489                calculate_match_range(line_num + 1, line_content, col, match_len);
490
491            warnings.push(LintWarning {
492                rule_name: Some(self.name()),
493                line: start_line,
494                column: start_col,
495                end_line,
496                end_column: end_col,
497                message: format!("Reference '{reference}' not found"),
498                severity: Severity::Warning,
499                fix: None,
500            });
501        }
502
503        Ok(warnings)
504    }
505
506    /// Check if this rule should be skipped for performance
507    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
508        // Skip if content is empty or has no reference-style links/images
509        ctx.content.is_empty() || (!ctx.content.contains("](") && !ctx.content.contains("]["))
510    }
511
512    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
513        let content = ctx.content;
514        // No automatic fix available for undefined references
515        Ok(content.to_string())
516    }
517
518    fn as_any(&self) -> &dyn std::any::Any {
519        self
520    }
521
522    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
523    where
524        Self: Sized,
525    {
526        // Flavor is now accessed from LintContext during check
527        Box::new(MD052ReferenceLinkImages::new())
528    }
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534    use crate::lint_context::LintContext;
535
536    #[test]
537    fn test_valid_reference_link() {
538        let rule = MD052ReferenceLinkImages::new();
539        let content = "[text][ref]\n\n[ref]: https://example.com";
540        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
541        let result = rule.check(&ctx).unwrap();
542
543        assert_eq!(result.len(), 0);
544    }
545
546    #[test]
547    fn test_undefined_reference_link() {
548        let rule = MD052ReferenceLinkImages::new();
549        let content = "[text][undefined]";
550        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
551        let result = rule.check(&ctx).unwrap();
552
553        assert_eq!(result.len(), 1);
554        assert!(result[0].message.contains("Reference 'undefined' not found"));
555    }
556
557    #[test]
558    fn test_valid_reference_image() {
559        let rule = MD052ReferenceLinkImages::new();
560        let content = "![alt][img]\n\n[img]: image.jpg";
561        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
562        let result = rule.check(&ctx).unwrap();
563
564        assert_eq!(result.len(), 0);
565    }
566
567    #[test]
568    fn test_undefined_reference_image() {
569        let rule = MD052ReferenceLinkImages::new();
570        let content = "![alt][missing]";
571        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
572        let result = rule.check(&ctx).unwrap();
573
574        assert_eq!(result.len(), 1);
575        assert!(result[0].message.contains("Reference 'missing' not found"));
576    }
577
578    #[test]
579    fn test_case_insensitive_references() {
580        let rule = MD052ReferenceLinkImages::new();
581        let content = "[Text][REF]\n\n[ref]: https://example.com";
582        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
583        let result = rule.check(&ctx).unwrap();
584
585        assert_eq!(result.len(), 0);
586    }
587
588    #[test]
589    fn test_shortcut_reference_valid() {
590        let rule = MD052ReferenceLinkImages::new();
591        let content = "[ref]\n\n[ref]: https://example.com";
592        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
593        let result = rule.check(&ctx).unwrap();
594
595        assert_eq!(result.len(), 0);
596    }
597
598    #[test]
599    fn test_shortcut_reference_undefined() {
600        let rule = MD052ReferenceLinkImages::new();
601        let content = "[undefined]";
602        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
603        let result = rule.check(&ctx).unwrap();
604
605        assert_eq!(result.len(), 1);
606        assert!(result[0].message.contains("Reference 'undefined' not found"));
607    }
608
609    #[test]
610    fn test_inline_links_ignored() {
611        let rule = MD052ReferenceLinkImages::new();
612        let content = "[text](https://example.com)";
613        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
614        let result = rule.check(&ctx).unwrap();
615
616        assert_eq!(result.len(), 0);
617    }
618
619    #[test]
620    fn test_inline_images_ignored() {
621        let rule = MD052ReferenceLinkImages::new();
622        let content = "![alt](image.jpg)";
623        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
624        let result = rule.check(&ctx).unwrap();
625
626        assert_eq!(result.len(), 0);
627    }
628
629    #[test]
630    fn test_references_in_code_blocks_ignored() {
631        let rule = MD052ReferenceLinkImages::new();
632        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
633        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
634        let result = rule.check(&ctx).unwrap();
635
636        assert_eq!(result.len(), 0);
637    }
638
639    #[test]
640    fn test_references_in_inline_code_ignored() {
641        let rule = MD052ReferenceLinkImages::new();
642        let content = "`[undefined]`";
643        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
644        let result = rule.check(&ctx).unwrap();
645
646        // References inside inline code spans should be ignored
647        assert_eq!(result.len(), 0);
648    }
649
650    #[test]
651    fn test_comprehensive_inline_code_detection() {
652        let rule = MD052ReferenceLinkImages::new();
653        let content = r#"# Test
654
655This `[inside]` should be ignored.
656This [outside] should be flagged.
657Reference links `[text][ref]` in code are ignored.
658Regular reference [text][missing] should be flagged.
659Images `![alt][img]` in code are ignored.
660Regular image ![alt][badimg] should be flagged.
661
662Multiple `[one]` and `[two]` in code ignored, but [three] is not.
663
664```
665[code block content] should be ignored
666```
667
668`Multiple [refs] in [same] code span` ignored."#;
669
670        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
671        let result = rule.check(&ctx).unwrap();
672
673        // Should only flag: outside, missing, badimg, three (4 total)
674        assert_eq!(result.len(), 4);
675
676        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
677        assert!(messages.iter().any(|m| m.contains("outside")));
678        assert!(messages.iter().any(|m| m.contains("missing")));
679        assert!(messages.iter().any(|m| m.contains("badimg")));
680        assert!(messages.iter().any(|m| m.contains("three")));
681
682        // Should NOT flag any references inside code spans
683        assert!(!messages.iter().any(|m| m.contains("inside")));
684        assert!(!messages.iter().any(|m| m.contains("one")));
685        assert!(!messages.iter().any(|m| m.contains("two")));
686        assert!(!messages.iter().any(|m| m.contains("refs")));
687        assert!(!messages.iter().any(|m| m.contains("same")));
688    }
689
690    #[test]
691    fn test_multiple_undefined_references() {
692        let rule = MD052ReferenceLinkImages::new();
693        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
694        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
695        let result = rule.check(&ctx).unwrap();
696
697        assert_eq!(result.len(), 3);
698        assert!(result[0].message.contains("ref1"));
699        assert!(result[1].message.contains("ref2"));
700        assert!(result[2].message.contains("ref3"));
701    }
702
703    #[test]
704    fn test_mixed_valid_and_undefined() {
705        let rule = MD052ReferenceLinkImages::new();
706        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
707        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
708        let result = rule.check(&ctx).unwrap();
709
710        assert_eq!(result.len(), 1);
711        assert!(result[0].message.contains("missing"));
712    }
713
714    #[test]
715    fn test_empty_reference() {
716        let rule = MD052ReferenceLinkImages::new();
717        let content = "[text][]\n\n[ref]: https://example.com";
718        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
719        let result = rule.check(&ctx).unwrap();
720
721        // Empty reference should use the link text as reference
722        assert_eq!(result.len(), 1);
723    }
724
725    #[test]
726    fn test_escaped_brackets_ignored() {
727        let rule = MD052ReferenceLinkImages::new();
728        let content = "\\[not a link\\]";
729        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
730        let result = rule.check(&ctx).unwrap();
731
732        assert_eq!(result.len(), 0);
733    }
734
735    #[test]
736    fn test_list_items_ignored() {
737        let rule = MD052ReferenceLinkImages::new();
738        let content = "- [undefined]\n* [another]\n+ [third]";
739        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
740        let result = rule.check(&ctx).unwrap();
741
742        // List items that look like shortcut references should be ignored
743        assert_eq!(result.len(), 0);
744    }
745
746    #[test]
747    fn test_output_example_section_ignored() {
748        let rule = MD052ReferenceLinkImages::new();
749        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
750        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
751        let result = rule.check(&ctx).unwrap();
752
753        // Only the reference outside the Output section should be flagged
754        assert_eq!(result.len(), 1);
755        assert!(result[0].message.contains("missing"));
756    }
757
758    #[test]
759    fn test_reference_definitions_in_code_blocks_ignored() {
760        let rule = MD052ReferenceLinkImages::new();
761        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
762        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
763        let result = rule.check(&ctx).unwrap();
764
765        // Reference defined in code block should not count
766        assert_eq!(result.len(), 1);
767        assert!(result[0].message.contains("ref"));
768    }
769
770    #[test]
771    fn test_multiple_references_to_same_undefined() {
772        let rule = MD052ReferenceLinkImages::new();
773        let content = "[first][missing] [second][missing] [third][missing]";
774        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
775        let result = rule.check(&ctx).unwrap();
776
777        // Should only report once per unique reference
778        assert_eq!(result.len(), 1);
779        assert!(result[0].message.contains("missing"));
780    }
781
782    #[test]
783    fn test_reference_with_special_characters() {
784        let rule = MD052ReferenceLinkImages::new();
785        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
786        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
787        let result = rule.check(&ctx).unwrap();
788
789        assert_eq!(result.len(), 0);
790    }
791
792    #[test]
793    fn test_issue_51_html_attribute_not_reference() {
794        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
795        let rule = MD052ReferenceLinkImages::new();
796        let content = r#"# Example
797
798## Test
799
800Want to fill out this form?
801
802<form method="post">
803    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
804</form>"#;
805        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
806        let result = rule.check(&ctx).unwrap();
807
808        assert_eq!(
809            result.len(),
810            0,
811            "HTML attributes with square brackets should not be flagged as undefined references"
812        );
813    }
814
815    #[test]
816    fn test_extract_references() {
817        let rule = MD052ReferenceLinkImages::new();
818        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
819        let refs = rule.extract_references(content);
820
821        assert_eq!(refs.len(), 3);
822        assert!(refs.contains("ref1"));
823        assert!(refs.contains("ref2"));
824        assert!(refs.contains("ref3"));
825    }
826
827    #[test]
828    fn test_inline_code_not_flagged() {
829        let rule = MD052ReferenceLinkImages::new();
830
831        // Test that arrays in inline code are not flagged as references
832        let content = r#"# Test
833
834Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
835
836Also, `[todo]` is not a reference link.
837
838But this [reference] should be flagged.
839
840And this `[inline code]` should not be flagged.
841"#;
842
843        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
844        let warnings = rule.check(&ctx).unwrap();
845
846        // Should only flag [reference], not the ones in backticks
847        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
848        assert!(warnings[0].message.contains("'reference'"));
849    }
850
851    #[test]
852    fn test_code_block_references_ignored() {
853        let rule = MD052ReferenceLinkImages::new();
854
855        let content = r#"# Test
856
857```markdown
858[undefined] reference in code block
859![undefined] image in code block
860```
861
862[real-undefined] reference outside
863"#;
864
865        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
866        let warnings = rule.check(&ctx).unwrap();
867
868        // Should only flag [real-undefined], not the ones in code block
869        assert_eq!(warnings.len(), 1);
870        assert!(warnings[0].message.contains("'real-undefined'"));
871    }
872
873    #[test]
874    fn test_html_comments_ignored() {
875        // Test for issue #20 - MD052 should not flag content inside HTML comments
876        let rule = MD052ReferenceLinkImages::new();
877
878        // Test the exact case from issue #20
879        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
880<!--- set_env EDITOR 'python3 fake_editor.py' -->
881
882```bash
883$ python3 vote.py
8843 votes for: 2
8852 votes for: 3, 4
886```"#;
887        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
888        let result = rule.check(&ctx).unwrap();
889        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
890
891        // Test various reference patterns inside HTML comments
892        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
893Normal [text][undefined]
894<!-- Another [comment][with] references -->"#;
895        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
896        let result = rule.check(&ctx).unwrap();
897        assert_eq!(
898            result.len(),
899            1,
900            "Should only flag the undefined reference outside comments"
901        );
902        assert!(result[0].message.contains("undefined"));
903
904        // Test multi-line HTML comments
905        let content = r#"<!--
906[ref1]
907[ref2][ref3]
908-->
909[actual][undefined]"#;
910        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
911        let result = rule.check(&ctx).unwrap();
912        assert_eq!(
913            result.len(),
914            1,
915            "Should not flag references in multi-line HTML comments"
916        );
917        assert!(result[0].message.contains("undefined"));
918
919        // Test mixed scenarios
920        let content = r#"<!-- Comment with [1:] pattern -->
921Valid [link][ref]
922<!-- More [refs][in][comments] -->
923![image][missing]
924
925[ref]: https://example.com"#;
926        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
927        let result = rule.check(&ctx).unwrap();
928        assert_eq!(result.len(), 1, "Should only flag missing image reference");
929        assert!(result[0].message.contains("missing"));
930    }
931
932    #[test]
933    fn test_frontmatter_ignored() {
934        // Test for issue #24 - MD052 should not flag content inside frontmatter
935        let rule = MD052ReferenceLinkImages::new();
936
937        // Test YAML frontmatter with arrays and references
938        let content = r#"---
939layout: post
940title: "My Jekyll Post"
941date: 2023-01-01
942categories: blog
943tags: ["test", "example"]
944author: John Doe
945---
946
947# My Blog Post
948
949This is the actual markdown content that should be linted.
950
951[undefined] reference should be flagged.
952
953## Section 1
954
955Some content here."#;
956        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
957        let result = rule.check(&ctx).unwrap();
958
959        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
960        assert_eq!(
961            result.len(),
962            1,
963            "Should only flag the undefined reference outside frontmatter"
964        );
965        assert!(result[0].message.contains("undefined"));
966
967        // Test TOML frontmatter
968        let content = r#"+++
969title = "My Post"
970tags = ["example", "test"]
971+++
972
973# Content
974
975[missing] reference should be flagged."#;
976        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
977        let result = rule.check(&ctx).unwrap();
978        assert_eq!(
979            result.len(),
980            1,
981            "Should only flag the undefined reference outside TOML frontmatter"
982        );
983        assert!(result[0].message.contains("missing"));
984    }
985
986    #[test]
987    fn test_github_alerts_not_flagged() {
988        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
989        let rule = MD052ReferenceLinkImages::new();
990
991        // Test various GitHub alert types
992        let content = r#"# Document with GitHub Alerts
993
994> [!NOTE]
995> This is a note alert.
996
997> [!TIP]
998> This is a tip alert.
999
1000> [!IMPORTANT]
1001> This is an important alert.
1002
1003> [!WARNING]
1004> This is a warning alert.
1005
1006> [!CAUTION]
1007> This is a caution alert.
1008
1009Regular content with [undefined] reference."#;
1010        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1011        let result = rule.check(&ctx).unwrap();
1012
1013        // Should only flag the undefined reference, not the GitHub alerts
1014        assert_eq!(
1015            result.len(),
1016            1,
1017            "Should only flag the undefined reference, not GitHub alerts"
1018        );
1019        assert!(result[0].message.contains("undefined"));
1020        assert_eq!(result[0].line, 18); // Line with [undefined]
1021
1022        // Test GitHub alerts with additional content
1023        let content = r#"> [!TIP]
1024> Here's a useful tip about [something].
1025> Multiple lines are allowed.
1026
1027[something] is mentioned but not defined."#;
1028        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1029        let result = rule.check(&ctx).unwrap();
1030
1031        // Should flag only the [something] outside blockquotes
1032        // The test shows we're only catching one, which might be correct behavior
1033        // matching markdownlint's approach
1034        assert_eq!(result.len(), 1, "Should flag undefined reference");
1035        assert!(result[0].message.contains("something"));
1036
1037        // Test GitHub alerts with proper references
1038        let content = r#"> [!NOTE]
1039> See [reference] for more details.
1040
1041[reference]: https://example.com"#;
1042        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1043        let result = rule.check(&ctx).unwrap();
1044
1045        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1046        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1047    }
1048}