rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_front_matter, is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url (standard regex is fine)
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s*.*").unwrap();
14
15    // Pattern for list items to exclude from reference checks (standard regex is fine)
16    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
17
18    // Pattern for code blocks (standard regex is fine)
19    static ref FENCED_CODE_START: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
20
21    // Pattern for output example sections (standard regex is fine)
22    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
23
24    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
25    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]").unwrap();
26}
27
28/// Rule MD052: Reference links and images should use reference style
29///
30/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
31///
32/// This rule is triggered when a reference link or image uses a reference that isn't defined.
33#[derive(Clone, Default)]
34pub struct MD052ReferenceLinkImages {}
35
36impl MD052ReferenceLinkImages {
37    pub fn new() -> Self {
38        Self {}
39    }
40
41    /// Check if a position is inside any code span
42    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
43        code_spans
44            .iter()
45            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
46    }
47
48    /// Check if a byte position is within an HTML comment
49    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
50        for m in HTML_COMMENT_PATTERN.find_iter(content) {
51            if m.start() <= byte_pos && byte_pos < m.end() {
52                return true;
53            }
54        }
55        false
56    }
57
58    /// Check if a byte position is within an HTML tag
59    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
60        // Check HTML tags
61        for html_tag in ctx.html_tags().iter() {
62            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
63                return true;
64            }
65        }
66        false
67    }
68
69    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
70        use crate::config::MarkdownFlavor;
71        use crate::utils::skip_context::is_mkdocs_snippet_line;
72
73        let mut references = HashSet::new();
74        let mut in_code_block = false;
75        let mut code_fence_marker = String::new();
76
77        for line in content.lines() {
78            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
79            if is_mkdocs_snippet_line(
80                line,
81                if mkdocs_mode {
82                    MarkdownFlavor::MkDocs
83                } else {
84                    MarkdownFlavor::Standard
85                },
86            ) {
87                continue;
88            }
89            // Handle code block boundaries
90            if let Some(cap) = FENCED_CODE_START.captures(line) {
91                if let Some(marker) = cap.get(0) {
92                    let marker_str = marker.as_str().to_string();
93                    if !in_code_block {
94                        in_code_block = true;
95                        code_fence_marker = marker_str;
96                    } else if line.trim().starts_with(&code_fence_marker) {
97                        in_code_block = false;
98                        code_fence_marker.clear();
99                    }
100                }
101                continue;
102            }
103
104            // Skip lines in code blocks
105            if in_code_block {
106                continue;
107            }
108
109            if let Some(cap) = REF_REGEX.captures(line) {
110                // Store references in lowercase for case-insensitive comparison
111                if let Some(reference) = cap.get(1) {
112                    references.insert(reference.as_str().to_lowercase());
113                }
114            }
115        }
116
117        references
118    }
119
120    fn find_undefined_references(
121        &self,
122        content: &str,
123        references: &HashSet<String>,
124        ctx: &crate::lint_context::LintContext,
125        mkdocs_mode: bool,
126    ) -> Vec<(usize, usize, usize, String)> {
127        let mut undefined = Vec::new();
128        let mut reported_refs = HashMap::new();
129        let mut in_code_block = false;
130        let mut code_fence_marker = String::new();
131        let mut in_example_section = false;
132
133        // Get code spans once for the entire function
134        let code_spans = ctx.code_spans();
135
136        // Use cached data for reference links and images
137        for link in &ctx.links {
138            if !link.is_reference {
139                continue; // Skip inline links
140            }
141
142            // Skip links inside code spans
143            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
144                continue;
145            }
146
147            // Skip links inside HTML comments
148            if Self::is_in_html_comment(content, link.byte_offset) {
149                continue;
150            }
151
152            // Skip links inside math contexts
153            if is_in_math_context(ctx, link.byte_offset) {
154                continue;
155            }
156
157            // Skip links inside table cells
158            if is_in_table_cell(ctx, link.line, link.start_col) {
159                continue;
160            }
161
162            // Skip links inside frontmatter (convert from 1-based to 0-based line numbers)
163            if is_in_front_matter(content, link.line.saturating_sub(1)) {
164                continue;
165            }
166
167            if let Some(ref_id) = &link.reference_id {
168                let reference_lower = ref_id.to_lowercase();
169
170                // Skip MkDocs auto-references if in MkDocs mode
171                // Check both the reference_id and the link text for shorthand references
172                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&link.text)) {
173                    continue;
174                }
175
176                // Check if reference is defined
177                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
178                    // Check if the line is in an example section or list item
179                    if let Some(line_info) = ctx.line_info(link.line) {
180                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
181                            in_example_section = true;
182                            continue;
183                        }
184
185                        if in_example_section {
186                            continue;
187                        }
188
189                        // Skip list items
190                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
191                            continue;
192                        }
193                    }
194
195                    let match_len = link.byte_end - link.byte_offset;
196                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
197                    reported_refs.insert(reference_lower, true);
198                }
199            }
200        }
201
202        // Use cached data for reference images
203        for image in &ctx.images {
204            if !image.is_reference {
205                continue; // Skip inline images
206            }
207
208            // Skip images inside code spans
209            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
210                continue;
211            }
212
213            // Skip images inside HTML comments
214            if Self::is_in_html_comment(content, image.byte_offset) {
215                continue;
216            }
217
218            // Skip images inside math contexts
219            if is_in_math_context(ctx, image.byte_offset) {
220                continue;
221            }
222
223            // Skip images inside table cells
224            if is_in_table_cell(ctx, image.line, image.start_col) {
225                continue;
226            }
227
228            // Skip images inside frontmatter (convert from 1-based to 0-based line numbers)
229            if is_in_front_matter(content, image.line.saturating_sub(1)) {
230                continue;
231            }
232
233            if let Some(ref_id) = &image.reference_id {
234                let reference_lower = ref_id.to_lowercase();
235
236                // Skip MkDocs auto-references if in MkDocs mode
237                // Check both the reference_id and the alt text for shorthand references
238                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&image.alt_text)) {
239                    continue;
240                }
241
242                // Check if reference is defined
243                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
244                    // Check if the line is in an example section or list item
245                    if let Some(line_info) = ctx.line_info(image.line) {
246                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
247                            in_example_section = true;
248                            continue;
249                        }
250
251                        if in_example_section {
252                            continue;
253                        }
254
255                        // Skip list items
256                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
257                            continue;
258                        }
259                    }
260
261                    let match_len = image.byte_end - image.byte_offset;
262                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
263                    reported_refs.insert(reference_lower, true);
264                }
265            }
266        }
267
268        // Build a set of byte ranges that are already covered by parsed links/images
269        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
270
271        // Add ranges from parsed links
272        for link in &ctx.links {
273            covered_ranges.push((link.byte_offset, link.byte_end));
274        }
275
276        // Add ranges from parsed images
277        for image in &ctx.images {
278            covered_ranges.push((image.byte_offset, image.byte_end));
279        }
280
281        // Sort ranges by start position
282        covered_ranges.sort_by_key(|&(start, _)| start);
283
284        // Handle shortcut references [text] which aren't captured in ctx.links
285        // Need to use regex for these
286        let lines: Vec<&str> = content.lines().collect();
287        in_example_section = false; // Reset for line-by-line processing
288
289        for (line_num, line) in lines.iter().enumerate() {
290            // Skip lines in frontmatter (line_num is already 0-based)
291            if is_in_front_matter(content, line_num) {
292                continue;
293            }
294
295            // Handle code blocks
296            if let Some(cap) = FENCED_CODE_START.captures(line) {
297                if let Some(marker) = cap.get(0) {
298                    let marker_str = marker.as_str().to_string();
299                    if !in_code_block {
300                        in_code_block = true;
301                        code_fence_marker = marker_str;
302                    } else if line.trim().starts_with(&code_fence_marker) {
303                        in_code_block = false;
304                        code_fence_marker.clear();
305                    }
306                }
307                continue;
308            }
309
310            if in_code_block {
311                continue;
312            }
313
314            // Check for example sections
315            if OUTPUT_EXAMPLE_START.is_match(line) {
316                in_example_section = true;
317                continue;
318            }
319
320            if in_example_section {
321                // Check if we're exiting the example section (another heading)
322                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
323                    in_example_section = false;
324                } else {
325                    continue;
326                }
327            }
328
329            // Skip list items
330            if LIST_ITEM_REGEX.is_match(line) {
331                continue;
332            }
333
334            // Skip GitHub alerts/callouts (e.g., > [!TIP])
335            if GITHUB_ALERT_REGEX.is_match(line) {
336                continue;
337            }
338
339            // Check shortcut references: [reference]
340            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
341                for cap in captures {
342                    if let Some(ref_match) = cap.get(1) {
343                        let reference = ref_match.as_str();
344                        let reference_lower = reference.to_lowercase();
345
346                        // Skip GitHub alerts (e.g., !NOTE, !TIP, !WARNING, !IMPORTANT, !CAUTION)
347                        if let Some(alert_type) = reference.strip_prefix('!')
348                            && matches!(alert_type, "NOTE" | "TIP" | "WARNING" | "IMPORTANT" | "CAUTION")
349                        {
350                            continue;
351                        }
352
353                        // Skip MkDocs auto-references if in MkDocs mode
354                        if mkdocs_mode && is_mkdocs_auto_reference(reference) {
355                            continue;
356                        }
357
358                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
359                            let full_match = cap.get(0).unwrap();
360                            let col = full_match.start();
361
362                            // Skip if inside code span
363                            let code_spans = ctx.code_spans();
364                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
365                                continue;
366                            }
367
368                            // Check if this position is within a covered range
369                            let line_start_byte = ctx.line_offsets[line_num];
370                            let byte_pos = line_start_byte + col;
371
372                            // Skip if inside HTML comment
373                            if Self::is_in_html_comment(content, byte_pos) {
374                                continue;
375                            }
376
377                            // Skip if inside HTML tag
378                            if Self::is_in_html_tag(ctx, byte_pos) {
379                                continue;
380                            }
381
382                            // Skip if inside math context
383                            if is_in_math_context(ctx, byte_pos) {
384                                continue;
385                            }
386
387                            // Skip if inside table cell
388                            if is_in_table_cell(ctx, line_num + 1, col) {
389                                continue;
390                            }
391
392                            let byte_end = byte_pos + (full_match.end() - full_match.start());
393
394                            // Check if this shortcut ref overlaps with any parsed link/image
395                            let mut is_covered = false;
396                            for &(range_start, range_end) in &covered_ranges {
397                                if range_start <= byte_pos && byte_end <= range_end {
398                                    // This shortcut ref is completely within a parsed link/image
399                                    is_covered = true;
400                                    break;
401                                }
402                                if range_start > byte_end {
403                                    // No need to check further (ranges are sorted)
404                                    break;
405                                }
406                            }
407
408                            if is_covered {
409                                continue;
410                            }
411
412                            // More sophisticated checks to avoid false positives
413
414                            // Check 1: If preceded by ], this might be part of [text][ref]
415                            // Look for the pattern ...][ref] and check if there's a matching [ before
416                            if col > 0 && line.chars().nth(col.saturating_sub(1)) == Some(']') {
417                                // Look backwards for a [ that would make this [text][ref]
418                                let mut bracket_count = 1; // We already saw one ]
419                                let mut check_pos = col.saturating_sub(2);
420                                let mut found_opening = false;
421
422                                while check_pos > 0 {
423                                    match line.chars().nth(check_pos) {
424                                        Some(']') => bracket_count += 1,
425                                        Some('[') => {
426                                            bracket_count -= 1;
427                                            if bracket_count == 0 {
428                                                // Check if this [ is escaped
429                                                if check_pos == 0 || line.chars().nth(check_pos - 1) != Some('\\') {
430                                                    found_opening = true;
431                                                }
432                                                break;
433                                            }
434                                        }
435                                        _ => {}
436                                    }
437                                    if check_pos == 0 {
438                                        break;
439                                    }
440                                    check_pos = check_pos.saturating_sub(1);
441                                }
442
443                                if found_opening {
444                                    // This is part of [text][ref], skip it
445                                    continue;
446                                }
447                            }
448
449                            // Check 2: If there's an escaped bracket pattern before this
450                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
451                            let before_text = &line[..col];
452                            if before_text.contains("\\]") {
453                                // Check if there's a \[ before the \]
454                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
455                                    let search_text = &before_text[..escaped_close_pos];
456                                    if search_text.contains("\\[") {
457                                        // This looks like \[...\][ref], skip it
458                                        continue;
459                                    }
460                                }
461                            }
462
463                            let match_len = full_match.end() - full_match.start();
464                            undefined.push((line_num, col, match_len, reference.to_string()));
465                            reported_refs.insert(reference_lower, true);
466                        }
467                    }
468                }
469            }
470        }
471
472        undefined
473    }
474}
475
476impl Rule for MD052ReferenceLinkImages {
477    fn name(&self) -> &'static str {
478        "MD052"
479    }
480
481    fn description(&self) -> &'static str {
482        "Reference links and images should use a reference that exists"
483    }
484
485    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
486        let content = ctx.content;
487        let mut warnings = Vec::new();
488
489        // Check if we're in MkDocs mode from the context
490        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
491
492        let references = self.extract_references(content, mkdocs_mode);
493
494        // Use optimized detection method with cached link/image data
495        for (line_num, col, match_len, reference) in
496            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
497        {
498            let lines: Vec<&str> = content.lines().collect();
499            let line_content = lines.get(line_num).unwrap_or(&"");
500
501            // Calculate precise character range for the entire undefined reference
502            let (start_line, start_col, end_line, end_col) =
503                calculate_match_range(line_num + 1, line_content, col, match_len);
504
505            warnings.push(LintWarning {
506                rule_name: Some(self.name()),
507                line: start_line,
508                column: start_col,
509                end_line,
510                end_column: end_col,
511                message: format!("Reference '{reference}' not found"),
512                severity: Severity::Warning,
513                fix: None,
514            });
515        }
516
517        Ok(warnings)
518    }
519
520    /// Check if this rule should be skipped for performance
521    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
522        // Skip if content is empty or has no reference-style links/images
523        ctx.content.is_empty() || (!ctx.content.contains("](") && !ctx.content.contains("]["))
524    }
525
526    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
527        let content = ctx.content;
528        // No automatic fix available for undefined references
529        Ok(content.to_string())
530    }
531
532    fn as_any(&self) -> &dyn std::any::Any {
533        self
534    }
535
536    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
537    where
538        Self: Sized,
539    {
540        // Flavor is now accessed from LintContext during check
541        Box::new(MD052ReferenceLinkImages::new())
542    }
543}
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548    use crate::lint_context::LintContext;
549
550    #[test]
551    fn test_valid_reference_link() {
552        let rule = MD052ReferenceLinkImages::new();
553        let content = "[text][ref]\n\n[ref]: https://example.com";
554        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
555        let result = rule.check(&ctx).unwrap();
556
557        assert_eq!(result.len(), 0);
558    }
559
560    #[test]
561    fn test_undefined_reference_link() {
562        let rule = MD052ReferenceLinkImages::new();
563        let content = "[text][undefined]";
564        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
565        let result = rule.check(&ctx).unwrap();
566
567        assert_eq!(result.len(), 1);
568        assert!(result[0].message.contains("Reference 'undefined' not found"));
569    }
570
571    #[test]
572    fn test_valid_reference_image() {
573        let rule = MD052ReferenceLinkImages::new();
574        let content = "![alt][img]\n\n[img]: image.jpg";
575        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
576        let result = rule.check(&ctx).unwrap();
577
578        assert_eq!(result.len(), 0);
579    }
580
581    #[test]
582    fn test_undefined_reference_image() {
583        let rule = MD052ReferenceLinkImages::new();
584        let content = "![alt][missing]";
585        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
586        let result = rule.check(&ctx).unwrap();
587
588        assert_eq!(result.len(), 1);
589        assert!(result[0].message.contains("Reference 'missing' not found"));
590    }
591
592    #[test]
593    fn test_case_insensitive_references() {
594        let rule = MD052ReferenceLinkImages::new();
595        let content = "[Text][REF]\n\n[ref]: https://example.com";
596        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
597        let result = rule.check(&ctx).unwrap();
598
599        assert_eq!(result.len(), 0);
600    }
601
602    #[test]
603    fn test_shortcut_reference_valid() {
604        let rule = MD052ReferenceLinkImages::new();
605        let content = "[ref]\n\n[ref]: https://example.com";
606        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
607        let result = rule.check(&ctx).unwrap();
608
609        assert_eq!(result.len(), 0);
610    }
611
612    #[test]
613    fn test_shortcut_reference_undefined() {
614        let rule = MD052ReferenceLinkImages::new();
615        let content = "[undefined]";
616        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
617        let result = rule.check(&ctx).unwrap();
618
619        assert_eq!(result.len(), 1);
620        assert!(result[0].message.contains("Reference 'undefined' not found"));
621    }
622
623    #[test]
624    fn test_inline_links_ignored() {
625        let rule = MD052ReferenceLinkImages::new();
626        let content = "[text](https://example.com)";
627        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
628        let result = rule.check(&ctx).unwrap();
629
630        assert_eq!(result.len(), 0);
631    }
632
633    #[test]
634    fn test_inline_images_ignored() {
635        let rule = MD052ReferenceLinkImages::new();
636        let content = "![alt](image.jpg)";
637        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
638        let result = rule.check(&ctx).unwrap();
639
640        assert_eq!(result.len(), 0);
641    }
642
643    #[test]
644    fn test_references_in_code_blocks_ignored() {
645        let rule = MD052ReferenceLinkImages::new();
646        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
647        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
648        let result = rule.check(&ctx).unwrap();
649
650        assert_eq!(result.len(), 0);
651    }
652
653    #[test]
654    fn test_references_in_inline_code_ignored() {
655        let rule = MD052ReferenceLinkImages::new();
656        let content = "`[undefined]`";
657        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
658        let result = rule.check(&ctx).unwrap();
659
660        // References inside inline code spans should be ignored
661        assert_eq!(result.len(), 0);
662    }
663
664    #[test]
665    fn test_comprehensive_inline_code_detection() {
666        let rule = MD052ReferenceLinkImages::new();
667        let content = r#"# Test
668
669This `[inside]` should be ignored.
670This [outside] should be flagged.
671Reference links `[text][ref]` in code are ignored.
672Regular reference [text][missing] should be flagged.
673Images `![alt][img]` in code are ignored.
674Regular image ![alt][badimg] should be flagged.
675
676Multiple `[one]` and `[two]` in code ignored, but [three] is not.
677
678```
679[code block content] should be ignored
680```
681
682`Multiple [refs] in [same] code span` ignored."#;
683
684        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
685        let result = rule.check(&ctx).unwrap();
686
687        // Should only flag: outside, missing, badimg, three (4 total)
688        assert_eq!(result.len(), 4);
689
690        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
691        assert!(messages.iter().any(|m| m.contains("outside")));
692        assert!(messages.iter().any(|m| m.contains("missing")));
693        assert!(messages.iter().any(|m| m.contains("badimg")));
694        assert!(messages.iter().any(|m| m.contains("three")));
695
696        // Should NOT flag any references inside code spans
697        assert!(!messages.iter().any(|m| m.contains("inside")));
698        assert!(!messages.iter().any(|m| m.contains("one")));
699        assert!(!messages.iter().any(|m| m.contains("two")));
700        assert!(!messages.iter().any(|m| m.contains("refs")));
701        assert!(!messages.iter().any(|m| m.contains("same")));
702    }
703
704    #[test]
705    fn test_multiple_undefined_references() {
706        let rule = MD052ReferenceLinkImages::new();
707        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
708        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
709        let result = rule.check(&ctx).unwrap();
710
711        assert_eq!(result.len(), 3);
712        assert!(result[0].message.contains("ref1"));
713        assert!(result[1].message.contains("ref2"));
714        assert!(result[2].message.contains("ref3"));
715    }
716
717    #[test]
718    fn test_mixed_valid_and_undefined() {
719        let rule = MD052ReferenceLinkImages::new();
720        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
721        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
722        let result = rule.check(&ctx).unwrap();
723
724        assert_eq!(result.len(), 1);
725        assert!(result[0].message.contains("missing"));
726    }
727
728    #[test]
729    fn test_empty_reference() {
730        let rule = MD052ReferenceLinkImages::new();
731        let content = "[text][]\n\n[ref]: https://example.com";
732        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
733        let result = rule.check(&ctx).unwrap();
734
735        // Empty reference should use the link text as reference
736        assert_eq!(result.len(), 1);
737    }
738
739    #[test]
740    fn test_escaped_brackets_ignored() {
741        let rule = MD052ReferenceLinkImages::new();
742        let content = "\\[not a link\\]";
743        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
744        let result = rule.check(&ctx).unwrap();
745
746        assert_eq!(result.len(), 0);
747    }
748
749    #[test]
750    fn test_list_items_ignored() {
751        let rule = MD052ReferenceLinkImages::new();
752        let content = "- [undefined]\n* [another]\n+ [third]";
753        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
754        let result = rule.check(&ctx).unwrap();
755
756        // List items that look like shortcut references should be ignored
757        assert_eq!(result.len(), 0);
758    }
759
760    #[test]
761    fn test_output_example_section_ignored() {
762        let rule = MD052ReferenceLinkImages::new();
763        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
764        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
765        let result = rule.check(&ctx).unwrap();
766
767        // Only the reference outside the Output section should be flagged
768        assert_eq!(result.len(), 1);
769        assert!(result[0].message.contains("missing"));
770    }
771
772    #[test]
773    fn test_reference_definitions_in_code_blocks_ignored() {
774        let rule = MD052ReferenceLinkImages::new();
775        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
776        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
777        let result = rule.check(&ctx).unwrap();
778
779        // Reference defined in code block should not count
780        assert_eq!(result.len(), 1);
781        assert!(result[0].message.contains("ref"));
782    }
783
784    #[test]
785    fn test_multiple_references_to_same_undefined() {
786        let rule = MD052ReferenceLinkImages::new();
787        let content = "[first][missing] [second][missing] [third][missing]";
788        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
789        let result = rule.check(&ctx).unwrap();
790
791        // Should only report once per unique reference
792        assert_eq!(result.len(), 1);
793        assert!(result[0].message.contains("missing"));
794    }
795
796    #[test]
797    fn test_reference_with_special_characters() {
798        let rule = MD052ReferenceLinkImages::new();
799        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
800        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
801        let result = rule.check(&ctx).unwrap();
802
803        assert_eq!(result.len(), 0);
804    }
805
806    #[test]
807    fn test_issue_51_html_attribute_not_reference() {
808        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
809        let rule = MD052ReferenceLinkImages::new();
810        let content = r#"# Example
811
812## Test
813
814Want to fill out this form?
815
816<form method="post">
817    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
818</form>"#;
819        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
820        let result = rule.check(&ctx).unwrap();
821
822        assert_eq!(
823            result.len(),
824            0,
825            "HTML attributes with square brackets should not be flagged as undefined references"
826        );
827    }
828
829    #[test]
830    fn test_extract_references() {
831        let rule = MD052ReferenceLinkImages::new();
832        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
833        let refs = rule.extract_references(content, false);
834
835        assert_eq!(refs.len(), 3);
836        assert!(refs.contains("ref1"));
837        assert!(refs.contains("ref2"));
838        assert!(refs.contains("ref3"));
839    }
840
841    #[test]
842    fn test_inline_code_not_flagged() {
843        let rule = MD052ReferenceLinkImages::new();
844
845        // Test that arrays in inline code are not flagged as references
846        let content = r#"# Test
847
848Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
849
850Also, `[todo]` is not a reference link.
851
852But this [reference] should be flagged.
853
854And this `[inline code]` should not be flagged.
855"#;
856
857        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
858        let warnings = rule.check(&ctx).unwrap();
859
860        // Should only flag [reference], not the ones in backticks
861        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
862        assert!(warnings[0].message.contains("'reference'"));
863    }
864
865    #[test]
866    fn test_code_block_references_ignored() {
867        let rule = MD052ReferenceLinkImages::new();
868
869        let content = r#"# Test
870
871```markdown
872[undefined] reference in code block
873![undefined] image in code block
874```
875
876[real-undefined] reference outside
877"#;
878
879        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
880        let warnings = rule.check(&ctx).unwrap();
881
882        // Should only flag [real-undefined], not the ones in code block
883        assert_eq!(warnings.len(), 1);
884        assert!(warnings[0].message.contains("'real-undefined'"));
885    }
886
887    #[test]
888    fn test_html_comments_ignored() {
889        // Test for issue #20 - MD052 should not flag content inside HTML comments
890        let rule = MD052ReferenceLinkImages::new();
891
892        // Test the exact case from issue #20
893        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
894<!--- set_env EDITOR 'python3 fake_editor.py' -->
895
896```bash
897$ python3 vote.py
8983 votes for: 2
8992 votes for: 3, 4
900```"#;
901        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
902        let result = rule.check(&ctx).unwrap();
903        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
904
905        // Test various reference patterns inside HTML comments
906        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
907Normal [text][undefined]
908<!-- Another [comment][with] references -->"#;
909        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
910        let result = rule.check(&ctx).unwrap();
911        assert_eq!(
912            result.len(),
913            1,
914            "Should only flag the undefined reference outside comments"
915        );
916        assert!(result[0].message.contains("undefined"));
917
918        // Test multi-line HTML comments
919        let content = r#"<!--
920[ref1]
921[ref2][ref3]
922-->
923[actual][undefined]"#;
924        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
925        let result = rule.check(&ctx).unwrap();
926        assert_eq!(
927            result.len(),
928            1,
929            "Should not flag references in multi-line HTML comments"
930        );
931        assert!(result[0].message.contains("undefined"));
932
933        // Test mixed scenarios
934        let content = r#"<!-- Comment with [1:] pattern -->
935Valid [link][ref]
936<!-- More [refs][in][comments] -->
937![image][missing]
938
939[ref]: https://example.com"#;
940        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
941        let result = rule.check(&ctx).unwrap();
942        assert_eq!(result.len(), 1, "Should only flag missing image reference");
943        assert!(result[0].message.contains("missing"));
944    }
945
946    #[test]
947    fn test_frontmatter_ignored() {
948        // Test for issue #24 - MD052 should not flag content inside frontmatter
949        let rule = MD052ReferenceLinkImages::new();
950
951        // Test YAML frontmatter with arrays and references
952        let content = r#"---
953layout: post
954title: "My Jekyll Post"
955date: 2023-01-01
956categories: blog
957tags: ["test", "example"]
958author: John Doe
959---
960
961# My Blog Post
962
963This is the actual markdown content that should be linted.
964
965[undefined] reference should be flagged.
966
967## Section 1
968
969Some content here."#;
970        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
971        let result = rule.check(&ctx).unwrap();
972
973        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
974        assert_eq!(
975            result.len(),
976            1,
977            "Should only flag the undefined reference outside frontmatter"
978        );
979        assert!(result[0].message.contains("undefined"));
980
981        // Test TOML frontmatter
982        let content = r#"+++
983title = "My Post"
984tags = ["example", "test"]
985+++
986
987# Content
988
989[missing] reference should be flagged."#;
990        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
991        let result = rule.check(&ctx).unwrap();
992        assert_eq!(
993            result.len(),
994            1,
995            "Should only flag the undefined reference outside TOML frontmatter"
996        );
997        assert!(result[0].message.contains("missing"));
998    }
999
1000    #[test]
1001    fn test_github_alerts_not_flagged() {
1002        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1003        let rule = MD052ReferenceLinkImages::new();
1004
1005        // Test various GitHub alert types
1006        let content = r#"# Document with GitHub Alerts
1007
1008> [!NOTE]
1009> This is a note alert.
1010
1011> [!TIP]
1012> This is a tip alert.
1013
1014> [!IMPORTANT]
1015> This is an important alert.
1016
1017> [!WARNING]
1018> This is a warning alert.
1019
1020> [!CAUTION]
1021> This is a caution alert.
1022
1023Regular content with [undefined] reference."#;
1024        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1025        let result = rule.check(&ctx).unwrap();
1026
1027        // Should only flag the undefined reference, not the GitHub alerts
1028        assert_eq!(
1029            result.len(),
1030            1,
1031            "Should only flag the undefined reference, not GitHub alerts"
1032        );
1033        assert!(result[0].message.contains("undefined"));
1034        assert_eq!(result[0].line, 18); // Line with [undefined]
1035
1036        // Test GitHub alerts with additional content
1037        let content = r#"> [!TIP]
1038> Here's a useful tip about [something].
1039> Multiple lines are allowed.
1040
1041[something] is mentioned but not defined."#;
1042        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1043        let result = rule.check(&ctx).unwrap();
1044
1045        // Should flag only the [something] outside blockquotes
1046        // The test shows we're only catching one, which might be correct behavior
1047        // matching markdownlint's approach
1048        assert_eq!(result.len(), 1, "Should flag undefined reference");
1049        assert!(result[0].message.contains("something"));
1050
1051        // Test GitHub alerts with proper references
1052        let content = r#"> [!NOTE]
1053> See [reference] for more details.
1054
1055[reference]: https://example.com"#;
1056        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1057        let result = rule.check(&ctx).unwrap();
1058
1059        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1060        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1061    }
1062}