rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::range_utils::calculate_match_range;
3use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
4use crate::utils::skip_context::{is_in_front_matter, is_in_math_context, is_in_table_cell};
5use lazy_static::lazy_static;
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8
9lazy_static! {
10    // Pattern to match reference definitions [ref]: url (standard regex is fine)
11    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
12    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s*.*").unwrap();
13
14    // Pattern for list items to exclude from reference checks (standard regex is fine)
15    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
16
17    // Pattern for code blocks (standard regex is fine)
18    static ref FENCED_CODE_START: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
19
20    // Pattern for output example sections (standard regex is fine)
21    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
22
23    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
24    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION)\]").unwrap();
25}
26
27/// Rule MD052: Reference links and images should use reference style
28///
29/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
30///
31/// This rule is triggered when a reference link or image uses a reference that isn't defined.
32#[derive(Clone)]
33pub struct MD052ReferenceLinkImages;
34
35impl Default for MD052ReferenceLinkImages {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl MD052ReferenceLinkImages {
42    pub fn new() -> Self {
43        Self
44    }
45
46    /// Check if a position is inside any code span
47    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
48        code_spans
49            .iter()
50            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
51    }
52
53    /// Check if a byte position is within an HTML comment
54    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
55        for m in HTML_COMMENT_PATTERN.find_iter(content) {
56            if m.start() <= byte_pos && byte_pos < m.end() {
57                return true;
58            }
59        }
60        false
61    }
62
63    /// Check if a byte position is within an HTML tag
64    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
65        // Check HTML tags
66        for html_tag in ctx.html_tags().iter() {
67            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
68                return true;
69            }
70        }
71        false
72    }
73
74    fn extract_references(&self, content: &str) -> HashSet<String> {
75        let mut references = HashSet::new();
76        let mut in_code_block = false;
77        let mut code_fence_marker = String::new();
78
79        for line in content.lines() {
80            // Handle code block boundaries
81            if let Some(cap) = FENCED_CODE_START.captures(line) {
82                if let Some(marker) = cap.get(0) {
83                    let marker_str = marker.as_str().to_string();
84                    if !in_code_block {
85                        in_code_block = true;
86                        code_fence_marker = marker_str;
87                    } else if line.trim().starts_with(&code_fence_marker) {
88                        in_code_block = false;
89                        code_fence_marker.clear();
90                    }
91                }
92                continue;
93            }
94
95            // Skip lines in code blocks
96            if in_code_block {
97                continue;
98            }
99
100            if let Some(cap) = REF_REGEX.captures(line) {
101                // Store references in lowercase for case-insensitive comparison
102                if let Some(reference) = cap.get(1) {
103                    references.insert(reference.as_str().to_lowercase());
104                }
105            }
106        }
107
108        references
109    }
110
111    fn find_undefined_references(
112        &self,
113        content: &str,
114        references: &HashSet<String>,
115        ctx: &crate::lint_context::LintContext,
116    ) -> Vec<(usize, usize, usize, String)> {
117        let mut undefined = Vec::new();
118        let mut reported_refs = HashMap::new();
119        let mut in_code_block = false;
120        let mut code_fence_marker = String::new();
121        let mut in_example_section = false;
122
123        // Get code spans once for the entire function
124        let code_spans = ctx.code_spans();
125
126        // Use cached data for reference links and images
127        for link in &ctx.links {
128            if !link.is_reference {
129                continue; // Skip inline links
130            }
131
132            // Skip links inside code spans
133            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
134                continue;
135            }
136
137            // Skip links inside HTML comments
138            if Self::is_in_html_comment(content, link.byte_offset) {
139                continue;
140            }
141
142            // Skip links inside math contexts
143            if is_in_math_context(ctx, link.byte_offset) {
144                continue;
145            }
146
147            // Skip links inside table cells
148            if is_in_table_cell(ctx, link.line, link.start_col) {
149                continue;
150            }
151
152            // Skip links inside frontmatter (convert from 1-based to 0-based line numbers)
153            if is_in_front_matter(content, link.line.saturating_sub(1)) {
154                continue;
155            }
156
157            if let Some(ref_id) = &link.reference_id {
158                let reference_lower = ref_id.to_lowercase();
159
160                // Check if reference is defined
161                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
162                    // Check if the line is in an example section or list item
163                    if let Some(line_info) = ctx.line_info(link.line) {
164                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
165                            in_example_section = true;
166                            continue;
167                        }
168
169                        if in_example_section {
170                            continue;
171                        }
172
173                        // Skip list items
174                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
175                            continue;
176                        }
177                    }
178
179                    let match_len = link.byte_end - link.byte_offset;
180                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
181                    reported_refs.insert(reference_lower, true);
182                }
183            }
184        }
185
186        // Use cached data for reference images
187        for image in &ctx.images {
188            if !image.is_reference {
189                continue; // Skip inline images
190            }
191
192            // Skip images inside code spans
193            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
194                continue;
195            }
196
197            // Skip images inside HTML comments
198            if Self::is_in_html_comment(content, image.byte_offset) {
199                continue;
200            }
201
202            // Skip images inside math contexts
203            if is_in_math_context(ctx, image.byte_offset) {
204                continue;
205            }
206
207            // Skip images inside table cells
208            if is_in_table_cell(ctx, image.line, image.start_col) {
209                continue;
210            }
211
212            // Skip images inside frontmatter (convert from 1-based to 0-based line numbers)
213            if is_in_front_matter(content, image.line.saturating_sub(1)) {
214                continue;
215            }
216
217            if let Some(ref_id) = &image.reference_id {
218                let reference_lower = ref_id.to_lowercase();
219
220                // Check if reference is defined
221                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
222                    // Check if the line is in an example section or list item
223                    if let Some(line_info) = ctx.line_info(image.line) {
224                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
225                            in_example_section = true;
226                            continue;
227                        }
228
229                        if in_example_section {
230                            continue;
231                        }
232
233                        // Skip list items
234                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
235                            continue;
236                        }
237                    }
238
239                    let match_len = image.byte_end - image.byte_offset;
240                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
241                    reported_refs.insert(reference_lower, true);
242                }
243            }
244        }
245
246        // Build a set of byte ranges that are already covered by parsed links/images
247        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
248
249        // Add ranges from parsed links
250        for link in &ctx.links {
251            covered_ranges.push((link.byte_offset, link.byte_end));
252        }
253
254        // Add ranges from parsed images
255        for image in &ctx.images {
256            covered_ranges.push((image.byte_offset, image.byte_end));
257        }
258
259        // Sort ranges by start position
260        covered_ranges.sort_by_key(|&(start, _)| start);
261
262        // Handle shortcut references [text] which aren't captured in ctx.links
263        // Need to use regex for these
264        let lines: Vec<&str> = content.lines().collect();
265        in_example_section = false; // Reset for line-by-line processing
266
267        for (line_num, line) in lines.iter().enumerate() {
268            // Skip lines in frontmatter (line_num is already 0-based)
269            if is_in_front_matter(content, line_num) {
270                continue;
271            }
272
273            // Handle code blocks
274            if let Some(cap) = FENCED_CODE_START.captures(line) {
275                if let Some(marker) = cap.get(0) {
276                    let marker_str = marker.as_str().to_string();
277                    if !in_code_block {
278                        in_code_block = true;
279                        code_fence_marker = marker_str;
280                    } else if line.trim().starts_with(&code_fence_marker) {
281                        in_code_block = false;
282                        code_fence_marker.clear();
283                    }
284                }
285                continue;
286            }
287
288            if in_code_block {
289                continue;
290            }
291
292            // Check for example sections
293            if OUTPUT_EXAMPLE_START.is_match(line) {
294                in_example_section = true;
295                continue;
296            }
297
298            if in_example_section {
299                // Check if we're exiting the example section (another heading)
300                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
301                    in_example_section = false;
302                } else {
303                    continue;
304                }
305            }
306
307            // Skip list items
308            if LIST_ITEM_REGEX.is_match(line) {
309                continue;
310            }
311
312            // Skip GitHub alerts/callouts (e.g., > [!TIP])
313            if GITHUB_ALERT_REGEX.is_match(line) {
314                continue;
315            }
316
317            // Check shortcut references: [reference]
318            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
319                for cap in captures {
320                    if let Some(ref_match) = cap.get(1) {
321                        let reference = ref_match.as_str();
322                        let reference_lower = reference.to_lowercase();
323
324                        // Skip GitHub alerts (e.g., !NOTE, !TIP, !WARNING, !IMPORTANT, !CAUTION)
325                        if let Some(alert_type) = reference.strip_prefix('!')
326                            && matches!(alert_type, "NOTE" | "TIP" | "WARNING" | "IMPORTANT" | "CAUTION")
327                        {
328                            continue;
329                        }
330
331                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
332                            let full_match = cap.get(0).unwrap();
333                            let col = full_match.start();
334
335                            // Skip if inside code span
336                            let code_spans = ctx.code_spans();
337                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
338                                continue;
339                            }
340
341                            // Check if this position is within a covered range
342                            let line_start_byte = ctx.line_offsets[line_num];
343                            let byte_pos = line_start_byte + col;
344
345                            // Skip if inside HTML comment
346                            if Self::is_in_html_comment(content, byte_pos) {
347                                continue;
348                            }
349
350                            // Skip if inside HTML tag
351                            if Self::is_in_html_tag(ctx, byte_pos) {
352                                continue;
353                            }
354
355                            // Skip if inside math context
356                            if is_in_math_context(ctx, byte_pos) {
357                                continue;
358                            }
359
360                            // Skip if inside table cell
361                            if is_in_table_cell(ctx, line_num + 1, col) {
362                                continue;
363                            }
364
365                            let byte_end = byte_pos + (full_match.end() - full_match.start());
366
367                            // Check if this shortcut ref overlaps with any parsed link/image
368                            let mut is_covered = false;
369                            for &(range_start, range_end) in &covered_ranges {
370                                if range_start <= byte_pos && byte_end <= range_end {
371                                    // This shortcut ref is completely within a parsed link/image
372                                    is_covered = true;
373                                    break;
374                                }
375                                if range_start > byte_end {
376                                    // No need to check further (ranges are sorted)
377                                    break;
378                                }
379                            }
380
381                            if is_covered {
382                                continue;
383                            }
384
385                            // More sophisticated checks to avoid false positives
386
387                            // Check 1: If preceded by ], this might be part of [text][ref]
388                            // Look for the pattern ...][ref] and check if there's a matching [ before
389                            if col > 0 && line.chars().nth(col.saturating_sub(1)) == Some(']') {
390                                // Look backwards for a [ that would make this [text][ref]
391                                let mut bracket_count = 1; // We already saw one ]
392                                let mut check_pos = col.saturating_sub(2);
393                                let mut found_opening = false;
394
395                                while check_pos > 0 {
396                                    match line.chars().nth(check_pos) {
397                                        Some(']') => bracket_count += 1,
398                                        Some('[') => {
399                                            bracket_count -= 1;
400                                            if bracket_count == 0 {
401                                                // Check if this [ is escaped
402                                                if check_pos == 0 || line.chars().nth(check_pos - 1) != Some('\\') {
403                                                    found_opening = true;
404                                                }
405                                                break;
406                                            }
407                                        }
408                                        _ => {}
409                                    }
410                                    if check_pos == 0 {
411                                        break;
412                                    }
413                                    check_pos = check_pos.saturating_sub(1);
414                                }
415
416                                if found_opening {
417                                    // This is part of [text][ref], skip it
418                                    continue;
419                                }
420                            }
421
422                            // Check 2: If there's an escaped bracket pattern before this
423                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
424                            let before_text = &line[..col];
425                            if before_text.contains("\\]") {
426                                // Check if there's a \[ before the \]
427                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
428                                    let search_text = &before_text[..escaped_close_pos];
429                                    if search_text.contains("\\[") {
430                                        // This looks like \[...\][ref], skip it
431                                        continue;
432                                    }
433                                }
434                            }
435
436                            let match_len = full_match.end() - full_match.start();
437                            undefined.push((line_num, col, match_len, reference.to_string()));
438                            reported_refs.insert(reference_lower, true);
439                        }
440                    }
441                }
442            }
443        }
444
445        undefined
446    }
447}
448
449impl Rule for MD052ReferenceLinkImages {
450    fn name(&self) -> &'static str {
451        "MD052"
452    }
453
454    fn description(&self) -> &'static str {
455        "Reference links and images should use a reference that exists"
456    }
457
458    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
459        let content = ctx.content;
460        let mut warnings = Vec::new();
461        let references = self.extract_references(content);
462
463        // Use optimized detection method with cached link/image data
464        for (line_num, col, match_len, reference) in self.find_undefined_references(content, &references, ctx) {
465            let lines: Vec<&str> = content.lines().collect();
466            let line_content = lines.get(line_num).unwrap_or(&"");
467
468            // Calculate precise character range for the entire undefined reference
469            let (start_line, start_col, end_line, end_col) =
470                calculate_match_range(line_num + 1, line_content, col, match_len);
471
472            warnings.push(LintWarning {
473                rule_name: Some(self.name()),
474                line: start_line,
475                column: start_col,
476                end_line,
477                end_column: end_col,
478                message: format!("Reference '{reference}' not found"),
479                severity: Severity::Warning,
480                fix: None,
481            });
482        }
483
484        Ok(warnings)
485    }
486
487    /// Check if this rule should be skipped for performance
488    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
489        // Skip if content is empty or has no reference-style links/images
490        ctx.content.is_empty() || (!ctx.content.contains("](") && !ctx.content.contains("]["))
491    }
492
493    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
494        let content = ctx.content;
495        // No automatic fix available for undefined references
496        Ok(content.to_string())
497    }
498
499    fn as_any(&self) -> &dyn std::any::Any {
500        self
501    }
502
503    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
504    where
505        Self: Sized,
506    {
507        Box::new(MD052ReferenceLinkImages::new())
508    }
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514    use crate::lint_context::LintContext;
515
516    #[test]
517    fn test_valid_reference_link() {
518        let rule = MD052ReferenceLinkImages::new();
519        let content = "[text][ref]\n\n[ref]: https://example.com";
520        let ctx = LintContext::new(content);
521        let result = rule.check(&ctx).unwrap();
522
523        assert_eq!(result.len(), 0);
524    }
525
526    #[test]
527    fn test_undefined_reference_link() {
528        let rule = MD052ReferenceLinkImages::new();
529        let content = "[text][undefined]";
530        let ctx = LintContext::new(content);
531        let result = rule.check(&ctx).unwrap();
532
533        assert_eq!(result.len(), 1);
534        assert!(result[0].message.contains("Reference 'undefined' not found"));
535    }
536
537    #[test]
538    fn test_valid_reference_image() {
539        let rule = MD052ReferenceLinkImages::new();
540        let content = "![alt][img]\n\n[img]: image.jpg";
541        let ctx = LintContext::new(content);
542        let result = rule.check(&ctx).unwrap();
543
544        assert_eq!(result.len(), 0);
545    }
546
547    #[test]
548    fn test_undefined_reference_image() {
549        let rule = MD052ReferenceLinkImages::new();
550        let content = "![alt][missing]";
551        let ctx = LintContext::new(content);
552        let result = rule.check(&ctx).unwrap();
553
554        assert_eq!(result.len(), 1);
555        assert!(result[0].message.contains("Reference 'missing' not found"));
556    }
557
558    #[test]
559    fn test_case_insensitive_references() {
560        let rule = MD052ReferenceLinkImages::new();
561        let content = "[Text][REF]\n\n[ref]: https://example.com";
562        let ctx = LintContext::new(content);
563        let result = rule.check(&ctx).unwrap();
564
565        assert_eq!(result.len(), 0);
566    }
567
568    #[test]
569    fn test_shortcut_reference_valid() {
570        let rule = MD052ReferenceLinkImages::new();
571        let content = "[ref]\n\n[ref]: https://example.com";
572        let ctx = LintContext::new(content);
573        let result = rule.check(&ctx).unwrap();
574
575        assert_eq!(result.len(), 0);
576    }
577
578    #[test]
579    fn test_shortcut_reference_undefined() {
580        let rule = MD052ReferenceLinkImages::new();
581        let content = "[undefined]";
582        let ctx = LintContext::new(content);
583        let result = rule.check(&ctx).unwrap();
584
585        assert_eq!(result.len(), 1);
586        assert!(result[0].message.contains("Reference 'undefined' not found"));
587    }
588
589    #[test]
590    fn test_inline_links_ignored() {
591        let rule = MD052ReferenceLinkImages::new();
592        let content = "[text](https://example.com)";
593        let ctx = LintContext::new(content);
594        let result = rule.check(&ctx).unwrap();
595
596        assert_eq!(result.len(), 0);
597    }
598
599    #[test]
600    fn test_inline_images_ignored() {
601        let rule = MD052ReferenceLinkImages::new();
602        let content = "![alt](image.jpg)";
603        let ctx = LintContext::new(content);
604        let result = rule.check(&ctx).unwrap();
605
606        assert_eq!(result.len(), 0);
607    }
608
609    #[test]
610    fn test_references_in_code_blocks_ignored() {
611        let rule = MD052ReferenceLinkImages::new();
612        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
613        let ctx = LintContext::new(content);
614        let result = rule.check(&ctx).unwrap();
615
616        assert_eq!(result.len(), 0);
617    }
618
619    #[test]
620    fn test_references_in_inline_code_ignored() {
621        let rule = MD052ReferenceLinkImages::new();
622        let content = "`[undefined]`";
623        let ctx = LintContext::new(content);
624        let result = rule.check(&ctx).unwrap();
625
626        // References inside inline code spans should be ignored
627        assert_eq!(result.len(), 0);
628    }
629
630    #[test]
631    fn test_comprehensive_inline_code_detection() {
632        let rule = MD052ReferenceLinkImages::new();
633        let content = r#"# Test
634
635This `[inside]` should be ignored.
636This [outside] should be flagged.
637Reference links `[text][ref]` in code are ignored.
638Regular reference [text][missing] should be flagged.
639Images `![alt][img]` in code are ignored.
640Regular image ![alt][badimg] should be flagged.
641
642Multiple `[one]` and `[two]` in code ignored, but [three] is not.
643
644```
645[code block content] should be ignored
646```
647
648`Multiple [refs] in [same] code span` ignored."#;
649
650        let ctx = LintContext::new(content);
651        let result = rule.check(&ctx).unwrap();
652
653        // Should only flag: outside, missing, badimg, three (4 total)
654        assert_eq!(result.len(), 4);
655
656        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
657        assert!(messages.iter().any(|m| m.contains("outside")));
658        assert!(messages.iter().any(|m| m.contains("missing")));
659        assert!(messages.iter().any(|m| m.contains("badimg")));
660        assert!(messages.iter().any(|m| m.contains("three")));
661
662        // Should NOT flag any references inside code spans
663        assert!(!messages.iter().any(|m| m.contains("inside")));
664        assert!(!messages.iter().any(|m| m.contains("one")));
665        assert!(!messages.iter().any(|m| m.contains("two")));
666        assert!(!messages.iter().any(|m| m.contains("refs")));
667        assert!(!messages.iter().any(|m| m.contains("same")));
668    }
669
670    #[test]
671    fn test_multiple_undefined_references() {
672        let rule = MD052ReferenceLinkImages::new();
673        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
674        let ctx = LintContext::new(content);
675        let result = rule.check(&ctx).unwrap();
676
677        assert_eq!(result.len(), 3);
678        assert!(result[0].message.contains("ref1"));
679        assert!(result[1].message.contains("ref2"));
680        assert!(result[2].message.contains("ref3"));
681    }
682
683    #[test]
684    fn test_mixed_valid_and_undefined() {
685        let rule = MD052ReferenceLinkImages::new();
686        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
687        let ctx = LintContext::new(content);
688        let result = rule.check(&ctx).unwrap();
689
690        assert_eq!(result.len(), 1);
691        assert!(result[0].message.contains("missing"));
692    }
693
694    #[test]
695    fn test_empty_reference() {
696        let rule = MD052ReferenceLinkImages::new();
697        let content = "[text][]\n\n[ref]: https://example.com";
698        let ctx = LintContext::new(content);
699        let result = rule.check(&ctx).unwrap();
700
701        // Empty reference should use the link text as reference
702        assert_eq!(result.len(), 1);
703    }
704
705    #[test]
706    fn test_escaped_brackets_ignored() {
707        let rule = MD052ReferenceLinkImages::new();
708        let content = "\\[not a link\\]";
709        let ctx = LintContext::new(content);
710        let result = rule.check(&ctx).unwrap();
711
712        assert_eq!(result.len(), 0);
713    }
714
715    #[test]
716    fn test_list_items_ignored() {
717        let rule = MD052ReferenceLinkImages::new();
718        let content = "- [undefined]\n* [another]\n+ [third]";
719        let ctx = LintContext::new(content);
720        let result = rule.check(&ctx).unwrap();
721
722        // List items that look like shortcut references should be ignored
723        assert_eq!(result.len(), 0);
724    }
725
726    #[test]
727    fn test_output_example_section_ignored() {
728        let rule = MD052ReferenceLinkImages::new();
729        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
730        let ctx = LintContext::new(content);
731        let result = rule.check(&ctx).unwrap();
732
733        // Only the reference outside the Output section should be flagged
734        assert_eq!(result.len(), 1);
735        assert!(result[0].message.contains("missing"));
736    }
737
738    #[test]
739    fn test_reference_definitions_in_code_blocks_ignored() {
740        let rule = MD052ReferenceLinkImages::new();
741        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
742        let ctx = LintContext::new(content);
743        let result = rule.check(&ctx).unwrap();
744
745        // Reference defined in code block should not count
746        assert_eq!(result.len(), 1);
747        assert!(result[0].message.contains("ref"));
748    }
749
750    #[test]
751    fn test_multiple_references_to_same_undefined() {
752        let rule = MD052ReferenceLinkImages::new();
753        let content = "[first][missing] [second][missing] [third][missing]";
754        let ctx = LintContext::new(content);
755        let result = rule.check(&ctx).unwrap();
756
757        // Should only report once per unique reference
758        assert_eq!(result.len(), 1);
759        assert!(result[0].message.contains("missing"));
760    }
761
762    #[test]
763    fn test_reference_with_special_characters() {
764        let rule = MD052ReferenceLinkImages::new();
765        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
766        let ctx = LintContext::new(content);
767        let result = rule.check(&ctx).unwrap();
768
769        assert_eq!(result.len(), 0);
770    }
771
772    #[test]
773    fn test_issue_51_html_attribute_not_reference() {
774        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
775        let rule = MD052ReferenceLinkImages::new();
776        let content = r#"# Example
777
778## Test
779
780Want to fill out this form?
781
782<form method="post">
783    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
784</form>"#;
785        let ctx = LintContext::new(content);
786        let result = rule.check(&ctx).unwrap();
787
788        assert_eq!(
789            result.len(),
790            0,
791            "HTML attributes with square brackets should not be flagged as undefined references"
792        );
793    }
794
795    #[test]
796    fn test_extract_references() {
797        let rule = MD052ReferenceLinkImages::new();
798        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
799        let refs = rule.extract_references(content);
800
801        assert_eq!(refs.len(), 3);
802        assert!(refs.contains("ref1"));
803        assert!(refs.contains("ref2"));
804        assert!(refs.contains("ref3"));
805    }
806
807    #[test]
808    fn test_inline_code_not_flagged() {
809        let rule = MD052ReferenceLinkImages::new();
810
811        // Test that arrays in inline code are not flagged as references
812        let content = r#"# Test
813
814Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
815
816Also, `[todo]` is not a reference link.
817
818But this [reference] should be flagged.
819
820And this `[inline code]` should not be flagged.
821"#;
822
823        let ctx = LintContext::new(content);
824        let warnings = rule.check(&ctx).unwrap();
825
826        // Should only flag [reference], not the ones in backticks
827        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
828        assert!(warnings[0].message.contains("'reference'"));
829    }
830
831    #[test]
832    fn test_code_block_references_ignored() {
833        let rule = MD052ReferenceLinkImages::new();
834
835        let content = r#"# Test
836
837```markdown
838[undefined] reference in code block
839![undefined] image in code block
840```
841
842[real-undefined] reference outside
843"#;
844
845        let ctx = LintContext::new(content);
846        let warnings = rule.check(&ctx).unwrap();
847
848        // Should only flag [real-undefined], not the ones in code block
849        assert_eq!(warnings.len(), 1);
850        assert!(warnings[0].message.contains("'real-undefined'"));
851    }
852
853    #[test]
854    fn test_html_comments_ignored() {
855        // Test for issue #20 - MD052 should not flag content inside HTML comments
856        let rule = MD052ReferenceLinkImages::new();
857
858        // Test the exact case from issue #20
859        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
860<!--- set_env EDITOR 'python3 fake_editor.py' -->
861
862```bash
863$ python3 vote.py
8643 votes for: 2
8652 votes for: 3, 4
866```"#;
867        let ctx = LintContext::new(content);
868        let result = rule.check(&ctx).unwrap();
869        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
870
871        // Test various reference patterns inside HTML comments
872        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
873Normal [text][undefined]
874<!-- Another [comment][with] references -->"#;
875        let ctx = LintContext::new(content);
876        let result = rule.check(&ctx).unwrap();
877        assert_eq!(
878            result.len(),
879            1,
880            "Should only flag the undefined reference outside comments"
881        );
882        assert!(result[0].message.contains("undefined"));
883
884        // Test multi-line HTML comments
885        let content = r#"<!--
886[ref1]
887[ref2][ref3]
888-->
889[actual][undefined]"#;
890        let ctx = LintContext::new(content);
891        let result = rule.check(&ctx).unwrap();
892        assert_eq!(
893            result.len(),
894            1,
895            "Should not flag references in multi-line HTML comments"
896        );
897        assert!(result[0].message.contains("undefined"));
898
899        // Test mixed scenarios
900        let content = r#"<!-- Comment with [1:] pattern -->
901Valid [link][ref]
902<!-- More [refs][in][comments] -->
903![image][missing]
904
905[ref]: https://example.com"#;
906        let ctx = LintContext::new(content);
907        let result = rule.check(&ctx).unwrap();
908        assert_eq!(result.len(), 1, "Should only flag missing image reference");
909        assert!(result[0].message.contains("missing"));
910    }
911
912    #[test]
913    fn test_frontmatter_ignored() {
914        // Test for issue #24 - MD052 should not flag content inside frontmatter
915        let rule = MD052ReferenceLinkImages::new();
916
917        // Test YAML frontmatter with arrays and references
918        let content = r#"---
919layout: post
920title: "My Jekyll Post"
921date: 2023-01-01
922categories: blog
923tags: ["test", "example"]
924author: John Doe
925---
926
927# My Blog Post
928
929This is the actual markdown content that should be linted.
930
931[undefined] reference should be flagged.
932
933## Section 1
934
935Some content here."#;
936        let ctx = LintContext::new(content);
937        let result = rule.check(&ctx).unwrap();
938
939        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
940        assert_eq!(
941            result.len(),
942            1,
943            "Should only flag the undefined reference outside frontmatter"
944        );
945        assert!(result[0].message.contains("undefined"));
946
947        // Test TOML frontmatter
948        let content = r#"+++
949title = "My Post"
950tags = ["example", "test"]
951+++
952
953# Content
954
955[missing] reference should be flagged."#;
956        let ctx = LintContext::new(content);
957        let result = rule.check(&ctx).unwrap();
958        assert_eq!(
959            result.len(),
960            1,
961            "Should only flag the undefined reference outside TOML frontmatter"
962        );
963        assert!(result[0].message.contains("missing"));
964    }
965
966    #[test]
967    fn test_github_alerts_not_flagged() {
968        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
969        let rule = MD052ReferenceLinkImages::new();
970
971        // Test various GitHub alert types
972        let content = r#"# Document with GitHub Alerts
973
974> [!NOTE]
975> This is a note alert.
976
977> [!TIP]
978> This is a tip alert.
979
980> [!IMPORTANT]
981> This is an important alert.
982
983> [!WARNING]
984> This is a warning alert.
985
986> [!CAUTION]
987> This is a caution alert.
988
989Regular content with [undefined] reference."#;
990        let ctx = LintContext::new(content);
991        let result = rule.check(&ctx).unwrap();
992
993        // Should only flag the undefined reference, not the GitHub alerts
994        assert_eq!(
995            result.len(),
996            1,
997            "Should only flag the undefined reference, not GitHub alerts"
998        );
999        assert!(result[0].message.contains("undefined"));
1000        assert_eq!(result[0].line, 18); // Line with [undefined]
1001
1002        // Test GitHub alerts with additional content
1003        let content = r#"> [!TIP]
1004> Here's a useful tip about [something].
1005> Multiple lines are allowed.
1006
1007[something] is mentioned but not defined."#;
1008        let ctx = LintContext::new(content);
1009        let result = rule.check(&ctx).unwrap();
1010
1011        // Should flag only the [something] outside blockquotes
1012        // The test shows we're only catching one, which might be correct behavior
1013        // matching markdownlint's approach
1014        assert_eq!(result.len(), 1, "Should flag undefined reference");
1015        assert!(result[0].message.contains("something"));
1016
1017        // Test GitHub alerts with proper references
1018        let content = r#"> [!NOTE]
1019> See [reference] for more details.
1020
1021[reference]: https://example.com"#;
1022        let ctx = LintContext::new(content);
1023        let result = rule.check(&ctx).unwrap();
1024
1025        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1026        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1027    }
1028}