rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    // The capturing group handles nested brackets to support cases like [`union[t, none]`]:
14    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16    // Pattern for list items to exclude from reference checks (standard regex is fine)
17    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19    // Pattern for code blocks (standard regex is fine)
20    static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22    // Pattern for output example sections (standard regex is fine)
23    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
26    // Extended to include additional common alert types
27    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29    // Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
30    // This pattern specifically looks for:
31    // - IPv6 addresses: https://[::1] or https://[2001:db8::1]
32    // - IPv6 with zone IDs: https://[fe80::1%eth0]
33    // - IPv6 mixed notation: https://[::ffff:192.0.2.1]
34    // - API paths with array notation: https://api.example.com/users[0]
35    // But NOT markdown reference links that happen to follow URLs
36    static ref URL_WITH_BRACKETS: Regex = Regex::new(
37        r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38    ).unwrap();
39}
40
41/// Rule MD052: Reference links and images should use reference style
42///
43/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
44///
45/// This rule is triggered when a reference link or image uses a reference that isn't defined.
46#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50    pub fn new() -> Self {
51        Self {}
52    }
53
54    /// Strip surrounding backticks from a string
55    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
56    fn strip_backticks(s: &str) -> &str {
57        s.trim_start_matches('`').trim_end_matches('`')
58    }
59
60    /// Check if a string is a valid Python identifier
61    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
62    /// like `str`, `int`, etc. should be accepted as valid auto-references
63    fn is_valid_python_identifier(s: &str) -> bool {
64        if s.is_empty() {
65            return false;
66        }
67        let first_char = s.chars().next().unwrap();
68        if !first_char.is_ascii_alphabetic() && first_char != '_' {
69            return false;
70        }
71        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
72    }
73
74    /// Check if text matches a known non-reference pattern that should be skipped.
75    ///
76    /// These are deterministic patterns from markdown extensions or code examples,
77    /// not heuristics. Returns true for:
78    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
79    /// - Programming syntax: [T], [null], [i32], ["string"]
80    /// - Descriptive text: [default: value], [0-9]
81    fn is_known_non_reference_pattern(text: &str) -> bool {
82        // Skip numeric patterns (array indices, ranges)
83        if text.chars().all(|c| c.is_ascii_digit()) {
84            return true;
85        }
86
87        // Skip numeric ranges like [1:3], [0:10], etc.
88        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
89            return true;
90        }
91
92        // Skip patterns that look like config sections [tool.something], [section.subsection]
93        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
94        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
95        if text.contains('.')
96            && !text.contains(' ')
97            && !text.contains('-')
98            && !text.contains('_')
99            && !text.contains('`')
100        {
101            // Config sections typically have dots, no spaces, and only alphanumeric + dots
102            return true;
103        }
104
105        // Skip glob/wildcard patterns like [*], [...], [**]
106        if text == "*" || text == "..." || text == "**" {
107            return true;
108        }
109
110        // Skip patterns that look like file paths [dir/file], [src/utils]
111        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
112            return true;
113        }
114
115        // Skip programming type annotations like [int, str], [Dict[str, Any]]
116        // These typically have commas and/or nested brackets
117        if text.contains(',') || text.contains('[') || text.contains(']') {
118            // Check if it looks like a type annotation pattern
119            return true;
120        }
121
122        // Note: We don't filter out patterns with backticks because backticks in reference names
123        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
124
125        // Skip patterns that look like module/class paths ONLY if they don't have backticks
126        // Backticks indicate intentional code formatting in a reference name
127        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
128        if !text.contains('`')
129            && text.contains('.')
130            && !text.contains(' ')
131            && !text.contains('-')
132            && !text.contains('_')
133        {
134            return true;
135        }
136
137        // Note: We don't filter based on word count anymore because legitimate references
138        // can have many words, like "python language reference for import statements"
139        // Word count filtering was causing false positives where valid references were
140        // being incorrectly flagged as unused
141
142        // Skip patterns that are just punctuation or operators
143        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
144            return true;
145        }
146
147        // Skip very short non-word patterns (likely operators or syntax)
148        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
149            return true;
150        }
151
152        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
153        if (text.starts_with('"') && text.ends_with('"'))
154            || (text.starts_with('\'') && text.ends_with('\''))
155            || text.contains('"')
156            || text.contains('\'')
157        {
158            return true;
159        }
160
161        // Skip descriptive patterns with colon like [default: the project root]
162        // But allow simple numeric ranges which are handled above
163        if text.contains(':') && text.contains(' ') {
164            return true;
165        }
166
167        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
168        if text.starts_with('!') {
169            return true;
170        }
171
172        // Skip footnote syntax like [^1], [^note], etc.
173        // Footnotes start with ^ and are a common markdown extension
174        if text.starts_with('^') {
175            return true;
176        }
177
178        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
179        // Citations in these formats start with @ inside brackets
180        if text.starts_with('@') {
181            return true;
182        }
183
184        // Skip table of contents markers like [TOC]
185        // Used by Python-Markdown and other processors
186        if text == "TOC" {
187            return true;
188        }
189
190        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
191        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
192            return true;
193        }
194
195        // Skip common programming type names, literals, and short identifiers
196        // that are likely not markdown references
197        let common_non_refs = [
198            // Programming types
199            "object",
200            "Object",
201            "any",
202            "Any",
203            "inv",
204            "void",
205            "bool",
206            "int",
207            "float",
208            "str",
209            "char",
210            "i8",
211            "i16",
212            "i32",
213            "i64",
214            "i128",
215            "isize",
216            "u8",
217            "u16",
218            "u32",
219            "u64",
220            "u128",
221            "usize",
222            "f32",
223            "f64",
224            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
225            "null",
226            "true",
227            "false",
228            "NaN",
229            "Infinity",
230            // Common JavaScript output patterns
231            "object Object",
232        ];
233
234        if common_non_refs.contains(&text) {
235            return true;
236        }
237
238        false
239    }
240
241    /// Check if a position is inside any code span
242    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
243        code_spans
244            .iter()
245            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
246    }
247
248    /// Check if a byte position is within an HTML comment
249    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
250        for m in HTML_COMMENT_PATTERN.find_iter(content) {
251            if m.start() <= byte_pos && byte_pos < m.end() {
252                return true;
253            }
254        }
255        false
256    }
257
258    /// Check if a byte position is within an HTML tag
259    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
260        // Check HTML tags
261        for html_tag in ctx.html_tags().iter() {
262            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
263                return true;
264            }
265        }
266        false
267    }
268
269    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
270        use crate::config::MarkdownFlavor;
271        use crate::utils::skip_context::is_mkdocs_snippet_line;
272
273        let mut references = HashSet::new();
274        let mut in_code_block = false;
275        let mut code_fence_marker = String::new();
276
277        for line in content.lines() {
278            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
279            if is_mkdocs_snippet_line(
280                line,
281                if mkdocs_mode {
282                    MarkdownFlavor::MkDocs
283                } else {
284                    MarkdownFlavor::Standard
285                },
286            ) {
287                continue;
288            }
289            // Handle code block boundaries
290            if let Some(cap) = FENCED_CODE_START.captures(line) {
291                if let Some(fence) = cap.get(2) {
292                    // Get the fence marker (``` or ~~~) without the indentation
293                    let fence_str = fence.as_str();
294                    if !in_code_block {
295                        in_code_block = true;
296                        code_fence_marker = fence_str.to_string();
297                    } else if line.trim_start().starts_with(&code_fence_marker) {
298                        // Check if this could be a closing fence
299                        let trimmed = line.trim_start();
300                        // A closing fence should be just the fence characters, possibly with trailing whitespace
301                        if trimmed.starts_with(&code_fence_marker) {
302                            let after_fence = &trimmed[code_fence_marker.len()..];
303                            if after_fence.trim().is_empty() {
304                                in_code_block = false;
305                                code_fence_marker.clear();
306                            }
307                        }
308                    }
309                }
310                continue;
311            }
312
313            // Skip lines in code blocks
314            if in_code_block {
315                continue;
316            }
317
318            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
319            // Abbreviations are not reference links and should not be tracked
320            if line.trim_start().starts_with("*[") {
321                continue;
322            }
323
324            if let Some(cap) = REF_REGEX.captures(line) {
325                // Store references in lowercase for case-insensitive comparison
326                if let Some(reference) = cap.get(1) {
327                    references.insert(reference.as_str().to_lowercase());
328                }
329            }
330        }
331
332        references
333    }
334
335    fn find_undefined_references(
336        &self,
337        content: &str,
338        references: &HashSet<String>,
339        ctx: &crate::lint_context::LintContext,
340        mkdocs_mode: bool,
341    ) -> Vec<(usize, usize, usize, String)> {
342        let mut undefined = Vec::new();
343        let mut reported_refs = HashMap::new();
344        let mut in_code_block = false;
345        let mut code_fence_marker = String::new();
346        let mut in_example_section = false;
347
348        // Get code spans once for the entire function
349        let code_spans = ctx.code_spans();
350
351        // Use cached data for reference links and images
352        for link in &ctx.links {
353            if !link.is_reference {
354                continue; // Skip inline links
355            }
356
357            // Skip links inside code spans
358            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
359                continue;
360            }
361
362            // Skip links inside HTML comments
363            if Self::is_in_html_comment(content, link.byte_offset) {
364                continue;
365            }
366
367            // Skip links inside HTML tags
368            if Self::is_in_html_tag(ctx, link.byte_offset) {
369                continue;
370            }
371
372            // Skip links inside math contexts
373            if is_in_math_context(ctx, link.byte_offset) {
374                continue;
375            }
376
377            // Skip links inside table cells
378            if is_in_table_cell(ctx, link.line, link.start_col) {
379                continue;
380            }
381
382            // Skip links inside frontmatter
383            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
384                continue;
385            }
386
387            if let Some(ref_id) = &link.reference_id {
388                let reference_lower = ref_id.to_lowercase();
389
390                // Skip known non-reference patterns (markdown extensions, code examples)
391                if Self::is_known_non_reference_pattern(ref_id) {
392                    continue;
393                }
394
395                // Skip MkDocs auto-references if in MkDocs mode
396                // Check both the reference_id and the link text for shorthand references
397                // Strip backticks since MkDocs resolves `module.Class` as module.Class
398                let stripped_ref = Self::strip_backticks(ref_id);
399                let stripped_text = Self::strip_backticks(&link.text);
400                if mkdocs_mode
401                    && (is_mkdocs_auto_reference(stripped_ref)
402                        || is_mkdocs_auto_reference(stripped_text)
403                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
404                        || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
405                {
406                    continue;
407                }
408
409                // Check if reference is defined
410                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
411                    // Check if the line is in an example section or list item
412                    if let Some(line_info) = ctx.line_info(link.line) {
413                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
414                            in_example_section = true;
415                            continue;
416                        }
417
418                        if in_example_section {
419                            continue;
420                        }
421
422                        // Skip list items
423                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
424                            continue;
425                        }
426
427                        // Skip lines that are HTML content
428                        let trimmed = line_info.content.trim_start();
429                        if trimmed.starts_with('<') {
430                            continue;
431                        }
432                    }
433
434                    let match_len = link.byte_end - link.byte_offset;
435                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
436                    reported_refs.insert(reference_lower, true);
437                }
438            }
439        }
440
441        // Use cached data for reference images
442        for image in &ctx.images {
443            if !image.is_reference {
444                continue; // Skip inline images
445            }
446
447            // Skip images inside code spans
448            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
449                continue;
450            }
451
452            // Skip images inside HTML comments
453            if Self::is_in_html_comment(content, image.byte_offset) {
454                continue;
455            }
456
457            // Skip images inside HTML tags
458            if Self::is_in_html_tag(ctx, image.byte_offset) {
459                continue;
460            }
461
462            // Skip images inside math contexts
463            if is_in_math_context(ctx, image.byte_offset) {
464                continue;
465            }
466
467            // Skip images inside table cells
468            if is_in_table_cell(ctx, image.line, image.start_col) {
469                continue;
470            }
471
472            // Skip images inside frontmatter
473            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
474                continue;
475            }
476
477            if let Some(ref_id) = &image.reference_id {
478                let reference_lower = ref_id.to_lowercase();
479
480                // Skip known non-reference patterns (markdown extensions, code examples)
481                if Self::is_known_non_reference_pattern(ref_id) {
482                    continue;
483                }
484
485                // Skip MkDocs auto-references if in MkDocs mode
486                // Check both the reference_id and the alt text for shorthand references
487                // Strip backticks since MkDocs resolves `module.Class` as module.Class
488                let stripped_ref = Self::strip_backticks(ref_id);
489                let stripped_alt = Self::strip_backticks(&image.alt_text);
490                if mkdocs_mode
491                    && (is_mkdocs_auto_reference(stripped_ref)
492                        || is_mkdocs_auto_reference(stripped_alt)
493                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
494                        || (image.alt_text.as_str() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
495                {
496                    continue;
497                }
498
499                // Check if reference is defined
500                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
501                    // Check if the line is in an example section or list item
502                    if let Some(line_info) = ctx.line_info(image.line) {
503                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
504                            in_example_section = true;
505                            continue;
506                        }
507
508                        if in_example_section {
509                            continue;
510                        }
511
512                        // Skip list items
513                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
514                            continue;
515                        }
516
517                        // Skip lines that are HTML content
518                        let trimmed = line_info.content.trim_start();
519                        if trimmed.starts_with('<') {
520                            continue;
521                        }
522                    }
523
524                    let match_len = image.byte_end - image.byte_offset;
525                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
526                    reported_refs.insert(reference_lower, true);
527                }
528            }
529        }
530
531        // Build a set of byte ranges that are already covered by parsed links/images
532        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
533
534        // Add ranges from parsed links
535        for link in &ctx.links {
536            covered_ranges.push((link.byte_offset, link.byte_end));
537        }
538
539        // Add ranges from parsed images
540        for image in &ctx.images {
541            covered_ranges.push((image.byte_offset, image.byte_end));
542        }
543
544        // Sort ranges by start position
545        covered_ranges.sort_by_key(|&(start, _)| start);
546
547        // Handle shortcut references [text] which aren't captured in ctx.links
548        // Need to use regex for these
549        let lines: Vec<&str> = content.lines().collect();
550        in_example_section = false; // Reset for line-by-line processing
551
552        for (line_num, line) in lines.iter().enumerate() {
553            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
554            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
555                continue;
556            }
557
558            // Handle code blocks
559            if let Some(cap) = FENCED_CODE_START.captures(line) {
560                if let Some(fence) = cap.get(2) {
561                    // Get the fence marker (``` or ~~~) without the indentation
562                    let fence_str = fence.as_str();
563                    if !in_code_block {
564                        in_code_block = true;
565                        code_fence_marker = fence_str.to_string();
566                    } else if line.trim_start().starts_with(&code_fence_marker) {
567                        // Check if this could be a closing fence
568                        let trimmed = line.trim_start();
569                        // A closing fence should be just the fence characters, possibly with trailing whitespace
570                        if trimmed.starts_with(&code_fence_marker) {
571                            let after_fence = &trimmed[code_fence_marker.len()..];
572                            if after_fence.trim().is_empty() {
573                                in_code_block = false;
574                                code_fence_marker.clear();
575                            }
576                        }
577                    }
578                }
579                continue;
580            }
581
582            if in_code_block {
583                continue;
584            }
585
586            // Check for example sections
587            if OUTPUT_EXAMPLE_START.is_match(line) {
588                in_example_section = true;
589                continue;
590            }
591
592            if in_example_section {
593                // Check if we're exiting the example section (another heading)
594                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
595                    in_example_section = false;
596                } else {
597                    continue;
598                }
599            }
600
601            // Skip list items
602            if LIST_ITEM_REGEX.is_match(line) {
603                continue;
604            }
605
606            // Skip lines that are HTML content
607            let trimmed_line = line.trim_start();
608            if trimmed_line.starts_with('<') {
609                continue;
610            }
611
612            // Skip GitHub alerts/callouts (e.g., > [!TIP])
613            if GITHUB_ALERT_REGEX.is_match(line) {
614                continue;
615            }
616
617            // Skip abbreviation definitions (*[ABBR]: Definition)
618            // These are not reference links and should not be checked
619            if trimmed_line.starts_with("*[") {
620                continue;
621            }
622
623            // Collect positions of brackets that are part of URLs (IPv6, etc.)
624            // so we can exclude them from reference checking
625            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
626            for mat in URL_WITH_BRACKETS.find_iter(line) {
627                // Find all bracket pairs within this URL match
628                let url_str = mat.as_str();
629                let url_start = mat.start();
630
631                // Find brackets within the URL (e.g., in https://[::1]:8080)
632                let mut idx = 0;
633                while idx < url_str.len() {
634                    if let Some(bracket_start) = url_str[idx..].find('[') {
635                        let bracket_start_abs = url_start + idx + bracket_start;
636                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
637                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
638                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
639                            idx += bracket_start + bracket_end + 2;
640                        } else {
641                            break;
642                        }
643                    } else {
644                        break;
645                    }
646                }
647            }
648
649            // Check shortcut references: [reference]
650            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
651                for cap in captures {
652                    if let Some(ref_match) = cap.get(1) {
653                        // Check if this bracket is part of a URL (IPv6, etc.)
654                        let bracket_start = cap.get(0).unwrap().start();
655                        let bracket_end = cap.get(0).unwrap().end();
656
657                        // Skip if this bracket pair is within any URL bracket range
658                        let is_in_url = url_bracket_ranges
659                            .iter()
660                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
661
662                        if is_in_url {
663                            continue;
664                        }
665
666                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
667                        // Check if there's a ^ immediately before the opening bracket
668                        if bracket_start > 0 {
669                            // bracket_start is a byte offset, so we need to check the byte before
670                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
671                                && *byte == b'^'
672                            {
673                                continue; // This is an inline footnote, skip it
674                            }
675                        }
676
677                        let reference = ref_match.as_str();
678                        let reference_lower = reference.to_lowercase();
679
680                        // Skip known non-reference patterns (markdown extensions, code examples)
681                        if Self::is_known_non_reference_pattern(reference) {
682                            continue;
683                        }
684
685                        // Skip GitHub alerts (including extended types)
686                        if let Some(alert_type) = reference.strip_prefix('!')
687                            && matches!(
688                                alert_type,
689                                "NOTE"
690                                    | "TIP"
691                                    | "WARNING"
692                                    | "IMPORTANT"
693                                    | "CAUTION"
694                                    | "INFO"
695                                    | "SUCCESS"
696                                    | "FAILURE"
697                                    | "DANGER"
698                                    | "BUG"
699                                    | "EXAMPLE"
700                                    | "QUOTE"
701                            )
702                        {
703                            continue;
704                        }
705
706                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
707                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
708                        if mkdocs_mode
709                            && (reference.starts_with("start:") || reference.starts_with("end:"))
710                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
711                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
712                        {
713                            continue;
714                        }
715
716                        // Skip MkDocs auto-references if in MkDocs mode
717                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
718                        let stripped_ref = Self::strip_backticks(reference);
719                        if mkdocs_mode
720                            && (is_mkdocs_auto_reference(stripped_ref)
721                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
722                        {
723                            continue;
724                        }
725
726                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
727                            let full_match = cap.get(0).unwrap();
728                            let col = full_match.start();
729
730                            // Skip if inside code span
731                            let code_spans = ctx.code_spans();
732                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
733                                continue;
734                            }
735
736                            // Check if this position is within a covered range
737                            let line_start_byte = ctx.line_offsets[line_num];
738                            let byte_pos = line_start_byte + col;
739
740                            // Skip if inside code block
741                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
742                                &ctx.code_blocks,
743                                byte_pos,
744                            ) {
745                                continue;
746                            }
747
748                            // Skip if inside HTML comment
749                            if Self::is_in_html_comment(content, byte_pos) {
750                                continue;
751                            }
752
753                            // Skip if inside HTML tag
754                            if Self::is_in_html_tag(ctx, byte_pos) {
755                                continue;
756                            }
757
758                            // Skip if inside math context
759                            if is_in_math_context(ctx, byte_pos) {
760                                continue;
761                            }
762
763                            // Skip if inside table cell
764                            if is_in_table_cell(ctx, line_num + 1, col) {
765                                continue;
766                            }
767
768                            let byte_end = byte_pos + (full_match.end() - full_match.start());
769
770                            // Check if this shortcut ref overlaps with any parsed link/image
771                            let mut is_covered = false;
772                            for &(range_start, range_end) in &covered_ranges {
773                                if range_start <= byte_pos && byte_end <= range_end {
774                                    // This shortcut ref is completely within a parsed link/image
775                                    is_covered = true;
776                                    break;
777                                }
778                                if range_start > byte_end {
779                                    // No need to check further (ranges are sorted)
780                                    break;
781                                }
782                            }
783
784                            if is_covered {
785                                continue;
786                            }
787
788                            // More sophisticated checks to avoid false positives
789
790                            // Check 1: If preceded by ], this might be part of [text][ref]
791                            // Look for the pattern ...][ref] and check if there's a matching [ before
792                            let line_chars: Vec<char> = line.chars().collect();
793                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
794                                // Look backwards for a [ that would make this [text][ref]
795                                let mut bracket_count = 1; // We already saw one ]
796                                let mut check_pos = col.saturating_sub(2);
797                                let mut found_opening = false;
798
799                                while check_pos > 0 && check_pos < line_chars.len() {
800                                    match line_chars.get(check_pos) {
801                                        Some(&']') => bracket_count += 1,
802                                        Some(&'[') => {
803                                            bracket_count -= 1;
804                                            if bracket_count == 0 {
805                                                // Check if this [ is escaped
806                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
807                                                    found_opening = true;
808                                                }
809                                                break;
810                                            }
811                                        }
812                                        _ => {}
813                                    }
814                                    if check_pos == 0 {
815                                        break;
816                                    }
817                                    check_pos = check_pos.saturating_sub(1);
818                                }
819
820                                if found_opening {
821                                    // This is part of [text][ref], skip it
822                                    continue;
823                                }
824                            }
825
826                            // Check 2: If there's an escaped bracket pattern before this
827                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
828                            let before_text = &line[..col];
829                            if before_text.contains("\\]") {
830                                // Check if there's a \[ before the \]
831                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
832                                    let search_text = &before_text[..escaped_close_pos];
833                                    if search_text.contains("\\[") {
834                                        // This looks like \[...\][ref], skip it
835                                        continue;
836                                    }
837                                }
838                            }
839
840                            let match_len = full_match.end() - full_match.start();
841                            undefined.push((line_num, col, match_len, reference.to_string()));
842                            reported_refs.insert(reference_lower, true);
843                        }
844                    }
845                }
846            }
847        }
848
849        undefined
850    }
851}
852
853impl Rule for MD052ReferenceLinkImages {
854    fn name(&self) -> &'static str {
855        "MD052"
856    }
857
858    fn description(&self) -> &'static str {
859        "Reference links and images should use a reference that exists"
860    }
861
862    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
863        let content = ctx.content;
864        let mut warnings = Vec::new();
865
866        // OPTIMIZATION: Early exit if no brackets at all
867        if !content.contains('[') {
868            return Ok(warnings);
869        }
870
871        // Check if we're in MkDocs mode from the context
872        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
873
874        let references = self.extract_references(content, mkdocs_mode);
875
876        // Use optimized detection method with cached link/image data
877        for (line_num, col, match_len, reference) in
878            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
879        {
880            let lines: Vec<&str> = content.lines().collect();
881            let line_content = lines.get(line_num).unwrap_or(&"");
882
883            // Calculate precise character range for the entire undefined reference
884            let (start_line, start_col, end_line, end_col) =
885                calculate_match_range(line_num + 1, line_content, col, match_len);
886
887            warnings.push(LintWarning {
888                rule_name: Some(self.name().to_string()),
889                line: start_line,
890                column: start_col,
891                end_line,
892                end_column: end_col,
893                message: format!("Reference '{reference}' not found"),
894                severity: Severity::Warning,
895                fix: None,
896            });
897        }
898
899        Ok(warnings)
900    }
901
902    /// Check if this rule should be skipped for performance
903    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
904        // Skip if content is empty or has no links/images
905        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
906    }
907
908    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
909        let content = ctx.content;
910        // No automatic fix available for undefined references
911        Ok(content.to_string())
912    }
913
914    fn as_any(&self) -> &dyn std::any::Any {
915        self
916    }
917
918    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
919    where
920        Self: Sized,
921    {
922        // Flavor is now accessed from LintContext during check
923        Box::new(MD052ReferenceLinkImages::new())
924    }
925}
926
927#[cfg(test)]
928mod tests {
929    use super::*;
930    use crate::lint_context::LintContext;
931
932    #[test]
933    fn test_valid_reference_link() {
934        let rule = MD052ReferenceLinkImages::new();
935        let content = "[text][ref]\n\n[ref]: https://example.com";
936        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
937        let result = rule.check(&ctx).unwrap();
938
939        assert_eq!(result.len(), 0);
940    }
941
942    #[test]
943    fn test_undefined_reference_link() {
944        let rule = MD052ReferenceLinkImages::new();
945        let content = "[text][undefined]";
946        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
947        let result = rule.check(&ctx).unwrap();
948
949        assert_eq!(result.len(), 1);
950        assert!(result[0].message.contains("Reference 'undefined' not found"));
951    }
952
953    #[test]
954    fn test_valid_reference_image() {
955        let rule = MD052ReferenceLinkImages::new();
956        let content = "![alt][img]\n\n[img]: image.jpg";
957        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
958        let result = rule.check(&ctx).unwrap();
959
960        assert_eq!(result.len(), 0);
961    }
962
963    #[test]
964    fn test_undefined_reference_image() {
965        let rule = MD052ReferenceLinkImages::new();
966        let content = "![alt][missing]";
967        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
968        let result = rule.check(&ctx).unwrap();
969
970        assert_eq!(result.len(), 1);
971        assert!(result[0].message.contains("Reference 'missing' not found"));
972    }
973
974    #[test]
975    fn test_case_insensitive_references() {
976        let rule = MD052ReferenceLinkImages::new();
977        let content = "[Text][REF]\n\n[ref]: https://example.com";
978        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
979        let result = rule.check(&ctx).unwrap();
980
981        assert_eq!(result.len(), 0);
982    }
983
984    #[test]
985    fn test_shortcut_reference_valid() {
986        let rule = MD052ReferenceLinkImages::new();
987        let content = "[ref]\n\n[ref]: https://example.com";
988        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
989        let result = rule.check(&ctx).unwrap();
990
991        assert_eq!(result.len(), 0);
992    }
993
994    #[test]
995    fn test_shortcut_reference_undefined() {
996        let rule = MD052ReferenceLinkImages::new();
997        let content = "[undefined]";
998        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
999        let result = rule.check(&ctx).unwrap();
1000
1001        assert_eq!(result.len(), 1);
1002        assert!(result[0].message.contains("Reference 'undefined' not found"));
1003    }
1004
1005    #[test]
1006    fn test_inline_links_ignored() {
1007        let rule = MD052ReferenceLinkImages::new();
1008        let content = "[text](https://example.com)";
1009        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1010        let result = rule.check(&ctx).unwrap();
1011
1012        assert_eq!(result.len(), 0);
1013    }
1014
1015    #[test]
1016    fn test_inline_images_ignored() {
1017        let rule = MD052ReferenceLinkImages::new();
1018        let content = "![alt](image.jpg)";
1019        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1020        let result = rule.check(&ctx).unwrap();
1021
1022        assert_eq!(result.len(), 0);
1023    }
1024
1025    #[test]
1026    fn test_references_in_code_blocks_ignored() {
1027        let rule = MD052ReferenceLinkImages::new();
1028        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1029        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1030        let result = rule.check(&ctx).unwrap();
1031
1032        assert_eq!(result.len(), 0);
1033    }
1034
1035    #[test]
1036    fn test_references_in_inline_code_ignored() {
1037        let rule = MD052ReferenceLinkImages::new();
1038        let content = "`[undefined]`";
1039        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1040        let result = rule.check(&ctx).unwrap();
1041
1042        // References inside inline code spans should be ignored
1043        assert_eq!(result.len(), 0);
1044    }
1045
1046    #[test]
1047    fn test_comprehensive_inline_code_detection() {
1048        let rule = MD052ReferenceLinkImages::new();
1049        let content = r#"# Test
1050
1051This `[inside]` should be ignored.
1052This [outside] should be flagged.
1053Reference links `[text][ref]` in code are ignored.
1054Regular reference [text][missing] should be flagged.
1055Images `![alt][img]` in code are ignored.
1056Regular image ![alt][badimg] should be flagged.
1057
1058Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1059
1060```
1061[code block content] should be ignored
1062```
1063
1064`Multiple [refs] in [same] code span` ignored."#;
1065
1066        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1067        let result = rule.check(&ctx).unwrap();
1068
1069        // Should only flag: outside, missing, badimg, three (4 total)
1070        assert_eq!(result.len(), 4);
1071
1072        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1073        assert!(messages.iter().any(|m| m.contains("outside")));
1074        assert!(messages.iter().any(|m| m.contains("missing")));
1075        assert!(messages.iter().any(|m| m.contains("badimg")));
1076        assert!(messages.iter().any(|m| m.contains("three")));
1077
1078        // Should NOT flag any references inside code spans
1079        assert!(!messages.iter().any(|m| m.contains("inside")));
1080        assert!(!messages.iter().any(|m| m.contains("one")));
1081        assert!(!messages.iter().any(|m| m.contains("two")));
1082        assert!(!messages.iter().any(|m| m.contains("refs")));
1083        assert!(!messages.iter().any(|m| m.contains("same")));
1084    }
1085
1086    #[test]
1087    fn test_multiple_undefined_references() {
1088        let rule = MD052ReferenceLinkImages::new();
1089        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1090        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1091        let result = rule.check(&ctx).unwrap();
1092
1093        assert_eq!(result.len(), 3);
1094        assert!(result[0].message.contains("ref1"));
1095        assert!(result[1].message.contains("ref2"));
1096        assert!(result[2].message.contains("ref3"));
1097    }
1098
1099    #[test]
1100    fn test_mixed_valid_and_undefined() {
1101        let rule = MD052ReferenceLinkImages::new();
1102        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1103        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1104        let result = rule.check(&ctx).unwrap();
1105
1106        assert_eq!(result.len(), 1);
1107        assert!(result[0].message.contains("missing"));
1108    }
1109
1110    #[test]
1111    fn test_empty_reference() {
1112        let rule = MD052ReferenceLinkImages::new();
1113        let content = "[text][]\n\n[ref]: https://example.com";
1114        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1115        let result = rule.check(&ctx).unwrap();
1116
1117        // Empty reference should use the link text as reference
1118        assert_eq!(result.len(), 1);
1119    }
1120
1121    #[test]
1122    fn test_escaped_brackets_ignored() {
1123        let rule = MD052ReferenceLinkImages::new();
1124        let content = "\\[not a link\\]";
1125        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1126        let result = rule.check(&ctx).unwrap();
1127
1128        assert_eq!(result.len(), 0);
1129    }
1130
1131    #[test]
1132    fn test_list_items_ignored() {
1133        let rule = MD052ReferenceLinkImages::new();
1134        let content = "- [undefined]\n* [another]\n+ [third]";
1135        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1136        let result = rule.check(&ctx).unwrap();
1137
1138        // List items that look like shortcut references should be ignored
1139        assert_eq!(result.len(), 0);
1140    }
1141
1142    #[test]
1143    fn test_output_example_section_ignored() {
1144        let rule = MD052ReferenceLinkImages::new();
1145        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1146        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1147        let result = rule.check(&ctx).unwrap();
1148
1149        // Only the reference outside the Output section should be flagged
1150        assert_eq!(result.len(), 1);
1151        assert!(result[0].message.contains("missing"));
1152    }
1153
1154    #[test]
1155    fn test_reference_definitions_in_code_blocks_ignored() {
1156        let rule = MD052ReferenceLinkImages::new();
1157        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1158        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1159        let result = rule.check(&ctx).unwrap();
1160
1161        // Reference defined in code block should not count
1162        assert_eq!(result.len(), 1);
1163        assert!(result[0].message.contains("ref"));
1164    }
1165
1166    #[test]
1167    fn test_multiple_references_to_same_undefined() {
1168        let rule = MD052ReferenceLinkImages::new();
1169        let content = "[first][missing] [second][missing] [third][missing]";
1170        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1171        let result = rule.check(&ctx).unwrap();
1172
1173        // Should only report once per unique reference
1174        assert_eq!(result.len(), 1);
1175        assert!(result[0].message.contains("missing"));
1176    }
1177
1178    #[test]
1179    fn test_reference_with_special_characters() {
1180        let rule = MD052ReferenceLinkImages::new();
1181        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1182        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1183        let result = rule.check(&ctx).unwrap();
1184
1185        assert_eq!(result.len(), 0);
1186    }
1187
1188    #[test]
1189    fn test_issue_51_html_attribute_not_reference() {
1190        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1191        let rule = MD052ReferenceLinkImages::new();
1192        let content = r#"# Example
1193
1194## Test
1195
1196Want to fill out this form?
1197
1198<form method="post">
1199    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1200</form>"#;
1201        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1202        let result = rule.check(&ctx).unwrap();
1203
1204        assert_eq!(
1205            result.len(),
1206            0,
1207            "HTML attributes with square brackets should not be flagged as undefined references"
1208        );
1209    }
1210
1211    #[test]
1212    fn test_extract_references() {
1213        let rule = MD052ReferenceLinkImages::new();
1214        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1215        let refs = rule.extract_references(content, false);
1216
1217        assert_eq!(refs.len(), 3);
1218        assert!(refs.contains("ref1"));
1219        assert!(refs.contains("ref2"));
1220        assert!(refs.contains("ref3"));
1221    }
1222
1223    #[test]
1224    fn test_inline_code_not_flagged() {
1225        let rule = MD052ReferenceLinkImages::new();
1226
1227        // Test that arrays in inline code are not flagged as references
1228        let content = r#"# Test
1229
1230Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1231
1232Also, `[todo]` is not a reference link.
1233
1234But this [reference] should be flagged.
1235
1236And this `[inline code]` should not be flagged.
1237"#;
1238
1239        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1240        let warnings = rule.check(&ctx).unwrap();
1241
1242        // Should only flag [reference], not the ones in backticks
1243        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1244        assert!(warnings[0].message.contains("'reference'"));
1245    }
1246
1247    #[test]
1248    fn test_code_block_references_ignored() {
1249        let rule = MD052ReferenceLinkImages::new();
1250
1251        let content = r#"# Test
1252
1253```markdown
1254[undefined] reference in code block
1255![undefined] image in code block
1256```
1257
1258[real-undefined] reference outside
1259"#;
1260
1261        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1262        let warnings = rule.check(&ctx).unwrap();
1263
1264        // Should only flag [real-undefined], not the ones in code block
1265        assert_eq!(warnings.len(), 1);
1266        assert!(warnings[0].message.contains("'real-undefined'"));
1267    }
1268
1269    #[test]
1270    fn test_html_comments_ignored() {
1271        // Test for issue #20 - MD052 should not flag content inside HTML comments
1272        let rule = MD052ReferenceLinkImages::new();
1273
1274        // Test the exact case from issue #20
1275        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1276<!--- set_env EDITOR 'python3 fake_editor.py' -->
1277
1278```bash
1279$ python3 vote.py
12803 votes for: 2
12812 votes for: 3, 4
1282```"#;
1283        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1284        let result = rule.check(&ctx).unwrap();
1285        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1286
1287        // Test various reference patterns inside HTML comments
1288        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1289Normal [text][undefined]
1290<!-- Another [comment][with] references -->"#;
1291        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1292        let result = rule.check(&ctx).unwrap();
1293        assert_eq!(
1294            result.len(),
1295            1,
1296            "Should only flag the undefined reference outside comments"
1297        );
1298        assert!(result[0].message.contains("undefined"));
1299
1300        // Test multi-line HTML comments
1301        let content = r#"<!--
1302[ref1]
1303[ref2][ref3]
1304-->
1305[actual][undefined]"#;
1306        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1307        let result = rule.check(&ctx).unwrap();
1308        assert_eq!(
1309            result.len(),
1310            1,
1311            "Should not flag references in multi-line HTML comments"
1312        );
1313        assert!(result[0].message.contains("undefined"));
1314
1315        // Test mixed scenarios
1316        let content = r#"<!-- Comment with [1:] pattern -->
1317Valid [link][ref]
1318<!-- More [refs][in][comments] -->
1319![image][missing]
1320
1321[ref]: https://example.com"#;
1322        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1323        let result = rule.check(&ctx).unwrap();
1324        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1325        assert!(result[0].message.contains("missing"));
1326    }
1327
1328    #[test]
1329    fn test_frontmatter_ignored() {
1330        // Test for issue #24 - MD052 should not flag content inside frontmatter
1331        let rule = MD052ReferenceLinkImages::new();
1332
1333        // Test YAML frontmatter with arrays and references
1334        let content = r#"---
1335layout: post
1336title: "My Jekyll Post"
1337date: 2023-01-01
1338categories: blog
1339tags: ["test", "example"]
1340author: John Doe
1341---
1342
1343# My Blog Post
1344
1345This is the actual markdown content that should be linted.
1346
1347[undefined] reference should be flagged.
1348
1349## Section 1
1350
1351Some content here."#;
1352        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1353        let result = rule.check(&ctx).unwrap();
1354
1355        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1356        assert_eq!(
1357            result.len(),
1358            1,
1359            "Should only flag the undefined reference outside frontmatter"
1360        );
1361        assert!(result[0].message.contains("undefined"));
1362
1363        // Test TOML frontmatter
1364        let content = r#"+++
1365title = "My Post"
1366tags = ["example", "test"]
1367+++
1368
1369# Content
1370
1371[missing] reference should be flagged."#;
1372        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1373        let result = rule.check(&ctx).unwrap();
1374        assert_eq!(
1375            result.len(),
1376            1,
1377            "Should only flag the undefined reference outside TOML frontmatter"
1378        );
1379        assert!(result[0].message.contains("missing"));
1380    }
1381
1382    #[test]
1383    fn test_mkdocs_snippet_markers_not_flagged() {
1384        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1385        let rule = MD052ReferenceLinkImages::new();
1386
1387        // Test snippet section markers
1388        let content = r#"# Document with MkDocs Snippets
1389
1390Some content here.
1391
1392# -8<- [start:remote-content]
1393
1394This is the remote content section.
1395
1396# -8<- [end:remote-content]
1397
1398More content here.
1399
1400<!-- --8<-- [start:another-section] -->
1401Content in another section
1402<!-- --8<-- [end:another-section] -->"#;
1403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1404        let result = rule.check(&ctx).unwrap();
1405
1406        // Should not flag any snippet markers as undefined references
1407        assert_eq!(
1408            result.len(),
1409            0,
1410            "Should not flag MkDocs snippet markers as undefined references"
1411        );
1412
1413        // Test that the snippet marker lines are properly skipped
1414        // but regular undefined references on other lines are still caught
1415        let content = r#"# Document
1416
1417# -8<- [start:section]
1418Content with [reference] inside snippet section
1419# -8<- [end:section]
1420
1421Regular [undefined] reference outside snippet markers."#;
1422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1423        let result = rule.check(&ctx).unwrap();
1424
1425        assert_eq!(
1426            result.len(),
1427            2,
1428            "Should flag undefined references but skip snippet marker lines"
1429        );
1430        // The references inside the content should be flagged, but not start: and end:
1431        assert!(result[0].message.contains("reference"));
1432        assert!(result[1].message.contains("undefined"));
1433
1434        // Test in standard mode - should flag the markers as undefined
1435        let content = r#"# Document
1436
1437# -8<- [start:section]
1438# -8<- [end:section]"#;
1439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1440        let result = rule.check(&ctx).unwrap();
1441
1442        assert_eq!(
1443            result.len(),
1444            2,
1445            "In standard mode, snippet markers should be flagged as undefined references"
1446        );
1447    }
1448
1449    #[test]
1450    fn test_pandoc_citations_not_flagged() {
1451        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1452        let rule = MD052ReferenceLinkImages::new();
1453
1454        let content = r#"# Research Paper
1455
1456We are using the **bookdown** package [@R-bookdown] in this sample book.
1457This was built on top of R Markdown and **knitr** [@xie2015].
1458
1459Multiple citations [@citation1; @citation2; @citation3] are also supported.
1460
1461Regular [undefined] reference should still be flagged.
1462"#;
1463        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1464        let result = rule.check(&ctx).unwrap();
1465
1466        // Should only flag the undefined reference, not the citations
1467        assert_eq!(
1468            result.len(),
1469            1,
1470            "Should only flag the undefined reference, not Pandoc citations"
1471        );
1472        assert!(result[0].message.contains("undefined"));
1473    }
1474
1475    #[test]
1476    fn test_pandoc_inline_footnotes_not_flagged() {
1477        // Test that Pandoc inline footnote syntax is not flagged
1478        let rule = MD052ReferenceLinkImages::new();
1479
1480        let content = r#"# Math Document
1481
1482You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1483
1484Another footnote^[with some text and a [link](https://example.com)].
1485
1486But this [reference] without ^ should be flagged.
1487"#;
1488        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1489        let result = rule.check(&ctx).unwrap();
1490
1491        // Should only flag the reference without ^
1492        assert_eq!(
1493            result.len(),
1494            1,
1495            "Should only flag the regular reference, not inline footnotes"
1496        );
1497        assert!(result[0].message.contains("reference"));
1498    }
1499
1500    #[test]
1501    fn test_github_alerts_not_flagged() {
1502        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1503        let rule = MD052ReferenceLinkImages::new();
1504
1505        // Test various GitHub alert types
1506        let content = r#"# Document with GitHub Alerts
1507
1508> [!NOTE]
1509> This is a note alert.
1510
1511> [!TIP]
1512> This is a tip alert.
1513
1514> [!IMPORTANT]
1515> This is an important alert.
1516
1517> [!WARNING]
1518> This is a warning alert.
1519
1520> [!CAUTION]
1521> This is a caution alert.
1522
1523Regular content with [undefined] reference."#;
1524        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1525        let result = rule.check(&ctx).unwrap();
1526
1527        // Should only flag the undefined reference, not the GitHub alerts
1528        assert_eq!(
1529            result.len(),
1530            1,
1531            "Should only flag the undefined reference, not GitHub alerts"
1532        );
1533        assert!(result[0].message.contains("undefined"));
1534        assert_eq!(result[0].line, 18); // Line with [undefined]
1535
1536        // Test GitHub alerts with additional content
1537        let content = r#"> [!TIP]
1538> Here's a useful tip about [something].
1539> Multiple lines are allowed.
1540
1541[something] is mentioned but not defined."#;
1542        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1543        let result = rule.check(&ctx).unwrap();
1544
1545        // Should flag only the [something] outside blockquotes
1546        // The test shows we're only catching one, which might be correct behavior
1547        // matching markdownlint's approach
1548        assert_eq!(result.len(), 1, "Should flag undefined reference");
1549        assert!(result[0].message.contains("something"));
1550
1551        // Test GitHub alerts with proper references
1552        let content = r#"> [!NOTE]
1553> See [reference] for more details.
1554
1555[reference]: https://example.com"#;
1556        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1557        let result = rule.check(&ctx).unwrap();
1558
1559        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1560        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1561    }
1562}