rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10// Pattern to match reference definitions [ref]: url
11// Note: \S* instead of \S+ to allow empty definitions like [ref]:
12// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
13static REF_REGEX: LazyLock<Regex> =
14    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
15
16// Pattern for list items to exclude from reference checks (standard regex is fine)
17static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
18
19// Pattern for code blocks (standard regex is fine)
20static FENCED_CODE_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap());
21
22// Pattern for output example sections (standard regex is fine)
23static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
24    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
25
26// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
27// Extended to include additional common alert types
28static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
30        .unwrap()
31});
32
33// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
34// This pattern specifically looks for:
35// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
36// - IPv6 with zone IDs: https://[fe80::1%eth0]
37// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
38// - API paths with array notation: https://api.example.com/users[0]
39// But NOT markdown reference links that happen to follow URLs
40static URL_WITH_BRACKETS: LazyLock<Regex> =
41    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
42
43/// Rule MD052: Reference links and images should use reference style
44///
45/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
46///
47/// This rule is triggered when a reference link or image uses a reference that isn't defined.
48#[derive(Clone, Default)]
49pub struct MD052ReferenceLinkImages {}
50
51impl MD052ReferenceLinkImages {
52    pub fn new() -> Self {
53        Self {}
54    }
55
56    /// Strip surrounding backticks from a string
57    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
58    fn strip_backticks(s: &str) -> &str {
59        s.trim_start_matches('`').trim_end_matches('`')
60    }
61
62    /// Check if a string is a valid Python identifier
63    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
64    /// like `str`, `int`, etc. should be accepted as valid auto-references
65    fn is_valid_python_identifier(s: &str) -> bool {
66        if s.is_empty() {
67            return false;
68        }
69        let first_char = s.chars().next().unwrap();
70        if !first_char.is_ascii_alphabetic() && first_char != '_' {
71            return false;
72        }
73        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
74    }
75
76    /// Check if text matches a known non-reference pattern that should be skipped.
77    ///
78    /// These are deterministic patterns from markdown extensions or code examples,
79    /// not heuristics. Returns true for:
80    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
81    /// - Programming syntax: [T], [null], [i32], ["string"]
82    /// - Descriptive text: [default: value], [0-9]
83    fn is_known_non_reference_pattern(text: &str) -> bool {
84        // Skip numeric patterns (array indices, ranges)
85        if text.chars().all(|c| c.is_ascii_digit()) {
86            return true;
87        }
88
89        // Skip numeric ranges like [1:3], [0:10], etc.
90        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
91            return true;
92        }
93
94        // Skip patterns that look like config sections [tool.something], [section.subsection]
95        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
96        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
97        if text.contains('.')
98            && !text.contains(' ')
99            && !text.contains('-')
100            && !text.contains('_')
101            && !text.contains('`')
102        {
103            // Config sections typically have dots, no spaces, and only alphanumeric + dots
104            return true;
105        }
106
107        // Skip glob/wildcard patterns like [*], [...], [**]
108        if text == "*" || text == "..." || text == "**" {
109            return true;
110        }
111
112        // Skip patterns that look like file paths [dir/file], [src/utils]
113        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
114            return true;
115        }
116
117        // Skip programming type annotations like [int, str], [Dict[str, Any]]
118        // These typically have commas and/or nested brackets
119        if text.contains(',') || text.contains('[') || text.contains(']') {
120            // Check if it looks like a type annotation pattern
121            return true;
122        }
123
124        // Note: We don't filter out patterns with backticks because backticks in reference names
125        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
126
127        // Skip patterns that look like module/class paths ONLY if they don't have backticks
128        // Backticks indicate intentional code formatting in a reference name
129        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
130        if !text.contains('`')
131            && text.contains('.')
132            && !text.contains(' ')
133            && !text.contains('-')
134            && !text.contains('_')
135        {
136            return true;
137        }
138
139        // Note: We don't filter based on word count anymore because legitimate references
140        // can have many words, like "python language reference for import statements"
141        // Word count filtering was causing false positives where valid references were
142        // being incorrectly flagged as unused
143
144        // Skip patterns that are just punctuation or operators
145        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
146            return true;
147        }
148
149        // Skip very short non-word patterns (likely operators or syntax)
150        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
151            return true;
152        }
153
154        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
155        if (text.starts_with('"') && text.ends_with('"'))
156            || (text.starts_with('\'') && text.ends_with('\''))
157            || text.contains('"')
158            || text.contains('\'')
159        {
160            return true;
161        }
162
163        // Skip descriptive patterns with colon like [default: the project root]
164        // But allow simple numeric ranges which are handled above
165        if text.contains(':') && text.contains(' ') {
166            return true;
167        }
168
169        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
170        if text.starts_with('!') {
171            return true;
172        }
173
174        // Skip footnote syntax like [^1], [^note], etc.
175        // Footnotes start with ^ and are a common markdown extension
176        if text.starts_with('^') {
177            return true;
178        }
179
180        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
181        // Citations in these formats start with @ inside brackets
182        if text.starts_with('@') {
183            return true;
184        }
185
186        // Skip table of contents markers like [TOC]
187        // Used by Python-Markdown and other processors
188        if text == "TOC" {
189            return true;
190        }
191
192        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
193        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
194            return true;
195        }
196
197        // Skip common programming type names, literals, and short identifiers
198        // that are likely not markdown references
199        let common_non_refs = [
200            // Programming types
201            "object",
202            "Object",
203            "any",
204            "Any",
205            "inv",
206            "void",
207            "bool",
208            "int",
209            "float",
210            "str",
211            "char",
212            "i8",
213            "i16",
214            "i32",
215            "i64",
216            "i128",
217            "isize",
218            "u8",
219            "u16",
220            "u32",
221            "u64",
222            "u128",
223            "usize",
224            "f32",
225            "f64",
226            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
227            "null",
228            "true",
229            "false",
230            "NaN",
231            "Infinity",
232            // Common JavaScript output patterns
233            "object Object",
234        ];
235
236        if common_non_refs.contains(&text) {
237            return true;
238        }
239
240        false
241    }
242
243    /// Check if a position is inside any code span
244    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
245        code_spans
246            .iter()
247            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
248    }
249
250    /// Check if a byte position is within an HTML comment
251    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
252        for m in HTML_COMMENT_PATTERN.find_iter(content) {
253            if m.start() <= byte_pos && byte_pos < m.end() {
254                return true;
255            }
256        }
257        false
258    }
259
260    /// Check if a byte position is within an HTML tag
261    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
262        // Check HTML tags
263        for html_tag in ctx.html_tags().iter() {
264            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
265                return true;
266            }
267        }
268        false
269    }
270
271    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
272        use crate::config::MarkdownFlavor;
273        use crate::utils::skip_context::is_mkdocs_snippet_line;
274
275        let mut references = HashSet::new();
276        let mut in_code_block = false;
277        let mut code_fence_marker = String::new();
278
279        for line in content.lines() {
280            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
281            if is_mkdocs_snippet_line(
282                line,
283                if mkdocs_mode {
284                    MarkdownFlavor::MkDocs
285                } else {
286                    MarkdownFlavor::Standard
287                },
288            ) {
289                continue;
290            }
291            // Handle code block boundaries
292            if let Some(cap) = FENCED_CODE_START.captures(line) {
293                if let Some(fence) = cap.get(2) {
294                    // Get the fence marker (``` or ~~~) without the indentation
295                    let fence_str = fence.as_str();
296                    if !in_code_block {
297                        in_code_block = true;
298                        code_fence_marker = fence_str.to_string();
299                    } else if line.trim_start().starts_with(&code_fence_marker) {
300                        // Check if this could be a closing fence
301                        let trimmed = line.trim_start();
302                        // A closing fence should be just the fence characters, possibly with trailing whitespace
303                        if trimmed.starts_with(&code_fence_marker) {
304                            let after_fence = &trimmed[code_fence_marker.len()..];
305                            if after_fence.trim().is_empty() {
306                                in_code_block = false;
307                                code_fence_marker.clear();
308                            }
309                        }
310                    }
311                }
312                continue;
313            }
314
315            // Skip lines in code blocks
316            if in_code_block {
317                continue;
318            }
319
320            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
321            // Abbreviations are not reference links and should not be tracked
322            if line.trim_start().starts_with("*[") {
323                continue;
324            }
325
326            if let Some(cap) = REF_REGEX.captures(line) {
327                // Store references in lowercase for case-insensitive comparison
328                if let Some(reference) = cap.get(1) {
329                    references.insert(reference.as_str().to_lowercase());
330                }
331            }
332        }
333
334        references
335    }
336
337    fn find_undefined_references(
338        &self,
339        content: &str,
340        references: &HashSet<String>,
341        ctx: &crate::lint_context::LintContext,
342        mkdocs_mode: bool,
343    ) -> Vec<(usize, usize, usize, String)> {
344        let mut undefined = Vec::new();
345        let mut reported_refs = HashMap::new();
346        let mut in_code_block = false;
347        let mut code_fence_marker = String::new();
348        let mut in_example_section = false;
349
350        // Get code spans once for the entire function
351        let code_spans = ctx.code_spans();
352
353        // Use cached data for reference links and images
354        for link in &ctx.links {
355            if !link.is_reference {
356                continue; // Skip inline links
357            }
358
359            // Skip links inside Jinja templates
360            if ctx.is_in_jinja_range(link.byte_offset) {
361                continue;
362            }
363
364            // Skip links inside code spans
365            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
366                continue;
367            }
368
369            // Skip links inside HTML comments
370            if Self::is_in_html_comment(content, link.byte_offset) {
371                continue;
372            }
373
374            // Skip links inside HTML tags
375            if Self::is_in_html_tag(ctx, link.byte_offset) {
376                continue;
377            }
378
379            // Skip links inside math contexts
380            if is_in_math_context(ctx, link.byte_offset) {
381                continue;
382            }
383
384            // Skip links inside table cells
385            if is_in_table_cell(ctx, link.line, link.start_col) {
386                continue;
387            }
388
389            // Skip links inside frontmatter
390            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
391                continue;
392            }
393
394            if let Some(ref_id) = &link.reference_id {
395                let reference_lower = ref_id.to_lowercase();
396
397                // Skip known non-reference patterns (markdown extensions, code examples)
398                if Self::is_known_non_reference_pattern(ref_id) {
399                    continue;
400                }
401
402                // Skip MkDocs auto-references if in MkDocs mode
403                // Check both the reference_id and the link text for shorthand references
404                // Strip backticks since MkDocs resolves `module.Class` as module.Class
405                let stripped_ref = Self::strip_backticks(ref_id);
406                let stripped_text = Self::strip_backticks(&link.text);
407                if mkdocs_mode
408                    && (is_mkdocs_auto_reference(stripped_ref)
409                        || is_mkdocs_auto_reference(stripped_text)
410                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
411                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
412                {
413                    continue;
414                }
415
416                // Check if reference is defined
417                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
418                    // Check if the line is in an example section or list item
419                    if let Some(line_info) = ctx.line_info(link.line) {
420                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
421                            in_example_section = true;
422                            continue;
423                        }
424
425                        if in_example_section {
426                            continue;
427                        }
428
429                        // Skip list items
430                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
431                            continue;
432                        }
433
434                        // Skip lines that are HTML content
435                        let trimmed = line_info.content(ctx.content).trim_start();
436                        if trimmed.starts_with('<') {
437                            continue;
438                        }
439                    }
440
441                    let match_len = link.byte_end - link.byte_offset;
442                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
443                    reported_refs.insert(reference_lower, true);
444                }
445            }
446        }
447
448        // Use cached data for reference images
449        for image in &ctx.images {
450            if !image.is_reference {
451                continue; // Skip inline images
452            }
453
454            // Skip images inside Jinja templates
455            if ctx.is_in_jinja_range(image.byte_offset) {
456                continue;
457            }
458
459            // Skip images inside code spans
460            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
461                continue;
462            }
463
464            // Skip images inside HTML comments
465            if Self::is_in_html_comment(content, image.byte_offset) {
466                continue;
467            }
468
469            // Skip images inside HTML tags
470            if Self::is_in_html_tag(ctx, image.byte_offset) {
471                continue;
472            }
473
474            // Skip images inside math contexts
475            if is_in_math_context(ctx, image.byte_offset) {
476                continue;
477            }
478
479            // Skip images inside table cells
480            if is_in_table_cell(ctx, image.line, image.start_col) {
481                continue;
482            }
483
484            // Skip images inside frontmatter
485            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
486                continue;
487            }
488
489            if let Some(ref_id) = &image.reference_id {
490                let reference_lower = ref_id.to_lowercase();
491
492                // Skip known non-reference patterns (markdown extensions, code examples)
493                if Self::is_known_non_reference_pattern(ref_id) {
494                    continue;
495                }
496
497                // Skip MkDocs auto-references if in MkDocs mode
498                // Check both the reference_id and the alt text for shorthand references
499                // Strip backticks since MkDocs resolves `module.Class` as module.Class
500                let stripped_ref = Self::strip_backticks(ref_id);
501                let stripped_alt = Self::strip_backticks(&image.alt_text);
502                if mkdocs_mode
503                    && (is_mkdocs_auto_reference(stripped_ref)
504                        || is_mkdocs_auto_reference(stripped_alt)
505                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
506                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
507                {
508                    continue;
509                }
510
511                // Check if reference is defined
512                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
513                    // Check if the line is in an example section or list item
514                    if let Some(line_info) = ctx.line_info(image.line) {
515                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
516                            in_example_section = true;
517                            continue;
518                        }
519
520                        if in_example_section {
521                            continue;
522                        }
523
524                        // Skip list items
525                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
526                            continue;
527                        }
528
529                        // Skip lines that are HTML content
530                        let trimmed = line_info.content(ctx.content).trim_start();
531                        if trimmed.starts_with('<') {
532                            continue;
533                        }
534                    }
535
536                    let match_len = image.byte_end - image.byte_offset;
537                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
538                    reported_refs.insert(reference_lower, true);
539                }
540            }
541        }
542
543        // Build a set of byte ranges that are already covered by parsed links/images
544        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
545
546        // Add ranges from parsed links
547        for link in &ctx.links {
548            covered_ranges.push((link.byte_offset, link.byte_end));
549        }
550
551        // Add ranges from parsed images
552        for image in &ctx.images {
553            covered_ranges.push((image.byte_offset, image.byte_end));
554        }
555
556        // Sort ranges by start position
557        covered_ranges.sort_by_key(|&(start, _)| start);
558
559        // Handle shortcut references [text] which aren't captured in ctx.links
560        // Need to use regex for these
561        let lines: Vec<&str> = content.lines().collect();
562        in_example_section = false; // Reset for line-by-line processing
563
564        for (line_num, line) in lines.iter().enumerate() {
565            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
566            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
567                continue;
568            }
569
570            // Handle code blocks
571            if let Some(cap) = FENCED_CODE_START.captures(line) {
572                if let Some(fence) = cap.get(2) {
573                    // Get the fence marker (``` or ~~~) without the indentation
574                    let fence_str = fence.as_str();
575                    if !in_code_block {
576                        in_code_block = true;
577                        code_fence_marker = fence_str.to_string();
578                    } else if line.trim_start().starts_with(&code_fence_marker) {
579                        // Check if this could be a closing fence
580                        let trimmed = line.trim_start();
581                        // A closing fence should be just the fence characters, possibly with trailing whitespace
582                        if trimmed.starts_with(&code_fence_marker) {
583                            let after_fence = &trimmed[code_fence_marker.len()..];
584                            if after_fence.trim().is_empty() {
585                                in_code_block = false;
586                                code_fence_marker.clear();
587                            }
588                        }
589                    }
590                }
591                continue;
592            }
593
594            if in_code_block {
595                continue;
596            }
597
598            // Check for example sections
599            if OUTPUT_EXAMPLE_START.is_match(line) {
600                in_example_section = true;
601                continue;
602            }
603
604            if in_example_section {
605                // Check if we're exiting the example section (another heading)
606                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
607                    in_example_section = false;
608                } else {
609                    continue;
610                }
611            }
612
613            // Skip list items
614            if LIST_ITEM_REGEX.is_match(line) {
615                continue;
616            }
617
618            // Skip lines that are HTML content
619            let trimmed_line = line.trim_start();
620            if trimmed_line.starts_with('<') {
621                continue;
622            }
623
624            // Skip GitHub alerts/callouts (e.g., > [!TIP])
625            if GITHUB_ALERT_REGEX.is_match(line) {
626                continue;
627            }
628
629            // Skip abbreviation definitions (*[ABBR]: Definition)
630            // These are not reference links and should not be checked
631            if trimmed_line.starts_with("*[") {
632                continue;
633            }
634
635            // Collect positions of brackets that are part of URLs (IPv6, etc.)
636            // so we can exclude them from reference checking
637            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
638            for mat in URL_WITH_BRACKETS.find_iter(line) {
639                // Find all bracket pairs within this URL match
640                let url_str = mat.as_str();
641                let url_start = mat.start();
642
643                // Find brackets within the URL (e.g., in https://[::1]:8080)
644                let mut idx = 0;
645                while idx < url_str.len() {
646                    if let Some(bracket_start) = url_str[idx..].find('[') {
647                        let bracket_start_abs = url_start + idx + bracket_start;
648                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
649                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
650                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
651                            idx += bracket_start + bracket_end + 2;
652                        } else {
653                            break;
654                        }
655                    } else {
656                        break;
657                    }
658                }
659            }
660
661            // Check shortcut references: [reference]
662            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
663                for cap in captures {
664                    if let Some(ref_match) = cap.get(1) {
665                        // Check if this bracket is part of a URL (IPv6, etc.)
666                        let bracket_start = cap.get(0).unwrap().start();
667                        let bracket_end = cap.get(0).unwrap().end();
668
669                        // Skip if this bracket pair is within any URL bracket range
670                        let is_in_url = url_bracket_ranges
671                            .iter()
672                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
673
674                        if is_in_url {
675                            continue;
676                        }
677
678                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
679                        // Check if there's a ^ immediately before the opening bracket
680                        if bracket_start > 0 {
681                            // bracket_start is a byte offset, so we need to check the byte before
682                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
683                                && *byte == b'^'
684                            {
685                                continue; // This is an inline footnote, skip it
686                            }
687                        }
688
689                        let reference = ref_match.as_str();
690                        let reference_lower = reference.to_lowercase();
691
692                        // Skip known non-reference patterns (markdown extensions, code examples)
693                        if Self::is_known_non_reference_pattern(reference) {
694                            continue;
695                        }
696
697                        // Skip GitHub alerts (including extended types)
698                        if let Some(alert_type) = reference.strip_prefix('!')
699                            && matches!(
700                                alert_type,
701                                "NOTE"
702                                    | "TIP"
703                                    | "WARNING"
704                                    | "IMPORTANT"
705                                    | "CAUTION"
706                                    | "INFO"
707                                    | "SUCCESS"
708                                    | "FAILURE"
709                                    | "DANGER"
710                                    | "BUG"
711                                    | "EXAMPLE"
712                                    | "QUOTE"
713                            )
714                        {
715                            continue;
716                        }
717
718                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
719                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
720                        if mkdocs_mode
721                            && (reference.starts_with("start:") || reference.starts_with("end:"))
722                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
723                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
724                        {
725                            continue;
726                        }
727
728                        // Skip MkDocs auto-references if in MkDocs mode
729                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
730                        let stripped_ref = Self::strip_backticks(reference);
731                        if mkdocs_mode
732                            && (is_mkdocs_auto_reference(stripped_ref)
733                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
734                        {
735                            continue;
736                        }
737
738                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
739                            let full_match = cap.get(0).unwrap();
740                            let col = full_match.start();
741
742                            // Skip if inside code span
743                            let code_spans = ctx.code_spans();
744                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
745                                continue;
746                            }
747
748                            // Check if this position is within a covered range
749                            let line_start_byte = ctx.line_offsets[line_num];
750                            let byte_pos = line_start_byte + col;
751
752                            // Skip if inside Jinja template
753                            if ctx.is_in_jinja_range(byte_pos) {
754                                continue;
755                            }
756
757                            // Skip if inside code block
758                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
759                                &ctx.code_blocks,
760                                byte_pos,
761                            ) {
762                                continue;
763                            }
764
765                            // Skip if inside HTML comment
766                            if Self::is_in_html_comment(content, byte_pos) {
767                                continue;
768                            }
769
770                            // Skip if inside HTML tag
771                            if Self::is_in_html_tag(ctx, byte_pos) {
772                                continue;
773                            }
774
775                            // Skip if inside math context
776                            if is_in_math_context(ctx, byte_pos) {
777                                continue;
778                            }
779
780                            // Skip if inside table cell
781                            if is_in_table_cell(ctx, line_num + 1, col) {
782                                continue;
783                            }
784
785                            let byte_end = byte_pos + (full_match.end() - full_match.start());
786
787                            // Check if this shortcut ref overlaps with any parsed link/image
788                            let mut is_covered = false;
789                            for &(range_start, range_end) in &covered_ranges {
790                                if range_start <= byte_pos && byte_end <= range_end {
791                                    // This shortcut ref is completely within a parsed link/image
792                                    is_covered = true;
793                                    break;
794                                }
795                                if range_start > byte_end {
796                                    // No need to check further (ranges are sorted)
797                                    break;
798                                }
799                            }
800
801                            if is_covered {
802                                continue;
803                            }
804
805                            // More sophisticated checks to avoid false positives
806
807                            // Check 1: If preceded by ], this might be part of [text][ref]
808                            // Look for the pattern ...][ref] and check if there's a matching [ before
809                            let line_chars: Vec<char> = line.chars().collect();
810                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
811                                // Look backwards for a [ that would make this [text][ref]
812                                let mut bracket_count = 1; // We already saw one ]
813                                let mut check_pos = col.saturating_sub(2);
814                                let mut found_opening = false;
815
816                                while check_pos > 0 && check_pos < line_chars.len() {
817                                    match line_chars.get(check_pos) {
818                                        Some(&']') => bracket_count += 1,
819                                        Some(&'[') => {
820                                            bracket_count -= 1;
821                                            if bracket_count == 0 {
822                                                // Check if this [ is escaped
823                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
824                                                    found_opening = true;
825                                                }
826                                                break;
827                                            }
828                                        }
829                                        _ => {}
830                                    }
831                                    if check_pos == 0 {
832                                        break;
833                                    }
834                                    check_pos = check_pos.saturating_sub(1);
835                                }
836
837                                if found_opening {
838                                    // This is part of [text][ref], skip it
839                                    continue;
840                                }
841                            }
842
843                            // Check 2: If there's an escaped bracket pattern before this
844                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
845                            let before_text = &line[..col];
846                            if before_text.contains("\\]") {
847                                // Check if there's a \[ before the \]
848                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
849                                    let search_text = &before_text[..escaped_close_pos];
850                                    if search_text.contains("\\[") {
851                                        // This looks like \[...\][ref], skip it
852                                        continue;
853                                    }
854                                }
855                            }
856
857                            let match_len = full_match.end() - full_match.start();
858                            undefined.push((line_num, col, match_len, reference.to_string()));
859                            reported_refs.insert(reference_lower, true);
860                        }
861                    }
862                }
863            }
864        }
865
866        undefined
867    }
868}
869
870impl Rule for MD052ReferenceLinkImages {
871    fn name(&self) -> &'static str {
872        "MD052"
873    }
874
875    fn description(&self) -> &'static str {
876        "Reference links and images should use a reference that exists"
877    }
878
879    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
880        let content = ctx.content;
881        let mut warnings = Vec::new();
882
883        // OPTIMIZATION: Early exit if no brackets at all
884        if !content.contains('[') {
885            return Ok(warnings);
886        }
887
888        // Check if we're in MkDocs mode from the context
889        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
890
891        let references = self.extract_references(content, mkdocs_mode);
892
893        // Use optimized detection method with cached link/image data
894        for (line_num, col, match_len, reference) in
895            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
896        {
897            let lines: Vec<&str> = content.lines().collect();
898            let line_content = lines.get(line_num).unwrap_or(&"");
899
900            // Calculate precise character range for the entire undefined reference
901            let (start_line, start_col, end_line, end_col) =
902                calculate_match_range(line_num + 1, line_content, col, match_len);
903
904            warnings.push(LintWarning {
905                rule_name: Some(self.name().to_string()),
906                line: start_line,
907                column: start_col,
908                end_line,
909                end_column: end_col,
910                message: format!("Reference '{reference}' not found"),
911                severity: Severity::Warning,
912                fix: None,
913            });
914        }
915
916        Ok(warnings)
917    }
918
919    /// Check if this rule should be skipped for performance
920    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
921        // Skip if content is empty or has no links/images
922        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
923    }
924
925    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
926        let content = ctx.content;
927        // No automatic fix available for undefined references
928        Ok(content.to_string())
929    }
930
931    fn as_any(&self) -> &dyn std::any::Any {
932        self
933    }
934
935    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
936    where
937        Self: Sized,
938    {
939        // Flavor is now accessed from LintContext during check
940        Box::new(MD052ReferenceLinkImages::new())
941    }
942}
943
944#[cfg(test)]
945mod tests {
946    use super::*;
947    use crate::lint_context::LintContext;
948
949    #[test]
950    fn test_valid_reference_link() {
951        let rule = MD052ReferenceLinkImages::new();
952        let content = "[text][ref]\n\n[ref]: https://example.com";
953        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
954        let result = rule.check(&ctx).unwrap();
955
956        assert_eq!(result.len(), 0);
957    }
958
959    #[test]
960    fn test_undefined_reference_link() {
961        let rule = MD052ReferenceLinkImages::new();
962        let content = "[text][undefined]";
963        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
964        let result = rule.check(&ctx).unwrap();
965
966        assert_eq!(result.len(), 1);
967        assert!(result[0].message.contains("Reference 'undefined' not found"));
968    }
969
970    #[test]
971    fn test_valid_reference_image() {
972        let rule = MD052ReferenceLinkImages::new();
973        let content = "![alt][img]\n\n[img]: image.jpg";
974        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
975        let result = rule.check(&ctx).unwrap();
976
977        assert_eq!(result.len(), 0);
978    }
979
980    #[test]
981    fn test_undefined_reference_image() {
982        let rule = MD052ReferenceLinkImages::new();
983        let content = "![alt][missing]";
984        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
985        let result = rule.check(&ctx).unwrap();
986
987        assert_eq!(result.len(), 1);
988        assert!(result[0].message.contains("Reference 'missing' not found"));
989    }
990
991    #[test]
992    fn test_case_insensitive_references() {
993        let rule = MD052ReferenceLinkImages::new();
994        let content = "[Text][REF]\n\n[ref]: https://example.com";
995        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
996        let result = rule.check(&ctx).unwrap();
997
998        assert_eq!(result.len(), 0);
999    }
1000
1001    #[test]
1002    fn test_shortcut_reference_valid() {
1003        let rule = MD052ReferenceLinkImages::new();
1004        let content = "[ref]\n\n[ref]: https://example.com";
1005        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1006        let result = rule.check(&ctx).unwrap();
1007
1008        assert_eq!(result.len(), 0);
1009    }
1010
1011    #[test]
1012    fn test_shortcut_reference_undefined() {
1013        let rule = MD052ReferenceLinkImages::new();
1014        let content = "[undefined]";
1015        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1016        let result = rule.check(&ctx).unwrap();
1017
1018        assert_eq!(result.len(), 1);
1019        assert!(result[0].message.contains("Reference 'undefined' not found"));
1020    }
1021
1022    #[test]
1023    fn test_inline_links_ignored() {
1024        let rule = MD052ReferenceLinkImages::new();
1025        let content = "[text](https://example.com)";
1026        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1027        let result = rule.check(&ctx).unwrap();
1028
1029        assert_eq!(result.len(), 0);
1030    }
1031
1032    #[test]
1033    fn test_inline_images_ignored() {
1034        let rule = MD052ReferenceLinkImages::new();
1035        let content = "![alt](image.jpg)";
1036        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1037        let result = rule.check(&ctx).unwrap();
1038
1039        assert_eq!(result.len(), 0);
1040    }
1041
1042    #[test]
1043    fn test_references_in_code_blocks_ignored() {
1044        let rule = MD052ReferenceLinkImages::new();
1045        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1046        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1047        let result = rule.check(&ctx).unwrap();
1048
1049        assert_eq!(result.len(), 0);
1050    }
1051
1052    #[test]
1053    fn test_references_in_inline_code_ignored() {
1054        let rule = MD052ReferenceLinkImages::new();
1055        let content = "`[undefined]`";
1056        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1057        let result = rule.check(&ctx).unwrap();
1058
1059        // References inside inline code spans should be ignored
1060        assert_eq!(result.len(), 0);
1061    }
1062
1063    #[test]
1064    fn test_comprehensive_inline_code_detection() {
1065        let rule = MD052ReferenceLinkImages::new();
1066        let content = r#"# Test
1067
1068This `[inside]` should be ignored.
1069This [outside] should be flagged.
1070Reference links `[text][ref]` in code are ignored.
1071Regular reference [text][missing] should be flagged.
1072Images `![alt][img]` in code are ignored.
1073Regular image ![alt][badimg] should be flagged.
1074
1075Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1076
1077```
1078[code block content] should be ignored
1079```
1080
1081`Multiple [refs] in [same] code span` ignored."#;
1082
1083        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1084        let result = rule.check(&ctx).unwrap();
1085
1086        // Should only flag: outside, missing, badimg, three (4 total)
1087        assert_eq!(result.len(), 4);
1088
1089        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1090        assert!(messages.iter().any(|m| m.contains("outside")));
1091        assert!(messages.iter().any(|m| m.contains("missing")));
1092        assert!(messages.iter().any(|m| m.contains("badimg")));
1093        assert!(messages.iter().any(|m| m.contains("three")));
1094
1095        // Should NOT flag any references inside code spans
1096        assert!(!messages.iter().any(|m| m.contains("inside")));
1097        assert!(!messages.iter().any(|m| m.contains("one")));
1098        assert!(!messages.iter().any(|m| m.contains("two")));
1099        assert!(!messages.iter().any(|m| m.contains("refs")));
1100        assert!(!messages.iter().any(|m| m.contains("same")));
1101    }
1102
1103    #[test]
1104    fn test_multiple_undefined_references() {
1105        let rule = MD052ReferenceLinkImages::new();
1106        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1107        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1108        let result = rule.check(&ctx).unwrap();
1109
1110        assert_eq!(result.len(), 3);
1111        assert!(result[0].message.contains("ref1"));
1112        assert!(result[1].message.contains("ref2"));
1113        assert!(result[2].message.contains("ref3"));
1114    }
1115
1116    #[test]
1117    fn test_mixed_valid_and_undefined() {
1118        let rule = MD052ReferenceLinkImages::new();
1119        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1120        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1121        let result = rule.check(&ctx).unwrap();
1122
1123        assert_eq!(result.len(), 1);
1124        assert!(result[0].message.contains("missing"));
1125    }
1126
1127    #[test]
1128    fn test_empty_reference() {
1129        let rule = MD052ReferenceLinkImages::new();
1130        let content = "[text][]\n\n[ref]: https://example.com";
1131        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1132        let result = rule.check(&ctx).unwrap();
1133
1134        // Empty reference should use the link text as reference
1135        assert_eq!(result.len(), 1);
1136    }
1137
1138    #[test]
1139    fn test_escaped_brackets_ignored() {
1140        let rule = MD052ReferenceLinkImages::new();
1141        let content = "\\[not a link\\]";
1142        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1143        let result = rule.check(&ctx).unwrap();
1144
1145        assert_eq!(result.len(), 0);
1146    }
1147
1148    #[test]
1149    fn test_list_items_ignored() {
1150        let rule = MD052ReferenceLinkImages::new();
1151        let content = "- [undefined]\n* [another]\n+ [third]";
1152        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1153        let result = rule.check(&ctx).unwrap();
1154
1155        // List items that look like shortcut references should be ignored
1156        assert_eq!(result.len(), 0);
1157    }
1158
1159    #[test]
1160    fn test_output_example_section_ignored() {
1161        let rule = MD052ReferenceLinkImages::new();
1162        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1163        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1164        let result = rule.check(&ctx).unwrap();
1165
1166        // Only the reference outside the Output section should be flagged
1167        assert_eq!(result.len(), 1);
1168        assert!(result[0].message.contains("missing"));
1169    }
1170
1171    #[test]
1172    fn test_reference_definitions_in_code_blocks_ignored() {
1173        let rule = MD052ReferenceLinkImages::new();
1174        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1175        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1176        let result = rule.check(&ctx).unwrap();
1177
1178        // Reference defined in code block should not count
1179        assert_eq!(result.len(), 1);
1180        assert!(result[0].message.contains("ref"));
1181    }
1182
1183    #[test]
1184    fn test_multiple_references_to_same_undefined() {
1185        let rule = MD052ReferenceLinkImages::new();
1186        let content = "[first][missing] [second][missing] [third][missing]";
1187        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1188        let result = rule.check(&ctx).unwrap();
1189
1190        // Should only report once per unique reference
1191        assert_eq!(result.len(), 1);
1192        assert!(result[0].message.contains("missing"));
1193    }
1194
1195    #[test]
1196    fn test_reference_with_special_characters() {
1197        let rule = MD052ReferenceLinkImages::new();
1198        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1199        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1200        let result = rule.check(&ctx).unwrap();
1201
1202        assert_eq!(result.len(), 0);
1203    }
1204
1205    #[test]
1206    fn test_issue_51_html_attribute_not_reference() {
1207        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1208        let rule = MD052ReferenceLinkImages::new();
1209        let content = r#"# Example
1210
1211## Test
1212
1213Want to fill out this form?
1214
1215<form method="post">
1216    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1217</form>"#;
1218        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1219        let result = rule.check(&ctx).unwrap();
1220
1221        assert_eq!(
1222            result.len(),
1223            0,
1224            "HTML attributes with square brackets should not be flagged as undefined references"
1225        );
1226    }
1227
1228    #[test]
1229    fn test_extract_references() {
1230        let rule = MD052ReferenceLinkImages::new();
1231        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1232        let refs = rule.extract_references(content, false);
1233
1234        assert_eq!(refs.len(), 3);
1235        assert!(refs.contains("ref1"));
1236        assert!(refs.contains("ref2"));
1237        assert!(refs.contains("ref3"));
1238    }
1239
1240    #[test]
1241    fn test_inline_code_not_flagged() {
1242        let rule = MD052ReferenceLinkImages::new();
1243
1244        // Test that arrays in inline code are not flagged as references
1245        let content = r#"# Test
1246
1247Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1248
1249Also, `[todo]` is not a reference link.
1250
1251But this [reference] should be flagged.
1252
1253And this `[inline code]` should not be flagged.
1254"#;
1255
1256        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1257        let warnings = rule.check(&ctx).unwrap();
1258
1259        // Should only flag [reference], not the ones in backticks
1260        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1261        assert!(warnings[0].message.contains("'reference'"));
1262    }
1263
1264    #[test]
1265    fn test_code_block_references_ignored() {
1266        let rule = MD052ReferenceLinkImages::new();
1267
1268        let content = r#"# Test
1269
1270```markdown
1271[undefined] reference in code block
1272![undefined] image in code block
1273```
1274
1275[real-undefined] reference outside
1276"#;
1277
1278        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1279        let warnings = rule.check(&ctx).unwrap();
1280
1281        // Should only flag [real-undefined], not the ones in code block
1282        assert_eq!(warnings.len(), 1);
1283        assert!(warnings[0].message.contains("'real-undefined'"));
1284    }
1285
1286    #[test]
1287    fn test_html_comments_ignored() {
1288        // Test for issue #20 - MD052 should not flag content inside HTML comments
1289        let rule = MD052ReferenceLinkImages::new();
1290
1291        // Test the exact case from issue #20
1292        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1293<!--- set_env EDITOR 'python3 fake_editor.py' -->
1294
1295```bash
1296$ python3 vote.py
12973 votes for: 2
12982 votes for: 3, 4
1299```"#;
1300        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1301        let result = rule.check(&ctx).unwrap();
1302        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1303
1304        // Test various reference patterns inside HTML comments
1305        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1306Normal [text][undefined]
1307<!-- Another [comment][with] references -->"#;
1308        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1309        let result = rule.check(&ctx).unwrap();
1310        assert_eq!(
1311            result.len(),
1312            1,
1313            "Should only flag the undefined reference outside comments"
1314        );
1315        assert!(result[0].message.contains("undefined"));
1316
1317        // Test multi-line HTML comments
1318        let content = r#"<!--
1319[ref1]
1320[ref2][ref3]
1321-->
1322[actual][undefined]"#;
1323        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1324        let result = rule.check(&ctx).unwrap();
1325        assert_eq!(
1326            result.len(),
1327            1,
1328            "Should not flag references in multi-line HTML comments"
1329        );
1330        assert!(result[0].message.contains("undefined"));
1331
1332        // Test mixed scenarios
1333        let content = r#"<!-- Comment with [1:] pattern -->
1334Valid [link][ref]
1335<!-- More [refs][in][comments] -->
1336![image][missing]
1337
1338[ref]: https://example.com"#;
1339        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1340        let result = rule.check(&ctx).unwrap();
1341        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1342        assert!(result[0].message.contains("missing"));
1343    }
1344
1345    #[test]
1346    fn test_frontmatter_ignored() {
1347        // Test for issue #24 - MD052 should not flag content inside frontmatter
1348        let rule = MD052ReferenceLinkImages::new();
1349
1350        // Test YAML frontmatter with arrays and references
1351        let content = r#"---
1352layout: post
1353title: "My Jekyll Post"
1354date: 2023-01-01
1355categories: blog
1356tags: ["test", "example"]
1357author: John Doe
1358---
1359
1360# My Blog Post
1361
1362This is the actual markdown content that should be linted.
1363
1364[undefined] reference should be flagged.
1365
1366## Section 1
1367
1368Some content here."#;
1369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1370        let result = rule.check(&ctx).unwrap();
1371
1372        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1373        assert_eq!(
1374            result.len(),
1375            1,
1376            "Should only flag the undefined reference outside frontmatter"
1377        );
1378        assert!(result[0].message.contains("undefined"));
1379
1380        // Test TOML frontmatter
1381        let content = r#"+++
1382title = "My Post"
1383tags = ["example", "test"]
1384+++
1385
1386# Content
1387
1388[missing] reference should be flagged."#;
1389        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1390        let result = rule.check(&ctx).unwrap();
1391        assert_eq!(
1392            result.len(),
1393            1,
1394            "Should only flag the undefined reference outside TOML frontmatter"
1395        );
1396        assert!(result[0].message.contains("missing"));
1397    }
1398
1399    #[test]
1400    fn test_mkdocs_snippet_markers_not_flagged() {
1401        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1402        let rule = MD052ReferenceLinkImages::new();
1403
1404        // Test snippet section markers
1405        let content = r#"# Document with MkDocs Snippets
1406
1407Some content here.
1408
1409# -8<- [start:remote-content]
1410
1411This is the remote content section.
1412
1413# -8<- [end:remote-content]
1414
1415More content here.
1416
1417<!-- --8<-- [start:another-section] -->
1418Content in another section
1419<!-- --8<-- [end:another-section] -->"#;
1420        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1421        let result = rule.check(&ctx).unwrap();
1422
1423        // Should not flag any snippet markers as undefined references
1424        assert_eq!(
1425            result.len(),
1426            0,
1427            "Should not flag MkDocs snippet markers as undefined references"
1428        );
1429
1430        // Test that the snippet marker lines are properly skipped
1431        // but regular undefined references on other lines are still caught
1432        let content = r#"# Document
1433
1434# -8<- [start:section]
1435Content with [reference] inside snippet section
1436# -8<- [end:section]
1437
1438Regular [undefined] reference outside snippet markers."#;
1439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1440        let result = rule.check(&ctx).unwrap();
1441
1442        assert_eq!(
1443            result.len(),
1444            2,
1445            "Should flag undefined references but skip snippet marker lines"
1446        );
1447        // The references inside the content should be flagged, but not start: and end:
1448        assert!(result[0].message.contains("reference"));
1449        assert!(result[1].message.contains("undefined"));
1450
1451        // Test in standard mode - should flag the markers as undefined
1452        let content = r#"# Document
1453
1454# -8<- [start:section]
1455# -8<- [end:section]"#;
1456        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1457        let result = rule.check(&ctx).unwrap();
1458
1459        assert_eq!(
1460            result.len(),
1461            2,
1462            "In standard mode, snippet markers should be flagged as undefined references"
1463        );
1464    }
1465
1466    #[test]
1467    fn test_pandoc_citations_not_flagged() {
1468        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1469        let rule = MD052ReferenceLinkImages::new();
1470
1471        let content = r#"# Research Paper
1472
1473We are using the **bookdown** package [@R-bookdown] in this sample book.
1474This was built on top of R Markdown and **knitr** [@xie2015].
1475
1476Multiple citations [@citation1; @citation2; @citation3] are also supported.
1477
1478Regular [undefined] reference should still be flagged.
1479"#;
1480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1481        let result = rule.check(&ctx).unwrap();
1482
1483        // Should only flag the undefined reference, not the citations
1484        assert_eq!(
1485            result.len(),
1486            1,
1487            "Should only flag the undefined reference, not Pandoc citations"
1488        );
1489        assert!(result[0].message.contains("undefined"));
1490    }
1491
1492    #[test]
1493    fn test_pandoc_inline_footnotes_not_flagged() {
1494        // Test that Pandoc inline footnote syntax is not flagged
1495        let rule = MD052ReferenceLinkImages::new();
1496
1497        let content = r#"# Math Document
1498
1499You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1500
1501Another footnote^[with some text and a [link](https://example.com)].
1502
1503But this [reference] without ^ should be flagged.
1504"#;
1505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1506        let result = rule.check(&ctx).unwrap();
1507
1508        // Should only flag the reference without ^
1509        assert_eq!(
1510            result.len(),
1511            1,
1512            "Should only flag the regular reference, not inline footnotes"
1513        );
1514        assert!(result[0].message.contains("reference"));
1515    }
1516
1517    #[test]
1518    fn test_github_alerts_not_flagged() {
1519        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1520        let rule = MD052ReferenceLinkImages::new();
1521
1522        // Test various GitHub alert types
1523        let content = r#"# Document with GitHub Alerts
1524
1525> [!NOTE]
1526> This is a note alert.
1527
1528> [!TIP]
1529> This is a tip alert.
1530
1531> [!IMPORTANT]
1532> This is an important alert.
1533
1534> [!WARNING]
1535> This is a warning alert.
1536
1537> [!CAUTION]
1538> This is a caution alert.
1539
1540Regular content with [undefined] reference."#;
1541        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1542        let result = rule.check(&ctx).unwrap();
1543
1544        // Should only flag the undefined reference, not the GitHub alerts
1545        assert_eq!(
1546            result.len(),
1547            1,
1548            "Should only flag the undefined reference, not GitHub alerts"
1549        );
1550        assert!(result[0].message.contains("undefined"));
1551        assert_eq!(result[0].line, 18); // Line with [undefined]
1552
1553        // Test GitHub alerts with additional content
1554        let content = r#"> [!TIP]
1555> Here's a useful tip about [something].
1556> Multiple lines are allowed.
1557
1558[something] is mentioned but not defined."#;
1559        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1560        let result = rule.check(&ctx).unwrap();
1561
1562        // Should flag only the [something] outside blockquotes
1563        // The test shows we're only catching one, which might be correct behavior
1564        // matching markdownlint's approach
1565        assert_eq!(result.len(), 1, "Should flag undefined reference");
1566        assert!(result[0].message.contains("something"));
1567
1568        // Test GitHub alerts with proper references
1569        let content = r#"> [!NOTE]
1570> See [reference] for more details.
1571
1572[reference]: https://example.com"#;
1573        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1574        let result = rule.check(&ctx).unwrap();
1575
1576        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1577        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1578    }
1579}