Skip to main content

rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::SHORTCUT_REF_REGEX;
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for output example sections (standard regex is fine)
23static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
24    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
25
26// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
27// Extended to include additional common alert types
28static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
30        .unwrap()
31});
32
33// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
34// This pattern specifically looks for:
35// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
36// - IPv6 with zone IDs: https://[fe80::1%eth0]
37// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
38// - API paths with array notation: https://api.example.com/users[0]
39// But NOT markdown reference links that happen to follow URLs
40static URL_WITH_BRACKETS: LazyLock<Regex> =
41    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
42
43/// Rule MD052: Reference links and images should use reference style
44///
45/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
46///
47/// This rule is triggered when a reference link or image uses a reference that isn't defined.
48///
49/// ## Configuration
50///
51/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
52///
53/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
54/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
55#[derive(Clone, Default)]
56pub struct MD052ReferenceLinkImages {
57    config: MD052Config,
58}
59
60impl MD052ReferenceLinkImages {
61    pub fn new() -> Self {
62        Self {
63            config: MD052Config::default(),
64        }
65    }
66
67    pub fn from_config_struct(config: MD052Config) -> Self {
68        Self { config }
69    }
70
71    /// Strip surrounding backticks from a string
72    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
73    fn strip_backticks(s: &str) -> &str {
74        s.trim_start_matches('`').trim_end_matches('`')
75    }
76
77    /// Check if a string is a valid Python identifier
78    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
79    /// like `str`, `int`, etc. should be accepted as valid auto-references
80    fn is_valid_python_identifier(s: &str) -> bool {
81        if s.is_empty() {
82            return false;
83        }
84        let first_char = s.chars().next().unwrap();
85        if !first_char.is_ascii_alphabetic() && first_char != '_' {
86            return false;
87        }
88        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
89    }
90
91    /// Check if text matches a known non-reference pattern that should be skipped.
92    ///
93    /// These are deterministic patterns from markdown extensions or code examples,
94    /// not heuristics. Returns true for:
95    /// - User-configured names via `ignore` config option
96    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
97    /// - Programming syntax: [T], [null], [i32], ["string"]
98    /// - Descriptive text: [default: value], [0-9]
99    fn is_known_non_reference_pattern(&self, text: &str) -> bool {
100        // Check user-configured ignore list first (case-insensitive match)
101        // Reference IDs are normalized to lowercase during parsing,
102        // so we use case-insensitive comparison for user convenience
103        if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
104            return true;
105        }
106        // Skip numeric patterns (array indices, ranges)
107        if text.chars().all(|c| c.is_ascii_digit()) {
108            return true;
109        }
110
111        // Skip numeric ranges like [1:3], [0:10], etc.
112        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
113            return true;
114        }
115
116        // Skip patterns that look like config sections [tool.something], [section.subsection]
117        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
118        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
119        if text.contains('.')
120            && !text.contains(' ')
121            && !text.contains('-')
122            && !text.contains('_')
123            && !text.contains('`')
124        {
125            // Config sections typically have dots, no spaces, and only alphanumeric + dots
126            return true;
127        }
128
129        // Skip glob/wildcard patterns like [*], [...], [**]
130        if text == "*" || text == "..." || text == "**" {
131            return true;
132        }
133
134        // Skip patterns that look like file paths [dir/file], [src/utils]
135        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
136            return true;
137        }
138
139        // Skip programming type annotations like [int, str], [Dict[str, Any]]
140        // These typically have commas and/or nested brackets
141        if text.contains(',') || text.contains('[') || text.contains(']') {
142            // Check if it looks like a type annotation pattern
143            return true;
144        }
145
146        // Note: We don't filter out patterns with backticks because backticks in reference names
147        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
148
149        // Skip patterns that look like module/class paths ONLY if they don't have backticks
150        // Backticks indicate intentional code formatting in a reference name
151        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
152        if !text.contains('`')
153            && text.contains('.')
154            && !text.contains(' ')
155            && !text.contains('-')
156            && !text.contains('_')
157        {
158            return true;
159        }
160
161        // Note: We don't filter based on word count anymore because legitimate references
162        // can have many words, like "python language reference for import statements"
163        // Word count filtering was causing false positives where valid references were
164        // being incorrectly flagged as unused
165
166        // Skip patterns that are just punctuation or operators
167        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
168            return true;
169        }
170
171        // Skip very short non-word patterns (likely operators or syntax)
172        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
173            return true;
174        }
175
176        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
177        if (text.starts_with('"') && text.ends_with('"'))
178            || (text.starts_with('\'') && text.ends_with('\''))
179            || text.contains('"')
180            || text.contains('\'')
181        {
182            return true;
183        }
184
185        // Skip descriptive patterns with colon like [default: the project root]
186        // But allow simple numeric ranges which are handled above
187        if text.contains(':') && text.contains(' ') {
188            return true;
189        }
190
191        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
192        if text.starts_with('!') {
193            return true;
194        }
195
196        // Skip footnote syntax like [^1], [^note], etc.
197        // Footnotes start with ^ and are a common markdown extension
198        if text.starts_with('^') {
199            return true;
200        }
201
202        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
203        // Citations in these formats start with @ inside brackets
204        if text.starts_with('@') {
205            return true;
206        }
207
208        // Skip table of contents markers like [TOC]
209        // Used by Python-Markdown and other processors
210        if text == "TOC" {
211            return true;
212        }
213
214        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
215        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
216            return true;
217        }
218
219        // Skip common programming type names, literals, and short identifiers
220        // that are likely not markdown references
221        let common_non_refs = [
222            // Programming types
223            "object",
224            "Object",
225            "any",
226            "Any",
227            "inv",
228            "void",
229            "bool",
230            "int",
231            "float",
232            "str",
233            "char",
234            "i8",
235            "i16",
236            "i32",
237            "i64",
238            "i128",
239            "isize",
240            "u8",
241            "u16",
242            "u32",
243            "u64",
244            "u128",
245            "usize",
246            "f32",
247            "f64",
248            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
249            "null",
250            "true",
251            "false",
252            "NaN",
253            "Infinity",
254            // Common JavaScript output patterns
255            "object Object",
256        ];
257
258        if common_non_refs.contains(&text) {
259            return true;
260        }
261
262        false
263    }
264
265    /// Check if a position is inside any code span
266    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
267        code_spans
268            .iter()
269            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
270    }
271
272    /// Check if a byte position is within an HTML tag
273    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
274        // Check HTML tags
275        for html_tag in ctx.html_tags().iter() {
276            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
277                return true;
278            }
279        }
280        false
281    }
282
283    fn extract_references(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
284        use crate::utils::skip_context::is_mkdocs_snippet_line;
285
286        let mut references = HashSet::new();
287
288        for (line_num, line) in ctx.content.lines().enumerate() {
289            // Use LintContext's pre-computed code block info (1-indexed)
290            if let Some(line_info) = ctx.line_info(line_num + 1)
291                && line_info.in_code_block
292            {
293                continue;
294            }
295
296            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
297            if is_mkdocs_snippet_line(line, ctx.flavor) {
298                continue;
299            }
300
301            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
302            // Abbreviations are not reference links and should not be tracked
303            if line.trim_start().starts_with("*[") {
304                continue;
305            }
306
307            if let Some(cap) = REF_REGEX.captures(line) {
308                // Store references in lowercase for case-insensitive comparison
309                if let Some(reference) = cap.get(1) {
310                    references.insert(reference.as_str().to_lowercase());
311                }
312            }
313        }
314
315        references
316    }
317
318    fn find_undefined_references(
319        &self,
320        references: &HashSet<String>,
321        ctx: &crate::lint_context::LintContext,
322        mkdocs_mode: bool,
323    ) -> Vec<(usize, usize, usize, String)> {
324        let mut undefined = Vec::new();
325        let mut reported_refs = HashMap::new();
326        let mut in_example_section = false;
327
328        // Get code spans once for the entire function
329        let code_spans = ctx.code_spans();
330
331        // Use cached data for reference links and images
332        for link in &ctx.links {
333            if !link.is_reference {
334                continue; // Skip inline links
335            }
336
337            // Skip links inside Jinja templates
338            if ctx.is_in_jinja_range(link.byte_offset) {
339                continue;
340            }
341
342            // Skip links inside code spans
343            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
344                continue;
345            }
346
347            // Skip links inside HTML comments (uses pre-computed ranges)
348            if ctx.is_in_html_comment(link.byte_offset) {
349                continue;
350            }
351
352            // Skip links inside HTML tags
353            if Self::is_in_html_tag(ctx, link.byte_offset) {
354                continue;
355            }
356
357            // Skip links inside math contexts
358            if is_in_math_context(ctx, link.byte_offset) {
359                continue;
360            }
361
362            // Skip links inside table cells
363            if is_in_table_cell(ctx, link.line, link.start_col) {
364                continue;
365            }
366
367            // Skip links inside frontmatter
368            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
369                continue;
370            }
371
372            // Skip Quarto/Pandoc citations ([@citation], @citation)
373            // Citations look like reference links but are bibliography references
374            if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
375                continue;
376            }
377
378            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
379            // Shortcodes may contain template syntax that looks like reference links
380            if ctx.is_in_shortcode(link.byte_offset) {
381                continue;
382            }
383
384            if let Some(ref_id) = &link.reference_id {
385                let reference_lower = ref_id.to_lowercase();
386
387                // Skip known non-reference patterns (markdown extensions, code examples)
388                if self.is_known_non_reference_pattern(ref_id) {
389                    continue;
390                }
391
392                // Skip MkDocs auto-references if in MkDocs mode
393                // Check both the reference_id and the link text for shorthand references
394                // Strip backticks since MkDocs resolves `module.Class` as module.Class
395                let stripped_ref = Self::strip_backticks(ref_id);
396                let stripped_text = Self::strip_backticks(&link.text);
397                if mkdocs_mode
398                    && (is_mkdocs_auto_reference(stripped_ref)
399                        || is_mkdocs_auto_reference(stripped_text)
400                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
401                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
402                {
403                    continue;
404                }
405
406                // Check if reference is defined
407                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
408                    // Check if the line is in an example section or list item
409                    if let Some(line_info) = ctx.line_info(link.line) {
410                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
411                            in_example_section = true;
412                            continue;
413                        }
414
415                        if in_example_section {
416                            continue;
417                        }
418
419                        // Skip list items
420                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
421                            continue;
422                        }
423
424                        // Skip lines that are HTML content
425                        let trimmed = line_info.content(ctx.content).trim_start();
426                        if trimmed.starts_with('<') {
427                            continue;
428                        }
429                    }
430
431                    let match_len = link.byte_end - link.byte_offset;
432                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
433                    reported_refs.insert(reference_lower, true);
434                }
435            }
436        }
437
438        // Use cached data for reference images
439        for image in &ctx.images {
440            if !image.is_reference {
441                continue; // Skip inline images
442            }
443
444            // Skip images inside Jinja templates
445            if ctx.is_in_jinja_range(image.byte_offset) {
446                continue;
447            }
448
449            // Skip images inside code spans
450            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
451                continue;
452            }
453
454            // Skip images inside HTML comments (uses pre-computed ranges)
455            if ctx.is_in_html_comment(image.byte_offset) {
456                continue;
457            }
458
459            // Skip images inside HTML tags
460            if Self::is_in_html_tag(ctx, image.byte_offset) {
461                continue;
462            }
463
464            // Skip images inside math contexts
465            if is_in_math_context(ctx, image.byte_offset) {
466                continue;
467            }
468
469            // Skip images inside table cells
470            if is_in_table_cell(ctx, image.line, image.start_col) {
471                continue;
472            }
473
474            // Skip images inside frontmatter
475            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
476                continue;
477            }
478
479            if let Some(ref_id) = &image.reference_id {
480                let reference_lower = ref_id.to_lowercase();
481
482                // Skip known non-reference patterns (markdown extensions, code examples)
483                if self.is_known_non_reference_pattern(ref_id) {
484                    continue;
485                }
486
487                // Skip MkDocs auto-references if in MkDocs mode
488                // Check both the reference_id and the alt text for shorthand references
489                // Strip backticks since MkDocs resolves `module.Class` as module.Class
490                let stripped_ref = Self::strip_backticks(ref_id);
491                let stripped_alt = Self::strip_backticks(&image.alt_text);
492                if mkdocs_mode
493                    && (is_mkdocs_auto_reference(stripped_ref)
494                        || is_mkdocs_auto_reference(stripped_alt)
495                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
496                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
497                {
498                    continue;
499                }
500
501                // Check if reference is defined
502                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
503                    // Check if the line is in an example section or list item
504                    if let Some(line_info) = ctx.line_info(image.line) {
505                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
506                            in_example_section = true;
507                            continue;
508                        }
509
510                        if in_example_section {
511                            continue;
512                        }
513
514                        // Skip list items
515                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
516                            continue;
517                        }
518
519                        // Skip lines that are HTML content
520                        let trimmed = line_info.content(ctx.content).trim_start();
521                        if trimmed.starts_with('<') {
522                            continue;
523                        }
524                    }
525
526                    let match_len = image.byte_end - image.byte_offset;
527                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
528                    reported_refs.insert(reference_lower, true);
529                }
530            }
531        }
532
533        // Build a set of byte ranges that are already covered by parsed links/images
534        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
535
536        // Add ranges from parsed links
537        for link in &ctx.links {
538            covered_ranges.push((link.byte_offset, link.byte_end));
539        }
540
541        // Add ranges from parsed images
542        for image in &ctx.images {
543            covered_ranges.push((image.byte_offset, image.byte_end));
544        }
545
546        // Sort ranges by start position
547        covered_ranges.sort_by_key(|&(start, _)| start);
548
549        // Handle shortcut references [text] which aren't captured in ctx.links
550        // Only check these if shortcut_syntax is enabled (default: false)
551        // Shortcut syntax is ambiguous because [text] could be a reference link
552        // OR just text in brackets (like spec notation in quotes)
553        if !self.config.shortcut_syntax {
554            return undefined;
555        }
556
557        // Need to use regex for shortcut references
558        let lines = ctx.raw_lines();
559        in_example_section = false; // Reset for line-by-line processing
560
561        for (line_num, line) in lines.iter().enumerate() {
562            // Skip lines in frontmatter or code blocks using LintContext's pre-computed info
563            if let Some(line_info) = ctx.line_info(line_num + 1)
564                && (line_info.in_front_matter || line_info.in_code_block)
565            {
566                continue;
567            }
568
569            // Check for example sections
570            if OUTPUT_EXAMPLE_START.is_match(line) {
571                in_example_section = true;
572                continue;
573            }
574
575            if in_example_section {
576                // Check if we're exiting the example section (another heading)
577                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
578                    in_example_section = false;
579                } else {
580                    continue;
581                }
582            }
583
584            // Skip list items
585            if LIST_ITEM_REGEX.is_match(line) {
586                continue;
587            }
588
589            // Skip lines that are HTML content
590            let trimmed_line = line.trim_start();
591            if trimmed_line.starts_with('<') {
592                continue;
593            }
594
595            // Skip GitHub alerts/callouts (e.g., > [!TIP])
596            if GITHUB_ALERT_REGEX.is_match(line) {
597                continue;
598            }
599
600            // Skip abbreviation definitions (*[ABBR]: Definition)
601            // These are not reference links and should not be checked
602            if trimmed_line.starts_with("*[") {
603                continue;
604            }
605
606            // Collect positions of brackets that are part of URLs (IPv6, etc.)
607            // so we can exclude them from reference checking
608            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
609            for mat in URL_WITH_BRACKETS.find_iter(line) {
610                // Find all bracket pairs within this URL match
611                let url_str = mat.as_str();
612                let url_start = mat.start();
613
614                // Find brackets within the URL (e.g., in https://[::1]:8080)
615                let mut idx = 0;
616                while idx < url_str.len() {
617                    if let Some(bracket_start) = url_str[idx..].find('[') {
618                        let bracket_start_abs = url_start + idx + bracket_start;
619                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
620                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
621                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
622                            idx += bracket_start + bracket_end + 2;
623                        } else {
624                            break;
625                        }
626                    } else {
627                        break;
628                    }
629                }
630            }
631
632            // Check shortcut references: [reference]
633            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
634                for cap in captures {
635                    if let Some(ref_match) = cap.get(1) {
636                        // Check if this bracket is part of a URL (IPv6, etc.)
637                        let bracket_start = cap.get(0).unwrap().start();
638                        let bracket_end = cap.get(0).unwrap().end();
639
640                        // Skip if this bracket pair is within any URL bracket range
641                        let is_in_url = url_bracket_ranges
642                            .iter()
643                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
644
645                        if is_in_url {
646                            continue;
647                        }
648
649                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
650                        // Check if there's a ^ immediately before the opening bracket
651                        if bracket_start > 0 {
652                            // bracket_start is a byte offset, so we need to check the byte before
653                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
654                                && *byte == b'^'
655                            {
656                                continue; // This is an inline footnote, skip it
657                            }
658                        }
659
660                        let reference = ref_match.as_str();
661                        let reference_lower = reference.to_lowercase();
662
663                        // Skip known non-reference patterns (markdown extensions, code examples)
664                        if self.is_known_non_reference_pattern(reference) {
665                            continue;
666                        }
667
668                        // Skip GitHub alerts (including extended types)
669                        if let Some(alert_type) = reference.strip_prefix('!')
670                            && matches!(
671                                alert_type,
672                                "NOTE"
673                                    | "TIP"
674                                    | "WARNING"
675                                    | "IMPORTANT"
676                                    | "CAUTION"
677                                    | "INFO"
678                                    | "SUCCESS"
679                                    | "FAILURE"
680                                    | "DANGER"
681                                    | "BUG"
682                                    | "EXAMPLE"
683                                    | "QUOTE"
684                            )
685                        {
686                            continue;
687                        }
688
689                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
690                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
691                        if mkdocs_mode
692                            && (reference.starts_with("start:") || reference.starts_with("end:"))
693                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
694                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
695                        {
696                            continue;
697                        }
698
699                        // Skip MkDocs auto-references if in MkDocs mode
700                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
701                        let stripped_ref = Self::strip_backticks(reference);
702                        if mkdocs_mode
703                            && (is_mkdocs_auto_reference(stripped_ref)
704                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
705                        {
706                            continue;
707                        }
708
709                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
710                            let full_match = cap.get(0).unwrap();
711                            let col = full_match.start();
712
713                            // Skip if inside code span
714                            let code_spans = ctx.code_spans();
715                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
716                                continue;
717                            }
718
719                            // Check if this position is within a covered range
720                            let line_start_byte = ctx.line_offsets[line_num];
721                            let byte_pos = line_start_byte + col;
722
723                            // Skip if inside Jinja template
724                            if ctx.is_in_jinja_range(byte_pos) {
725                                continue;
726                            }
727
728                            // Skip if inside code block
729                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
730                                &ctx.code_blocks,
731                                byte_pos,
732                            ) {
733                                continue;
734                            }
735
736                            // Skip if inside HTML comment (uses pre-computed ranges)
737                            if ctx.is_in_html_comment(byte_pos) {
738                                continue;
739                            }
740
741                            // Skip if inside HTML tag
742                            if Self::is_in_html_tag(ctx, byte_pos) {
743                                continue;
744                            }
745
746                            // Skip if inside math context
747                            if is_in_math_context(ctx, byte_pos) {
748                                continue;
749                            }
750
751                            // Skip if inside table cell
752                            if is_in_table_cell(ctx, line_num + 1, col) {
753                                continue;
754                            }
755
756                            let byte_end = byte_pos + (full_match.end() - full_match.start());
757
758                            // Check if this shortcut ref overlaps with any parsed link/image
759                            let mut is_covered = false;
760                            for &(range_start, range_end) in &covered_ranges {
761                                if range_start <= byte_pos && byte_end <= range_end {
762                                    // This shortcut ref is completely within a parsed link/image
763                                    is_covered = true;
764                                    break;
765                                }
766                                if range_start > byte_end {
767                                    // No need to check further (ranges are sorted)
768                                    break;
769                                }
770                            }
771
772                            if is_covered {
773                                continue;
774                            }
775
776                            // More sophisticated checks to avoid false positives
777
778                            // Check 1: If preceded by ], this might be part of [text][ref]
779                            // Look for the pattern ...][ref] and check if there's a matching [ before
780                            let line_chars: Vec<char> = line.chars().collect();
781                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
782                                // Look backwards for a [ that would make this [text][ref]
783                                let mut bracket_count = 1; // We already saw one ]
784                                let mut check_pos = col.saturating_sub(2);
785                                let mut found_opening = false;
786
787                                while check_pos > 0 && check_pos < line_chars.len() {
788                                    match line_chars.get(check_pos) {
789                                        Some(&']') => bracket_count += 1,
790                                        Some(&'[') => {
791                                            bracket_count -= 1;
792                                            if bracket_count == 0 {
793                                                // Check if this [ is escaped
794                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
795                                                    found_opening = true;
796                                                }
797                                                break;
798                                            }
799                                        }
800                                        _ => {}
801                                    }
802                                    if check_pos == 0 {
803                                        break;
804                                    }
805                                    check_pos = check_pos.saturating_sub(1);
806                                }
807
808                                if found_opening {
809                                    // This is part of [text][ref], skip it
810                                    continue;
811                                }
812                            }
813
814                            // Check 2: If there's an escaped bracket pattern before this
815                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
816                            let before_text = &line[..col];
817                            if before_text.contains("\\]") {
818                                // Check if there's a \[ before the \]
819                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
820                                    let search_text = &before_text[..escaped_close_pos];
821                                    if search_text.contains("\\[") {
822                                        // This looks like \[...\][ref], skip it
823                                        continue;
824                                    }
825                                }
826                            }
827
828                            let match_len = full_match.end() - full_match.start();
829                            undefined.push((line_num, col, match_len, reference.to_string()));
830                            reported_refs.insert(reference_lower, true);
831                        }
832                    }
833                }
834            }
835        }
836
837        undefined
838    }
839}
840
841impl Rule for MD052ReferenceLinkImages {
842    fn name(&self) -> &'static str {
843        "MD052"
844    }
845
846    fn description(&self) -> &'static str {
847        "Reference links and images should use a reference that exists"
848    }
849
850    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
851        let content = ctx.content;
852        let mut warnings = Vec::new();
853
854        // OPTIMIZATION: Early exit if no brackets at all
855        if !content.contains('[') {
856            return Ok(warnings);
857        }
858
859        // Check if we're in MkDocs mode from the context
860        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
861
862        let references = self.extract_references(ctx);
863
864        // Use optimized detection method with cached link/image data
865        let lines = ctx.raw_lines();
866        for (line_num, col, match_len, reference) in self.find_undefined_references(&references, ctx, mkdocs_mode) {
867            let line_content = lines.get(line_num).unwrap_or(&"");
868
869            // Calculate precise character range for the entire undefined reference
870            let (start_line, start_col, end_line, end_col) =
871                calculate_match_range(line_num + 1, line_content, col, match_len);
872
873            warnings.push(LintWarning {
874                rule_name: Some(self.name().to_string()),
875                line: start_line,
876                column: start_col,
877                end_line,
878                end_column: end_col,
879                message: format!("Reference '{reference}' not found"),
880                severity: Severity::Warning,
881                fix: None,
882            });
883        }
884
885        Ok(warnings)
886    }
887
888    /// Check if this rule should be skipped for performance
889    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
890        // Skip if content is empty or has no links/images
891        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
892    }
893
894    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
895        let content = ctx.content;
896        // No automatic fix available for undefined references
897        Ok(content.to_string())
898    }
899
900    fn as_any(&self) -> &dyn std::any::Any {
901        self
902    }
903
904    fn default_config_section(&self) -> Option<(String, toml::Value)> {
905        let json_value = serde_json::to_value(&self.config).ok()?;
906        Some((
907            self.name().to_string(),
908            crate::rule_config_serde::json_to_toml_value(&json_value)?,
909        ))
910    }
911
912    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
913    where
914        Self: Sized,
915    {
916        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
917        Box::new(Self::from_config_struct(rule_config))
918    }
919}
920
921#[cfg(test)]
922mod tests {
923    use super::*;
924    use crate::lint_context::LintContext;
925
926    #[test]
927    fn test_valid_reference_link() {
928        let rule = MD052ReferenceLinkImages::new();
929        let content = "[text][ref]\n\n[ref]: https://example.com";
930        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
931        let result = rule.check(&ctx).unwrap();
932
933        assert_eq!(result.len(), 0);
934    }
935
936    #[test]
937    fn test_undefined_reference_link() {
938        let rule = MD052ReferenceLinkImages::new();
939        let content = "[text][undefined]";
940        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
941        let result = rule.check(&ctx).unwrap();
942
943        assert_eq!(result.len(), 1);
944        assert!(result[0].message.contains("Reference 'undefined' not found"));
945    }
946
947    #[test]
948    fn test_valid_reference_image() {
949        let rule = MD052ReferenceLinkImages::new();
950        let content = "![alt][img]\n\n[img]: image.jpg";
951        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
952        let result = rule.check(&ctx).unwrap();
953
954        assert_eq!(result.len(), 0);
955    }
956
957    #[test]
958    fn test_undefined_reference_image() {
959        let rule = MD052ReferenceLinkImages::new();
960        let content = "![alt][missing]";
961        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
962        let result = rule.check(&ctx).unwrap();
963
964        assert_eq!(result.len(), 1);
965        assert!(result[0].message.contains("Reference 'missing' not found"));
966    }
967
968    #[test]
969    fn test_case_insensitive_references() {
970        let rule = MD052ReferenceLinkImages::new();
971        let content = "[Text][REF]\n\n[ref]: https://example.com";
972        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
973        let result = rule.check(&ctx).unwrap();
974
975        assert_eq!(result.len(), 0);
976    }
977
978    #[test]
979    fn test_shortcut_reference_valid() {
980        let rule = MD052ReferenceLinkImages::new();
981        let content = "[ref]\n\n[ref]: https://example.com";
982        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
983        let result = rule.check(&ctx).unwrap();
984
985        assert_eq!(result.len(), 0);
986    }
987
988    #[test]
989    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
990        // Shortcut syntax checking is disabled by default
991        // Enable it to test undefined shortcut references
992        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
993            shortcut_syntax: true,
994            ..Default::default()
995        });
996        let content = "[undefined]";
997        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
998        let result = rule.check(&ctx).unwrap();
999
1000        assert_eq!(result.len(), 1);
1001        assert!(result[0].message.contains("Reference 'undefined' not found"));
1002    }
1003
1004    #[test]
1005    fn test_shortcut_reference_not_checked_by_default() {
1006        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1007        let rule = MD052ReferenceLinkImages::new();
1008        let content = "[undefined]";
1009        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1010        let result = rule.check(&ctx).unwrap();
1011
1012        // Should be 0 because shortcut_syntax is false by default
1013        assert_eq!(result.len(), 0);
1014    }
1015
1016    #[test]
1017    fn test_inline_links_ignored() {
1018        let rule = MD052ReferenceLinkImages::new();
1019        let content = "[text](https://example.com)";
1020        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1021        let result = rule.check(&ctx).unwrap();
1022
1023        assert_eq!(result.len(), 0);
1024    }
1025
1026    #[test]
1027    fn test_inline_images_ignored() {
1028        let rule = MD052ReferenceLinkImages::new();
1029        let content = "![alt](image.jpg)";
1030        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1031        let result = rule.check(&ctx).unwrap();
1032
1033        assert_eq!(result.len(), 0);
1034    }
1035
1036    #[test]
1037    fn test_references_in_code_blocks_ignored() {
1038        let rule = MD052ReferenceLinkImages::new();
1039        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1040        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1041        let result = rule.check(&ctx).unwrap();
1042
1043        assert_eq!(result.len(), 0);
1044    }
1045
1046    #[test]
1047    fn test_references_in_inline_code_ignored() {
1048        let rule = MD052ReferenceLinkImages::new();
1049        let content = "`[undefined]`";
1050        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1051        let result = rule.check(&ctx).unwrap();
1052
1053        // References inside inline code spans should be ignored
1054        assert_eq!(result.len(), 0);
1055    }
1056
1057    #[test]
1058    fn test_comprehensive_inline_code_detection() {
1059        // Enable shortcut_syntax to test comprehensive detection
1060        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1061            shortcut_syntax: true,
1062            ..Default::default()
1063        });
1064        let content = r#"# Test
1065
1066This `[inside]` should be ignored.
1067This [outside] should be flagged.
1068Reference links `[text][ref]` in code are ignored.
1069Regular reference [text][missing] should be flagged.
1070Images `![alt][img]` in code are ignored.
1071Regular image ![alt][badimg] should be flagged.
1072
1073Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1074
1075```
1076[code block content] should be ignored
1077```
1078
1079`Multiple [refs] in [same] code span` ignored."#;
1080
1081        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1082        let result = rule.check(&ctx).unwrap();
1083
1084        // Should only flag: outside, missing, badimg, three (4 total)
1085        assert_eq!(result.len(), 4);
1086
1087        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1088        assert!(messages.iter().any(|m| m.contains("outside")));
1089        assert!(messages.iter().any(|m| m.contains("missing")));
1090        assert!(messages.iter().any(|m| m.contains("badimg")));
1091        assert!(messages.iter().any(|m| m.contains("three")));
1092
1093        // Should NOT flag any references inside code spans
1094        assert!(!messages.iter().any(|m| m.contains("inside")));
1095        assert!(!messages.iter().any(|m| m.contains("one")));
1096        assert!(!messages.iter().any(|m| m.contains("two")));
1097        assert!(!messages.iter().any(|m| m.contains("refs")));
1098        assert!(!messages.iter().any(|m| m.contains("same")));
1099    }
1100
1101    #[test]
1102    fn test_multiple_undefined_references() {
1103        let rule = MD052ReferenceLinkImages::new();
1104        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1105        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1106        let result = rule.check(&ctx).unwrap();
1107
1108        assert_eq!(result.len(), 3);
1109        assert!(result[0].message.contains("ref1"));
1110        assert!(result[1].message.contains("ref2"));
1111        assert!(result[2].message.contains("ref3"));
1112    }
1113
1114    #[test]
1115    fn test_mixed_valid_and_undefined() {
1116        let rule = MD052ReferenceLinkImages::new();
1117        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1118        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1119        let result = rule.check(&ctx).unwrap();
1120
1121        assert_eq!(result.len(), 1);
1122        assert!(result[0].message.contains("missing"));
1123    }
1124
1125    #[test]
1126    fn test_empty_reference() {
1127        let rule = MD052ReferenceLinkImages::new();
1128        let content = "[text][]\n\n[ref]: https://example.com";
1129        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1130        let result = rule.check(&ctx).unwrap();
1131
1132        // Empty reference should use the link text as reference
1133        assert_eq!(result.len(), 1);
1134    }
1135
1136    #[test]
1137    fn test_escaped_brackets_ignored() {
1138        let rule = MD052ReferenceLinkImages::new();
1139        let content = "\\[not a link\\]";
1140        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1141        let result = rule.check(&ctx).unwrap();
1142
1143        assert_eq!(result.len(), 0);
1144    }
1145
1146    #[test]
1147    fn test_list_items_ignored() {
1148        let rule = MD052ReferenceLinkImages::new();
1149        let content = "- [undefined]\n* [another]\n+ [third]";
1150        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1151        let result = rule.check(&ctx).unwrap();
1152
1153        // List items that look like shortcut references should be ignored
1154        assert_eq!(result.len(), 0);
1155    }
1156
1157    #[test]
1158    fn test_output_example_section_ignored() {
1159        // Enable shortcut_syntax to test example section handling
1160        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1161            shortcut_syntax: true,
1162            ..Default::default()
1163        });
1164        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1165        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1166        let result = rule.check(&ctx).unwrap();
1167
1168        // Only the reference outside the Output section should be flagged
1169        assert_eq!(result.len(), 1);
1170        assert!(result[0].message.contains("missing"));
1171    }
1172
1173    #[test]
1174    fn test_reference_definitions_in_code_blocks_ignored() {
1175        let rule = MD052ReferenceLinkImages::new();
1176        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1177        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1178        let result = rule.check(&ctx).unwrap();
1179
1180        // Reference defined in code block should not count
1181        assert_eq!(result.len(), 1);
1182        assert!(result[0].message.contains("ref"));
1183    }
1184
1185    #[test]
1186    fn test_multiple_references_to_same_undefined() {
1187        let rule = MD052ReferenceLinkImages::new();
1188        let content = "[first][missing] [second][missing] [third][missing]";
1189        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1190        let result = rule.check(&ctx).unwrap();
1191
1192        // Should only report once per unique reference
1193        assert_eq!(result.len(), 1);
1194        assert!(result[0].message.contains("missing"));
1195    }
1196
1197    #[test]
1198    fn test_reference_with_special_characters() {
1199        let rule = MD052ReferenceLinkImages::new();
1200        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1201        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1202        let result = rule.check(&ctx).unwrap();
1203
1204        assert_eq!(result.len(), 0);
1205    }
1206
1207    #[test]
1208    fn test_issue_51_html_attribute_not_reference() {
1209        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1210        let rule = MD052ReferenceLinkImages::new();
1211        let content = r#"# Example
1212
1213## Test
1214
1215Want to fill out this form?
1216
1217<form method="post">
1218    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1219</form>"#;
1220        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1221        let result = rule.check(&ctx).unwrap();
1222
1223        assert_eq!(
1224            result.len(),
1225            0,
1226            "HTML attributes with square brackets should not be flagged as undefined references"
1227        );
1228    }
1229
1230    #[test]
1231    fn test_extract_references() {
1232        let rule = MD052ReferenceLinkImages::new();
1233        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1234        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1235        let refs = rule.extract_references(&ctx);
1236
1237        assert_eq!(refs.len(), 3);
1238        assert!(refs.contains("ref1"));
1239        assert!(refs.contains("ref2"));
1240        assert!(refs.contains("ref3"));
1241    }
1242
1243    #[test]
1244    fn test_inline_code_not_flagged() {
1245        // Enable shortcut_syntax to test inline code detection
1246        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1247            shortcut_syntax: true,
1248            ..Default::default()
1249        });
1250
1251        // Test that arrays in inline code are not flagged as references
1252        let content = r#"# Test
1253
1254Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1255
1256Also, `[todo]` is not a reference link.
1257
1258But this [reference] should be flagged.
1259
1260And this `[inline code]` should not be flagged.
1261"#;
1262
1263        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1264        let warnings = rule.check(&ctx).unwrap();
1265
1266        // Should only flag [reference], not the ones in backticks
1267        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1268        assert!(warnings[0].message.contains("'reference'"));
1269    }
1270
1271    #[test]
1272    fn test_code_block_references_ignored() {
1273        // Enable shortcut_syntax to test code block handling
1274        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1275            shortcut_syntax: true,
1276            ..Default::default()
1277        });
1278
1279        let content = r#"# Test
1280
1281```markdown
1282[undefined] reference in code block
1283![undefined] image in code block
1284```
1285
1286[real-undefined] reference outside
1287"#;
1288
1289        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1290        let warnings = rule.check(&ctx).unwrap();
1291
1292        // Should only flag [real-undefined], not the ones in code block
1293        assert_eq!(warnings.len(), 1);
1294        assert!(warnings[0].message.contains("'real-undefined'"));
1295    }
1296
1297    #[test]
1298    fn test_html_comments_ignored() {
1299        // Test for issue #20 - MD052 should not flag content inside HTML comments
1300        let rule = MD052ReferenceLinkImages::new();
1301
1302        // Test the exact case from issue #20
1303        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1304<!--- set_env EDITOR 'python3 fake_editor.py' -->
1305
1306```bash
1307$ python3 vote.py
13083 votes for: 2
13092 votes for: 3, 4
1310```"#;
1311        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1312        let result = rule.check(&ctx).unwrap();
1313        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1314
1315        // Test various reference patterns inside HTML comments
1316        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1317Normal [text][undefined]
1318<!-- Another [comment][with] references -->"#;
1319        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1320        let result = rule.check(&ctx).unwrap();
1321        assert_eq!(
1322            result.len(),
1323            1,
1324            "Should only flag the undefined reference outside comments"
1325        );
1326        assert!(result[0].message.contains("undefined"));
1327
1328        // Test multi-line HTML comments
1329        let content = r#"<!--
1330[ref1]
1331[ref2][ref3]
1332-->
1333[actual][undefined]"#;
1334        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1335        let result = rule.check(&ctx).unwrap();
1336        assert_eq!(
1337            result.len(),
1338            1,
1339            "Should not flag references in multi-line HTML comments"
1340        );
1341        assert!(result[0].message.contains("undefined"));
1342
1343        // Test mixed scenarios
1344        let content = r#"<!-- Comment with [1:] pattern -->
1345Valid [link][ref]
1346<!-- More [refs][in][comments] -->
1347![image][missing]
1348
1349[ref]: https://example.com"#;
1350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1351        let result = rule.check(&ctx).unwrap();
1352        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1353        assert!(result[0].message.contains("missing"));
1354    }
1355
1356    #[test]
1357    fn test_frontmatter_ignored() {
1358        // Test for issue #24 - MD052 should not flag content inside frontmatter
1359        // Enable shortcut_syntax to test frontmatter handling
1360        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1361            shortcut_syntax: true,
1362            ..Default::default()
1363        });
1364
1365        // Test YAML frontmatter with arrays and references
1366        let content = r#"---
1367layout: post
1368title: "My Jekyll Post"
1369date: 2023-01-01
1370categories: blog
1371tags: ["test", "example"]
1372author: John Doe
1373---
1374
1375# My Blog Post
1376
1377This is the actual markdown content that should be linted.
1378
1379[undefined] reference should be flagged.
1380
1381## Section 1
1382
1383Some content here."#;
1384        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1385        let result = rule.check(&ctx).unwrap();
1386
1387        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1388        assert_eq!(
1389            result.len(),
1390            1,
1391            "Should only flag the undefined reference outside frontmatter"
1392        );
1393        assert!(result[0].message.contains("undefined"));
1394
1395        // Test TOML frontmatter
1396        let content = r#"+++
1397title = "My Post"
1398tags = ["example", "test"]
1399+++
1400
1401# Content
1402
1403[missing] reference should be flagged."#;
1404        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1405        let result = rule.check(&ctx).unwrap();
1406        assert_eq!(
1407            result.len(),
1408            1,
1409            "Should only flag the undefined reference outside TOML frontmatter"
1410        );
1411        assert!(result[0].message.contains("missing"));
1412    }
1413
1414    #[test]
1415    fn test_mkdocs_snippet_markers_not_flagged() {
1416        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1417        // Enable shortcut_syntax to test snippet marker handling
1418        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1419            shortcut_syntax: true,
1420            ..Default::default()
1421        });
1422
1423        // Test snippet section markers
1424        let content = r#"# Document with MkDocs Snippets
1425
1426Some content here.
1427
1428# -8<- [start:remote-content]
1429
1430This is the remote content section.
1431
1432# -8<- [end:remote-content]
1433
1434More content here.
1435
1436<!-- --8<-- [start:another-section] -->
1437Content in another section
1438<!-- --8<-- [end:another-section] -->"#;
1439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1440        let result = rule.check(&ctx).unwrap();
1441
1442        // Should not flag any snippet markers as undefined references
1443        assert_eq!(
1444            result.len(),
1445            0,
1446            "Should not flag MkDocs snippet markers as undefined references"
1447        );
1448
1449        // Test that the snippet marker lines are properly skipped
1450        // but regular undefined references on other lines are still caught
1451        let content = r#"# Document
1452
1453# -8<- [start:section]
1454Content with [reference] inside snippet section
1455# -8<- [end:section]
1456
1457Regular [undefined] reference outside snippet markers."#;
1458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1459        let result = rule.check(&ctx).unwrap();
1460
1461        assert_eq!(
1462            result.len(),
1463            2,
1464            "Should flag undefined references but skip snippet marker lines"
1465        );
1466        // The references inside the content should be flagged, but not start: and end:
1467        assert!(result[0].message.contains("reference"));
1468        assert!(result[1].message.contains("undefined"));
1469
1470        // Test in standard mode - should flag the markers as undefined
1471        let content = r#"# Document
1472
1473# -8<- [start:section]
1474# -8<- [end:section]"#;
1475        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1476        let result = rule.check(&ctx).unwrap();
1477
1478        assert_eq!(
1479            result.len(),
1480            2,
1481            "In standard mode, snippet markers should be flagged as undefined references"
1482        );
1483    }
1484
1485    #[test]
1486    fn test_pandoc_citations_not_flagged() {
1487        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1488        // Enable shortcut_syntax to test citation handling
1489        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1490            shortcut_syntax: true,
1491            ..Default::default()
1492        });
1493
1494        let content = r#"# Research Paper
1495
1496We are using the **bookdown** package [@R-bookdown] in this sample book.
1497This was built on top of R Markdown and **knitr** [@xie2015].
1498
1499Multiple citations [@citation1; @citation2; @citation3] are also supported.
1500
1501Regular [undefined] reference should still be flagged.
1502"#;
1503        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1504        let result = rule.check(&ctx).unwrap();
1505
1506        // Should only flag the undefined reference, not the citations
1507        assert_eq!(
1508            result.len(),
1509            1,
1510            "Should only flag the undefined reference, not Pandoc citations"
1511        );
1512        assert!(result[0].message.contains("undefined"));
1513    }
1514
1515    #[test]
1516    fn test_pandoc_inline_footnotes_not_flagged() {
1517        // Test that Pandoc inline footnote syntax is not flagged
1518        // Enable shortcut_syntax to test inline footnote handling
1519        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1520            shortcut_syntax: true,
1521            ..Default::default()
1522        });
1523
1524        let content = r#"# Math Document
1525
1526You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1527
1528Another footnote^[with some text and a [link](https://example.com)].
1529
1530But this [reference] without ^ should be flagged.
1531"#;
1532        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1533        let result = rule.check(&ctx).unwrap();
1534
1535        // Should only flag the reference without ^
1536        assert_eq!(
1537            result.len(),
1538            1,
1539            "Should only flag the regular reference, not inline footnotes"
1540        );
1541        assert!(result[0].message.contains("reference"));
1542    }
1543
1544    #[test]
1545    fn test_github_alerts_not_flagged() {
1546        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1547        // Enable shortcut_syntax to test GitHub alert handling
1548        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1549            shortcut_syntax: true,
1550            ..Default::default()
1551        });
1552
1553        // Test various GitHub alert types
1554        let content = r#"# Document with GitHub Alerts
1555
1556> [!NOTE]
1557> This is a note alert.
1558
1559> [!TIP]
1560> This is a tip alert.
1561
1562> [!IMPORTANT]
1563> This is an important alert.
1564
1565> [!WARNING]
1566> This is a warning alert.
1567
1568> [!CAUTION]
1569> This is a caution alert.
1570
1571Regular content with [undefined] reference."#;
1572        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1573        let result = rule.check(&ctx).unwrap();
1574
1575        // Should only flag the undefined reference, not the GitHub alerts
1576        assert_eq!(
1577            result.len(),
1578            1,
1579            "Should only flag the undefined reference, not GitHub alerts"
1580        );
1581        assert!(result[0].message.contains("undefined"));
1582        assert_eq!(result[0].line, 18); // Line with [undefined]
1583
1584        // Test GitHub alerts with additional content
1585        let content = r#"> [!TIP]
1586> Here's a useful tip about [something].
1587> Multiple lines are allowed.
1588
1589[something] is mentioned but not defined."#;
1590        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1591        let result = rule.check(&ctx).unwrap();
1592
1593        // Should flag only the [something] outside blockquotes
1594        // The test shows we're only catching one, which might be correct behavior
1595        // matching markdownlint's approach
1596        assert_eq!(result.len(), 1, "Should flag undefined reference");
1597        assert!(result[0].message.contains("something"));
1598
1599        // Test GitHub alerts with proper references
1600        let content = r#"> [!NOTE]
1601> See [reference] for more details.
1602
1603[reference]: https://example.com"#;
1604        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1605        let result = rule.check(&ctx).unwrap();
1606
1607        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1608        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1609    }
1610
1611    #[test]
1612    fn test_ignore_config() {
1613        // Test that user-configured ignore list is respected
1614        let config = MD052Config {
1615            shortcut_syntax: true,
1616            ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1617        };
1618        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1619
1620        let content = r#"# Document with Custom Types
1621
1622Use [Vec] for dynamic arrays.
1623Use [HashMap] for key-value storage.
1624Use [Option] for nullable values.
1625Use [Result] for error handling.
1626"#;
1627        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1628        let result = rule.check(&ctx).unwrap();
1629
1630        // Should only flag [Result] because it's not in ignore
1631        assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1632        assert!(result[0].message.contains("Result"));
1633    }
1634
1635    #[test]
1636    fn test_ignore_case_insensitive() {
1637        // Test that ignore list is case-insensitive
1638        let config = MD052Config {
1639            shortcut_syntax: true,
1640            ignore: vec!["Vec".to_string()],
1641        };
1642        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1643
1644        let content = r#"# Case Insensitivity Test
1645
1646[Vec] should be ignored.
1647[vec] should also be ignored (different case, same match).
1648[VEC] should also be ignored (different case, same match).
1649[undefined] should be flagged (not in ignore list).
1650"#;
1651        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1652        let result = rule.check(&ctx).unwrap();
1653
1654        // Should only flag [undefined] because ignore is case-insensitive
1655        assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1656        assert!(result[0].message.contains("undefined"));
1657    }
1658
1659    #[test]
1660    fn test_ignore_empty_by_default() {
1661        // Test that empty ignore list doesn't affect existing behavior
1662        let rule = MD052ReferenceLinkImages::new();
1663
1664        let content = "[text][undefined]";
1665        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1666        let result = rule.check(&ctx).unwrap();
1667
1668        // Should still flag undefined references
1669        assert_eq!(result.len(), 1);
1670        assert!(result[0].message.contains("undefined"));
1671    }
1672
1673    #[test]
1674    fn test_ignore_with_reference_links() {
1675        // Test ignore list with full reference link syntax [text][ref]
1676        let config = MD052Config {
1677            shortcut_syntax: false,
1678            ignore: vec!["CustomType".to_string()],
1679        };
1680        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1681
1682        let content = r#"# Test
1683
1684See [documentation][CustomType] for details.
1685See [other docs][MissingRef] for more.
1686"#;
1687        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1688        let result = rule.check(&ctx).unwrap();
1689
1690        // Debug: print warnings if test fails
1691        for (i, w) in result.iter().enumerate() {
1692            eprintln!("Warning {}: {}", i, w.message);
1693        }
1694
1695        // Should flag [MissingRef] but not [CustomType]
1696        // Note: reference IDs are lowercased in the message
1697        assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1698        assert!(
1699            result[0].message.contains("missingref"),
1700            "Expected 'missingref' in message: {}",
1701            result[0].message
1702        );
1703    }
1704
1705    #[test]
1706    fn test_ignore_multiple() {
1707        // Test multiple ignored names work correctly
1708        let config = MD052Config {
1709            shortcut_syntax: true,
1710            ignore: vec![
1711                "i32".to_string(),
1712                "u64".to_string(),
1713                "String".to_string(),
1714                "Arc".to_string(),
1715                "Mutex".to_string(),
1716            ],
1717        };
1718        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1719
1720        let content = r#"# Types
1721
1722[i32] [u64] [String] [Arc] [Mutex] [Box]
1723"#;
1724        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1725        let result = rule.check(&ctx).unwrap();
1726
1727        // Note: i32 and u64 are already in the hardcoded list, so they'd be skipped anyway
1728        // String is NOT in the hardcoded list, so we test that the user config works
1729        // [Box] should be flagged (not in ignore)
1730        assert_eq!(result.len(), 1);
1731        assert!(result[0].message.contains("Box"));
1732    }
1733
1734    #[test]
1735    fn test_nested_code_fences_reference_extraction() {
1736        // Verify that extract_references uses LintContext's pre-computed in_code_block
1737        // so nested fences are handled correctly.
1738        // A 4-backtick fence wrapping a 3-backtick fence should treat the inner
1739        // ``` as content, not a code block boundary.
1740        let rule = MD052ReferenceLinkImages::new();
1741
1742        let content = "````\n```\n[ref-inside]: https://example.com\n```\n````\n\n[Use this link][ref-inside]";
1743        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1744        let result = rule.check(&ctx).unwrap();
1745
1746        // The reference definition is inside a code block (the outer ````),
1747        // so it should NOT be recognized as a definition.
1748        // Therefore [ref-inside] should be flagged as undefined.
1749        assert_eq!(
1750            result.len(),
1751            1,
1752            "Reference defined inside nested code fence should not count as a definition"
1753        );
1754        assert!(result[0].message.contains("ref-inside"));
1755    }
1756}