Skip to main content

rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::SHORTCUT_REF_REGEX;
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for output example sections (standard regex is fine)
23static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
24    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
25
26// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
27// Extended to include additional common alert types
28static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
30        .unwrap()
31});
32
33// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
34// This pattern specifically looks for:
35// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
36// - IPv6 with zone IDs: https://[fe80::1%eth0]
37// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
38// - API paths with array notation: https://api.example.com/users[0]
39// But NOT markdown reference links that happen to follow URLs
40static URL_WITH_BRACKETS: LazyLock<Regex> =
41    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
42
43/// Rule MD052: Reference links and images should use reference style
44///
45/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
46///
47/// This rule is triggered when a reference link or image uses a reference that isn't defined.
48///
49/// ## Configuration
50///
51/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
52///
53/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
54/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
55#[derive(Clone, Default)]
56pub struct MD052ReferenceLinkImages {
57    config: MD052Config,
58}
59
60impl MD052ReferenceLinkImages {
61    pub fn new() -> Self {
62        Self {
63            config: MD052Config::default(),
64        }
65    }
66
67    pub fn from_config_struct(config: MD052Config) -> Self {
68        Self { config }
69    }
70
71    /// Strip surrounding backticks from a string
72    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
73    fn strip_backticks(s: &str) -> &str {
74        s.trim_start_matches('`').trim_end_matches('`')
75    }
76
77    /// Check if a string is a valid Python identifier
78    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
79    /// like `str`, `int`, etc. should be accepted as valid auto-references
80    fn is_valid_python_identifier(s: &str) -> bool {
81        if s.is_empty() {
82            return false;
83        }
84        let first_char = s.chars().next().unwrap();
85        if !first_char.is_ascii_alphabetic() && first_char != '_' {
86            return false;
87        }
88        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
89    }
90
91    /// Check if text matches a known non-reference pattern that should be skipped.
92    ///
93    /// These are deterministic patterns from markdown extensions or code examples,
94    /// not heuristics. Returns true for:
95    /// - User-configured names via `ignore` config option
96    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
97    /// - Programming syntax: [T], [null], [i32], ["string"]
98    /// - Descriptive text: [default: value], [0-9]
99    fn is_known_non_reference_pattern(&self, text: &str) -> bool {
100        // Check user-configured ignore list first (case-insensitive match)
101        // Reference IDs are normalized to lowercase during parsing,
102        // so we use case-insensitive comparison for user convenience
103        if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
104            return true;
105        }
106        // Skip numeric patterns (array indices, ranges)
107        if text.chars().all(|c| c.is_ascii_digit()) {
108            return true;
109        }
110
111        // Skip numeric ranges like [1:3], [0:10], etc.
112        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
113            return true;
114        }
115
116        // Skip patterns that look like config sections [tool.something], [section.subsection]
117        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
118        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
119        if text.contains('.')
120            && !text.contains(' ')
121            && !text.contains('-')
122            && !text.contains('_')
123            && !text.contains('`')
124        {
125            // Config sections typically have dots, no spaces, and only alphanumeric + dots
126            return true;
127        }
128
129        // Skip glob/wildcard patterns like [*], [...], [**]
130        if text == "*" || text == "..." || text == "**" {
131            return true;
132        }
133
134        // Skip patterns that look like file paths [dir/file], [src/utils]
135        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
136            return true;
137        }
138
139        // Skip programming type annotations like [int, str], [Dict[str, Any]]
140        // These typically have commas and/or nested brackets
141        if text.contains(',') || text.contains('[') || text.contains(']') {
142            // Check if it looks like a type annotation pattern
143            return true;
144        }
145
146        // Note: We don't filter out patterns with backticks because backticks in reference names
147        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
148
149        // Skip patterns that look like module/class paths ONLY if they don't have backticks
150        // Backticks indicate intentional code formatting in a reference name
151        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
152        if !text.contains('`')
153            && text.contains('.')
154            && !text.contains(' ')
155            && !text.contains('-')
156            && !text.contains('_')
157        {
158            return true;
159        }
160
161        // Note: We don't filter based on word count anymore because legitimate references
162        // can have many words, like "python language reference for import statements"
163        // Word count filtering was causing false positives where valid references were
164        // being incorrectly flagged as unused
165
166        // Skip patterns that are just punctuation or operators
167        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
168            return true;
169        }
170
171        // Skip very short non-word patterns (likely operators or syntax)
172        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
173            return true;
174        }
175
176        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
177        if (text.starts_with('"') && text.ends_with('"'))
178            || (text.starts_with('\'') && text.ends_with('\''))
179            || text.contains('"')
180            || text.contains('\'')
181        {
182            return true;
183        }
184
185        // Skip descriptive patterns with colon like [default: the project root]
186        // But allow simple numeric ranges which are handled above
187        if text.contains(':') && text.contains(' ') {
188            return true;
189        }
190
191        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
192        if text.starts_with('!') {
193            return true;
194        }
195
196        // Skip footnote syntax like [^1], [^note], etc.
197        // Footnotes start with ^ and are a common markdown extension
198        if text.starts_with('^') {
199            return true;
200        }
201
202        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
203        // Citations in these formats start with @ inside brackets
204        if text.starts_with('@') {
205            return true;
206        }
207
208        // Skip table of contents markers like [TOC]
209        // Used by Python-Markdown and other processors
210        if text == "TOC" {
211            return true;
212        }
213
214        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
215        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
216            return true;
217        }
218
219        // Skip common programming type names, literals, and short identifiers
220        // that are likely not markdown references
221        let common_non_refs = [
222            // Programming types
223            "object",
224            "Object",
225            "any",
226            "Any",
227            "inv",
228            "void",
229            "bool",
230            "int",
231            "float",
232            "str",
233            "char",
234            "i8",
235            "i16",
236            "i32",
237            "i64",
238            "i128",
239            "isize",
240            "u8",
241            "u16",
242            "u32",
243            "u64",
244            "u128",
245            "usize",
246            "f32",
247            "f64",
248            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
249            "null",
250            "true",
251            "false",
252            "NaN",
253            "Infinity",
254            // Common JavaScript output patterns
255            "object Object",
256        ];
257
258        if common_non_refs.contains(&text) {
259            return true;
260        }
261
262        false
263    }
264
265    /// Check if a byte position is inside any code span. O(log n) via binary search.
266    fn is_in_code_span(byte_pos: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
267        let idx = code_spans.partition_point(|span| span.byte_offset <= byte_pos);
268        idx > 0 && byte_pos < code_spans[idx - 1].byte_end
269    }
270
271    /// Check if a byte position is within an HTML tag. O(log n) via binary search.
272    fn is_in_html_tag(html_tags: &[crate::lint_context::HtmlTag], byte_pos: usize) -> bool {
273        let idx = html_tags.partition_point(|tag| tag.byte_offset <= byte_pos);
274        idx > 0 && byte_pos < html_tags[idx - 1].byte_end
275    }
276
277    fn extract_references(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
278        use crate::utils::skip_context::is_mkdocs_snippet_line;
279
280        let mut references = HashSet::new();
281
282        for (line_num, line) in ctx.content.lines().enumerate() {
283            // Use LintContext's pre-computed code block info (1-indexed)
284            if let Some(line_info) = ctx.line_info(line_num + 1)
285                && line_info.in_code_block
286            {
287                continue;
288            }
289
290            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
291            if is_mkdocs_snippet_line(line, ctx.flavor) {
292                continue;
293            }
294
295            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
296            // Abbreviations are not reference links and should not be tracked
297            if line.trim_start().starts_with("*[") {
298                continue;
299            }
300
301            if let Some(cap) = REF_REGEX.captures(line) {
302                // Store references in lowercase for case-insensitive comparison
303                if let Some(reference) = cap.get(1) {
304                    references.insert(reference.as_str().to_lowercase());
305                }
306            }
307        }
308
309        references
310    }
311
312    fn find_undefined_references(
313        &self,
314        references: &HashSet<String>,
315        ctx: &crate::lint_context::LintContext,
316        mkdocs_mode: bool,
317    ) -> Vec<(usize, usize, usize, String)> {
318        let mut undefined = Vec::new();
319        let mut reported_refs = HashMap::new();
320        let mut in_example_section = false;
321
322        // Get code spans and HTML tags once for the entire function
323        let code_spans = ctx.code_spans();
324        let html_tags = ctx.html_tags();
325
326        // Use cached data for reference links and images
327        for link in &ctx.links {
328            if !link.is_reference {
329                continue; // Skip inline links
330            }
331
332            // Skip links inside Jinja templates
333            if ctx.is_in_jinja_range(link.byte_offset) {
334                continue;
335            }
336
337            // Skip links inside code spans
338            if Self::is_in_code_span(link.byte_offset, &code_spans) {
339                continue;
340            }
341
342            // Skip links inside HTML comments (uses pre-computed ranges)
343            if ctx.is_in_html_comment(link.byte_offset) || ctx.is_in_mdx_comment(link.byte_offset) {
344                continue;
345            }
346
347            // Skip links inside HTML tags
348            if Self::is_in_html_tag(&html_tags, link.byte_offset) {
349                continue;
350            }
351
352            // Skip links inside math contexts
353            if is_in_math_context(ctx, link.byte_offset) {
354                continue;
355            }
356
357            // Skip links inside table cells
358            if is_in_table_cell(ctx, link.line, link.start_col) {
359                continue;
360            }
361
362            // Skip links inside frontmatter
363            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
364                continue;
365            }
366
367            // Skip Quarto/Pandoc citations ([@citation], @citation)
368            // Citations look like reference links but are bibliography references
369            if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
370                continue;
371            }
372
373            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
374            // Shortcodes may contain template syntax that looks like reference links
375            if ctx.is_in_shortcode(link.byte_offset) {
376                continue;
377            }
378
379            if let Some(ref_id) = &link.reference_id {
380                let reference_lower = ref_id.to_lowercase();
381
382                // Skip known non-reference patterns (markdown extensions, code examples)
383                if self.is_known_non_reference_pattern(ref_id) {
384                    continue;
385                }
386
387                // Skip MkDocs auto-references if in MkDocs mode
388                // Check both the reference_id and the link text for shorthand references
389                // Strip backticks since MkDocs resolves `module.Class` as module.Class
390                let stripped_ref = Self::strip_backticks(ref_id);
391                let stripped_text = Self::strip_backticks(&link.text);
392                if mkdocs_mode
393                    && (is_mkdocs_auto_reference(stripped_ref)
394                        || is_mkdocs_auto_reference(stripped_text)
395                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
396                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
397                {
398                    continue;
399                }
400
401                // Check if reference is defined
402                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
403                    // Check if the line is in an example section or list item
404                    if let Some(line_info) = ctx.line_info(link.line) {
405                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
406                            in_example_section = true;
407                            continue;
408                        }
409
410                        if in_example_section {
411                            continue;
412                        }
413
414                        // Skip list items
415                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
416                            continue;
417                        }
418
419                        // Skip lines that are HTML content
420                        let trimmed = line_info.content(ctx.content).trim_start();
421                        if trimmed.starts_with('<') {
422                            continue;
423                        }
424                    }
425
426                    let match_len = link.byte_end - link.byte_offset;
427                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
428                    reported_refs.insert(reference_lower, true);
429                }
430            }
431        }
432
433        // Use cached data for reference images
434        for image in &ctx.images {
435            if !image.is_reference {
436                continue; // Skip inline images
437            }
438
439            // Skip images inside Jinja templates
440            if ctx.is_in_jinja_range(image.byte_offset) {
441                continue;
442            }
443
444            // Skip images inside code spans
445            if Self::is_in_code_span(image.byte_offset, &code_spans) {
446                continue;
447            }
448
449            // Skip images inside HTML comments (uses pre-computed ranges)
450            if ctx.is_in_html_comment(image.byte_offset) || ctx.is_in_mdx_comment(image.byte_offset) {
451                continue;
452            }
453
454            // Skip images inside HTML tags
455            if Self::is_in_html_tag(&html_tags, image.byte_offset) {
456                continue;
457            }
458
459            // Skip images inside math contexts
460            if is_in_math_context(ctx, image.byte_offset) {
461                continue;
462            }
463
464            // Skip images inside table cells
465            if is_in_table_cell(ctx, image.line, image.start_col) {
466                continue;
467            }
468
469            // Skip images inside frontmatter
470            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
471                continue;
472            }
473
474            if let Some(ref_id) = &image.reference_id {
475                let reference_lower = ref_id.to_lowercase();
476
477                // Skip known non-reference patterns (markdown extensions, code examples)
478                if self.is_known_non_reference_pattern(ref_id) {
479                    continue;
480                }
481
482                // Skip MkDocs auto-references if in MkDocs mode
483                // Check both the reference_id and the alt text for shorthand references
484                // Strip backticks since MkDocs resolves `module.Class` as module.Class
485                let stripped_ref = Self::strip_backticks(ref_id);
486                let stripped_alt = Self::strip_backticks(&image.alt_text);
487                if mkdocs_mode
488                    && (is_mkdocs_auto_reference(stripped_ref)
489                        || is_mkdocs_auto_reference(stripped_alt)
490                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
491                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
492                {
493                    continue;
494                }
495
496                // Check if reference is defined
497                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
498                    // Check if the line is in an example section or list item
499                    if let Some(line_info) = ctx.line_info(image.line) {
500                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
501                            in_example_section = true;
502                            continue;
503                        }
504
505                        if in_example_section {
506                            continue;
507                        }
508
509                        // Skip list items
510                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
511                            continue;
512                        }
513
514                        // Skip lines that are HTML content
515                        let trimmed = line_info.content(ctx.content).trim_start();
516                        if trimmed.starts_with('<') {
517                            continue;
518                        }
519                    }
520
521                    let match_len = image.byte_end - image.byte_offset;
522                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
523                    reported_refs.insert(reference_lower, true);
524                }
525            }
526        }
527
528        // Build a set of byte ranges that are already covered by parsed links/images
529        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
530
531        // Add ranges from parsed links
532        for link in &ctx.links {
533            covered_ranges.push((link.byte_offset, link.byte_end));
534        }
535
536        // Add ranges from parsed images
537        for image in &ctx.images {
538            covered_ranges.push((image.byte_offset, image.byte_end));
539        }
540
541        // Sort ranges by start position
542        covered_ranges.sort_by_key(|&(start, _)| start);
543
544        // Handle shortcut references [text] which aren't captured in ctx.links
545        // Only check these if shortcut_syntax is enabled (default: false)
546        // Shortcut syntax is ambiguous because [text] could be a reference link
547        // OR just text in brackets (like spec notation in quotes)
548        if !self.config.shortcut_syntax {
549            return undefined;
550        }
551
552        // Need to use regex for shortcut references
553        let lines = ctx.raw_lines();
554        in_example_section = false; // Reset for line-by-line processing
555
556        for (line_num, line) in lines.iter().enumerate() {
557            // Skip lines in frontmatter or code blocks using LintContext's pre-computed info
558            if let Some(line_info) = ctx.line_info(line_num + 1)
559                && (line_info.in_front_matter || line_info.in_code_block)
560            {
561                continue;
562            }
563
564            // Check for example sections
565            if OUTPUT_EXAMPLE_START.is_match(line) {
566                in_example_section = true;
567                continue;
568            }
569
570            if in_example_section {
571                // Check if we're exiting the example section (another heading)
572                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
573                    in_example_section = false;
574                } else {
575                    continue;
576                }
577            }
578
579            // Skip list items
580            if LIST_ITEM_REGEX.is_match(line) {
581                continue;
582            }
583
584            // Skip lines that are HTML content
585            let trimmed_line = line.trim_start();
586            if trimmed_line.starts_with('<') {
587                continue;
588            }
589
590            // Skip GitHub alerts/callouts (e.g., > [!TIP])
591            if GITHUB_ALERT_REGEX.is_match(line) {
592                continue;
593            }
594
595            // Skip abbreviation definitions (*[ABBR]: Definition)
596            // These are not reference links and should not be checked
597            if trimmed_line.starts_with("*[") {
598                continue;
599            }
600
601            // Collect positions of brackets that are part of URLs (IPv6, etc.)
602            // so we can exclude them from reference checking
603            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
604            for mat in URL_WITH_BRACKETS.find_iter(line) {
605                // Find all bracket pairs within this URL match
606                let url_str = mat.as_str();
607                let url_start = mat.start();
608
609                // Find brackets within the URL (e.g., in https://[::1]:8080)
610                let mut idx = 0;
611                while idx < url_str.len() {
612                    if let Some(bracket_start) = url_str[idx..].find('[') {
613                        let bracket_start_abs = url_start + idx + bracket_start;
614                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
615                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
616                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
617                            idx += bracket_start + bracket_end + 2;
618                        } else {
619                            break;
620                        }
621                    } else {
622                        break;
623                    }
624                }
625            }
626
627            // Check shortcut references: [reference]
628            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
629                for cap in captures {
630                    if let Some(ref_match) = cap.get(1) {
631                        // Check if this bracket is part of a URL (IPv6, etc.)
632                        let bracket_start = cap.get(0).unwrap().start();
633                        let bracket_end = cap.get(0).unwrap().end();
634
635                        // Skip if this bracket pair is within any URL bracket range
636                        let is_in_url = url_bracket_ranges
637                            .iter()
638                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
639
640                        if is_in_url {
641                            continue;
642                        }
643
644                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
645                        // Check if there's a ^ immediately before the opening bracket
646                        if bracket_start > 0 {
647                            // bracket_start is a byte offset, so we need to check the byte before
648                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
649                                && *byte == b'^'
650                            {
651                                continue; // This is an inline footnote, skip it
652                            }
653                        }
654
655                        let reference = ref_match.as_str();
656                        let reference_lower = reference.to_lowercase();
657
658                        // Skip known non-reference patterns (markdown extensions, code examples)
659                        if self.is_known_non_reference_pattern(reference) {
660                            continue;
661                        }
662
663                        // Skip GitHub alerts (including extended types)
664                        if let Some(alert_type) = reference.strip_prefix('!')
665                            && matches!(
666                                alert_type,
667                                "NOTE"
668                                    | "TIP"
669                                    | "WARNING"
670                                    | "IMPORTANT"
671                                    | "CAUTION"
672                                    | "INFO"
673                                    | "SUCCESS"
674                                    | "FAILURE"
675                                    | "DANGER"
676                                    | "BUG"
677                                    | "EXAMPLE"
678                                    | "QUOTE"
679                            )
680                        {
681                            continue;
682                        }
683
684                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
685                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
686                        if mkdocs_mode
687                            && (reference.starts_with("start:") || reference.starts_with("end:"))
688                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
689                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
690                        {
691                            continue;
692                        }
693
694                        // Skip MkDocs auto-references if in MkDocs mode
695                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
696                        let stripped_ref = Self::strip_backticks(reference);
697                        if mkdocs_mode
698                            && (is_mkdocs_auto_reference(stripped_ref)
699                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
700                        {
701                            continue;
702                        }
703
704                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
705                            let full_match = cap.get(0).unwrap();
706                            let col = full_match.start();
707                            let line_start_byte = ctx.line_offsets[line_num];
708                            let byte_pos = line_start_byte + col;
709
710                            // Skip if inside code span
711                            let code_spans = ctx.code_spans();
712                            if Self::is_in_code_span(byte_pos, &code_spans) {
713                                continue;
714                            }
715
716                            // Skip if inside Jinja template
717                            if ctx.is_in_jinja_range(byte_pos) {
718                                continue;
719                            }
720
721                            // Skip if inside code block
722                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
723                                &ctx.code_blocks,
724                                byte_pos,
725                            ) {
726                                continue;
727                            }
728
729                            // Skip if inside HTML comment (uses pre-computed ranges)
730                            if ctx.is_in_html_comment(byte_pos) || ctx.is_in_mdx_comment(byte_pos) {
731                                continue;
732                            }
733
734                            // Skip if inside HTML tag
735                            if Self::is_in_html_tag(&html_tags, byte_pos) {
736                                continue;
737                            }
738
739                            // Skip if inside math context
740                            if is_in_math_context(ctx, byte_pos) {
741                                continue;
742                            }
743
744                            // Skip if inside table cell
745                            if is_in_table_cell(ctx, line_num + 1, col) {
746                                continue;
747                            }
748
749                            let byte_end = byte_pos + (full_match.end() - full_match.start());
750
751                            // Check if this shortcut ref overlaps with any parsed link/image
752                            let mut is_covered = false;
753                            for &(range_start, range_end) in &covered_ranges {
754                                if range_start <= byte_pos && byte_end <= range_end {
755                                    // This shortcut ref is completely within a parsed link/image
756                                    is_covered = true;
757                                    break;
758                                }
759                                if range_start > byte_end {
760                                    // No need to check further (ranges are sorted)
761                                    break;
762                                }
763                            }
764
765                            if is_covered {
766                                continue;
767                            }
768
769                            // More sophisticated checks to avoid false positives
770
771                            // Check 1: If preceded by ], this might be part of [text][ref]
772                            // Look for the pattern ...][ref] and check if there's a matching [ before
773                            let line_chars: Vec<char> = line.chars().collect();
774                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
775                                // Look backwards for a [ that would make this [text][ref]
776                                let mut bracket_count = 1; // We already saw one ]
777                                let mut check_pos = col.saturating_sub(2);
778                                let mut found_opening = false;
779
780                                while check_pos > 0 && check_pos < line_chars.len() {
781                                    match line_chars.get(check_pos) {
782                                        Some(&']') => bracket_count += 1,
783                                        Some(&'[') => {
784                                            bracket_count -= 1;
785                                            if bracket_count == 0 {
786                                                // Check if this [ is escaped
787                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
788                                                    found_opening = true;
789                                                }
790                                                break;
791                                            }
792                                        }
793                                        _ => {}
794                                    }
795                                    if check_pos == 0 {
796                                        break;
797                                    }
798                                    check_pos = check_pos.saturating_sub(1);
799                                }
800
801                                if found_opening {
802                                    // This is part of [text][ref], skip it
803                                    continue;
804                                }
805                            }
806
807                            // Check 2: If there's an escaped bracket pattern before this
808                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
809                            let before_text = &line[..col];
810                            if before_text.contains("\\]") {
811                                // Check if there's a \[ before the \]
812                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
813                                    let search_text = &before_text[..escaped_close_pos];
814                                    if search_text.contains("\\[") {
815                                        // This looks like \[...\][ref], skip it
816                                        continue;
817                                    }
818                                }
819                            }
820
821                            let match_len = full_match.end() - full_match.start();
822                            undefined.push((line_num, col, match_len, reference.to_string()));
823                            reported_refs.insert(reference_lower, true);
824                        }
825                    }
826                }
827            }
828        }
829
830        undefined
831    }
832}
833
834impl Rule for MD052ReferenceLinkImages {
835    fn name(&self) -> &'static str {
836        "MD052"
837    }
838
839    fn description(&self) -> &'static str {
840        "Reference links and images should use a reference that exists"
841    }
842
843    fn category(&self) -> RuleCategory {
844        RuleCategory::Link
845    }
846
847    fn fix_capability(&self) -> FixCapability {
848        FixCapability::Unfixable
849    }
850
851    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
852        let content = ctx.content;
853        let mut warnings = Vec::new();
854
855        // OPTIMIZATION: Early exit if no brackets at all
856        if !content.contains('[') {
857            return Ok(warnings);
858        }
859
860        // Check if we're in MkDocs mode from the context
861        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
862
863        let references = self.extract_references(ctx);
864
865        // Use optimized detection method with cached link/image data
866        let lines = ctx.raw_lines();
867        for (line_num, col, match_len, reference) in self.find_undefined_references(&references, ctx, mkdocs_mode) {
868            let line_content = lines.get(line_num).unwrap_or(&"");
869
870            // Calculate precise character range for the entire undefined reference
871            let (start_line, start_col, end_line, end_col) =
872                calculate_match_range(line_num + 1, line_content, col, match_len);
873
874            warnings.push(LintWarning {
875                rule_name: Some(self.name().to_string()),
876                line: start_line,
877                column: start_col,
878                end_line,
879                end_column: end_col,
880                message: format!("Reference '{reference}' not found"),
881                severity: Severity::Warning,
882                fix: None,
883            });
884        }
885
886        Ok(warnings)
887    }
888
889    /// Check if this rule should be skipped for performance
890    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
891        // Skip if content is empty or has no links/images
892        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
893    }
894
895    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
896        let content = ctx.content;
897        // No automatic fix available for undefined references
898        Ok(content.to_string())
899    }
900
901    fn as_any(&self) -> &dyn std::any::Any {
902        self
903    }
904
905    fn default_config_section(&self) -> Option<(String, toml::Value)> {
906        let json_value = serde_json::to_value(&self.config).ok()?;
907        Some((
908            self.name().to_string(),
909            crate::rule_config_serde::json_to_toml_value(&json_value)?,
910        ))
911    }
912
913    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
914    where
915        Self: Sized,
916    {
917        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
918        Box::new(Self::from_config_struct(rule_config))
919    }
920}
921
922#[cfg(test)]
923mod tests {
924    use super::*;
925    use crate::lint_context::LintContext;
926
927    #[test]
928    fn test_valid_reference_link() {
929        let rule = MD052ReferenceLinkImages::new();
930        let content = "[text][ref]\n\n[ref]: https://example.com";
931        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
932        let result = rule.check(&ctx).unwrap();
933
934        assert_eq!(result.len(), 0);
935    }
936
937    #[test]
938    fn test_undefined_reference_link() {
939        let rule = MD052ReferenceLinkImages::new();
940        let content = "[text][undefined]";
941        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
942        let result = rule.check(&ctx).unwrap();
943
944        assert_eq!(result.len(), 1);
945        assert!(result[0].message.contains("Reference 'undefined' not found"));
946    }
947
948    #[test]
949    fn test_valid_reference_image() {
950        let rule = MD052ReferenceLinkImages::new();
951        let content = "![alt][img]\n\n[img]: image.jpg";
952        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
953        let result = rule.check(&ctx).unwrap();
954
955        assert_eq!(result.len(), 0);
956    }
957
958    #[test]
959    fn test_undefined_reference_image() {
960        let rule = MD052ReferenceLinkImages::new();
961        let content = "![alt][missing]";
962        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
963        let result = rule.check(&ctx).unwrap();
964
965        assert_eq!(result.len(), 1);
966        assert!(result[0].message.contains("Reference 'missing' not found"));
967    }
968
969    #[test]
970    fn test_case_insensitive_references() {
971        let rule = MD052ReferenceLinkImages::new();
972        let content = "[Text][REF]\n\n[ref]: https://example.com";
973        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
974        let result = rule.check(&ctx).unwrap();
975
976        assert_eq!(result.len(), 0);
977    }
978
979    #[test]
980    fn test_shortcut_reference_valid() {
981        let rule = MD052ReferenceLinkImages::new();
982        let content = "[ref]\n\n[ref]: https://example.com";
983        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
984        let result = rule.check(&ctx).unwrap();
985
986        assert_eq!(result.len(), 0);
987    }
988
989    #[test]
990    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
991        // Shortcut syntax checking is disabled by default
992        // Enable it to test undefined shortcut references
993        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
994            shortcut_syntax: true,
995            ..Default::default()
996        });
997        let content = "[undefined]";
998        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
999        let result = rule.check(&ctx).unwrap();
1000
1001        assert_eq!(result.len(), 1);
1002        assert!(result[0].message.contains("Reference 'undefined' not found"));
1003    }
1004
1005    #[test]
1006    fn test_shortcut_reference_not_checked_by_default() {
1007        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1008        let rule = MD052ReferenceLinkImages::new();
1009        let content = "[undefined]";
1010        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1011        let result = rule.check(&ctx).unwrap();
1012
1013        // Should be 0 because shortcut_syntax is false by default
1014        assert_eq!(result.len(), 0);
1015    }
1016
1017    #[test]
1018    fn test_inline_links_ignored() {
1019        let rule = MD052ReferenceLinkImages::new();
1020        let content = "[text](https://example.com)";
1021        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1022        let result = rule.check(&ctx).unwrap();
1023
1024        assert_eq!(result.len(), 0);
1025    }
1026
1027    #[test]
1028    fn test_inline_images_ignored() {
1029        let rule = MD052ReferenceLinkImages::new();
1030        let content = "![alt](image.jpg)";
1031        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1032        let result = rule.check(&ctx).unwrap();
1033
1034        assert_eq!(result.len(), 0);
1035    }
1036
1037    #[test]
1038    fn test_references_in_code_blocks_ignored() {
1039        let rule = MD052ReferenceLinkImages::new();
1040        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1041        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1042        let result = rule.check(&ctx).unwrap();
1043
1044        assert_eq!(result.len(), 0);
1045    }
1046
1047    #[test]
1048    fn test_references_in_inline_code_ignored() {
1049        let rule = MD052ReferenceLinkImages::new();
1050        let content = "`[undefined]`";
1051        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1052        let result = rule.check(&ctx).unwrap();
1053
1054        // References inside inline code spans should be ignored
1055        assert_eq!(result.len(), 0);
1056    }
1057
1058    #[test]
1059    fn test_comprehensive_inline_code_detection() {
1060        // Enable shortcut_syntax to test comprehensive detection
1061        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1062            shortcut_syntax: true,
1063            ..Default::default()
1064        });
1065        let content = r#"# Test
1066
1067This `[inside]` should be ignored.
1068This [outside] should be flagged.
1069Reference links `[text][ref]` in code are ignored.
1070Regular reference [text][missing] should be flagged.
1071Images `![alt][img]` in code are ignored.
1072Regular image ![alt][badimg] should be flagged.
1073
1074Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1075
1076```
1077[code block content] should be ignored
1078```
1079
1080`Multiple [refs] in [same] code span` ignored."#;
1081
1082        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1083        let result = rule.check(&ctx).unwrap();
1084
1085        // Should only flag: outside, missing, badimg, three (4 total)
1086        assert_eq!(result.len(), 4);
1087
1088        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1089        assert!(messages.iter().any(|m| m.contains("outside")));
1090        assert!(messages.iter().any(|m| m.contains("missing")));
1091        assert!(messages.iter().any(|m| m.contains("badimg")));
1092        assert!(messages.iter().any(|m| m.contains("three")));
1093
1094        // Should NOT flag any references inside code spans
1095        assert!(!messages.iter().any(|m| m.contains("inside")));
1096        assert!(!messages.iter().any(|m| m.contains("one")));
1097        assert!(!messages.iter().any(|m| m.contains("two")));
1098        assert!(!messages.iter().any(|m| m.contains("refs")));
1099        assert!(!messages.iter().any(|m| m.contains("same")));
1100    }
1101
1102    #[test]
1103    fn test_multiple_undefined_references() {
1104        let rule = MD052ReferenceLinkImages::new();
1105        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1106        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1107        let result = rule.check(&ctx).unwrap();
1108
1109        assert_eq!(result.len(), 3);
1110        assert!(result[0].message.contains("ref1"));
1111        assert!(result[1].message.contains("ref2"));
1112        assert!(result[2].message.contains("ref3"));
1113    }
1114
1115    #[test]
1116    fn test_mixed_valid_and_undefined() {
1117        let rule = MD052ReferenceLinkImages::new();
1118        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1119        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1120        let result = rule.check(&ctx).unwrap();
1121
1122        assert_eq!(result.len(), 1);
1123        assert!(result[0].message.contains("missing"));
1124    }
1125
1126    #[test]
1127    fn test_empty_reference() {
1128        let rule = MD052ReferenceLinkImages::new();
1129        let content = "[text][]\n\n[ref]: https://example.com";
1130        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1131        let result = rule.check(&ctx).unwrap();
1132
1133        // Empty reference should use the link text as reference
1134        assert_eq!(result.len(), 1);
1135    }
1136
1137    #[test]
1138    fn test_escaped_brackets_ignored() {
1139        let rule = MD052ReferenceLinkImages::new();
1140        let content = "\\[not a link\\]";
1141        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1142        let result = rule.check(&ctx).unwrap();
1143
1144        assert_eq!(result.len(), 0);
1145    }
1146
1147    #[test]
1148    fn test_list_items_ignored() {
1149        let rule = MD052ReferenceLinkImages::new();
1150        let content = "- [undefined]\n* [another]\n+ [third]";
1151        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1152        let result = rule.check(&ctx).unwrap();
1153
1154        // List items that look like shortcut references should be ignored
1155        assert_eq!(result.len(), 0);
1156    }
1157
1158    #[test]
1159    fn test_output_example_section_ignored() {
1160        // Enable shortcut_syntax to test example section handling
1161        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1162            shortcut_syntax: true,
1163            ..Default::default()
1164        });
1165        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1166        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1167        let result = rule.check(&ctx).unwrap();
1168
1169        // Only the reference outside the Output section should be flagged
1170        assert_eq!(result.len(), 1);
1171        assert!(result[0].message.contains("missing"));
1172    }
1173
1174    #[test]
1175    fn test_reference_definitions_in_code_blocks_ignored() {
1176        let rule = MD052ReferenceLinkImages::new();
1177        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1178        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1179        let result = rule.check(&ctx).unwrap();
1180
1181        // Reference defined in code block should not count
1182        assert_eq!(result.len(), 1);
1183        assert!(result[0].message.contains("ref"));
1184    }
1185
1186    #[test]
1187    fn test_multiple_references_to_same_undefined() {
1188        let rule = MD052ReferenceLinkImages::new();
1189        let content = "[first][missing] [second][missing] [third][missing]";
1190        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1191        let result = rule.check(&ctx).unwrap();
1192
1193        // Should only report once per unique reference
1194        assert_eq!(result.len(), 1);
1195        assert!(result[0].message.contains("missing"));
1196    }
1197
1198    #[test]
1199    fn test_reference_with_special_characters() {
1200        let rule = MD052ReferenceLinkImages::new();
1201        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1202        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1203        let result = rule.check(&ctx).unwrap();
1204
1205        assert_eq!(result.len(), 0);
1206    }
1207
1208    #[test]
1209    fn test_issue_51_html_attribute_not_reference() {
1210        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1211        let rule = MD052ReferenceLinkImages::new();
1212        let content = r#"# Example
1213
1214## Test
1215
1216Want to fill out this form?
1217
1218<form method="post">
1219    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1220</form>"#;
1221        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1222        let result = rule.check(&ctx).unwrap();
1223
1224        assert_eq!(
1225            result.len(),
1226            0,
1227            "HTML attributes with square brackets should not be flagged as undefined references"
1228        );
1229    }
1230
1231    #[test]
1232    fn test_extract_references() {
1233        let rule = MD052ReferenceLinkImages::new();
1234        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1235        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1236        let refs = rule.extract_references(&ctx);
1237
1238        assert_eq!(refs.len(), 3);
1239        assert!(refs.contains("ref1"));
1240        assert!(refs.contains("ref2"));
1241        assert!(refs.contains("ref3"));
1242    }
1243
1244    #[test]
1245    fn test_inline_code_not_flagged() {
1246        // Enable shortcut_syntax to test inline code detection
1247        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1248            shortcut_syntax: true,
1249            ..Default::default()
1250        });
1251
1252        // Test that arrays in inline code are not flagged as references
1253        let content = r#"# Test
1254
1255Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1256
1257Also, `[todo]` is not a reference link.
1258
1259But this [reference] should be flagged.
1260
1261And this `[inline code]` should not be flagged.
1262"#;
1263
1264        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1265        let warnings = rule.check(&ctx).unwrap();
1266
1267        // Should only flag [reference], not the ones in backticks
1268        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1269        assert!(warnings[0].message.contains("'reference'"));
1270    }
1271
1272    #[test]
1273    fn test_code_block_references_ignored() {
1274        // Enable shortcut_syntax to test code block handling
1275        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1276            shortcut_syntax: true,
1277            ..Default::default()
1278        });
1279
1280        let content = r#"# Test
1281
1282```markdown
1283[undefined] reference in code block
1284![undefined] image in code block
1285```
1286
1287[real-undefined] reference outside
1288"#;
1289
1290        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1291        let warnings = rule.check(&ctx).unwrap();
1292
1293        // Should only flag [real-undefined], not the ones in code block
1294        assert_eq!(warnings.len(), 1);
1295        assert!(warnings[0].message.contains("'real-undefined'"));
1296    }
1297
1298    #[test]
1299    fn test_html_comments_ignored() {
1300        // Test for issue #20 - MD052 should not flag content inside HTML comments
1301        let rule = MD052ReferenceLinkImages::new();
1302
1303        // Test the exact case from issue #20
1304        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1305<!--- set_env EDITOR 'python3 fake_editor.py' -->
1306
1307```bash
1308$ python3 vote.py
13093 votes for: 2
13102 votes for: 3, 4
1311```"#;
1312        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1313        let result = rule.check(&ctx).unwrap();
1314        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1315
1316        // Test various reference patterns inside HTML comments
1317        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1318Normal [text][undefined]
1319<!-- Another [comment][with] references -->"#;
1320        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1321        let result = rule.check(&ctx).unwrap();
1322        assert_eq!(
1323            result.len(),
1324            1,
1325            "Should only flag the undefined reference outside comments"
1326        );
1327        assert!(result[0].message.contains("undefined"));
1328
1329        // Test multi-line HTML comments
1330        let content = r#"<!--
1331[ref1]
1332[ref2][ref3]
1333-->
1334[actual][undefined]"#;
1335        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1336        let result = rule.check(&ctx).unwrap();
1337        assert_eq!(
1338            result.len(),
1339            1,
1340            "Should not flag references in multi-line HTML comments"
1341        );
1342        assert!(result[0].message.contains("undefined"));
1343
1344        // Test mixed scenarios
1345        let content = r#"<!-- Comment with [1:] pattern -->
1346Valid [link][ref]
1347<!-- More [refs][in][comments] -->
1348![image][missing]
1349
1350[ref]: https://example.com"#;
1351        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1352        let result = rule.check(&ctx).unwrap();
1353        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1354        assert!(result[0].message.contains("missing"));
1355    }
1356
1357    #[test]
1358    fn test_frontmatter_ignored() {
1359        // Test for issue #24 - MD052 should not flag content inside frontmatter
1360        // Enable shortcut_syntax to test frontmatter handling
1361        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1362            shortcut_syntax: true,
1363            ..Default::default()
1364        });
1365
1366        // Test YAML frontmatter with arrays and references
1367        let content = r#"---
1368layout: post
1369title: "My Jekyll Post"
1370date: 2023-01-01
1371categories: blog
1372tags: ["test", "example"]
1373author: John Doe
1374---
1375
1376# My Blog Post
1377
1378This is the actual markdown content that should be linted.
1379
1380[undefined] reference should be flagged.
1381
1382## Section 1
1383
1384Some content here."#;
1385        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1386        let result = rule.check(&ctx).unwrap();
1387
1388        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1389        assert_eq!(
1390            result.len(),
1391            1,
1392            "Should only flag the undefined reference outside frontmatter"
1393        );
1394        assert!(result[0].message.contains("undefined"));
1395
1396        // Test TOML frontmatter
1397        let content = r#"+++
1398title = "My Post"
1399tags = ["example", "test"]
1400+++
1401
1402# Content
1403
1404[missing] reference should be flagged."#;
1405        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1406        let result = rule.check(&ctx).unwrap();
1407        assert_eq!(
1408            result.len(),
1409            1,
1410            "Should only flag the undefined reference outside TOML frontmatter"
1411        );
1412        assert!(result[0].message.contains("missing"));
1413    }
1414
1415    #[test]
1416    fn test_mkdocs_snippet_markers_not_flagged() {
1417        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1418        // Enable shortcut_syntax to test snippet marker handling
1419        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1420            shortcut_syntax: true,
1421            ..Default::default()
1422        });
1423
1424        // Test snippet section markers
1425        let content = r#"# Document with MkDocs Snippets
1426
1427Some content here.
1428
1429# -8<- [start:remote-content]
1430
1431This is the remote content section.
1432
1433# -8<- [end:remote-content]
1434
1435More content here.
1436
1437<!-- --8<-- [start:another-section] -->
1438Content in another section
1439<!-- --8<-- [end:another-section] -->"#;
1440        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1441        let result = rule.check(&ctx).unwrap();
1442
1443        // Should not flag any snippet markers as undefined references
1444        assert_eq!(
1445            result.len(),
1446            0,
1447            "Should not flag MkDocs snippet markers as undefined references"
1448        );
1449
1450        // Test that the snippet marker lines are properly skipped
1451        // but regular undefined references on other lines are still caught
1452        let content = r#"# Document
1453
1454# -8<- [start:section]
1455Content with [reference] inside snippet section
1456# -8<- [end:section]
1457
1458Regular [undefined] reference outside snippet markers."#;
1459        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1460        let result = rule.check(&ctx).unwrap();
1461
1462        assert_eq!(
1463            result.len(),
1464            2,
1465            "Should flag undefined references but skip snippet marker lines"
1466        );
1467        // The references inside the content should be flagged, but not start: and end:
1468        assert!(result[0].message.contains("reference"));
1469        assert!(result[1].message.contains("undefined"));
1470
1471        // Test in standard mode - should flag the markers as undefined
1472        let content = r#"# Document
1473
1474# -8<- [start:section]
1475# -8<- [end:section]"#;
1476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1477        let result = rule.check(&ctx).unwrap();
1478
1479        assert_eq!(
1480            result.len(),
1481            2,
1482            "In standard mode, snippet markers should be flagged as undefined references"
1483        );
1484    }
1485
1486    #[test]
1487    fn test_pandoc_citations_not_flagged() {
1488        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1489        // Enable shortcut_syntax to test citation handling
1490        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1491            shortcut_syntax: true,
1492            ..Default::default()
1493        });
1494
1495        let content = r#"# Research Paper
1496
1497We are using the **bookdown** package [@R-bookdown] in this sample book.
1498This was built on top of R Markdown and **knitr** [@xie2015].
1499
1500Multiple citations [@citation1; @citation2; @citation3] are also supported.
1501
1502Regular [undefined] reference should still be flagged.
1503"#;
1504        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1505        let result = rule.check(&ctx).unwrap();
1506
1507        // Should only flag the undefined reference, not the citations
1508        assert_eq!(
1509            result.len(),
1510            1,
1511            "Should only flag the undefined reference, not Pandoc citations"
1512        );
1513        assert!(result[0].message.contains("undefined"));
1514    }
1515
1516    #[test]
1517    fn test_pandoc_inline_footnotes_not_flagged() {
1518        // Test that Pandoc inline footnote syntax is not flagged
1519        // Enable shortcut_syntax to test inline footnote handling
1520        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1521            shortcut_syntax: true,
1522            ..Default::default()
1523        });
1524
1525        let content = r#"# Math Document
1526
1527You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1528
1529Another footnote^[with some text and a [link](https://example.com)].
1530
1531But this [reference] without ^ should be flagged.
1532"#;
1533        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1534        let result = rule.check(&ctx).unwrap();
1535
1536        // Should only flag the reference without ^
1537        assert_eq!(
1538            result.len(),
1539            1,
1540            "Should only flag the regular reference, not inline footnotes"
1541        );
1542        assert!(result[0].message.contains("reference"));
1543    }
1544
1545    #[test]
1546    fn test_github_alerts_not_flagged() {
1547        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1548        // Enable shortcut_syntax to test GitHub alert handling
1549        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1550            shortcut_syntax: true,
1551            ..Default::default()
1552        });
1553
1554        // Test various GitHub alert types
1555        let content = r#"# Document with GitHub Alerts
1556
1557> [!NOTE]
1558> This is a note alert.
1559
1560> [!TIP]
1561> This is a tip alert.
1562
1563> [!IMPORTANT]
1564> This is an important alert.
1565
1566> [!WARNING]
1567> This is a warning alert.
1568
1569> [!CAUTION]
1570> This is a caution alert.
1571
1572Regular content with [undefined] reference."#;
1573        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1574        let result = rule.check(&ctx).unwrap();
1575
1576        // Should only flag the undefined reference, not the GitHub alerts
1577        assert_eq!(
1578            result.len(),
1579            1,
1580            "Should only flag the undefined reference, not GitHub alerts"
1581        );
1582        assert!(result[0].message.contains("undefined"));
1583        assert_eq!(result[0].line, 18); // Line with [undefined]
1584
1585        // Test GitHub alerts with additional content
1586        let content = r#"> [!TIP]
1587> Here's a useful tip about [something].
1588> Multiple lines are allowed.
1589
1590[something] is mentioned but not defined."#;
1591        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1592        let result = rule.check(&ctx).unwrap();
1593
1594        // Should flag only the [something] outside blockquotes
1595        // The test shows we're only catching one, which might be correct behavior
1596        // matching markdownlint's approach
1597        assert_eq!(result.len(), 1, "Should flag undefined reference");
1598        assert!(result[0].message.contains("something"));
1599
1600        // Test GitHub alerts with proper references
1601        let content = r#"> [!NOTE]
1602> See [reference] for more details.
1603
1604[reference]: https://example.com"#;
1605        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1606        let result = rule.check(&ctx).unwrap();
1607
1608        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1609        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1610    }
1611
1612    #[test]
1613    fn test_ignore_config() {
1614        // Test that user-configured ignore list is respected
1615        let config = MD052Config {
1616            shortcut_syntax: true,
1617            ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1618        };
1619        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1620
1621        let content = r#"# Document with Custom Types
1622
1623Use [Vec] for dynamic arrays.
1624Use [HashMap] for key-value storage.
1625Use [Option] for nullable values.
1626Use [Result] for error handling.
1627"#;
1628        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1629        let result = rule.check(&ctx).unwrap();
1630
1631        // Should only flag [Result] because it's not in ignore
1632        assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1633        assert!(result[0].message.contains("Result"));
1634    }
1635
1636    #[test]
1637    fn test_ignore_case_insensitive() {
1638        // Test that ignore list is case-insensitive
1639        let config = MD052Config {
1640            shortcut_syntax: true,
1641            ignore: vec!["Vec".to_string()],
1642        };
1643        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1644
1645        let content = r#"# Case Insensitivity Test
1646
1647[Vec] should be ignored.
1648[vec] should also be ignored (different case, same match).
1649[VEC] should also be ignored (different case, same match).
1650[undefined] should be flagged (not in ignore list).
1651"#;
1652        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1653        let result = rule.check(&ctx).unwrap();
1654
1655        // Should only flag [undefined] because ignore is case-insensitive
1656        assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1657        assert!(result[0].message.contains("undefined"));
1658    }
1659
1660    #[test]
1661    fn test_ignore_empty_by_default() {
1662        // Test that empty ignore list doesn't affect existing behavior
1663        let rule = MD052ReferenceLinkImages::new();
1664
1665        let content = "[text][undefined]";
1666        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1667        let result = rule.check(&ctx).unwrap();
1668
1669        // Should still flag undefined references
1670        assert_eq!(result.len(), 1);
1671        assert!(result[0].message.contains("undefined"));
1672    }
1673
1674    #[test]
1675    fn test_ignore_with_reference_links() {
1676        // Test ignore list with full reference link syntax [text][ref]
1677        let config = MD052Config {
1678            shortcut_syntax: false,
1679            ignore: vec!["CustomType".to_string()],
1680        };
1681        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1682
1683        let content = r#"# Test
1684
1685See [documentation][CustomType] for details.
1686See [other docs][MissingRef] for more.
1687"#;
1688        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1689        let result = rule.check(&ctx).unwrap();
1690
1691        // Debug: print warnings if test fails
1692        for (i, w) in result.iter().enumerate() {
1693            eprintln!("Warning {}: {}", i, w.message);
1694        }
1695
1696        // Should flag [MissingRef] but not [CustomType]
1697        // Note: reference IDs are lowercased in the message
1698        assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1699        assert!(
1700            result[0].message.contains("missingref"),
1701            "Expected 'missingref' in message: {}",
1702            result[0].message
1703        );
1704    }
1705
1706    #[test]
1707    fn test_ignore_multiple() {
1708        // Test multiple ignored names work correctly
1709        let config = MD052Config {
1710            shortcut_syntax: true,
1711            ignore: vec![
1712                "i32".to_string(),
1713                "u64".to_string(),
1714                "String".to_string(),
1715                "Arc".to_string(),
1716                "Mutex".to_string(),
1717            ],
1718        };
1719        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1720
1721        let content = r#"# Types
1722
1723[i32] [u64] [String] [Arc] [Mutex] [Box]
1724"#;
1725        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1726        let result = rule.check(&ctx).unwrap();
1727
1728        // Note: i32 and u64 are already in the hardcoded list, so they'd be skipped anyway
1729        // String is NOT in the hardcoded list, so we test that the user config works
1730        // [Box] should be flagged (not in ignore)
1731        assert_eq!(result.len(), 1);
1732        assert!(result[0].message.contains("Box"));
1733    }
1734
1735    #[test]
1736    fn test_nested_code_fences_reference_extraction() {
1737        // Verify that extract_references uses LintContext's pre-computed in_code_block
1738        // so nested fences are handled correctly.
1739        // A 4-backtick fence wrapping a 3-backtick fence should treat the inner
1740        // ``` as content, not a code block boundary.
1741        let rule = MD052ReferenceLinkImages::new();
1742
1743        let content = "````\n```\n[ref-inside]: https://example.com\n```\n````\n\n[Use this link][ref-inside]";
1744        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1745        let result = rule.check(&ctx).unwrap();
1746
1747        // The reference definition is inside a code block (the outer ````),
1748        // so it should NOT be recognized as a definition.
1749        // Therefore [ref-inside] should be flagged as undefined.
1750        assert_eq!(
1751            result.len(),
1752            1,
1753            "Reference defined inside nested code fence should not count as a definition"
1754        );
1755        assert!(result[0].message.contains("ref-inside"));
1756    }
1757}