Skip to main content

rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::SHORTCUT_REF_REGEX;
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for output example sections (standard regex is fine)
23static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
24    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
25
26// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
27// Extended to include additional common alert types
28static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
30        .unwrap()
31});
32
33// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
34// This pattern specifically looks for:
35// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
36// - IPv6 with zone IDs: https://[fe80::1%eth0]
37// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
38// - API paths with array notation: https://api.example.com/users[0]
39// But NOT markdown reference links that happen to follow URLs
40static URL_WITH_BRACKETS: LazyLock<Regex> =
41    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
42
43/// Rule MD052: Reference links and images should use reference style
44///
45/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
46///
47/// This rule is triggered when a reference link or image uses a reference that isn't defined.
48///
49/// ## Configuration
50///
51/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
52///
53/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
54/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
55#[derive(Clone, Default)]
56pub struct MD052ReferenceLinkImages {
57    config: MD052Config,
58}
59
60impl MD052ReferenceLinkImages {
61    pub fn new() -> Self {
62        Self {
63            config: MD052Config::default(),
64        }
65    }
66
67    pub fn from_config_struct(config: MD052Config) -> Self {
68        Self { config }
69    }
70
71    /// Strip surrounding backticks from a string
72    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
73    fn strip_backticks(s: &str) -> &str {
74        s.trim_start_matches('`').trim_end_matches('`')
75    }
76
77    /// Check if a string is a valid Python identifier
78    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
79    /// like `str`, `int`, etc. should be accepted as valid auto-references
80    fn is_valid_python_identifier(s: &str) -> bool {
81        if s.is_empty() {
82            return false;
83        }
84        let first_char = s.chars().next().unwrap();
85        if !first_char.is_ascii_alphabetic() && first_char != '_' {
86            return false;
87        }
88        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
89    }
90
91    /// Check if text matches a known non-reference pattern that should be skipped.
92    ///
93    /// These are deterministic patterns from markdown extensions or code examples,
94    /// not heuristics. Returns true for:
95    /// - User-configured names via `ignore` config option
96    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
97    /// - Programming syntax: [T], [null], [i32], ["string"]
98    /// - Descriptive text: [default: value], [0-9]
99    fn is_known_non_reference_pattern(&self, text: &str) -> bool {
100        // Check user-configured ignore list first (case-insensitive match)
101        // Reference IDs are normalized to lowercase during parsing,
102        // so we use case-insensitive comparison for user convenience
103        if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
104            return true;
105        }
106        // Skip numeric patterns (array indices, ranges)
107        if text.chars().all(|c| c.is_ascii_digit()) {
108            return true;
109        }
110
111        // Skip numeric ranges like [1:3], [0:10], etc.
112        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
113            return true;
114        }
115
116        // Skip patterns that look like config sections [tool.something], [section.subsection]
117        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
118        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
119        if text.contains('.')
120            && !text.contains(' ')
121            && !text.contains('-')
122            && !text.contains('_')
123            && !text.contains('`')
124        {
125            // Config sections typically have dots, no spaces, and only alphanumeric + dots
126            return true;
127        }
128
129        // Skip glob/wildcard patterns like [*], [...], [**]
130        if text == "*" || text == "..." || text == "**" {
131            return true;
132        }
133
134        // Skip patterns that look like file paths [dir/file], [src/utils]
135        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
136            return true;
137        }
138
139        // Skip programming type annotations like [int, str], [Dict[str, Any]]
140        // These typically have commas and/or nested brackets
141        if text.contains(',') || text.contains('[') || text.contains(']') {
142            // Check if it looks like a type annotation pattern
143            return true;
144        }
145
146        // Note: We don't filter out patterns with backticks because backticks in reference names
147        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
148
149        // Skip patterns that look like module/class paths ONLY if they don't have backticks
150        // Backticks indicate intentional code formatting in a reference name
151        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
152        if !text.contains('`')
153            && text.contains('.')
154            && !text.contains(' ')
155            && !text.contains('-')
156            && !text.contains('_')
157        {
158            return true;
159        }
160
161        // Note: We don't filter based on word count anymore because legitimate references
162        // can have many words, like "python language reference for import statements"
163        // Word count filtering was causing false positives where valid references were
164        // being incorrectly flagged as unused
165
166        // Skip patterns that are just punctuation or operators
167        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
168            return true;
169        }
170
171        // Skip very short non-word patterns (likely operators or syntax)
172        if text.len() <= 2 && !text.chars().all(char::is_alphabetic) {
173            return true;
174        }
175
176        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
177        if (text.starts_with('"') && text.ends_with('"'))
178            || (text.starts_with('\'') && text.ends_with('\''))
179            || text.contains('"')
180            || text.contains('\'')
181        {
182            return true;
183        }
184
185        // Skip descriptive patterns with colon like [default: the project root]
186        // But allow simple numeric ranges which are handled above
187        if text.contains(':') && text.contains(' ') {
188            return true;
189        }
190
191        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
192        if text.starts_with('!') {
193            return true;
194        }
195
196        // Skip footnote syntax like [^1], [^note], etc.
197        // Footnotes start with ^ and are a common markdown extension
198        if text.starts_with('^') {
199            return true;
200        }
201
202        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
203        // Citations in these formats start with @ inside brackets
204        if text.starts_with('@') {
205            return true;
206        }
207
208        // Skip table of contents markers like [TOC]
209        // Used by Python-Markdown and other processors
210        if text == "TOC" {
211            return true;
212        }
213
214        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
215        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
216            return true;
217        }
218
219        // Skip common programming type names, literals, and short identifiers
220        // that are likely not markdown references
221        let common_non_refs = [
222            // Programming types
223            "object",
224            "Object",
225            "any",
226            "Any",
227            "inv",
228            "void",
229            "bool",
230            "int",
231            "float",
232            "str",
233            "char",
234            "i8",
235            "i16",
236            "i32",
237            "i64",
238            "i128",
239            "isize",
240            "u8",
241            "u16",
242            "u32",
243            "u64",
244            "u128",
245            "usize",
246            "f32",
247            "f64",
248            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
249            "null",
250            "true",
251            "false",
252            "NaN",
253            "Infinity",
254            // Common JavaScript output patterns
255            "object Object",
256        ];
257
258        if common_non_refs.contains(&text) {
259            return true;
260        }
261
262        false
263    }
264
265    /// Check if a byte position is inside any code span. O(log n) via binary search.
266    fn is_in_code_span(byte_pos: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
267        let idx = code_spans.partition_point(|span| span.byte_offset <= byte_pos);
268        idx > 0 && byte_pos < code_spans[idx - 1].byte_end
269    }
270
271    /// Check if a byte position is within an HTML tag. O(log n) via binary search.
272    fn is_in_html_tag(html_tags: &[crate::lint_context::HtmlTag], byte_pos: usize) -> bool {
273        let idx = html_tags.partition_point(|tag| tag.byte_offset <= byte_pos);
274        idx > 0 && byte_pos < html_tags[idx - 1].byte_end
275    }
276
277    fn extract_references(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
278        use crate::utils::skip_context::is_mkdocs_snippet_line;
279
280        let mut references = HashSet::new();
281
282        for (line_num, line) in ctx.content.lines().enumerate() {
283            // Use LintContext's pre-computed code block info (1-indexed)
284            if let Some(line_info) = ctx.line_info(line_num + 1)
285                && line_info.in_code_block
286            {
287                continue;
288            }
289
290            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
291            if is_mkdocs_snippet_line(line, ctx.flavor) {
292                continue;
293            }
294
295            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
296            // Abbreviations are not reference links and should not be tracked
297            if line.trim_start().starts_with("*[") {
298                continue;
299            }
300
301            if let Some(cap) = REF_REGEX.captures(line) {
302                // Store references in lowercase for case-insensitive comparison
303                if let Some(reference) = cap.get(1) {
304                    references.insert(reference.as_str().to_lowercase());
305                }
306            }
307        }
308
309        references
310    }
311
312    fn find_undefined_references(
313        &self,
314        references: &HashSet<String>,
315        ctx: &crate::lint_context::LintContext,
316        mkdocs_mode: bool,
317    ) -> Vec<(usize, usize, usize, String)> {
318        let mut undefined = Vec::new();
319        let mut reported_refs = HashMap::new();
320        let mut in_example_section = false;
321
322        // Get code spans and HTML tags once for the entire function
323        let code_spans = ctx.code_spans();
324        let html_tags = ctx.html_tags();
325
326        // Use cached data for reference links and images
327        for link in &ctx.links {
328            if !link.is_reference {
329                continue; // Skip inline links
330            }
331
332            // Skip links inside Jinja templates
333            if ctx.is_in_jinja_range(link.byte_offset) {
334                continue;
335            }
336
337            // Skip links inside code spans
338            if Self::is_in_code_span(link.byte_offset, &code_spans) {
339                continue;
340            }
341
342            // Skip links inside HTML comments (uses pre-computed ranges)
343            if ctx.is_in_html_comment(link.byte_offset) || ctx.is_in_mdx_comment(link.byte_offset) {
344                continue;
345            }
346
347            // Skip links inside HTML tags
348            if Self::is_in_html_tag(&html_tags, link.byte_offset) {
349                continue;
350            }
351
352            // Skip links inside math contexts
353            if is_in_math_context(ctx, link.byte_offset) {
354                continue;
355            }
356
357            // Skip links inside table cells
358            if is_in_table_cell(ctx, link.line, link.start_col) {
359                continue;
360            }
361
362            // Skip links inside frontmatter
363            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
364                continue;
365            }
366
367            // Skip Pandoc/Quarto citations ([@citation], @citation)
368            // Citations look like reference links but are bibliography references
369            if ctx.flavor.is_pandoc_compatible() && ctx.is_in_citation(link.byte_offset) {
370                continue;
371            }
372
373            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
374            // Shortcodes may contain template syntax that looks like reference links
375            if ctx.is_in_shortcode(link.byte_offset) {
376                continue;
377            }
378
379            if let Some(ref_id) = &link.reference_id {
380                let reference_lower = ref_id.to_lowercase();
381
382                // Skip known non-reference patterns (markdown extensions, code examples)
383                if self.is_known_non_reference_pattern(ref_id) {
384                    continue;
385                }
386
387                // Skip MkDocs auto-references if in MkDocs mode
388                // Check both the reference_id and the link text for shorthand references
389                // Strip backticks since MkDocs resolves `module.Class` as module.Class
390                let stripped_ref = Self::strip_backticks(ref_id);
391                let stripped_text = Self::strip_backticks(&link.text);
392                if mkdocs_mode
393                    && (is_mkdocs_auto_reference(stripped_ref)
394                        || is_mkdocs_auto_reference(stripped_text)
395                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
396                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
397                {
398                    continue;
399                }
400
401                // Check if reference is defined
402                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
403                    // Check if the line is in an example section or list item
404                    if let Some(line_info) = ctx.line_info(link.line) {
405                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
406                            in_example_section = true;
407                            continue;
408                        }
409
410                        if in_example_section {
411                            continue;
412                        }
413
414                        // Skip list items
415                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
416                            continue;
417                        }
418
419                        // Skip lines that are HTML content
420                        let trimmed = line_info.content(ctx.content).trim_start();
421                        if trimmed.starts_with('<') {
422                            continue;
423                        }
424                    }
425
426                    let match_len = link.byte_end - link.byte_offset;
427                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
428                    reported_refs.insert(reference_lower, true);
429                }
430            }
431        }
432
433        // Use cached data for reference images
434        for image in &ctx.images {
435            if !image.is_reference {
436                continue; // Skip inline images
437            }
438
439            // Skip images inside Jinja templates
440            if ctx.is_in_jinja_range(image.byte_offset) {
441                continue;
442            }
443
444            // Skip images inside code spans
445            if Self::is_in_code_span(image.byte_offset, &code_spans) {
446                continue;
447            }
448
449            // Skip images inside HTML comments (uses pre-computed ranges)
450            if ctx.is_in_html_comment(image.byte_offset) || ctx.is_in_mdx_comment(image.byte_offset) {
451                continue;
452            }
453
454            // Skip images inside HTML tags
455            if Self::is_in_html_tag(&html_tags, image.byte_offset) {
456                continue;
457            }
458
459            // Skip images inside math contexts
460            if is_in_math_context(ctx, image.byte_offset) {
461                continue;
462            }
463
464            // Skip images inside table cells
465            if is_in_table_cell(ctx, image.line, image.start_col) {
466                continue;
467            }
468
469            // Skip images inside frontmatter
470            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
471                continue;
472            }
473
474            if let Some(ref_id) = &image.reference_id {
475                let reference_lower = ref_id.to_lowercase();
476
477                // Skip known non-reference patterns (markdown extensions, code examples)
478                if self.is_known_non_reference_pattern(ref_id) {
479                    continue;
480                }
481
482                // Skip MkDocs auto-references if in MkDocs mode
483                // Check both the reference_id and the alt text for shorthand references
484                // Strip backticks since MkDocs resolves `module.Class` as module.Class
485                let stripped_ref = Self::strip_backticks(ref_id);
486                let stripped_alt = Self::strip_backticks(&image.alt_text);
487                if mkdocs_mode
488                    && (is_mkdocs_auto_reference(stripped_ref)
489                        || is_mkdocs_auto_reference(stripped_alt)
490                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
491                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
492                {
493                    continue;
494                }
495
496                // Check if reference is defined
497                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
498                    // Check if the line is in an example section or list item
499                    if let Some(line_info) = ctx.line_info(image.line) {
500                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
501                            in_example_section = true;
502                            continue;
503                        }
504
505                        if in_example_section {
506                            continue;
507                        }
508
509                        // Skip list items
510                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
511                            continue;
512                        }
513
514                        // Skip lines that are HTML content
515                        let trimmed = line_info.content(ctx.content).trim_start();
516                        if trimmed.starts_with('<') {
517                            continue;
518                        }
519                    }
520
521                    let match_len = image.byte_end - image.byte_offset;
522                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
523                    reported_refs.insert(reference_lower, true);
524                }
525            }
526        }
527
528        // Build a set of byte ranges that are already covered by parsed links/images
529        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
530
531        // Add ranges from parsed links
532        for link in &ctx.links {
533            covered_ranges.push((link.byte_offset, link.byte_end));
534        }
535
536        // Add ranges from parsed images
537        for image in &ctx.images {
538            covered_ranges.push((image.byte_offset, image.byte_end));
539        }
540
541        // Sort ranges by start position
542        covered_ranges.sort_by_key(|&(start, _)| start);
543
544        // Handle shortcut references [text] which aren't captured in ctx.links
545        // Only check these if shortcut_syntax is enabled (default: false)
546        // Shortcut syntax is ambiguous because [text] could be a reference link
547        // OR just text in brackets (like spec notation in quotes)
548        if !self.config.shortcut_syntax {
549            return undefined;
550        }
551
552        // Need to use regex for shortcut references
553        let lines = ctx.raw_lines();
554        in_example_section = false; // Reset for line-by-line processing
555
556        for (line_num, line) in lines.iter().enumerate() {
557            // Skip lines in frontmatter or code blocks using LintContext's pre-computed info
558            if let Some(line_info) = ctx.line_info(line_num + 1)
559                && (line_info.in_front_matter || line_info.in_code_block)
560            {
561                continue;
562            }
563
564            // Check for example sections
565            if OUTPUT_EXAMPLE_START.is_match(line) {
566                in_example_section = true;
567                continue;
568            }
569
570            if in_example_section {
571                // Check if we're exiting the example section (another heading)
572                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
573                    in_example_section = false;
574                } else {
575                    continue;
576                }
577            }
578
579            // Skip list items
580            if LIST_ITEM_REGEX.is_match(line) {
581                continue;
582            }
583
584            // Skip lines that are HTML content
585            let trimmed_line = line.trim_start();
586            if trimmed_line.starts_with('<') {
587                continue;
588            }
589
590            // Skip GitHub alerts/callouts (e.g., > [!TIP])
591            if GITHUB_ALERT_REGEX.is_match(line) {
592                continue;
593            }
594
595            // Skip abbreviation definitions (*[ABBR]: Definition)
596            // These are not reference links and should not be checked
597            if trimmed_line.starts_with("*[") {
598                continue;
599            }
600
601            // Collect positions of brackets that are part of URLs (IPv6, etc.)
602            // so we can exclude them from reference checking
603            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
604            for mat in URL_WITH_BRACKETS.find_iter(line) {
605                // Find all bracket pairs within this URL match
606                let url_str = mat.as_str();
607                let url_start = mat.start();
608
609                // Find brackets within the URL (e.g., in https://[::1]:8080)
610                let mut idx = 0;
611                while idx < url_str.len() {
612                    if let Some(bracket_start) = url_str[idx..].find('[') {
613                        let bracket_start_abs = url_start + idx + bracket_start;
614                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
615                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
616                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
617                            idx += bracket_start + bracket_end + 2;
618                        } else {
619                            break;
620                        }
621                    } else {
622                        break;
623                    }
624                }
625            }
626
627            // Check shortcut references: [reference]
628            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
629                for cap in captures {
630                    if let Some(ref_match) = cap.get(1) {
631                        // Check if this bracket is part of a URL (IPv6, etc.)
632                        let bracket_start = cap.get(0).unwrap().start();
633                        let bracket_end = cap.get(0).unwrap().end();
634
635                        // Skip if this bracket pair is within any URL bracket range
636                        let is_in_url = url_bracket_ranges
637                            .iter()
638                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
639
640                        if is_in_url {
641                            continue;
642                        }
643
644                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
645                        // Check if there's a ^ immediately before the opening bracket
646                        if bracket_start > 0 {
647                            // bracket_start is a byte offset, so we need to check the byte before
648                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
649                                && *byte == b'^'
650                            {
651                                continue; // This is an inline footnote, skip it
652                            }
653                        }
654
655                        let reference = ref_match.as_str();
656                        let reference_lower = reference.to_lowercase();
657
658                        // Skip known non-reference patterns (markdown extensions, code examples)
659                        if self.is_known_non_reference_pattern(reference) {
660                            continue;
661                        }
662
663                        // Skip GitHub alerts (including extended types)
664                        if let Some(alert_type) = reference.strip_prefix('!')
665                            && matches!(
666                                alert_type,
667                                "NOTE"
668                                    | "TIP"
669                                    | "WARNING"
670                                    | "IMPORTANT"
671                                    | "CAUTION"
672                                    | "INFO"
673                                    | "SUCCESS"
674                                    | "FAILURE"
675                                    | "DANGER"
676                                    | "BUG"
677                                    | "EXAMPLE"
678                                    | "QUOTE"
679                            )
680                        {
681                            continue;
682                        }
683
684                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
685                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
686                        if mkdocs_mode
687                            && (reference.starts_with("start:") || reference.starts_with("end:"))
688                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
689                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
690                        {
691                            continue;
692                        }
693
694                        // Skip MkDocs auto-references if in MkDocs mode
695                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
696                        let stripped_ref = Self::strip_backticks(reference);
697                        if mkdocs_mode
698                            && (is_mkdocs_auto_reference(stripped_ref)
699                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
700                        {
701                            continue;
702                        }
703
704                        // Pandoc-flavor implicit header references: `[Section name]` resolves
705                        // to a heading whose Pandoc slug matches the bracketed text. These are
706                        // not undefined references — Pandoc renders them as anchor links.
707                        if ctx.flavor.is_pandoc_compatible() && ctx.matches_implicit_header_reference(reference) {
708                            continue;
709                        }
710
711                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
712                            let full_match = cap.get(0).unwrap();
713                            let col = full_match.start();
714                            let line_start_byte = ctx.line_offsets[line_num];
715                            let byte_pos = line_start_byte + col;
716
717                            // Skip if inside code span
718                            let code_spans = ctx.code_spans();
719                            if Self::is_in_code_span(byte_pos, &code_spans) {
720                                continue;
721                            }
722
723                            // Skip if inside Jinja template
724                            if ctx.is_in_jinja_range(byte_pos) {
725                                continue;
726                            }
727
728                            // Skip if inside code block
729                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
730                                &ctx.code_blocks,
731                                byte_pos,
732                            ) {
733                                continue;
734                            }
735
736                            // Skip if inside HTML comment (uses pre-computed ranges)
737                            if ctx.is_in_html_comment(byte_pos) || ctx.is_in_mdx_comment(byte_pos) {
738                                continue;
739                            }
740
741                            // Skip if inside HTML tag
742                            if Self::is_in_html_tag(&html_tags, byte_pos) {
743                                continue;
744                            }
745
746                            // Skip if inside math context
747                            if is_in_math_context(ctx, byte_pos) {
748                                continue;
749                            }
750
751                            // Skip if inside table cell
752                            if is_in_table_cell(ctx, line_num + 1, col) {
753                                continue;
754                            }
755
756                            let byte_end = byte_pos + (full_match.end() - full_match.start());
757
758                            // Check if this shortcut ref overlaps with any parsed link/image
759                            let mut is_covered = false;
760                            for &(range_start, range_end) in &covered_ranges {
761                                if range_start <= byte_pos && byte_end <= range_end {
762                                    // This shortcut ref is completely within a parsed link/image
763                                    is_covered = true;
764                                    break;
765                                }
766                                if range_start > byte_end {
767                                    // No need to check further (ranges are sorted)
768                                    break;
769                                }
770                            }
771
772                            if is_covered {
773                                continue;
774                            }
775
776                            // More sophisticated checks to avoid false positives
777
778                            // Check 1: If preceded by ], this might be part of [text][ref]
779                            // Look for the pattern ...][ref] and check if there's a matching [ before
780                            let line_chars: Vec<char> = line.chars().collect();
781                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
782                                // Look backwards for a [ that would make this [text][ref]
783                                let mut bracket_count = 1; // We already saw one ]
784                                let mut check_pos = col.saturating_sub(2);
785                                let mut found_opening = false;
786
787                                while check_pos > 0 && check_pos < line_chars.len() {
788                                    match line_chars.get(check_pos) {
789                                        Some(&']') => bracket_count += 1,
790                                        Some(&'[') => {
791                                            bracket_count -= 1;
792                                            if bracket_count == 0 {
793                                                // Check if this [ is escaped
794                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
795                                                    found_opening = true;
796                                                }
797                                                break;
798                                            }
799                                        }
800                                        _ => {}
801                                    }
802                                    if check_pos == 0 {
803                                        break;
804                                    }
805                                    check_pos = check_pos.saturating_sub(1);
806                                }
807
808                                if found_opening {
809                                    // This is part of [text][ref], skip it
810                                    continue;
811                                }
812                            }
813
814                            // Check 2: If there's an escaped bracket pattern before this
815                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
816                            let before_text = &line[..col];
817                            if before_text.contains("\\]") {
818                                // Check if there's a \[ before the \]
819                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
820                                    let search_text = &before_text[..escaped_close_pos];
821                                    if search_text.contains("\\[") {
822                                        // This looks like \[...\][ref], skip it
823                                        continue;
824                                    }
825                                }
826                            }
827
828                            let match_len = full_match.end() - full_match.start();
829                            undefined.push((line_num, col, match_len, reference.to_string()));
830                            reported_refs.insert(reference_lower, true);
831                        }
832                    }
833                }
834            }
835        }
836
837        undefined
838    }
839}
840
841impl Rule for MD052ReferenceLinkImages {
842    fn name(&self) -> &'static str {
843        "MD052"
844    }
845
846    fn description(&self) -> &'static str {
847        "Reference links and images should use a reference that exists"
848    }
849
850    fn category(&self) -> RuleCategory {
851        RuleCategory::Link
852    }
853
854    fn fix_capability(&self) -> FixCapability {
855        FixCapability::Unfixable
856    }
857
858    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
859        let content = ctx.content;
860        let mut warnings = Vec::new();
861
862        // OPTIMIZATION: Early exit if no brackets at all
863        if !content.contains('[') {
864            return Ok(warnings);
865        }
866
867        // Check if we're in MkDocs mode from the context
868        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
869
870        let references = self.extract_references(ctx);
871
872        // Use optimized detection method with cached link/image data
873        let lines = ctx.raw_lines();
874        for (line_num, col, match_len, reference) in self.find_undefined_references(&references, ctx, mkdocs_mode) {
875            let line_content = lines.get(line_num).unwrap_or(&"");
876
877            // Calculate precise character range for the entire undefined reference
878            let (start_line, start_col, end_line, end_col) =
879                calculate_match_range(line_num + 1, line_content, col, match_len);
880
881            warnings.push(LintWarning {
882                rule_name: Some(self.name().to_string()),
883                line: start_line,
884                column: start_col,
885                end_line,
886                end_column: end_col,
887                message: format!("Reference '{reference}' not found"),
888                severity: Severity::Warning,
889                fix: None,
890            });
891        }
892
893        Ok(warnings)
894    }
895
896    /// Check if this rule should be skipped for performance
897    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
898        // Skip if content is empty or has no links/images
899        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
900    }
901
902    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
903        let content = ctx.content;
904        // No automatic fix available for undefined references
905        Ok(content.to_string())
906    }
907
908    fn as_any(&self) -> &dyn std::any::Any {
909        self
910    }
911
912    fn default_config_section(&self) -> Option<(String, toml::Value)> {
913        let json_value = serde_json::to_value(&self.config).ok()?;
914        Some((
915            self.name().to_string(),
916            crate::rule_config_serde::json_to_toml_value(&json_value)?,
917        ))
918    }
919
920    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
921    where
922        Self: Sized,
923    {
924        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
925        Box::new(Self::from_config_struct(rule_config))
926    }
927}
928
929#[cfg(test)]
930mod tests {
931    use super::*;
932    use crate::lint_context::LintContext;
933
934    #[test]
935    fn test_valid_reference_link() {
936        let rule = MD052ReferenceLinkImages::new();
937        let content = "[text][ref]\n\n[ref]: https://example.com";
938        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
939        let result = rule.check(&ctx).unwrap();
940
941        assert_eq!(result.len(), 0);
942    }
943
944    #[test]
945    fn test_undefined_reference_link() {
946        let rule = MD052ReferenceLinkImages::new();
947        let content = "[text][undefined]";
948        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
949        let result = rule.check(&ctx).unwrap();
950
951        assert_eq!(result.len(), 1);
952        assert!(result[0].message.contains("Reference 'undefined' not found"));
953    }
954
955    #[test]
956    fn test_valid_reference_image() {
957        let rule = MD052ReferenceLinkImages::new();
958        let content = "![alt][img]\n\n[img]: image.jpg";
959        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
960        let result = rule.check(&ctx).unwrap();
961
962        assert_eq!(result.len(), 0);
963    }
964
965    #[test]
966    fn test_undefined_reference_image() {
967        let rule = MD052ReferenceLinkImages::new();
968        let content = "![alt][missing]";
969        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
970        let result = rule.check(&ctx).unwrap();
971
972        assert_eq!(result.len(), 1);
973        assert!(result[0].message.contains("Reference 'missing' not found"));
974    }
975
976    #[test]
977    fn test_case_insensitive_references() {
978        let rule = MD052ReferenceLinkImages::new();
979        let content = "[Text][REF]\n\n[ref]: https://example.com";
980        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
981        let result = rule.check(&ctx).unwrap();
982
983        assert_eq!(result.len(), 0);
984    }
985
986    #[test]
987    fn test_shortcut_reference_valid() {
988        let rule = MD052ReferenceLinkImages::new();
989        let content = "[ref]\n\n[ref]: https://example.com";
990        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
991        let result = rule.check(&ctx).unwrap();
992
993        assert_eq!(result.len(), 0);
994    }
995
996    #[test]
997    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
998        // Shortcut syntax checking is disabled by default
999        // Enable it to test undefined shortcut references
1000        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1001            shortcut_syntax: true,
1002            ..Default::default()
1003        });
1004        let content = "[undefined]";
1005        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1006        let result = rule.check(&ctx).unwrap();
1007
1008        assert_eq!(result.len(), 1);
1009        assert!(result[0].message.contains("Reference 'undefined' not found"));
1010    }
1011
1012    #[test]
1013    fn test_shortcut_reference_not_checked_by_default() {
1014        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1015        let rule = MD052ReferenceLinkImages::new();
1016        let content = "[undefined]";
1017        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1018        let result = rule.check(&ctx).unwrap();
1019
1020        // Should be 0 because shortcut_syntax is false by default
1021        assert_eq!(result.len(), 0);
1022    }
1023
1024    #[test]
1025    fn test_inline_links_ignored() {
1026        let rule = MD052ReferenceLinkImages::new();
1027        let content = "[text](https://example.com)";
1028        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1029        let result = rule.check(&ctx).unwrap();
1030
1031        assert_eq!(result.len(), 0);
1032    }
1033
1034    #[test]
1035    fn test_inline_images_ignored() {
1036        let rule = MD052ReferenceLinkImages::new();
1037        let content = "![alt](image.jpg)";
1038        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1039        let result = rule.check(&ctx).unwrap();
1040
1041        assert_eq!(result.len(), 0);
1042    }
1043
1044    #[test]
1045    fn test_references_in_code_blocks_ignored() {
1046        let rule = MD052ReferenceLinkImages::new();
1047        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1048        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1049        let result = rule.check(&ctx).unwrap();
1050
1051        assert_eq!(result.len(), 0);
1052    }
1053
1054    #[test]
1055    fn test_references_in_inline_code_ignored() {
1056        let rule = MD052ReferenceLinkImages::new();
1057        let content = "`[undefined]`";
1058        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1059        let result = rule.check(&ctx).unwrap();
1060
1061        // References inside inline code spans should be ignored
1062        assert_eq!(result.len(), 0);
1063    }
1064
1065    #[test]
1066    fn test_comprehensive_inline_code_detection() {
1067        // Enable shortcut_syntax to test comprehensive detection
1068        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1069            shortcut_syntax: true,
1070            ..Default::default()
1071        });
1072        let content = r#"# Test
1073
1074This `[inside]` should be ignored.
1075This [outside] should be flagged.
1076Reference links `[text][ref]` in code are ignored.
1077Regular reference [text][missing] should be flagged.
1078Images `![alt][img]` in code are ignored.
1079Regular image ![alt][badimg] should be flagged.
1080
1081Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1082
1083```
1084[code block content] should be ignored
1085```
1086
1087`Multiple [refs] in [same] code span` ignored."#;
1088
1089        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1090        let result = rule.check(&ctx).unwrap();
1091
1092        // Should only flag: outside, missing, badimg, three (4 total)
1093        assert_eq!(result.len(), 4);
1094
1095        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1096        assert!(messages.iter().any(|m| m.contains("outside")));
1097        assert!(messages.iter().any(|m| m.contains("missing")));
1098        assert!(messages.iter().any(|m| m.contains("badimg")));
1099        assert!(messages.iter().any(|m| m.contains("three")));
1100
1101        // Should NOT flag any references inside code spans
1102        assert!(!messages.iter().any(|m| m.contains("inside")));
1103        assert!(!messages.iter().any(|m| m.contains("one")));
1104        assert!(!messages.iter().any(|m| m.contains("two")));
1105        assert!(!messages.iter().any(|m| m.contains("refs")));
1106        assert!(!messages.iter().any(|m| m.contains("same")));
1107    }
1108
1109    #[test]
1110    fn test_multiple_undefined_references() {
1111        let rule = MD052ReferenceLinkImages::new();
1112        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1113        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1114        let result = rule.check(&ctx).unwrap();
1115
1116        assert_eq!(result.len(), 3);
1117        assert!(result[0].message.contains("ref1"));
1118        assert!(result[1].message.contains("ref2"));
1119        assert!(result[2].message.contains("ref3"));
1120    }
1121
1122    #[test]
1123    fn test_mixed_valid_and_undefined() {
1124        let rule = MD052ReferenceLinkImages::new();
1125        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1126        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1127        let result = rule.check(&ctx).unwrap();
1128
1129        assert_eq!(result.len(), 1);
1130        assert!(result[0].message.contains("missing"));
1131    }
1132
1133    #[test]
1134    fn test_empty_reference() {
1135        let rule = MD052ReferenceLinkImages::new();
1136        let content = "[text][]\n\n[ref]: https://example.com";
1137        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1138        let result = rule.check(&ctx).unwrap();
1139
1140        // Empty reference should use the link text as reference
1141        assert_eq!(result.len(), 1);
1142    }
1143
1144    #[test]
1145    fn test_escaped_brackets_ignored() {
1146        let rule = MD052ReferenceLinkImages::new();
1147        let content = "\\[not a link\\]";
1148        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1149        let result = rule.check(&ctx).unwrap();
1150
1151        assert_eq!(result.len(), 0);
1152    }
1153
1154    #[test]
1155    fn test_list_items_ignored() {
1156        let rule = MD052ReferenceLinkImages::new();
1157        let content = "- [undefined]\n* [another]\n+ [third]";
1158        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1159        let result = rule.check(&ctx).unwrap();
1160
1161        // List items that look like shortcut references should be ignored
1162        assert_eq!(result.len(), 0);
1163    }
1164
1165    #[test]
1166    fn test_output_example_section_ignored() {
1167        // Enable shortcut_syntax to test example section handling
1168        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1169            shortcut_syntax: true,
1170            ..Default::default()
1171        });
1172        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1173        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1174        let result = rule.check(&ctx).unwrap();
1175
1176        // Only the reference outside the Output section should be flagged
1177        assert_eq!(result.len(), 1);
1178        assert!(result[0].message.contains("missing"));
1179    }
1180
1181    #[test]
1182    fn test_reference_definitions_in_code_blocks_ignored() {
1183        let rule = MD052ReferenceLinkImages::new();
1184        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1185        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1186        let result = rule.check(&ctx).unwrap();
1187
1188        // Reference defined in code block should not count
1189        assert_eq!(result.len(), 1);
1190        assert!(result[0].message.contains("ref"));
1191    }
1192
1193    #[test]
1194    fn test_multiple_references_to_same_undefined() {
1195        let rule = MD052ReferenceLinkImages::new();
1196        let content = "[first][missing] [second][missing] [third][missing]";
1197        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1198        let result = rule.check(&ctx).unwrap();
1199
1200        // Should only report once per unique reference
1201        assert_eq!(result.len(), 1);
1202        assert!(result[0].message.contains("missing"));
1203    }
1204
1205    #[test]
1206    fn test_reference_with_special_characters() {
1207        let rule = MD052ReferenceLinkImages::new();
1208        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1209        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1210        let result = rule.check(&ctx).unwrap();
1211
1212        assert_eq!(result.len(), 0);
1213    }
1214
1215    #[test]
1216    fn test_issue_51_html_attribute_not_reference() {
1217        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1218        let rule = MD052ReferenceLinkImages::new();
1219        let content = r#"# Example
1220
1221## Test
1222
1223Want to fill out this form?
1224
1225<form method="post">
1226    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1227</form>"#;
1228        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1229        let result = rule.check(&ctx).unwrap();
1230
1231        assert_eq!(
1232            result.len(),
1233            0,
1234            "HTML attributes with square brackets should not be flagged as undefined references"
1235        );
1236    }
1237
1238    #[test]
1239    fn test_extract_references() {
1240        let rule = MD052ReferenceLinkImages::new();
1241        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1242        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1243        let refs = rule.extract_references(&ctx);
1244
1245        assert_eq!(refs.len(), 3);
1246        assert!(refs.contains("ref1"));
1247        assert!(refs.contains("ref2"));
1248        assert!(refs.contains("ref3"));
1249    }
1250
1251    #[test]
1252    fn test_inline_code_not_flagged() {
1253        // Enable shortcut_syntax to test inline code detection
1254        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1255            shortcut_syntax: true,
1256            ..Default::default()
1257        });
1258
1259        // Test that arrays in inline code are not flagged as references
1260        let content = r#"# Test
1261
1262Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1263
1264Also, `[todo]` is not a reference link.
1265
1266But this [reference] should be flagged.
1267
1268And this `[inline code]` should not be flagged.
1269"#;
1270
1271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1272        let warnings = rule.check(&ctx).unwrap();
1273
1274        // Should only flag [reference], not the ones in backticks
1275        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1276        assert!(warnings[0].message.contains("'reference'"));
1277    }
1278
1279    #[test]
1280    fn test_code_block_references_ignored() {
1281        // Enable shortcut_syntax to test code block handling
1282        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1283            shortcut_syntax: true,
1284            ..Default::default()
1285        });
1286
1287        let content = r#"# Test
1288
1289```markdown
1290[undefined] reference in code block
1291![undefined] image in code block
1292```
1293
1294[real-undefined] reference outside
1295"#;
1296
1297        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1298        let warnings = rule.check(&ctx).unwrap();
1299
1300        // Should only flag [real-undefined], not the ones in code block
1301        assert_eq!(warnings.len(), 1);
1302        assert!(warnings[0].message.contains("'real-undefined'"));
1303    }
1304
1305    #[test]
1306    fn test_html_comments_ignored() {
1307        // Test for issue #20 - MD052 should not flag content inside HTML comments
1308        let rule = MD052ReferenceLinkImages::new();
1309
1310        // Test the exact case from issue #20
1311        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1312<!--- set_env EDITOR 'python3 fake_editor.py' -->
1313
1314```bash
1315$ python3 vote.py
13163 votes for: 2
13172 votes for: 3, 4
1318```"#;
1319        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1320        let result = rule.check(&ctx).unwrap();
1321        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1322
1323        // Test various reference patterns inside HTML comments
1324        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1325Normal [text][undefined]
1326<!-- Another [comment][with] references -->"#;
1327        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1328        let result = rule.check(&ctx).unwrap();
1329        assert_eq!(
1330            result.len(),
1331            1,
1332            "Should only flag the undefined reference outside comments"
1333        );
1334        assert!(result[0].message.contains("undefined"));
1335
1336        // Test multi-line HTML comments
1337        let content = r#"<!--
1338[ref1]
1339[ref2][ref3]
1340-->
1341[actual][undefined]"#;
1342        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1343        let result = rule.check(&ctx).unwrap();
1344        assert_eq!(
1345            result.len(),
1346            1,
1347            "Should not flag references in multi-line HTML comments"
1348        );
1349        assert!(result[0].message.contains("undefined"));
1350
1351        // Test mixed scenarios
1352        let content = r#"<!-- Comment with [1:] pattern -->
1353Valid [link][ref]
1354<!-- More [refs][in][comments] -->
1355![image][missing]
1356
1357[ref]: https://example.com"#;
1358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1359        let result = rule.check(&ctx).unwrap();
1360        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1361        assert!(result[0].message.contains("missing"));
1362    }
1363
1364    #[test]
1365    fn test_frontmatter_ignored() {
1366        // Test for issue #24 - MD052 should not flag content inside frontmatter
1367        // Enable shortcut_syntax to test frontmatter handling
1368        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1369            shortcut_syntax: true,
1370            ..Default::default()
1371        });
1372
1373        // Test YAML frontmatter with arrays and references
1374        let content = r#"---
1375layout: post
1376title: "My Jekyll Post"
1377date: 2023-01-01
1378categories: blog
1379tags: ["test", "example"]
1380author: John Doe
1381---
1382
1383# My Blog Post
1384
1385This is the actual markdown content that should be linted.
1386
1387[undefined] reference should be flagged.
1388
1389## Section 1
1390
1391Some content here."#;
1392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1393        let result = rule.check(&ctx).unwrap();
1394
1395        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1396        assert_eq!(
1397            result.len(),
1398            1,
1399            "Should only flag the undefined reference outside frontmatter"
1400        );
1401        assert!(result[0].message.contains("undefined"));
1402
1403        // Test TOML frontmatter
1404        let content = r#"+++
1405title = "My Post"
1406tags = ["example", "test"]
1407+++
1408
1409# Content
1410
1411[missing] reference should be flagged."#;
1412        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1413        let result = rule.check(&ctx).unwrap();
1414        assert_eq!(
1415            result.len(),
1416            1,
1417            "Should only flag the undefined reference outside TOML frontmatter"
1418        );
1419        assert!(result[0].message.contains("missing"));
1420    }
1421
1422    #[test]
1423    fn test_mkdocs_snippet_markers_not_flagged() {
1424        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1425        // Enable shortcut_syntax to test snippet marker handling
1426        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1427            shortcut_syntax: true,
1428            ..Default::default()
1429        });
1430
1431        // Test snippet section markers
1432        let content = r#"# Document with MkDocs Snippets
1433
1434Some content here.
1435
1436# -8<- [start:remote-content]
1437
1438This is the remote content section.
1439
1440# -8<- [end:remote-content]
1441
1442More content here.
1443
1444<!-- --8<-- [start:another-section] -->
1445Content in another section
1446<!-- --8<-- [end:another-section] -->"#;
1447        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1448        let result = rule.check(&ctx).unwrap();
1449
1450        // Should not flag any snippet markers as undefined references
1451        assert_eq!(
1452            result.len(),
1453            0,
1454            "Should not flag MkDocs snippet markers as undefined references"
1455        );
1456
1457        // Test that the snippet marker lines are properly skipped
1458        // but regular undefined references on other lines are still caught
1459        let content = r#"# Document
1460
1461# -8<- [start:section]
1462Content with [reference] inside snippet section
1463# -8<- [end:section]
1464
1465Regular [undefined] reference outside snippet markers."#;
1466        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1467        let result = rule.check(&ctx).unwrap();
1468
1469        assert_eq!(
1470            result.len(),
1471            2,
1472            "Should flag undefined references but skip snippet marker lines"
1473        );
1474        // The references inside the content should be flagged, but not start: and end:
1475        assert!(result[0].message.contains("reference"));
1476        assert!(result[1].message.contains("undefined"));
1477
1478        // Test in standard mode - should flag the markers as undefined
1479        let content = r#"# Document
1480
1481# -8<- [start:section]
1482# -8<- [end:section]"#;
1483        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1484        let result = rule.check(&ctx).unwrap();
1485
1486        assert_eq!(
1487            result.len(),
1488            2,
1489            "In standard mode, snippet markers should be flagged as undefined references"
1490        );
1491    }
1492
1493    #[test]
1494    fn test_pandoc_citations_not_flagged() {
1495        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1496        // Enable shortcut_syntax to test citation handling
1497        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1498            shortcut_syntax: true,
1499            ..Default::default()
1500        });
1501
1502        let content = r#"# Research Paper
1503
1504We are using the **bookdown** package [@R-bookdown] in this sample book.
1505This was built on top of R Markdown and **knitr** [@xie2015].
1506
1507Multiple citations [@citation1; @citation2; @citation3] are also supported.
1508
1509Regular [undefined] reference should still be flagged.
1510"#;
1511        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1512        let result = rule.check(&ctx).unwrap();
1513
1514        // Should only flag the undefined reference, not the citations
1515        assert_eq!(
1516            result.len(),
1517            1,
1518            "Should only flag the undefined reference, not Pandoc citations"
1519        );
1520        assert!(result[0].message.contains("undefined"));
1521    }
1522
1523    #[test]
1524    fn test_pandoc_inline_footnotes_not_flagged() {
1525        // Test that Pandoc inline footnote syntax is not flagged
1526        // Enable shortcut_syntax to test inline footnote handling
1527        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1528            shortcut_syntax: true,
1529            ..Default::default()
1530        });
1531
1532        let content = r#"# Math Document
1533
1534You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1535
1536Another footnote^[with some text and a [link](https://example.com)].
1537
1538But this [reference] without ^ should be flagged.
1539"#;
1540        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1541        let result = rule.check(&ctx).unwrap();
1542
1543        // Should only flag the reference without ^
1544        assert_eq!(
1545            result.len(),
1546            1,
1547            "Should only flag the regular reference, not inline footnotes"
1548        );
1549        assert!(result[0].message.contains("reference"));
1550    }
1551
1552    #[test]
1553    fn test_github_alerts_not_flagged() {
1554        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1555        // Enable shortcut_syntax to test GitHub alert handling
1556        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1557            shortcut_syntax: true,
1558            ..Default::default()
1559        });
1560
1561        // Test various GitHub alert types
1562        let content = r#"# Document with GitHub Alerts
1563
1564> [!NOTE]
1565> This is a note alert.
1566
1567> [!TIP]
1568> This is a tip alert.
1569
1570> [!IMPORTANT]
1571> This is an important alert.
1572
1573> [!WARNING]
1574> This is a warning alert.
1575
1576> [!CAUTION]
1577> This is a caution alert.
1578
1579Regular content with [undefined] reference."#;
1580        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1581        let result = rule.check(&ctx).unwrap();
1582
1583        // Should only flag the undefined reference, not the GitHub alerts
1584        assert_eq!(
1585            result.len(),
1586            1,
1587            "Should only flag the undefined reference, not GitHub alerts"
1588        );
1589        assert!(result[0].message.contains("undefined"));
1590        assert_eq!(result[0].line, 18); // Line with [undefined]
1591
1592        // Test GitHub alerts with additional content
1593        let content = r#"> [!TIP]
1594> Here's a useful tip about [something].
1595> Multiple lines are allowed.
1596
1597[something] is mentioned but not defined."#;
1598        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1599        let result = rule.check(&ctx).unwrap();
1600
1601        // Should flag only the [something] outside blockquotes
1602        // The test shows we're only catching one, which might be correct behavior
1603        // matching markdownlint's approach
1604        assert_eq!(result.len(), 1, "Should flag undefined reference");
1605        assert!(result[0].message.contains("something"));
1606
1607        // Test GitHub alerts with proper references
1608        let content = r#"> [!NOTE]
1609> See [reference] for more details.
1610
1611[reference]: https://example.com"#;
1612        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1613        let result = rule.check(&ctx).unwrap();
1614
1615        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1616        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1617    }
1618
1619    #[test]
1620    fn test_ignore_config() {
1621        // Test that user-configured ignore list is respected
1622        let config = MD052Config {
1623            shortcut_syntax: true,
1624            ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1625        };
1626        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1627
1628        let content = r#"# Document with Custom Types
1629
1630Use [Vec] for dynamic arrays.
1631Use [HashMap] for key-value storage.
1632Use [Option] for nullable values.
1633Use [Result] for error handling.
1634"#;
1635        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1636        let result = rule.check(&ctx).unwrap();
1637
1638        // Should only flag [Result] because it's not in ignore
1639        assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1640        assert!(result[0].message.contains("Result"));
1641    }
1642
1643    #[test]
1644    fn test_ignore_case_insensitive() {
1645        // Test that ignore list is case-insensitive
1646        let config = MD052Config {
1647            shortcut_syntax: true,
1648            ignore: vec!["Vec".to_string()],
1649        };
1650        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1651
1652        let content = r#"# Case Insensitivity Test
1653
1654[Vec] should be ignored.
1655[vec] should also be ignored (different case, same match).
1656[VEC] should also be ignored (different case, same match).
1657[undefined] should be flagged (not in ignore list).
1658"#;
1659        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1660        let result = rule.check(&ctx).unwrap();
1661
1662        // Should only flag [undefined] because ignore is case-insensitive
1663        assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1664        assert!(result[0].message.contains("undefined"));
1665    }
1666
1667    #[test]
1668    fn test_ignore_empty_by_default() {
1669        // Test that empty ignore list doesn't affect existing behavior
1670        let rule = MD052ReferenceLinkImages::new();
1671
1672        let content = "[text][undefined]";
1673        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1674        let result = rule.check(&ctx).unwrap();
1675
1676        // Should still flag undefined references
1677        assert_eq!(result.len(), 1);
1678        assert!(result[0].message.contains("undefined"));
1679    }
1680
1681    #[test]
1682    fn test_ignore_with_reference_links() {
1683        // Test ignore list with full reference link syntax [text][ref]
1684        let config = MD052Config {
1685            shortcut_syntax: false,
1686            ignore: vec!["CustomType".to_string()],
1687        };
1688        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1689
1690        let content = r#"# Test
1691
1692See [documentation][CustomType] for details.
1693See [other docs][MissingRef] for more.
1694"#;
1695        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1696        let result = rule.check(&ctx).unwrap();
1697
1698        // Debug: print warnings if test fails
1699        for (i, w) in result.iter().enumerate() {
1700            eprintln!("Warning {}: {}", i, w.message);
1701        }
1702
1703        // Should flag [MissingRef] but not [CustomType]
1704        // Note: reference IDs are lowercased in the message
1705        assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1706        assert!(
1707            result[0].message.contains("missingref"),
1708            "Expected 'missingref' in message: {}",
1709            result[0].message
1710        );
1711    }
1712
1713    #[test]
1714    fn test_ignore_multiple() {
1715        // Test multiple ignored names work correctly
1716        let config = MD052Config {
1717            shortcut_syntax: true,
1718            ignore: vec![
1719                "i32".to_string(),
1720                "u64".to_string(),
1721                "String".to_string(),
1722                "Arc".to_string(),
1723                "Mutex".to_string(),
1724            ],
1725        };
1726        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1727
1728        let content = r#"# Types
1729
1730[i32] [u64] [String] [Arc] [Mutex] [Box]
1731"#;
1732        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1733        let result = rule.check(&ctx).unwrap();
1734
1735        // Note: i32 and u64 are already in the hardcoded list, so they'd be skipped anyway
1736        // String is NOT in the hardcoded list, so we test that the user config works
1737        // [Box] should be flagged (not in ignore)
1738        assert_eq!(result.len(), 1);
1739        assert!(result[0].message.contains("Box"));
1740    }
1741
1742    #[test]
1743    fn test_nested_code_fences_reference_extraction() {
1744        // Verify that extract_references uses LintContext's pre-computed in_code_block
1745        // so nested fences are handled correctly.
1746        // A 4-backtick fence wrapping a 3-backtick fence should treat the inner
1747        // ``` as content, not a code block boundary.
1748        let rule = MD052ReferenceLinkImages::new();
1749
1750        let content = "````\n```\n[ref-inside]: https://example.com\n```\n````\n\n[Use this link][ref-inside]";
1751        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1752        let result = rule.check(&ctx).unwrap();
1753
1754        // The reference definition is inside a code block (the outer ````),
1755        // so it should NOT be recognized as a definition.
1756        // Therefore [ref-inside] should be flagged as undefined.
1757        assert_eq!(
1758            result.len(),
1759            1,
1760            "Reference defined inside nested code fence should not count as a definition"
1761        );
1762        assert!(result[0].message.contains("ref-inside"));
1763    }
1764
1765    #[test]
1766    fn test_pandoc_flavor_skips_citations() {
1767        // Pandoc citations ([@key]) are bibliography references, not undefined reference
1768        // links. MD052 should skip them under Pandoc flavor, mirroring the Quarto skip.
1769        let rule = MD052ReferenceLinkImages::new();
1770        let content = "See [@smith2020] for details.\n";
1771        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1772        let result = rule.check(&ctx).unwrap();
1773        assert!(
1774            result.is_empty(),
1775            "MD052 should skip Pandoc citations under Pandoc flavor: {result:?}"
1776        );
1777    }
1778
1779    #[test]
1780    fn md052_pandoc_skips_implicit_header_refs_with_shortcut_syntax() {
1781        // Implicit header references (`[Section name]` resolving to a heading
1782        // whose Pandoc slug matches the bracketed text) only flow through
1783        // MD052's shortcut-syntax regex path — pulldown-cmark drops them as
1784        // broken links before they reach `ctx.links`. Enabling
1785        // `shortcut_syntax = true` exercises the SHORTCUT_REF_REGEX scan where
1786        // the Pandoc implicit-header-ref guard lives.
1787        use crate::config::MarkdownFlavor;
1788        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1789            shortcut_syntax: true,
1790            ..Default::default()
1791        });
1792        let content = "# My Section\n\nSee [My Section] for details.\n";
1793
1794        // Under Standard flavor (no implicit-header-ref resolution), shortcut
1795        // checking flags the bracketed text as undefined.
1796        let ctx_std = LintContext::new(content, MarkdownFlavor::Standard, None);
1797        let std_result = rule.check(&ctx_std).unwrap();
1798        assert_eq!(
1799            std_result.len(),
1800            1,
1801            "Standard flavor with shortcut_syntax should flag [My Section]: {std_result:?}"
1802        );
1803
1804        // Under Pandoc flavor, the implicit-header-ref guard resolves it.
1805        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1806        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1807        assert!(
1808            pandoc_result.is_empty(),
1809            "Pandoc flavor should accept [My Section] as an implicit header ref: {pandoc_result:?}"
1810        );
1811    }
1812}