Skip to main content

rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::SHORTCUT_REF_REGEX;
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for output example sections (standard regex is fine)
23static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
24    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
25
26// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
27// Extended to include additional common alert types
28static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
30        .unwrap()
31});
32
33// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
34// This pattern specifically looks for:
35// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
36// - IPv6 with zone IDs: https://[fe80::1%eth0]
37// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
38// - API paths with array notation: https://api.example.com/users[0]
39// But NOT markdown reference links that happen to follow URLs
40static URL_WITH_BRACKETS: LazyLock<Regex> =
41    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
42
43/// Rule MD052: Reference links and images should use reference style
44///
45/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
46///
47/// This rule is triggered when a reference link or image uses a reference that isn't defined.
48///
49/// ## Configuration
50///
51/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
52///
53/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
54/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
55#[derive(Clone, Default)]
56pub struct MD052ReferenceLinkImages {
57    config: MD052Config,
58}
59
60impl MD052ReferenceLinkImages {
61    pub fn new() -> Self {
62        Self {
63            config: MD052Config::default(),
64        }
65    }
66
67    pub fn from_config_struct(config: MD052Config) -> Self {
68        Self { config }
69    }
70
71    /// Strip surrounding backticks from a string
72    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
73    fn strip_backticks(s: &str) -> &str {
74        s.trim_start_matches('`').trim_end_matches('`')
75    }
76
77    /// Check if a string is a valid Python identifier
78    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
79    /// like `str`, `int`, etc. should be accepted as valid auto-references
80    fn is_valid_python_identifier(s: &str) -> bool {
81        if s.is_empty() {
82            return false;
83        }
84        let first_char = s.chars().next().unwrap();
85        if !first_char.is_ascii_alphabetic() && first_char != '_' {
86            return false;
87        }
88        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
89    }
90
91    /// Check if text matches a known non-reference pattern that should be skipped.
92    ///
93    /// These are deterministic patterns from markdown extensions or code examples,
94    /// not heuristics. Returns true for:
95    /// - User-configured names via `ignore` config option
96    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
97    /// - Programming syntax: [T], [null], [i32], ["string"]
98    /// - Descriptive text: [default: value], [0-9]
99    fn is_known_non_reference_pattern(&self, text: &str) -> bool {
100        // Check user-configured ignore list first (case-insensitive match)
101        // Reference IDs are normalized to lowercase during parsing,
102        // so we use case-insensitive comparison for user convenience
103        if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
104            return true;
105        }
106        // Skip numeric patterns (array indices, ranges)
107        if text.chars().all(|c| c.is_ascii_digit()) {
108            return true;
109        }
110
111        // Skip numeric ranges like [1:3], [0:10], etc.
112        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
113            return true;
114        }
115
116        // Skip patterns that look like config sections [tool.something], [section.subsection]
117        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
118        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
119        if text.contains('.')
120            && !text.contains(' ')
121            && !text.contains('-')
122            && !text.contains('_')
123            && !text.contains('`')
124        {
125            // Config sections typically have dots, no spaces, and only alphanumeric + dots
126            return true;
127        }
128
129        // Skip glob/wildcard patterns like [*], [...], [**]
130        if text == "*" || text == "..." || text == "**" {
131            return true;
132        }
133
134        // Skip patterns that look like file paths [dir/file], [src/utils]
135        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
136            return true;
137        }
138
139        // Skip programming type annotations like [int, str], [Dict[str, Any]]
140        // These typically have commas and/or nested brackets
141        if text.contains(',') || text.contains('[') || text.contains(']') {
142            // Check if it looks like a type annotation pattern
143            return true;
144        }
145
146        // Note: We don't filter out patterns with backticks because backticks in reference names
147        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
148
149        // Skip patterns that look like module/class paths ONLY if they don't have backticks
150        // Backticks indicate intentional code formatting in a reference name
151        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
152        if !text.contains('`')
153            && text.contains('.')
154            && !text.contains(' ')
155            && !text.contains('-')
156            && !text.contains('_')
157        {
158            return true;
159        }
160
161        // Note: We don't filter based on word count anymore because legitimate references
162        // can have many words, like "python language reference for import statements"
163        // Word count filtering was causing false positives where valid references were
164        // being incorrectly flagged as unused
165
166        // Skip patterns that are just punctuation or operators
167        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
168            return true;
169        }
170
171        // Skip very short non-word patterns (likely operators or syntax)
172        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
173            return true;
174        }
175
176        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
177        if (text.starts_with('"') && text.ends_with('"'))
178            || (text.starts_with('\'') && text.ends_with('\''))
179            || text.contains('"')
180            || text.contains('\'')
181        {
182            return true;
183        }
184
185        // Skip descriptive patterns with colon like [default: the project root]
186        // But allow simple numeric ranges which are handled above
187        if text.contains(':') && text.contains(' ') {
188            return true;
189        }
190
191        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
192        if text.starts_with('!') {
193            return true;
194        }
195
196        // Skip footnote syntax like [^1], [^note], etc.
197        // Footnotes start with ^ and are a common markdown extension
198        if text.starts_with('^') {
199            return true;
200        }
201
202        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
203        // Citations in these formats start with @ inside brackets
204        if text.starts_with('@') {
205            return true;
206        }
207
208        // Skip table of contents markers like [TOC]
209        // Used by Python-Markdown and other processors
210        if text == "TOC" {
211            return true;
212        }
213
214        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
215        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
216            return true;
217        }
218
219        // Skip common programming type names, literals, and short identifiers
220        // that are likely not markdown references
221        let common_non_refs = [
222            // Programming types
223            "object",
224            "Object",
225            "any",
226            "Any",
227            "inv",
228            "void",
229            "bool",
230            "int",
231            "float",
232            "str",
233            "char",
234            "i8",
235            "i16",
236            "i32",
237            "i64",
238            "i128",
239            "isize",
240            "u8",
241            "u16",
242            "u32",
243            "u64",
244            "u128",
245            "usize",
246            "f32",
247            "f64",
248            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
249            "null",
250            "true",
251            "false",
252            "NaN",
253            "Infinity",
254            // Common JavaScript output patterns
255            "object Object",
256        ];
257
258        if common_non_refs.contains(&text) {
259            return true;
260        }
261
262        false
263    }
264
265    /// Check if a byte position is inside any code span
266    fn is_in_code_span(byte_pos: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
267        code_spans
268            .iter()
269            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
270    }
271
272    /// Check if a byte position is within an HTML tag
273    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
274        // Check HTML tags
275        for html_tag in ctx.html_tags().iter() {
276            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
277                return true;
278            }
279        }
280        false
281    }
282
283    fn extract_references(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
284        use crate::utils::skip_context::is_mkdocs_snippet_line;
285
286        let mut references = HashSet::new();
287
288        for (line_num, line) in ctx.content.lines().enumerate() {
289            // Use LintContext's pre-computed code block info (1-indexed)
290            if let Some(line_info) = ctx.line_info(line_num + 1)
291                && line_info.in_code_block
292            {
293                continue;
294            }
295
296            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
297            if is_mkdocs_snippet_line(line, ctx.flavor) {
298                continue;
299            }
300
301            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
302            // Abbreviations are not reference links and should not be tracked
303            if line.trim_start().starts_with("*[") {
304                continue;
305            }
306
307            if let Some(cap) = REF_REGEX.captures(line) {
308                // Store references in lowercase for case-insensitive comparison
309                if let Some(reference) = cap.get(1) {
310                    references.insert(reference.as_str().to_lowercase());
311                }
312            }
313        }
314
315        references
316    }
317
318    fn find_undefined_references(
319        &self,
320        references: &HashSet<String>,
321        ctx: &crate::lint_context::LintContext,
322        mkdocs_mode: bool,
323    ) -> Vec<(usize, usize, usize, String)> {
324        let mut undefined = Vec::new();
325        let mut reported_refs = HashMap::new();
326        let mut in_example_section = false;
327
328        // Get code spans once for the entire function
329        let code_spans = ctx.code_spans();
330
331        // Use cached data for reference links and images
332        for link in &ctx.links {
333            if !link.is_reference {
334                continue; // Skip inline links
335            }
336
337            // Skip links inside Jinja templates
338            if ctx.is_in_jinja_range(link.byte_offset) {
339                continue;
340            }
341
342            // Skip links inside code spans
343            if Self::is_in_code_span(link.byte_offset, &code_spans) {
344                continue;
345            }
346
347            // Skip links inside HTML comments (uses pre-computed ranges)
348            if ctx.is_in_html_comment(link.byte_offset) {
349                continue;
350            }
351
352            // Skip links inside HTML tags
353            if Self::is_in_html_tag(ctx, link.byte_offset) {
354                continue;
355            }
356
357            // Skip links inside math contexts
358            if is_in_math_context(ctx, link.byte_offset) {
359                continue;
360            }
361
362            // Skip links inside table cells
363            if is_in_table_cell(ctx, link.line, link.start_col) {
364                continue;
365            }
366
367            // Skip links inside frontmatter
368            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
369                continue;
370            }
371
372            // Skip Quarto/Pandoc citations ([@citation], @citation)
373            // Citations look like reference links but are bibliography references
374            if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
375                continue;
376            }
377
378            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
379            // Shortcodes may contain template syntax that looks like reference links
380            if ctx.is_in_shortcode(link.byte_offset) {
381                continue;
382            }
383
384            if let Some(ref_id) = &link.reference_id {
385                let reference_lower = ref_id.to_lowercase();
386
387                // Skip known non-reference patterns (markdown extensions, code examples)
388                if self.is_known_non_reference_pattern(ref_id) {
389                    continue;
390                }
391
392                // Skip MkDocs auto-references if in MkDocs mode
393                // Check both the reference_id and the link text for shorthand references
394                // Strip backticks since MkDocs resolves `module.Class` as module.Class
395                let stripped_ref = Self::strip_backticks(ref_id);
396                let stripped_text = Self::strip_backticks(&link.text);
397                if mkdocs_mode
398                    && (is_mkdocs_auto_reference(stripped_ref)
399                        || is_mkdocs_auto_reference(stripped_text)
400                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
401                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
402                {
403                    continue;
404                }
405
406                // Check if reference is defined
407                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
408                    // Check if the line is in an example section or list item
409                    if let Some(line_info) = ctx.line_info(link.line) {
410                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
411                            in_example_section = true;
412                            continue;
413                        }
414
415                        if in_example_section {
416                            continue;
417                        }
418
419                        // Skip list items
420                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
421                            continue;
422                        }
423
424                        // Skip lines that are HTML content
425                        let trimmed = line_info.content(ctx.content).trim_start();
426                        if trimmed.starts_with('<') {
427                            continue;
428                        }
429                    }
430
431                    let match_len = link.byte_end - link.byte_offset;
432                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
433                    reported_refs.insert(reference_lower, true);
434                }
435            }
436        }
437
438        // Use cached data for reference images
439        for image in &ctx.images {
440            if !image.is_reference {
441                continue; // Skip inline images
442            }
443
444            // Skip images inside Jinja templates
445            if ctx.is_in_jinja_range(image.byte_offset) {
446                continue;
447            }
448
449            // Skip images inside code spans
450            if Self::is_in_code_span(image.byte_offset, &code_spans) {
451                continue;
452            }
453
454            // Skip images inside HTML comments (uses pre-computed ranges)
455            if ctx.is_in_html_comment(image.byte_offset) {
456                continue;
457            }
458
459            // Skip images inside HTML tags
460            if Self::is_in_html_tag(ctx, image.byte_offset) {
461                continue;
462            }
463
464            // Skip images inside math contexts
465            if is_in_math_context(ctx, image.byte_offset) {
466                continue;
467            }
468
469            // Skip images inside table cells
470            if is_in_table_cell(ctx, image.line, image.start_col) {
471                continue;
472            }
473
474            // Skip images inside frontmatter
475            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
476                continue;
477            }
478
479            if let Some(ref_id) = &image.reference_id {
480                let reference_lower = ref_id.to_lowercase();
481
482                // Skip known non-reference patterns (markdown extensions, code examples)
483                if self.is_known_non_reference_pattern(ref_id) {
484                    continue;
485                }
486
487                // Skip MkDocs auto-references if in MkDocs mode
488                // Check both the reference_id and the alt text for shorthand references
489                // Strip backticks since MkDocs resolves `module.Class` as module.Class
490                let stripped_ref = Self::strip_backticks(ref_id);
491                let stripped_alt = Self::strip_backticks(&image.alt_text);
492                if mkdocs_mode
493                    && (is_mkdocs_auto_reference(stripped_ref)
494                        || is_mkdocs_auto_reference(stripped_alt)
495                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
496                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
497                {
498                    continue;
499                }
500
501                // Check if reference is defined
502                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
503                    // Check if the line is in an example section or list item
504                    if let Some(line_info) = ctx.line_info(image.line) {
505                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
506                            in_example_section = true;
507                            continue;
508                        }
509
510                        if in_example_section {
511                            continue;
512                        }
513
514                        // Skip list items
515                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
516                            continue;
517                        }
518
519                        // Skip lines that are HTML content
520                        let trimmed = line_info.content(ctx.content).trim_start();
521                        if trimmed.starts_with('<') {
522                            continue;
523                        }
524                    }
525
526                    let match_len = image.byte_end - image.byte_offset;
527                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
528                    reported_refs.insert(reference_lower, true);
529                }
530            }
531        }
532
533        // Build a set of byte ranges that are already covered by parsed links/images
534        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
535
536        // Add ranges from parsed links
537        for link in &ctx.links {
538            covered_ranges.push((link.byte_offset, link.byte_end));
539        }
540
541        // Add ranges from parsed images
542        for image in &ctx.images {
543            covered_ranges.push((image.byte_offset, image.byte_end));
544        }
545
546        // Sort ranges by start position
547        covered_ranges.sort_by_key(|&(start, _)| start);
548
549        // Handle shortcut references [text] which aren't captured in ctx.links
550        // Only check these if shortcut_syntax is enabled (default: false)
551        // Shortcut syntax is ambiguous because [text] could be a reference link
552        // OR just text in brackets (like spec notation in quotes)
553        if !self.config.shortcut_syntax {
554            return undefined;
555        }
556
557        // Need to use regex for shortcut references
558        let lines = ctx.raw_lines();
559        in_example_section = false; // Reset for line-by-line processing
560
561        for (line_num, line) in lines.iter().enumerate() {
562            // Skip lines in frontmatter or code blocks using LintContext's pre-computed info
563            if let Some(line_info) = ctx.line_info(line_num + 1)
564                && (line_info.in_front_matter || line_info.in_code_block)
565            {
566                continue;
567            }
568
569            // Check for example sections
570            if OUTPUT_EXAMPLE_START.is_match(line) {
571                in_example_section = true;
572                continue;
573            }
574
575            if in_example_section {
576                // Check if we're exiting the example section (another heading)
577                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
578                    in_example_section = false;
579                } else {
580                    continue;
581                }
582            }
583
584            // Skip list items
585            if LIST_ITEM_REGEX.is_match(line) {
586                continue;
587            }
588
589            // Skip lines that are HTML content
590            let trimmed_line = line.trim_start();
591            if trimmed_line.starts_with('<') {
592                continue;
593            }
594
595            // Skip GitHub alerts/callouts (e.g., > [!TIP])
596            if GITHUB_ALERT_REGEX.is_match(line) {
597                continue;
598            }
599
600            // Skip abbreviation definitions (*[ABBR]: Definition)
601            // These are not reference links and should not be checked
602            if trimmed_line.starts_with("*[") {
603                continue;
604            }
605
606            // Collect positions of brackets that are part of URLs (IPv6, etc.)
607            // so we can exclude them from reference checking
608            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
609            for mat in URL_WITH_BRACKETS.find_iter(line) {
610                // Find all bracket pairs within this URL match
611                let url_str = mat.as_str();
612                let url_start = mat.start();
613
614                // Find brackets within the URL (e.g., in https://[::1]:8080)
615                let mut idx = 0;
616                while idx < url_str.len() {
617                    if let Some(bracket_start) = url_str[idx..].find('[') {
618                        let bracket_start_abs = url_start + idx + bracket_start;
619                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
620                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
621                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
622                            idx += bracket_start + bracket_end + 2;
623                        } else {
624                            break;
625                        }
626                    } else {
627                        break;
628                    }
629                }
630            }
631
632            // Check shortcut references: [reference]
633            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
634                for cap in captures {
635                    if let Some(ref_match) = cap.get(1) {
636                        // Check if this bracket is part of a URL (IPv6, etc.)
637                        let bracket_start = cap.get(0).unwrap().start();
638                        let bracket_end = cap.get(0).unwrap().end();
639
640                        // Skip if this bracket pair is within any URL bracket range
641                        let is_in_url = url_bracket_ranges
642                            .iter()
643                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
644
645                        if is_in_url {
646                            continue;
647                        }
648
649                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
650                        // Check if there's a ^ immediately before the opening bracket
651                        if bracket_start > 0 {
652                            // bracket_start is a byte offset, so we need to check the byte before
653                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
654                                && *byte == b'^'
655                            {
656                                continue; // This is an inline footnote, skip it
657                            }
658                        }
659
660                        let reference = ref_match.as_str();
661                        let reference_lower = reference.to_lowercase();
662
663                        // Skip known non-reference patterns (markdown extensions, code examples)
664                        if self.is_known_non_reference_pattern(reference) {
665                            continue;
666                        }
667
668                        // Skip GitHub alerts (including extended types)
669                        if let Some(alert_type) = reference.strip_prefix('!')
670                            && matches!(
671                                alert_type,
672                                "NOTE"
673                                    | "TIP"
674                                    | "WARNING"
675                                    | "IMPORTANT"
676                                    | "CAUTION"
677                                    | "INFO"
678                                    | "SUCCESS"
679                                    | "FAILURE"
680                                    | "DANGER"
681                                    | "BUG"
682                                    | "EXAMPLE"
683                                    | "QUOTE"
684                            )
685                        {
686                            continue;
687                        }
688
689                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
690                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
691                        if mkdocs_mode
692                            && (reference.starts_with("start:") || reference.starts_with("end:"))
693                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
694                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
695                        {
696                            continue;
697                        }
698
699                        // Skip MkDocs auto-references if in MkDocs mode
700                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
701                        let stripped_ref = Self::strip_backticks(reference);
702                        if mkdocs_mode
703                            && (is_mkdocs_auto_reference(stripped_ref)
704                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
705                        {
706                            continue;
707                        }
708
709                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
710                            let full_match = cap.get(0).unwrap();
711                            let col = full_match.start();
712                            let line_start_byte = ctx.line_offsets[line_num];
713                            let byte_pos = line_start_byte + col;
714
715                            // Skip if inside code span
716                            let code_spans = ctx.code_spans();
717                            if Self::is_in_code_span(byte_pos, &code_spans) {
718                                continue;
719                            }
720
721                            // Skip if inside Jinja template
722                            if ctx.is_in_jinja_range(byte_pos) {
723                                continue;
724                            }
725
726                            // Skip if inside code block
727                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
728                                &ctx.code_blocks,
729                                byte_pos,
730                            ) {
731                                continue;
732                            }
733
734                            // Skip if inside HTML comment (uses pre-computed ranges)
735                            if ctx.is_in_html_comment(byte_pos) {
736                                continue;
737                            }
738
739                            // Skip if inside HTML tag
740                            if Self::is_in_html_tag(ctx, byte_pos) {
741                                continue;
742                            }
743
744                            // Skip if inside math context
745                            if is_in_math_context(ctx, byte_pos) {
746                                continue;
747                            }
748
749                            // Skip if inside table cell
750                            if is_in_table_cell(ctx, line_num + 1, col) {
751                                continue;
752                            }
753
754                            let byte_end = byte_pos + (full_match.end() - full_match.start());
755
756                            // Check if this shortcut ref overlaps with any parsed link/image
757                            let mut is_covered = false;
758                            for &(range_start, range_end) in &covered_ranges {
759                                if range_start <= byte_pos && byte_end <= range_end {
760                                    // This shortcut ref is completely within a parsed link/image
761                                    is_covered = true;
762                                    break;
763                                }
764                                if range_start > byte_end {
765                                    // No need to check further (ranges are sorted)
766                                    break;
767                                }
768                            }
769
770                            if is_covered {
771                                continue;
772                            }
773
774                            // More sophisticated checks to avoid false positives
775
776                            // Check 1: If preceded by ], this might be part of [text][ref]
777                            // Look for the pattern ...][ref] and check if there's a matching [ before
778                            let line_chars: Vec<char> = line.chars().collect();
779                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
780                                // Look backwards for a [ that would make this [text][ref]
781                                let mut bracket_count = 1; // We already saw one ]
782                                let mut check_pos = col.saturating_sub(2);
783                                let mut found_opening = false;
784
785                                while check_pos > 0 && check_pos < line_chars.len() {
786                                    match line_chars.get(check_pos) {
787                                        Some(&']') => bracket_count += 1,
788                                        Some(&'[') => {
789                                            bracket_count -= 1;
790                                            if bracket_count == 0 {
791                                                // Check if this [ is escaped
792                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
793                                                    found_opening = true;
794                                                }
795                                                break;
796                                            }
797                                        }
798                                        _ => {}
799                                    }
800                                    if check_pos == 0 {
801                                        break;
802                                    }
803                                    check_pos = check_pos.saturating_sub(1);
804                                }
805
806                                if found_opening {
807                                    // This is part of [text][ref], skip it
808                                    continue;
809                                }
810                            }
811
812                            // Check 2: If there's an escaped bracket pattern before this
813                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
814                            let before_text = &line[..col];
815                            if before_text.contains("\\]") {
816                                // Check if there's a \[ before the \]
817                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
818                                    let search_text = &before_text[..escaped_close_pos];
819                                    if search_text.contains("\\[") {
820                                        // This looks like \[...\][ref], skip it
821                                        continue;
822                                    }
823                                }
824                            }
825
826                            let match_len = full_match.end() - full_match.start();
827                            undefined.push((line_num, col, match_len, reference.to_string()));
828                            reported_refs.insert(reference_lower, true);
829                        }
830                    }
831                }
832            }
833        }
834
835        undefined
836    }
837}
838
839impl Rule for MD052ReferenceLinkImages {
840    fn name(&self) -> &'static str {
841        "MD052"
842    }
843
844    fn description(&self) -> &'static str {
845        "Reference links and images should use a reference that exists"
846    }
847
848    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
849        let content = ctx.content;
850        let mut warnings = Vec::new();
851
852        // OPTIMIZATION: Early exit if no brackets at all
853        if !content.contains('[') {
854            return Ok(warnings);
855        }
856
857        // Check if we're in MkDocs mode from the context
858        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
859
860        let references = self.extract_references(ctx);
861
862        // Use optimized detection method with cached link/image data
863        let lines = ctx.raw_lines();
864        for (line_num, col, match_len, reference) in self.find_undefined_references(&references, ctx, mkdocs_mode) {
865            let line_content = lines.get(line_num).unwrap_or(&"");
866
867            // Calculate precise character range for the entire undefined reference
868            let (start_line, start_col, end_line, end_col) =
869                calculate_match_range(line_num + 1, line_content, col, match_len);
870
871            warnings.push(LintWarning {
872                rule_name: Some(self.name().to_string()),
873                line: start_line,
874                column: start_col,
875                end_line,
876                end_column: end_col,
877                message: format!("Reference '{reference}' not found"),
878                severity: Severity::Warning,
879                fix: None,
880            });
881        }
882
883        Ok(warnings)
884    }
885
886    /// Check if this rule should be skipped for performance
887    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
888        // Skip if content is empty or has no links/images
889        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
890    }
891
892    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
893        let content = ctx.content;
894        // No automatic fix available for undefined references
895        Ok(content.to_string())
896    }
897
898    fn as_any(&self) -> &dyn std::any::Any {
899        self
900    }
901
902    fn default_config_section(&self) -> Option<(String, toml::Value)> {
903        let json_value = serde_json::to_value(&self.config).ok()?;
904        Some((
905            self.name().to_string(),
906            crate::rule_config_serde::json_to_toml_value(&json_value)?,
907        ))
908    }
909
910    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
911    where
912        Self: Sized,
913    {
914        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
915        Box::new(Self::from_config_struct(rule_config))
916    }
917}
918
919#[cfg(test)]
920mod tests {
921    use super::*;
922    use crate::lint_context::LintContext;
923
924    #[test]
925    fn test_valid_reference_link() {
926        let rule = MD052ReferenceLinkImages::new();
927        let content = "[text][ref]\n\n[ref]: https://example.com";
928        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
929        let result = rule.check(&ctx).unwrap();
930
931        assert_eq!(result.len(), 0);
932    }
933
934    #[test]
935    fn test_undefined_reference_link() {
936        let rule = MD052ReferenceLinkImages::new();
937        let content = "[text][undefined]";
938        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
939        let result = rule.check(&ctx).unwrap();
940
941        assert_eq!(result.len(), 1);
942        assert!(result[0].message.contains("Reference 'undefined' not found"));
943    }
944
945    #[test]
946    fn test_valid_reference_image() {
947        let rule = MD052ReferenceLinkImages::new();
948        let content = "![alt][img]\n\n[img]: image.jpg";
949        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
950        let result = rule.check(&ctx).unwrap();
951
952        assert_eq!(result.len(), 0);
953    }
954
955    #[test]
956    fn test_undefined_reference_image() {
957        let rule = MD052ReferenceLinkImages::new();
958        let content = "![alt][missing]";
959        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
960        let result = rule.check(&ctx).unwrap();
961
962        assert_eq!(result.len(), 1);
963        assert!(result[0].message.contains("Reference 'missing' not found"));
964    }
965
966    #[test]
967    fn test_case_insensitive_references() {
968        let rule = MD052ReferenceLinkImages::new();
969        let content = "[Text][REF]\n\n[ref]: https://example.com";
970        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
971        let result = rule.check(&ctx).unwrap();
972
973        assert_eq!(result.len(), 0);
974    }
975
976    #[test]
977    fn test_shortcut_reference_valid() {
978        let rule = MD052ReferenceLinkImages::new();
979        let content = "[ref]\n\n[ref]: https://example.com";
980        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
981        let result = rule.check(&ctx).unwrap();
982
983        assert_eq!(result.len(), 0);
984    }
985
986    #[test]
987    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
988        // Shortcut syntax checking is disabled by default
989        // Enable it to test undefined shortcut references
990        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
991            shortcut_syntax: true,
992            ..Default::default()
993        });
994        let content = "[undefined]";
995        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
996        let result = rule.check(&ctx).unwrap();
997
998        assert_eq!(result.len(), 1);
999        assert!(result[0].message.contains("Reference 'undefined' not found"));
1000    }
1001
1002    #[test]
1003    fn test_shortcut_reference_not_checked_by_default() {
1004        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1005        let rule = MD052ReferenceLinkImages::new();
1006        let content = "[undefined]";
1007        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1008        let result = rule.check(&ctx).unwrap();
1009
1010        // Should be 0 because shortcut_syntax is false by default
1011        assert_eq!(result.len(), 0);
1012    }
1013
1014    #[test]
1015    fn test_inline_links_ignored() {
1016        let rule = MD052ReferenceLinkImages::new();
1017        let content = "[text](https://example.com)";
1018        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1019        let result = rule.check(&ctx).unwrap();
1020
1021        assert_eq!(result.len(), 0);
1022    }
1023
1024    #[test]
1025    fn test_inline_images_ignored() {
1026        let rule = MD052ReferenceLinkImages::new();
1027        let content = "![alt](image.jpg)";
1028        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1029        let result = rule.check(&ctx).unwrap();
1030
1031        assert_eq!(result.len(), 0);
1032    }
1033
1034    #[test]
1035    fn test_references_in_code_blocks_ignored() {
1036        let rule = MD052ReferenceLinkImages::new();
1037        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1038        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1039        let result = rule.check(&ctx).unwrap();
1040
1041        assert_eq!(result.len(), 0);
1042    }
1043
1044    #[test]
1045    fn test_references_in_inline_code_ignored() {
1046        let rule = MD052ReferenceLinkImages::new();
1047        let content = "`[undefined]`";
1048        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1049        let result = rule.check(&ctx).unwrap();
1050
1051        // References inside inline code spans should be ignored
1052        assert_eq!(result.len(), 0);
1053    }
1054
1055    #[test]
1056    fn test_comprehensive_inline_code_detection() {
1057        // Enable shortcut_syntax to test comprehensive detection
1058        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1059            shortcut_syntax: true,
1060            ..Default::default()
1061        });
1062        let content = r#"# Test
1063
1064This `[inside]` should be ignored.
1065This [outside] should be flagged.
1066Reference links `[text][ref]` in code are ignored.
1067Regular reference [text][missing] should be flagged.
1068Images `![alt][img]` in code are ignored.
1069Regular image ![alt][badimg] should be flagged.
1070
1071Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1072
1073```
1074[code block content] should be ignored
1075```
1076
1077`Multiple [refs] in [same] code span` ignored."#;
1078
1079        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1080        let result = rule.check(&ctx).unwrap();
1081
1082        // Should only flag: outside, missing, badimg, three (4 total)
1083        assert_eq!(result.len(), 4);
1084
1085        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1086        assert!(messages.iter().any(|m| m.contains("outside")));
1087        assert!(messages.iter().any(|m| m.contains("missing")));
1088        assert!(messages.iter().any(|m| m.contains("badimg")));
1089        assert!(messages.iter().any(|m| m.contains("three")));
1090
1091        // Should NOT flag any references inside code spans
1092        assert!(!messages.iter().any(|m| m.contains("inside")));
1093        assert!(!messages.iter().any(|m| m.contains("one")));
1094        assert!(!messages.iter().any(|m| m.contains("two")));
1095        assert!(!messages.iter().any(|m| m.contains("refs")));
1096        assert!(!messages.iter().any(|m| m.contains("same")));
1097    }
1098
1099    #[test]
1100    fn test_multiple_undefined_references() {
1101        let rule = MD052ReferenceLinkImages::new();
1102        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1103        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1104        let result = rule.check(&ctx).unwrap();
1105
1106        assert_eq!(result.len(), 3);
1107        assert!(result[0].message.contains("ref1"));
1108        assert!(result[1].message.contains("ref2"));
1109        assert!(result[2].message.contains("ref3"));
1110    }
1111
1112    #[test]
1113    fn test_mixed_valid_and_undefined() {
1114        let rule = MD052ReferenceLinkImages::new();
1115        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1116        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1117        let result = rule.check(&ctx).unwrap();
1118
1119        assert_eq!(result.len(), 1);
1120        assert!(result[0].message.contains("missing"));
1121    }
1122
1123    #[test]
1124    fn test_empty_reference() {
1125        let rule = MD052ReferenceLinkImages::new();
1126        let content = "[text][]\n\n[ref]: https://example.com";
1127        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1128        let result = rule.check(&ctx).unwrap();
1129
1130        // Empty reference should use the link text as reference
1131        assert_eq!(result.len(), 1);
1132    }
1133
1134    #[test]
1135    fn test_escaped_brackets_ignored() {
1136        let rule = MD052ReferenceLinkImages::new();
1137        let content = "\\[not a link\\]";
1138        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1139        let result = rule.check(&ctx).unwrap();
1140
1141        assert_eq!(result.len(), 0);
1142    }
1143
1144    #[test]
1145    fn test_list_items_ignored() {
1146        let rule = MD052ReferenceLinkImages::new();
1147        let content = "- [undefined]\n* [another]\n+ [third]";
1148        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1149        let result = rule.check(&ctx).unwrap();
1150
1151        // List items that look like shortcut references should be ignored
1152        assert_eq!(result.len(), 0);
1153    }
1154
1155    #[test]
1156    fn test_output_example_section_ignored() {
1157        // Enable shortcut_syntax to test example section handling
1158        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1159            shortcut_syntax: true,
1160            ..Default::default()
1161        });
1162        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1163        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1164        let result = rule.check(&ctx).unwrap();
1165
1166        // Only the reference outside the Output section should be flagged
1167        assert_eq!(result.len(), 1);
1168        assert!(result[0].message.contains("missing"));
1169    }
1170
1171    #[test]
1172    fn test_reference_definitions_in_code_blocks_ignored() {
1173        let rule = MD052ReferenceLinkImages::new();
1174        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1175        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1176        let result = rule.check(&ctx).unwrap();
1177
1178        // Reference defined in code block should not count
1179        assert_eq!(result.len(), 1);
1180        assert!(result[0].message.contains("ref"));
1181    }
1182
1183    #[test]
1184    fn test_multiple_references_to_same_undefined() {
1185        let rule = MD052ReferenceLinkImages::new();
1186        let content = "[first][missing] [second][missing] [third][missing]";
1187        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1188        let result = rule.check(&ctx).unwrap();
1189
1190        // Should only report once per unique reference
1191        assert_eq!(result.len(), 1);
1192        assert!(result[0].message.contains("missing"));
1193    }
1194
1195    #[test]
1196    fn test_reference_with_special_characters() {
1197        let rule = MD052ReferenceLinkImages::new();
1198        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1199        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1200        let result = rule.check(&ctx).unwrap();
1201
1202        assert_eq!(result.len(), 0);
1203    }
1204
1205    #[test]
1206    fn test_issue_51_html_attribute_not_reference() {
1207        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1208        let rule = MD052ReferenceLinkImages::new();
1209        let content = r#"# Example
1210
1211## Test
1212
1213Want to fill out this form?
1214
1215<form method="post">
1216    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1217</form>"#;
1218        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1219        let result = rule.check(&ctx).unwrap();
1220
1221        assert_eq!(
1222            result.len(),
1223            0,
1224            "HTML attributes with square brackets should not be flagged as undefined references"
1225        );
1226    }
1227
1228    #[test]
1229    fn test_extract_references() {
1230        let rule = MD052ReferenceLinkImages::new();
1231        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1232        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1233        let refs = rule.extract_references(&ctx);
1234
1235        assert_eq!(refs.len(), 3);
1236        assert!(refs.contains("ref1"));
1237        assert!(refs.contains("ref2"));
1238        assert!(refs.contains("ref3"));
1239    }
1240
1241    #[test]
1242    fn test_inline_code_not_flagged() {
1243        // Enable shortcut_syntax to test inline code detection
1244        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1245            shortcut_syntax: true,
1246            ..Default::default()
1247        });
1248
1249        // Test that arrays in inline code are not flagged as references
1250        let content = r#"# Test
1251
1252Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1253
1254Also, `[todo]` is not a reference link.
1255
1256But this [reference] should be flagged.
1257
1258And this `[inline code]` should not be flagged.
1259"#;
1260
1261        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1262        let warnings = rule.check(&ctx).unwrap();
1263
1264        // Should only flag [reference], not the ones in backticks
1265        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1266        assert!(warnings[0].message.contains("'reference'"));
1267    }
1268
1269    #[test]
1270    fn test_code_block_references_ignored() {
1271        // Enable shortcut_syntax to test code block handling
1272        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1273            shortcut_syntax: true,
1274            ..Default::default()
1275        });
1276
1277        let content = r#"# Test
1278
1279```markdown
1280[undefined] reference in code block
1281![undefined] image in code block
1282```
1283
1284[real-undefined] reference outside
1285"#;
1286
1287        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1288        let warnings = rule.check(&ctx).unwrap();
1289
1290        // Should only flag [real-undefined], not the ones in code block
1291        assert_eq!(warnings.len(), 1);
1292        assert!(warnings[0].message.contains("'real-undefined'"));
1293    }
1294
1295    #[test]
1296    fn test_html_comments_ignored() {
1297        // Test for issue #20 - MD052 should not flag content inside HTML comments
1298        let rule = MD052ReferenceLinkImages::new();
1299
1300        // Test the exact case from issue #20
1301        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1302<!--- set_env EDITOR 'python3 fake_editor.py' -->
1303
1304```bash
1305$ python3 vote.py
13063 votes for: 2
13072 votes for: 3, 4
1308```"#;
1309        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1310        let result = rule.check(&ctx).unwrap();
1311        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1312
1313        // Test various reference patterns inside HTML comments
1314        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1315Normal [text][undefined]
1316<!-- Another [comment][with] references -->"#;
1317        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1318        let result = rule.check(&ctx).unwrap();
1319        assert_eq!(
1320            result.len(),
1321            1,
1322            "Should only flag the undefined reference outside comments"
1323        );
1324        assert!(result[0].message.contains("undefined"));
1325
1326        // Test multi-line HTML comments
1327        let content = r#"<!--
1328[ref1]
1329[ref2][ref3]
1330-->
1331[actual][undefined]"#;
1332        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1333        let result = rule.check(&ctx).unwrap();
1334        assert_eq!(
1335            result.len(),
1336            1,
1337            "Should not flag references in multi-line HTML comments"
1338        );
1339        assert!(result[0].message.contains("undefined"));
1340
1341        // Test mixed scenarios
1342        let content = r#"<!-- Comment with [1:] pattern -->
1343Valid [link][ref]
1344<!-- More [refs][in][comments] -->
1345![image][missing]
1346
1347[ref]: https://example.com"#;
1348        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1349        let result = rule.check(&ctx).unwrap();
1350        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1351        assert!(result[0].message.contains("missing"));
1352    }
1353
1354    #[test]
1355    fn test_frontmatter_ignored() {
1356        // Test for issue #24 - MD052 should not flag content inside frontmatter
1357        // Enable shortcut_syntax to test frontmatter handling
1358        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1359            shortcut_syntax: true,
1360            ..Default::default()
1361        });
1362
1363        // Test YAML frontmatter with arrays and references
1364        let content = r#"---
1365layout: post
1366title: "My Jekyll Post"
1367date: 2023-01-01
1368categories: blog
1369tags: ["test", "example"]
1370author: John Doe
1371---
1372
1373# My Blog Post
1374
1375This is the actual markdown content that should be linted.
1376
1377[undefined] reference should be flagged.
1378
1379## Section 1
1380
1381Some content here."#;
1382        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1383        let result = rule.check(&ctx).unwrap();
1384
1385        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1386        assert_eq!(
1387            result.len(),
1388            1,
1389            "Should only flag the undefined reference outside frontmatter"
1390        );
1391        assert!(result[0].message.contains("undefined"));
1392
1393        // Test TOML frontmatter
1394        let content = r#"+++
1395title = "My Post"
1396tags = ["example", "test"]
1397+++
1398
1399# Content
1400
1401[missing] reference should be flagged."#;
1402        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1403        let result = rule.check(&ctx).unwrap();
1404        assert_eq!(
1405            result.len(),
1406            1,
1407            "Should only flag the undefined reference outside TOML frontmatter"
1408        );
1409        assert!(result[0].message.contains("missing"));
1410    }
1411
1412    #[test]
1413    fn test_mkdocs_snippet_markers_not_flagged() {
1414        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1415        // Enable shortcut_syntax to test snippet marker handling
1416        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1417            shortcut_syntax: true,
1418            ..Default::default()
1419        });
1420
1421        // Test snippet section markers
1422        let content = r#"# Document with MkDocs Snippets
1423
1424Some content here.
1425
1426# -8<- [start:remote-content]
1427
1428This is the remote content section.
1429
1430# -8<- [end:remote-content]
1431
1432More content here.
1433
1434<!-- --8<-- [start:another-section] -->
1435Content in another section
1436<!-- --8<-- [end:another-section] -->"#;
1437        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1438        let result = rule.check(&ctx).unwrap();
1439
1440        // Should not flag any snippet markers as undefined references
1441        assert_eq!(
1442            result.len(),
1443            0,
1444            "Should not flag MkDocs snippet markers as undefined references"
1445        );
1446
1447        // Test that the snippet marker lines are properly skipped
1448        // but regular undefined references on other lines are still caught
1449        let content = r#"# Document
1450
1451# -8<- [start:section]
1452Content with [reference] inside snippet section
1453# -8<- [end:section]
1454
1455Regular [undefined] reference outside snippet markers."#;
1456        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1457        let result = rule.check(&ctx).unwrap();
1458
1459        assert_eq!(
1460            result.len(),
1461            2,
1462            "Should flag undefined references but skip snippet marker lines"
1463        );
1464        // The references inside the content should be flagged, but not start: and end:
1465        assert!(result[0].message.contains("reference"));
1466        assert!(result[1].message.contains("undefined"));
1467
1468        // Test in standard mode - should flag the markers as undefined
1469        let content = r#"# Document
1470
1471# -8<- [start:section]
1472# -8<- [end:section]"#;
1473        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1474        let result = rule.check(&ctx).unwrap();
1475
1476        assert_eq!(
1477            result.len(),
1478            2,
1479            "In standard mode, snippet markers should be flagged as undefined references"
1480        );
1481    }
1482
1483    #[test]
1484    fn test_pandoc_citations_not_flagged() {
1485        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1486        // Enable shortcut_syntax to test citation handling
1487        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1488            shortcut_syntax: true,
1489            ..Default::default()
1490        });
1491
1492        let content = r#"# Research Paper
1493
1494We are using the **bookdown** package [@R-bookdown] in this sample book.
1495This was built on top of R Markdown and **knitr** [@xie2015].
1496
1497Multiple citations [@citation1; @citation2; @citation3] are also supported.
1498
1499Regular [undefined] reference should still be flagged.
1500"#;
1501        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1502        let result = rule.check(&ctx).unwrap();
1503
1504        // Should only flag the undefined reference, not the citations
1505        assert_eq!(
1506            result.len(),
1507            1,
1508            "Should only flag the undefined reference, not Pandoc citations"
1509        );
1510        assert!(result[0].message.contains("undefined"));
1511    }
1512
1513    #[test]
1514    fn test_pandoc_inline_footnotes_not_flagged() {
1515        // Test that Pandoc inline footnote syntax is not flagged
1516        // Enable shortcut_syntax to test inline footnote handling
1517        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1518            shortcut_syntax: true,
1519            ..Default::default()
1520        });
1521
1522        let content = r#"# Math Document
1523
1524You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1525
1526Another footnote^[with some text and a [link](https://example.com)].
1527
1528But this [reference] without ^ should be flagged.
1529"#;
1530        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1531        let result = rule.check(&ctx).unwrap();
1532
1533        // Should only flag the reference without ^
1534        assert_eq!(
1535            result.len(),
1536            1,
1537            "Should only flag the regular reference, not inline footnotes"
1538        );
1539        assert!(result[0].message.contains("reference"));
1540    }
1541
1542    #[test]
1543    fn test_github_alerts_not_flagged() {
1544        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1545        // Enable shortcut_syntax to test GitHub alert handling
1546        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1547            shortcut_syntax: true,
1548            ..Default::default()
1549        });
1550
1551        // Test various GitHub alert types
1552        let content = r#"# Document with GitHub Alerts
1553
1554> [!NOTE]
1555> This is a note alert.
1556
1557> [!TIP]
1558> This is a tip alert.
1559
1560> [!IMPORTANT]
1561> This is an important alert.
1562
1563> [!WARNING]
1564> This is a warning alert.
1565
1566> [!CAUTION]
1567> This is a caution alert.
1568
1569Regular content with [undefined] reference."#;
1570        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1571        let result = rule.check(&ctx).unwrap();
1572
1573        // Should only flag the undefined reference, not the GitHub alerts
1574        assert_eq!(
1575            result.len(),
1576            1,
1577            "Should only flag the undefined reference, not GitHub alerts"
1578        );
1579        assert!(result[0].message.contains("undefined"));
1580        assert_eq!(result[0].line, 18); // Line with [undefined]
1581
1582        // Test GitHub alerts with additional content
1583        let content = r#"> [!TIP]
1584> Here's a useful tip about [something].
1585> Multiple lines are allowed.
1586
1587[something] is mentioned but not defined."#;
1588        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1589        let result = rule.check(&ctx).unwrap();
1590
1591        // Should flag only the [something] outside blockquotes
1592        // The test shows we're only catching one, which might be correct behavior
1593        // matching markdownlint's approach
1594        assert_eq!(result.len(), 1, "Should flag undefined reference");
1595        assert!(result[0].message.contains("something"));
1596
1597        // Test GitHub alerts with proper references
1598        let content = r#"> [!NOTE]
1599> See [reference] for more details.
1600
1601[reference]: https://example.com"#;
1602        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1603        let result = rule.check(&ctx).unwrap();
1604
1605        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1606        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1607    }
1608
1609    #[test]
1610    fn test_ignore_config() {
1611        // Test that user-configured ignore list is respected
1612        let config = MD052Config {
1613            shortcut_syntax: true,
1614            ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1615        };
1616        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1617
1618        let content = r#"# Document with Custom Types
1619
1620Use [Vec] for dynamic arrays.
1621Use [HashMap] for key-value storage.
1622Use [Option] for nullable values.
1623Use [Result] for error handling.
1624"#;
1625        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1626        let result = rule.check(&ctx).unwrap();
1627
1628        // Should only flag [Result] because it's not in ignore
1629        assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1630        assert!(result[0].message.contains("Result"));
1631    }
1632
1633    #[test]
1634    fn test_ignore_case_insensitive() {
1635        // Test that ignore list is case-insensitive
1636        let config = MD052Config {
1637            shortcut_syntax: true,
1638            ignore: vec!["Vec".to_string()],
1639        };
1640        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1641
1642        let content = r#"# Case Insensitivity Test
1643
1644[Vec] should be ignored.
1645[vec] should also be ignored (different case, same match).
1646[VEC] should also be ignored (different case, same match).
1647[undefined] should be flagged (not in ignore list).
1648"#;
1649        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1650        let result = rule.check(&ctx).unwrap();
1651
1652        // Should only flag [undefined] because ignore is case-insensitive
1653        assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1654        assert!(result[0].message.contains("undefined"));
1655    }
1656
1657    #[test]
1658    fn test_ignore_empty_by_default() {
1659        // Test that empty ignore list doesn't affect existing behavior
1660        let rule = MD052ReferenceLinkImages::new();
1661
1662        let content = "[text][undefined]";
1663        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1664        let result = rule.check(&ctx).unwrap();
1665
1666        // Should still flag undefined references
1667        assert_eq!(result.len(), 1);
1668        assert!(result[0].message.contains("undefined"));
1669    }
1670
1671    #[test]
1672    fn test_ignore_with_reference_links() {
1673        // Test ignore list with full reference link syntax [text][ref]
1674        let config = MD052Config {
1675            shortcut_syntax: false,
1676            ignore: vec!["CustomType".to_string()],
1677        };
1678        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1679
1680        let content = r#"# Test
1681
1682See [documentation][CustomType] for details.
1683See [other docs][MissingRef] for more.
1684"#;
1685        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1686        let result = rule.check(&ctx).unwrap();
1687
1688        // Debug: print warnings if test fails
1689        for (i, w) in result.iter().enumerate() {
1690            eprintln!("Warning {}: {}", i, w.message);
1691        }
1692
1693        // Should flag [MissingRef] but not [CustomType]
1694        // Note: reference IDs are lowercased in the message
1695        assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1696        assert!(
1697            result[0].message.contains("missingref"),
1698            "Expected 'missingref' in message: {}",
1699            result[0].message
1700        );
1701    }
1702
1703    #[test]
1704    fn test_ignore_multiple() {
1705        // Test multiple ignored names work correctly
1706        let config = MD052Config {
1707            shortcut_syntax: true,
1708            ignore: vec![
1709                "i32".to_string(),
1710                "u64".to_string(),
1711                "String".to_string(),
1712                "Arc".to_string(),
1713                "Mutex".to_string(),
1714            ],
1715        };
1716        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1717
1718        let content = r#"# Types
1719
1720[i32] [u64] [String] [Arc] [Mutex] [Box]
1721"#;
1722        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1723        let result = rule.check(&ctx).unwrap();
1724
1725        // Note: i32 and u64 are already in the hardcoded list, so they'd be skipped anyway
1726        // String is NOT in the hardcoded list, so we test that the user config works
1727        // [Box] should be flagged (not in ignore)
1728        assert_eq!(result.len(), 1);
1729        assert!(result[0].message.contains("Box"));
1730    }
1731
1732    #[test]
1733    fn test_nested_code_fences_reference_extraction() {
1734        // Verify that extract_references uses LintContext's pre-computed in_code_block
1735        // so nested fences are handled correctly.
1736        // A 4-backtick fence wrapping a 3-backtick fence should treat the inner
1737        // ``` as content, not a code block boundary.
1738        let rule = MD052ReferenceLinkImages::new();
1739
1740        let content = "````\n```\n[ref-inside]: https://example.com\n```\n````\n\n[Use this link][ref-inside]";
1741        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1742        let result = rule.check(&ctx).unwrap();
1743
1744        // The reference definition is inside a code block (the outer ````),
1745        // so it should NOT be recognized as a definition.
1746        // Therefore [ref-inside] should be flagged as undefined.
1747        assert_eq!(
1748            result.len(),
1749            1,
1750            "Reference defined inside nested code fence should not count as a definition"
1751        );
1752        assert!(result[0].message.contains("ref-inside"));
1753    }
1754}