rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for code blocks (standard regex is fine)
23static FENCED_CODE_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap());
24
25// Pattern for output example sections (standard regex is fine)
26static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
27    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
28
29// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
30// Extended to include additional common alert types
31static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
32    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
33        .unwrap()
34});
35
36// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
37// This pattern specifically looks for:
38// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
39// - IPv6 with zone IDs: https://[fe80::1%eth0]
40// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
41// - API paths with array notation: https://api.example.com/users[0]
42// But NOT markdown reference links that happen to follow URLs
43static URL_WITH_BRACKETS: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
45
46/// Rule MD052: Reference links and images should use reference style
47///
48/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
49///
50/// This rule is triggered when a reference link or image uses a reference that isn't defined.
51///
52/// ## Configuration
53///
54/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
55///
56/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
57/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
58#[derive(Clone, Default)]
59pub struct MD052ReferenceLinkImages {
60    config: MD052Config,
61}
62
63impl MD052ReferenceLinkImages {
64    pub fn new() -> Self {
65        Self {
66            config: MD052Config::default(),
67        }
68    }
69
70    pub fn from_config_struct(config: MD052Config) -> Self {
71        Self { config }
72    }
73
74    /// Strip surrounding backticks from a string
75    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
76    fn strip_backticks(s: &str) -> &str {
77        s.trim_start_matches('`').trim_end_matches('`')
78    }
79
80    /// Check if a string is a valid Python identifier
81    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
82    /// like `str`, `int`, etc. should be accepted as valid auto-references
83    fn is_valid_python_identifier(s: &str) -> bool {
84        if s.is_empty() {
85            return false;
86        }
87        let first_char = s.chars().next().unwrap();
88        if !first_char.is_ascii_alphabetic() && first_char != '_' {
89            return false;
90        }
91        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
92    }
93
94    /// Check if text matches a known non-reference pattern that should be skipped.
95    ///
96    /// These are deterministic patterns from markdown extensions or code examples,
97    /// not heuristics. Returns true for:
98    /// - User-configured names via `ignore` config option
99    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
100    /// - Programming syntax: [T], [null], [i32], ["string"]
101    /// - Descriptive text: [default: value], [0-9]
102    fn is_known_non_reference_pattern(&self, text: &str) -> bool {
103        // Check user-configured ignore list first (case-insensitive match)
104        // Reference IDs are normalized to lowercase during parsing,
105        // so we use case-insensitive comparison for user convenience
106        if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
107            return true;
108        }
109        // Skip numeric patterns (array indices, ranges)
110        if text.chars().all(|c| c.is_ascii_digit()) {
111            return true;
112        }
113
114        // Skip numeric ranges like [1:3], [0:10], etc.
115        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
116            return true;
117        }
118
119        // Skip patterns that look like config sections [tool.something], [section.subsection]
120        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
121        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
122        if text.contains('.')
123            && !text.contains(' ')
124            && !text.contains('-')
125            && !text.contains('_')
126            && !text.contains('`')
127        {
128            // Config sections typically have dots, no spaces, and only alphanumeric + dots
129            return true;
130        }
131
132        // Skip glob/wildcard patterns like [*], [...], [**]
133        if text == "*" || text == "..." || text == "**" {
134            return true;
135        }
136
137        // Skip patterns that look like file paths [dir/file], [src/utils]
138        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
139            return true;
140        }
141
142        // Skip programming type annotations like [int, str], [Dict[str, Any]]
143        // These typically have commas and/or nested brackets
144        if text.contains(',') || text.contains('[') || text.contains(']') {
145            // Check if it looks like a type annotation pattern
146            return true;
147        }
148
149        // Note: We don't filter out patterns with backticks because backticks in reference names
150        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
151
152        // Skip patterns that look like module/class paths ONLY if they don't have backticks
153        // Backticks indicate intentional code formatting in a reference name
154        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
155        if !text.contains('`')
156            && text.contains('.')
157            && !text.contains(' ')
158            && !text.contains('-')
159            && !text.contains('_')
160        {
161            return true;
162        }
163
164        // Note: We don't filter based on word count anymore because legitimate references
165        // can have many words, like "python language reference for import statements"
166        // Word count filtering was causing false positives where valid references were
167        // being incorrectly flagged as unused
168
169        // Skip patterns that are just punctuation or operators
170        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
171            return true;
172        }
173
174        // Skip very short non-word patterns (likely operators or syntax)
175        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
176            return true;
177        }
178
179        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
180        if (text.starts_with('"') && text.ends_with('"'))
181            || (text.starts_with('\'') && text.ends_with('\''))
182            || text.contains('"')
183            || text.contains('\'')
184        {
185            return true;
186        }
187
188        // Skip descriptive patterns with colon like [default: the project root]
189        // But allow simple numeric ranges which are handled above
190        if text.contains(':') && text.contains(' ') {
191            return true;
192        }
193
194        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
195        if text.starts_with('!') {
196            return true;
197        }
198
199        // Skip footnote syntax like [^1], [^note], etc.
200        // Footnotes start with ^ and are a common markdown extension
201        if text.starts_with('^') {
202            return true;
203        }
204
205        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
206        // Citations in these formats start with @ inside brackets
207        if text.starts_with('@') {
208            return true;
209        }
210
211        // Skip table of contents markers like [TOC]
212        // Used by Python-Markdown and other processors
213        if text == "TOC" {
214            return true;
215        }
216
217        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
218        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
219            return true;
220        }
221
222        // Skip common programming type names, literals, and short identifiers
223        // that are likely not markdown references
224        let common_non_refs = [
225            // Programming types
226            "object",
227            "Object",
228            "any",
229            "Any",
230            "inv",
231            "void",
232            "bool",
233            "int",
234            "float",
235            "str",
236            "char",
237            "i8",
238            "i16",
239            "i32",
240            "i64",
241            "i128",
242            "isize",
243            "u8",
244            "u16",
245            "u32",
246            "u64",
247            "u128",
248            "usize",
249            "f32",
250            "f64",
251            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
252            "null",
253            "true",
254            "false",
255            "NaN",
256            "Infinity",
257            // Common JavaScript output patterns
258            "object Object",
259        ];
260
261        if common_non_refs.contains(&text) {
262            return true;
263        }
264
265        false
266    }
267
268    /// Check if a position is inside any code span
269    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
270        code_spans
271            .iter()
272            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
273    }
274
275    /// Check if a byte position is within an HTML comment
276    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
277        for m in HTML_COMMENT_PATTERN.find_iter(content) {
278            if m.start() <= byte_pos && byte_pos < m.end() {
279                return true;
280            }
281        }
282        false
283    }
284
285    /// Check if a byte position is within an HTML tag
286    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
287        // Check HTML tags
288        for html_tag in ctx.html_tags().iter() {
289            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
290                return true;
291            }
292        }
293        false
294    }
295
296    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
297        use crate::config::MarkdownFlavor;
298        use crate::utils::skip_context::is_mkdocs_snippet_line;
299
300        let mut references = HashSet::new();
301        let mut in_code_block = false;
302        let mut code_fence_marker = String::new();
303
304        for line in content.lines() {
305            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
306            if is_mkdocs_snippet_line(
307                line,
308                if mkdocs_mode {
309                    MarkdownFlavor::MkDocs
310                } else {
311                    MarkdownFlavor::Standard
312                },
313            ) {
314                continue;
315            }
316            // Handle code block boundaries
317            if let Some(cap) = FENCED_CODE_START.captures(line) {
318                if let Some(fence) = cap.get(2) {
319                    // Get the fence marker (``` or ~~~) without the indentation
320                    let fence_str = fence.as_str();
321                    if !in_code_block {
322                        in_code_block = true;
323                        code_fence_marker = fence_str.to_string();
324                    } else if line.trim_start().starts_with(&code_fence_marker) {
325                        // Check if this could be a closing fence
326                        let trimmed = line.trim_start();
327                        // A closing fence should be just the fence characters, possibly with trailing whitespace
328                        if trimmed.starts_with(&code_fence_marker) {
329                            let after_fence = &trimmed[code_fence_marker.len()..];
330                            if after_fence.trim().is_empty() {
331                                in_code_block = false;
332                                code_fence_marker.clear();
333                            }
334                        }
335                    }
336                }
337                continue;
338            }
339
340            // Skip lines in code blocks
341            if in_code_block {
342                continue;
343            }
344
345            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
346            // Abbreviations are not reference links and should not be tracked
347            if line.trim_start().starts_with("*[") {
348                continue;
349            }
350
351            if let Some(cap) = REF_REGEX.captures(line) {
352                // Store references in lowercase for case-insensitive comparison
353                if let Some(reference) = cap.get(1) {
354                    references.insert(reference.as_str().to_lowercase());
355                }
356            }
357        }
358
359        references
360    }
361
362    fn find_undefined_references(
363        &self,
364        content: &str,
365        references: &HashSet<String>,
366        ctx: &crate::lint_context::LintContext,
367        mkdocs_mode: bool,
368    ) -> Vec<(usize, usize, usize, String)> {
369        let mut undefined = Vec::new();
370        let mut reported_refs = HashMap::new();
371        let mut in_code_block = false;
372        let mut code_fence_marker = String::new();
373        let mut in_example_section = false;
374
375        // Get code spans once for the entire function
376        let code_spans = ctx.code_spans();
377
378        // Use cached data for reference links and images
379        for link in &ctx.links {
380            if !link.is_reference {
381                continue; // Skip inline links
382            }
383
384            // Skip links inside Jinja templates
385            if ctx.is_in_jinja_range(link.byte_offset) {
386                continue;
387            }
388
389            // Skip links inside code spans
390            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
391                continue;
392            }
393
394            // Skip links inside HTML comments
395            if Self::is_in_html_comment(content, link.byte_offset) {
396                continue;
397            }
398
399            // Skip links inside HTML tags
400            if Self::is_in_html_tag(ctx, link.byte_offset) {
401                continue;
402            }
403
404            // Skip links inside math contexts
405            if is_in_math_context(ctx, link.byte_offset) {
406                continue;
407            }
408
409            // Skip links inside table cells
410            if is_in_table_cell(ctx, link.line, link.start_col) {
411                continue;
412            }
413
414            // Skip links inside frontmatter
415            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
416                continue;
417            }
418
419            // Skip Quarto/Pandoc citations ([@citation], @citation)
420            // Citations look like reference links but are bibliography references
421            if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
422                continue;
423            }
424
425            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
426            // Shortcodes may contain template syntax that looks like reference links
427            if ctx.is_in_shortcode(link.byte_offset) {
428                continue;
429            }
430
431            if let Some(ref_id) = &link.reference_id {
432                let reference_lower = ref_id.to_lowercase();
433
434                // Skip known non-reference patterns (markdown extensions, code examples)
435                if self.is_known_non_reference_pattern(ref_id) {
436                    continue;
437                }
438
439                // Skip MkDocs auto-references if in MkDocs mode
440                // Check both the reference_id and the link text for shorthand references
441                // Strip backticks since MkDocs resolves `module.Class` as module.Class
442                let stripped_ref = Self::strip_backticks(ref_id);
443                let stripped_text = Self::strip_backticks(&link.text);
444                if mkdocs_mode
445                    && (is_mkdocs_auto_reference(stripped_ref)
446                        || is_mkdocs_auto_reference(stripped_text)
447                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
448                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
449                {
450                    continue;
451                }
452
453                // Check if reference is defined
454                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
455                    // Check if the line is in an example section or list item
456                    if let Some(line_info) = ctx.line_info(link.line) {
457                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
458                            in_example_section = true;
459                            continue;
460                        }
461
462                        if in_example_section {
463                            continue;
464                        }
465
466                        // Skip list items
467                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
468                            continue;
469                        }
470
471                        // Skip lines that are HTML content
472                        let trimmed = line_info.content(ctx.content).trim_start();
473                        if trimmed.starts_with('<') {
474                            continue;
475                        }
476                    }
477
478                    let match_len = link.byte_end - link.byte_offset;
479                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
480                    reported_refs.insert(reference_lower, true);
481                }
482            }
483        }
484
485        // Use cached data for reference images
486        for image in &ctx.images {
487            if !image.is_reference {
488                continue; // Skip inline images
489            }
490
491            // Skip images inside Jinja templates
492            if ctx.is_in_jinja_range(image.byte_offset) {
493                continue;
494            }
495
496            // Skip images inside code spans
497            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
498                continue;
499            }
500
501            // Skip images inside HTML comments
502            if Self::is_in_html_comment(content, image.byte_offset) {
503                continue;
504            }
505
506            // Skip images inside HTML tags
507            if Self::is_in_html_tag(ctx, image.byte_offset) {
508                continue;
509            }
510
511            // Skip images inside math contexts
512            if is_in_math_context(ctx, image.byte_offset) {
513                continue;
514            }
515
516            // Skip images inside table cells
517            if is_in_table_cell(ctx, image.line, image.start_col) {
518                continue;
519            }
520
521            // Skip images inside frontmatter
522            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
523                continue;
524            }
525
526            if let Some(ref_id) = &image.reference_id {
527                let reference_lower = ref_id.to_lowercase();
528
529                // Skip known non-reference patterns (markdown extensions, code examples)
530                if self.is_known_non_reference_pattern(ref_id) {
531                    continue;
532                }
533
534                // Skip MkDocs auto-references if in MkDocs mode
535                // Check both the reference_id and the alt text for shorthand references
536                // Strip backticks since MkDocs resolves `module.Class` as module.Class
537                let stripped_ref = Self::strip_backticks(ref_id);
538                let stripped_alt = Self::strip_backticks(&image.alt_text);
539                if mkdocs_mode
540                    && (is_mkdocs_auto_reference(stripped_ref)
541                        || is_mkdocs_auto_reference(stripped_alt)
542                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
543                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
544                {
545                    continue;
546                }
547
548                // Check if reference is defined
549                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
550                    // Check if the line is in an example section or list item
551                    if let Some(line_info) = ctx.line_info(image.line) {
552                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
553                            in_example_section = true;
554                            continue;
555                        }
556
557                        if in_example_section {
558                            continue;
559                        }
560
561                        // Skip list items
562                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
563                            continue;
564                        }
565
566                        // Skip lines that are HTML content
567                        let trimmed = line_info.content(ctx.content).trim_start();
568                        if trimmed.starts_with('<') {
569                            continue;
570                        }
571                    }
572
573                    let match_len = image.byte_end - image.byte_offset;
574                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
575                    reported_refs.insert(reference_lower, true);
576                }
577            }
578        }
579
580        // Build a set of byte ranges that are already covered by parsed links/images
581        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
582
583        // Add ranges from parsed links
584        for link in &ctx.links {
585            covered_ranges.push((link.byte_offset, link.byte_end));
586        }
587
588        // Add ranges from parsed images
589        for image in &ctx.images {
590            covered_ranges.push((image.byte_offset, image.byte_end));
591        }
592
593        // Sort ranges by start position
594        covered_ranges.sort_by_key(|&(start, _)| start);
595
596        // Handle shortcut references [text] which aren't captured in ctx.links
597        // Only check these if shortcut_syntax is enabled (default: false)
598        // Shortcut syntax is ambiguous because [text] could be a reference link
599        // OR just text in brackets (like spec notation in quotes)
600        if !self.config.shortcut_syntax {
601            return undefined;
602        }
603
604        // Need to use regex for shortcut references
605        let lines: Vec<&str> = content.lines().collect();
606        in_example_section = false; // Reset for line-by-line processing
607
608        for (line_num, line) in lines.iter().enumerate() {
609            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
610            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
611                continue;
612            }
613
614            // Handle code blocks
615            if let Some(cap) = FENCED_CODE_START.captures(line) {
616                if let Some(fence) = cap.get(2) {
617                    // Get the fence marker (``` or ~~~) without the indentation
618                    let fence_str = fence.as_str();
619                    if !in_code_block {
620                        in_code_block = true;
621                        code_fence_marker = fence_str.to_string();
622                    } else if line.trim_start().starts_with(&code_fence_marker) {
623                        // Check if this could be a closing fence
624                        let trimmed = line.trim_start();
625                        // A closing fence should be just the fence characters, possibly with trailing whitespace
626                        if trimmed.starts_with(&code_fence_marker) {
627                            let after_fence = &trimmed[code_fence_marker.len()..];
628                            if after_fence.trim().is_empty() {
629                                in_code_block = false;
630                                code_fence_marker.clear();
631                            }
632                        }
633                    }
634                }
635                continue;
636            }
637
638            if in_code_block {
639                continue;
640            }
641
642            // Check for example sections
643            if OUTPUT_EXAMPLE_START.is_match(line) {
644                in_example_section = true;
645                continue;
646            }
647
648            if in_example_section {
649                // Check if we're exiting the example section (another heading)
650                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
651                    in_example_section = false;
652                } else {
653                    continue;
654                }
655            }
656
657            // Skip list items
658            if LIST_ITEM_REGEX.is_match(line) {
659                continue;
660            }
661
662            // Skip lines that are HTML content
663            let trimmed_line = line.trim_start();
664            if trimmed_line.starts_with('<') {
665                continue;
666            }
667
668            // Skip GitHub alerts/callouts (e.g., > [!TIP])
669            if GITHUB_ALERT_REGEX.is_match(line) {
670                continue;
671            }
672
673            // Skip abbreviation definitions (*[ABBR]: Definition)
674            // These are not reference links and should not be checked
675            if trimmed_line.starts_with("*[") {
676                continue;
677            }
678
679            // Collect positions of brackets that are part of URLs (IPv6, etc.)
680            // so we can exclude them from reference checking
681            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
682            for mat in URL_WITH_BRACKETS.find_iter(line) {
683                // Find all bracket pairs within this URL match
684                let url_str = mat.as_str();
685                let url_start = mat.start();
686
687                // Find brackets within the URL (e.g., in https://[::1]:8080)
688                let mut idx = 0;
689                while idx < url_str.len() {
690                    if let Some(bracket_start) = url_str[idx..].find('[') {
691                        let bracket_start_abs = url_start + idx + bracket_start;
692                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
693                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
694                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
695                            idx += bracket_start + bracket_end + 2;
696                        } else {
697                            break;
698                        }
699                    } else {
700                        break;
701                    }
702                }
703            }
704
705            // Check shortcut references: [reference]
706            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
707                for cap in captures {
708                    if let Some(ref_match) = cap.get(1) {
709                        // Check if this bracket is part of a URL (IPv6, etc.)
710                        let bracket_start = cap.get(0).unwrap().start();
711                        let bracket_end = cap.get(0).unwrap().end();
712
713                        // Skip if this bracket pair is within any URL bracket range
714                        let is_in_url = url_bracket_ranges
715                            .iter()
716                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
717
718                        if is_in_url {
719                            continue;
720                        }
721
722                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
723                        // Check if there's a ^ immediately before the opening bracket
724                        if bracket_start > 0 {
725                            // bracket_start is a byte offset, so we need to check the byte before
726                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
727                                && *byte == b'^'
728                            {
729                                continue; // This is an inline footnote, skip it
730                            }
731                        }
732
733                        let reference = ref_match.as_str();
734                        let reference_lower = reference.to_lowercase();
735
736                        // Skip known non-reference patterns (markdown extensions, code examples)
737                        if self.is_known_non_reference_pattern(reference) {
738                            continue;
739                        }
740
741                        // Skip GitHub alerts (including extended types)
742                        if let Some(alert_type) = reference.strip_prefix('!')
743                            && matches!(
744                                alert_type,
745                                "NOTE"
746                                    | "TIP"
747                                    | "WARNING"
748                                    | "IMPORTANT"
749                                    | "CAUTION"
750                                    | "INFO"
751                                    | "SUCCESS"
752                                    | "FAILURE"
753                                    | "DANGER"
754                                    | "BUG"
755                                    | "EXAMPLE"
756                                    | "QUOTE"
757                            )
758                        {
759                            continue;
760                        }
761
762                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
763                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
764                        if mkdocs_mode
765                            && (reference.starts_with("start:") || reference.starts_with("end:"))
766                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
767                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
768                        {
769                            continue;
770                        }
771
772                        // Skip MkDocs auto-references if in MkDocs mode
773                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
774                        let stripped_ref = Self::strip_backticks(reference);
775                        if mkdocs_mode
776                            && (is_mkdocs_auto_reference(stripped_ref)
777                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
778                        {
779                            continue;
780                        }
781
782                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
783                            let full_match = cap.get(0).unwrap();
784                            let col = full_match.start();
785
786                            // Skip if inside code span
787                            let code_spans = ctx.code_spans();
788                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
789                                continue;
790                            }
791
792                            // Check if this position is within a covered range
793                            let line_start_byte = ctx.line_offsets[line_num];
794                            let byte_pos = line_start_byte + col;
795
796                            // Skip if inside Jinja template
797                            if ctx.is_in_jinja_range(byte_pos) {
798                                continue;
799                            }
800
801                            // Skip if inside code block
802                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
803                                &ctx.code_blocks,
804                                byte_pos,
805                            ) {
806                                continue;
807                            }
808
809                            // Skip if inside HTML comment
810                            if Self::is_in_html_comment(content, byte_pos) {
811                                continue;
812                            }
813
814                            // Skip if inside HTML tag
815                            if Self::is_in_html_tag(ctx, byte_pos) {
816                                continue;
817                            }
818
819                            // Skip if inside math context
820                            if is_in_math_context(ctx, byte_pos) {
821                                continue;
822                            }
823
824                            // Skip if inside table cell
825                            if is_in_table_cell(ctx, line_num + 1, col) {
826                                continue;
827                            }
828
829                            let byte_end = byte_pos + (full_match.end() - full_match.start());
830
831                            // Check if this shortcut ref overlaps with any parsed link/image
832                            let mut is_covered = false;
833                            for &(range_start, range_end) in &covered_ranges {
834                                if range_start <= byte_pos && byte_end <= range_end {
835                                    // This shortcut ref is completely within a parsed link/image
836                                    is_covered = true;
837                                    break;
838                                }
839                                if range_start > byte_end {
840                                    // No need to check further (ranges are sorted)
841                                    break;
842                                }
843                            }
844
845                            if is_covered {
846                                continue;
847                            }
848
849                            // More sophisticated checks to avoid false positives
850
851                            // Check 1: If preceded by ], this might be part of [text][ref]
852                            // Look for the pattern ...][ref] and check if there's a matching [ before
853                            let line_chars: Vec<char> = line.chars().collect();
854                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
855                                // Look backwards for a [ that would make this [text][ref]
856                                let mut bracket_count = 1; // We already saw one ]
857                                let mut check_pos = col.saturating_sub(2);
858                                let mut found_opening = false;
859
860                                while check_pos > 0 && check_pos < line_chars.len() {
861                                    match line_chars.get(check_pos) {
862                                        Some(&']') => bracket_count += 1,
863                                        Some(&'[') => {
864                                            bracket_count -= 1;
865                                            if bracket_count == 0 {
866                                                // Check if this [ is escaped
867                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
868                                                    found_opening = true;
869                                                }
870                                                break;
871                                            }
872                                        }
873                                        _ => {}
874                                    }
875                                    if check_pos == 0 {
876                                        break;
877                                    }
878                                    check_pos = check_pos.saturating_sub(1);
879                                }
880
881                                if found_opening {
882                                    // This is part of [text][ref], skip it
883                                    continue;
884                                }
885                            }
886
887                            // Check 2: If there's an escaped bracket pattern before this
888                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
889                            let before_text = &line[..col];
890                            if before_text.contains("\\]") {
891                                // Check if there's a \[ before the \]
892                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
893                                    let search_text = &before_text[..escaped_close_pos];
894                                    if search_text.contains("\\[") {
895                                        // This looks like \[...\][ref], skip it
896                                        continue;
897                                    }
898                                }
899                            }
900
901                            let match_len = full_match.end() - full_match.start();
902                            undefined.push((line_num, col, match_len, reference.to_string()));
903                            reported_refs.insert(reference_lower, true);
904                        }
905                    }
906                }
907            }
908        }
909
910        undefined
911    }
912}
913
914impl Rule for MD052ReferenceLinkImages {
915    fn name(&self) -> &'static str {
916        "MD052"
917    }
918
919    fn description(&self) -> &'static str {
920        "Reference links and images should use a reference that exists"
921    }
922
923    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
924        let content = ctx.content;
925        let mut warnings = Vec::new();
926
927        // OPTIMIZATION: Early exit if no brackets at all
928        if !content.contains('[') {
929            return Ok(warnings);
930        }
931
932        // Check if we're in MkDocs mode from the context
933        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
934
935        let references = self.extract_references(content, mkdocs_mode);
936
937        // Use optimized detection method with cached link/image data
938        for (line_num, col, match_len, reference) in
939            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
940        {
941            let lines: Vec<&str> = content.lines().collect();
942            let line_content = lines.get(line_num).unwrap_or(&"");
943
944            // Calculate precise character range for the entire undefined reference
945            let (start_line, start_col, end_line, end_col) =
946                calculate_match_range(line_num + 1, line_content, col, match_len);
947
948            warnings.push(LintWarning {
949                rule_name: Some(self.name().to_string()),
950                line: start_line,
951                column: start_col,
952                end_line,
953                end_column: end_col,
954                message: format!("Reference '{reference}' not found"),
955                severity: Severity::Warning,
956                fix: None,
957            });
958        }
959
960        Ok(warnings)
961    }
962
963    /// Check if this rule should be skipped for performance
964    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
965        // Skip if content is empty or has no links/images
966        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
967    }
968
969    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
970        let content = ctx.content;
971        // No automatic fix available for undefined references
972        Ok(content.to_string())
973    }
974
975    fn as_any(&self) -> &dyn std::any::Any {
976        self
977    }
978
979    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
980    where
981        Self: Sized,
982    {
983        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
984        Box::new(Self::from_config_struct(rule_config))
985    }
986}
987
988#[cfg(test)]
989mod tests {
990    use super::*;
991    use crate::lint_context::LintContext;
992
993    #[test]
994    fn test_valid_reference_link() {
995        let rule = MD052ReferenceLinkImages::new();
996        let content = "[text][ref]\n\n[ref]: https://example.com";
997        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
998        let result = rule.check(&ctx).unwrap();
999
1000        assert_eq!(result.len(), 0);
1001    }
1002
1003    #[test]
1004    fn test_undefined_reference_link() {
1005        let rule = MD052ReferenceLinkImages::new();
1006        let content = "[text][undefined]";
1007        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1008        let result = rule.check(&ctx).unwrap();
1009
1010        assert_eq!(result.len(), 1);
1011        assert!(result[0].message.contains("Reference 'undefined' not found"));
1012    }
1013
1014    #[test]
1015    fn test_valid_reference_image() {
1016        let rule = MD052ReferenceLinkImages::new();
1017        let content = "![alt][img]\n\n[img]: image.jpg";
1018        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1019        let result = rule.check(&ctx).unwrap();
1020
1021        assert_eq!(result.len(), 0);
1022    }
1023
1024    #[test]
1025    fn test_undefined_reference_image() {
1026        let rule = MD052ReferenceLinkImages::new();
1027        let content = "![alt][missing]";
1028        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1029        let result = rule.check(&ctx).unwrap();
1030
1031        assert_eq!(result.len(), 1);
1032        assert!(result[0].message.contains("Reference 'missing' not found"));
1033    }
1034
1035    #[test]
1036    fn test_case_insensitive_references() {
1037        let rule = MD052ReferenceLinkImages::new();
1038        let content = "[Text][REF]\n\n[ref]: https://example.com";
1039        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1040        let result = rule.check(&ctx).unwrap();
1041
1042        assert_eq!(result.len(), 0);
1043    }
1044
1045    #[test]
1046    fn test_shortcut_reference_valid() {
1047        let rule = MD052ReferenceLinkImages::new();
1048        let content = "[ref]\n\n[ref]: https://example.com";
1049        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1050        let result = rule.check(&ctx).unwrap();
1051
1052        assert_eq!(result.len(), 0);
1053    }
1054
1055    #[test]
1056    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
1057        // Shortcut syntax checking is disabled by default
1058        // Enable it to test undefined shortcut references
1059        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1060            shortcut_syntax: true,
1061            ..Default::default()
1062        });
1063        let content = "[undefined]";
1064        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1065        let result = rule.check(&ctx).unwrap();
1066
1067        assert_eq!(result.len(), 1);
1068        assert!(result[0].message.contains("Reference 'undefined' not found"));
1069    }
1070
1071    #[test]
1072    fn test_shortcut_reference_not_checked_by_default() {
1073        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1074        let rule = MD052ReferenceLinkImages::new();
1075        let content = "[undefined]";
1076        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1077        let result = rule.check(&ctx).unwrap();
1078
1079        // Should be 0 because shortcut_syntax is false by default
1080        assert_eq!(result.len(), 0);
1081    }
1082
1083    #[test]
1084    fn test_inline_links_ignored() {
1085        let rule = MD052ReferenceLinkImages::new();
1086        let content = "[text](https://example.com)";
1087        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1088        let result = rule.check(&ctx).unwrap();
1089
1090        assert_eq!(result.len(), 0);
1091    }
1092
1093    #[test]
1094    fn test_inline_images_ignored() {
1095        let rule = MD052ReferenceLinkImages::new();
1096        let content = "![alt](image.jpg)";
1097        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1098        let result = rule.check(&ctx).unwrap();
1099
1100        assert_eq!(result.len(), 0);
1101    }
1102
1103    #[test]
1104    fn test_references_in_code_blocks_ignored() {
1105        let rule = MD052ReferenceLinkImages::new();
1106        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1107        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1108        let result = rule.check(&ctx).unwrap();
1109
1110        assert_eq!(result.len(), 0);
1111    }
1112
1113    #[test]
1114    fn test_references_in_inline_code_ignored() {
1115        let rule = MD052ReferenceLinkImages::new();
1116        let content = "`[undefined]`";
1117        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1118        let result = rule.check(&ctx).unwrap();
1119
1120        // References inside inline code spans should be ignored
1121        assert_eq!(result.len(), 0);
1122    }
1123
1124    #[test]
1125    fn test_comprehensive_inline_code_detection() {
1126        // Enable shortcut_syntax to test comprehensive detection
1127        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1128            shortcut_syntax: true,
1129            ..Default::default()
1130        });
1131        let content = r#"# Test
1132
1133This `[inside]` should be ignored.
1134This [outside] should be flagged.
1135Reference links `[text][ref]` in code are ignored.
1136Regular reference [text][missing] should be flagged.
1137Images `![alt][img]` in code are ignored.
1138Regular image ![alt][badimg] should be flagged.
1139
1140Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1141
1142```
1143[code block content] should be ignored
1144```
1145
1146`Multiple [refs] in [same] code span` ignored."#;
1147
1148        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1149        let result = rule.check(&ctx).unwrap();
1150
1151        // Should only flag: outside, missing, badimg, three (4 total)
1152        assert_eq!(result.len(), 4);
1153
1154        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1155        assert!(messages.iter().any(|m| m.contains("outside")));
1156        assert!(messages.iter().any(|m| m.contains("missing")));
1157        assert!(messages.iter().any(|m| m.contains("badimg")));
1158        assert!(messages.iter().any(|m| m.contains("three")));
1159
1160        // Should NOT flag any references inside code spans
1161        assert!(!messages.iter().any(|m| m.contains("inside")));
1162        assert!(!messages.iter().any(|m| m.contains("one")));
1163        assert!(!messages.iter().any(|m| m.contains("two")));
1164        assert!(!messages.iter().any(|m| m.contains("refs")));
1165        assert!(!messages.iter().any(|m| m.contains("same")));
1166    }
1167
1168    #[test]
1169    fn test_multiple_undefined_references() {
1170        let rule = MD052ReferenceLinkImages::new();
1171        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1172        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1173        let result = rule.check(&ctx).unwrap();
1174
1175        assert_eq!(result.len(), 3);
1176        assert!(result[0].message.contains("ref1"));
1177        assert!(result[1].message.contains("ref2"));
1178        assert!(result[2].message.contains("ref3"));
1179    }
1180
1181    #[test]
1182    fn test_mixed_valid_and_undefined() {
1183        let rule = MD052ReferenceLinkImages::new();
1184        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1185        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1186        let result = rule.check(&ctx).unwrap();
1187
1188        assert_eq!(result.len(), 1);
1189        assert!(result[0].message.contains("missing"));
1190    }
1191
1192    #[test]
1193    fn test_empty_reference() {
1194        let rule = MD052ReferenceLinkImages::new();
1195        let content = "[text][]\n\n[ref]: https://example.com";
1196        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1197        let result = rule.check(&ctx).unwrap();
1198
1199        // Empty reference should use the link text as reference
1200        assert_eq!(result.len(), 1);
1201    }
1202
1203    #[test]
1204    fn test_escaped_brackets_ignored() {
1205        let rule = MD052ReferenceLinkImages::new();
1206        let content = "\\[not a link\\]";
1207        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1208        let result = rule.check(&ctx).unwrap();
1209
1210        assert_eq!(result.len(), 0);
1211    }
1212
1213    #[test]
1214    fn test_list_items_ignored() {
1215        let rule = MD052ReferenceLinkImages::new();
1216        let content = "- [undefined]\n* [another]\n+ [third]";
1217        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1218        let result = rule.check(&ctx).unwrap();
1219
1220        // List items that look like shortcut references should be ignored
1221        assert_eq!(result.len(), 0);
1222    }
1223
1224    #[test]
1225    fn test_output_example_section_ignored() {
1226        // Enable shortcut_syntax to test example section handling
1227        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1228            shortcut_syntax: true,
1229            ..Default::default()
1230        });
1231        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1232        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1233        let result = rule.check(&ctx).unwrap();
1234
1235        // Only the reference outside the Output section should be flagged
1236        assert_eq!(result.len(), 1);
1237        assert!(result[0].message.contains("missing"));
1238    }
1239
1240    #[test]
1241    fn test_reference_definitions_in_code_blocks_ignored() {
1242        let rule = MD052ReferenceLinkImages::new();
1243        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1244        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1245        let result = rule.check(&ctx).unwrap();
1246
1247        // Reference defined in code block should not count
1248        assert_eq!(result.len(), 1);
1249        assert!(result[0].message.contains("ref"));
1250    }
1251
1252    #[test]
1253    fn test_multiple_references_to_same_undefined() {
1254        let rule = MD052ReferenceLinkImages::new();
1255        let content = "[first][missing] [second][missing] [third][missing]";
1256        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1257        let result = rule.check(&ctx).unwrap();
1258
1259        // Should only report once per unique reference
1260        assert_eq!(result.len(), 1);
1261        assert!(result[0].message.contains("missing"));
1262    }
1263
1264    #[test]
1265    fn test_reference_with_special_characters() {
1266        let rule = MD052ReferenceLinkImages::new();
1267        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1268        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1269        let result = rule.check(&ctx).unwrap();
1270
1271        assert_eq!(result.len(), 0);
1272    }
1273
1274    #[test]
1275    fn test_issue_51_html_attribute_not_reference() {
1276        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1277        let rule = MD052ReferenceLinkImages::new();
1278        let content = r#"# Example
1279
1280## Test
1281
1282Want to fill out this form?
1283
1284<form method="post">
1285    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1286</form>"#;
1287        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1288        let result = rule.check(&ctx).unwrap();
1289
1290        assert_eq!(
1291            result.len(),
1292            0,
1293            "HTML attributes with square brackets should not be flagged as undefined references"
1294        );
1295    }
1296
1297    #[test]
1298    fn test_extract_references() {
1299        let rule = MD052ReferenceLinkImages::new();
1300        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1301        let refs = rule.extract_references(content, false);
1302
1303        assert_eq!(refs.len(), 3);
1304        assert!(refs.contains("ref1"));
1305        assert!(refs.contains("ref2"));
1306        assert!(refs.contains("ref3"));
1307    }
1308
1309    #[test]
1310    fn test_inline_code_not_flagged() {
1311        // Enable shortcut_syntax to test inline code detection
1312        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1313            shortcut_syntax: true,
1314            ..Default::default()
1315        });
1316
1317        // Test that arrays in inline code are not flagged as references
1318        let content = r#"# Test
1319
1320Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1321
1322Also, `[todo]` is not a reference link.
1323
1324But this [reference] should be flagged.
1325
1326And this `[inline code]` should not be flagged.
1327"#;
1328
1329        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1330        let warnings = rule.check(&ctx).unwrap();
1331
1332        // Should only flag [reference], not the ones in backticks
1333        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1334        assert!(warnings[0].message.contains("'reference'"));
1335    }
1336
1337    #[test]
1338    fn test_code_block_references_ignored() {
1339        // Enable shortcut_syntax to test code block handling
1340        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1341            shortcut_syntax: true,
1342            ..Default::default()
1343        });
1344
1345        let content = r#"# Test
1346
1347```markdown
1348[undefined] reference in code block
1349![undefined] image in code block
1350```
1351
1352[real-undefined] reference outside
1353"#;
1354
1355        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1356        let warnings = rule.check(&ctx).unwrap();
1357
1358        // Should only flag [real-undefined], not the ones in code block
1359        assert_eq!(warnings.len(), 1);
1360        assert!(warnings[0].message.contains("'real-undefined'"));
1361    }
1362
1363    #[test]
1364    fn test_html_comments_ignored() {
1365        // Test for issue #20 - MD052 should not flag content inside HTML comments
1366        let rule = MD052ReferenceLinkImages::new();
1367
1368        // Test the exact case from issue #20
1369        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1370<!--- set_env EDITOR 'python3 fake_editor.py' -->
1371
1372```bash
1373$ python3 vote.py
13743 votes for: 2
13752 votes for: 3, 4
1376```"#;
1377        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1378        let result = rule.check(&ctx).unwrap();
1379        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1380
1381        // Test various reference patterns inside HTML comments
1382        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1383Normal [text][undefined]
1384<!-- Another [comment][with] references -->"#;
1385        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1386        let result = rule.check(&ctx).unwrap();
1387        assert_eq!(
1388            result.len(),
1389            1,
1390            "Should only flag the undefined reference outside comments"
1391        );
1392        assert!(result[0].message.contains("undefined"));
1393
1394        // Test multi-line HTML comments
1395        let content = r#"<!--
1396[ref1]
1397[ref2][ref3]
1398-->
1399[actual][undefined]"#;
1400        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1401        let result = rule.check(&ctx).unwrap();
1402        assert_eq!(
1403            result.len(),
1404            1,
1405            "Should not flag references in multi-line HTML comments"
1406        );
1407        assert!(result[0].message.contains("undefined"));
1408
1409        // Test mixed scenarios
1410        let content = r#"<!-- Comment with [1:] pattern -->
1411Valid [link][ref]
1412<!-- More [refs][in][comments] -->
1413![image][missing]
1414
1415[ref]: https://example.com"#;
1416        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1417        let result = rule.check(&ctx).unwrap();
1418        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1419        assert!(result[0].message.contains("missing"));
1420    }
1421
1422    #[test]
1423    fn test_frontmatter_ignored() {
1424        // Test for issue #24 - MD052 should not flag content inside frontmatter
1425        // Enable shortcut_syntax to test frontmatter handling
1426        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1427            shortcut_syntax: true,
1428            ..Default::default()
1429        });
1430
1431        // Test YAML frontmatter with arrays and references
1432        let content = r#"---
1433layout: post
1434title: "My Jekyll Post"
1435date: 2023-01-01
1436categories: blog
1437tags: ["test", "example"]
1438author: John Doe
1439---
1440
1441# My Blog Post
1442
1443This is the actual markdown content that should be linted.
1444
1445[undefined] reference should be flagged.
1446
1447## Section 1
1448
1449Some content here."#;
1450        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1451        let result = rule.check(&ctx).unwrap();
1452
1453        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1454        assert_eq!(
1455            result.len(),
1456            1,
1457            "Should only flag the undefined reference outside frontmatter"
1458        );
1459        assert!(result[0].message.contains("undefined"));
1460
1461        // Test TOML frontmatter
1462        let content = r#"+++
1463title = "My Post"
1464tags = ["example", "test"]
1465+++
1466
1467# Content
1468
1469[missing] reference should be flagged."#;
1470        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1471        let result = rule.check(&ctx).unwrap();
1472        assert_eq!(
1473            result.len(),
1474            1,
1475            "Should only flag the undefined reference outside TOML frontmatter"
1476        );
1477        assert!(result[0].message.contains("missing"));
1478    }
1479
1480    #[test]
1481    fn test_mkdocs_snippet_markers_not_flagged() {
1482        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1483        // Enable shortcut_syntax to test snippet marker handling
1484        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1485            shortcut_syntax: true,
1486            ..Default::default()
1487        });
1488
1489        // Test snippet section markers
1490        let content = r#"# Document with MkDocs Snippets
1491
1492Some content here.
1493
1494# -8<- [start:remote-content]
1495
1496This is the remote content section.
1497
1498# -8<- [end:remote-content]
1499
1500More content here.
1501
1502<!-- --8<-- [start:another-section] -->
1503Content in another section
1504<!-- --8<-- [end:another-section] -->"#;
1505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1506        let result = rule.check(&ctx).unwrap();
1507
1508        // Should not flag any snippet markers as undefined references
1509        assert_eq!(
1510            result.len(),
1511            0,
1512            "Should not flag MkDocs snippet markers as undefined references"
1513        );
1514
1515        // Test that the snippet marker lines are properly skipped
1516        // but regular undefined references on other lines are still caught
1517        let content = r#"# Document
1518
1519# -8<- [start:section]
1520Content with [reference] inside snippet section
1521# -8<- [end:section]
1522
1523Regular [undefined] reference outside snippet markers."#;
1524        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1525        let result = rule.check(&ctx).unwrap();
1526
1527        assert_eq!(
1528            result.len(),
1529            2,
1530            "Should flag undefined references but skip snippet marker lines"
1531        );
1532        // The references inside the content should be flagged, but not start: and end:
1533        assert!(result[0].message.contains("reference"));
1534        assert!(result[1].message.contains("undefined"));
1535
1536        // Test in standard mode - should flag the markers as undefined
1537        let content = r#"# Document
1538
1539# -8<- [start:section]
1540# -8<- [end:section]"#;
1541        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1542        let result = rule.check(&ctx).unwrap();
1543
1544        assert_eq!(
1545            result.len(),
1546            2,
1547            "In standard mode, snippet markers should be flagged as undefined references"
1548        );
1549    }
1550
1551    #[test]
1552    fn test_pandoc_citations_not_flagged() {
1553        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1554        // Enable shortcut_syntax to test citation handling
1555        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1556            shortcut_syntax: true,
1557            ..Default::default()
1558        });
1559
1560        let content = r#"# Research Paper
1561
1562We are using the **bookdown** package [@R-bookdown] in this sample book.
1563This was built on top of R Markdown and **knitr** [@xie2015].
1564
1565Multiple citations [@citation1; @citation2; @citation3] are also supported.
1566
1567Regular [undefined] reference should still be flagged.
1568"#;
1569        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1570        let result = rule.check(&ctx).unwrap();
1571
1572        // Should only flag the undefined reference, not the citations
1573        assert_eq!(
1574            result.len(),
1575            1,
1576            "Should only flag the undefined reference, not Pandoc citations"
1577        );
1578        assert!(result[0].message.contains("undefined"));
1579    }
1580
1581    #[test]
1582    fn test_pandoc_inline_footnotes_not_flagged() {
1583        // Test that Pandoc inline footnote syntax is not flagged
1584        // Enable shortcut_syntax to test inline footnote handling
1585        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1586            shortcut_syntax: true,
1587            ..Default::default()
1588        });
1589
1590        let content = r#"# Math Document
1591
1592You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1593
1594Another footnote^[with some text and a [link](https://example.com)].
1595
1596But this [reference] without ^ should be flagged.
1597"#;
1598        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1599        let result = rule.check(&ctx).unwrap();
1600
1601        // Should only flag the reference without ^
1602        assert_eq!(
1603            result.len(),
1604            1,
1605            "Should only flag the regular reference, not inline footnotes"
1606        );
1607        assert!(result[0].message.contains("reference"));
1608    }
1609
1610    #[test]
1611    fn test_github_alerts_not_flagged() {
1612        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1613        // Enable shortcut_syntax to test GitHub alert handling
1614        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1615            shortcut_syntax: true,
1616            ..Default::default()
1617        });
1618
1619        // Test various GitHub alert types
1620        let content = r#"# Document with GitHub Alerts
1621
1622> [!NOTE]
1623> This is a note alert.
1624
1625> [!TIP]
1626> This is a tip alert.
1627
1628> [!IMPORTANT]
1629> This is an important alert.
1630
1631> [!WARNING]
1632> This is a warning alert.
1633
1634> [!CAUTION]
1635> This is a caution alert.
1636
1637Regular content with [undefined] reference."#;
1638        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1639        let result = rule.check(&ctx).unwrap();
1640
1641        // Should only flag the undefined reference, not the GitHub alerts
1642        assert_eq!(
1643            result.len(),
1644            1,
1645            "Should only flag the undefined reference, not GitHub alerts"
1646        );
1647        assert!(result[0].message.contains("undefined"));
1648        assert_eq!(result[0].line, 18); // Line with [undefined]
1649
1650        // Test GitHub alerts with additional content
1651        let content = r#"> [!TIP]
1652> Here's a useful tip about [something].
1653> Multiple lines are allowed.
1654
1655[something] is mentioned but not defined."#;
1656        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1657        let result = rule.check(&ctx).unwrap();
1658
1659        // Should flag only the [something] outside blockquotes
1660        // The test shows we're only catching one, which might be correct behavior
1661        // matching markdownlint's approach
1662        assert_eq!(result.len(), 1, "Should flag undefined reference");
1663        assert!(result[0].message.contains("something"));
1664
1665        // Test GitHub alerts with proper references
1666        let content = r#"> [!NOTE]
1667> See [reference] for more details.
1668
1669[reference]: https://example.com"#;
1670        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1671        let result = rule.check(&ctx).unwrap();
1672
1673        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1674        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1675    }
1676
1677    #[test]
1678    fn test_ignore_config() {
1679        // Test that user-configured ignore list is respected
1680        let config = MD052Config {
1681            shortcut_syntax: true,
1682            ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1683        };
1684        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1685
1686        let content = r#"# Document with Custom Types
1687
1688Use [Vec] for dynamic arrays.
1689Use [HashMap] for key-value storage.
1690Use [Option] for nullable values.
1691Use [Result] for error handling.
1692"#;
1693        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1694        let result = rule.check(&ctx).unwrap();
1695
1696        // Should only flag [Result] because it's not in ignore
1697        assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1698        assert!(result[0].message.contains("Result"));
1699    }
1700
1701    #[test]
1702    fn test_ignore_case_insensitive() {
1703        // Test that ignore list is case-insensitive
1704        let config = MD052Config {
1705            shortcut_syntax: true,
1706            ignore: vec!["Vec".to_string()],
1707        };
1708        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1709
1710        let content = r#"# Case Insensitivity Test
1711
1712[Vec] should be ignored.
1713[vec] should also be ignored (different case, same match).
1714[VEC] should also be ignored (different case, same match).
1715[undefined] should be flagged (not in ignore list).
1716"#;
1717        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1718        let result = rule.check(&ctx).unwrap();
1719
1720        // Should only flag [undefined] because ignore is case-insensitive
1721        assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1722        assert!(result[0].message.contains("undefined"));
1723    }
1724
1725    #[test]
1726    fn test_ignore_empty_by_default() {
1727        // Test that empty ignore list doesn't affect existing behavior
1728        let rule = MD052ReferenceLinkImages::new();
1729
1730        let content = "[text][undefined]";
1731        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1732        let result = rule.check(&ctx).unwrap();
1733
1734        // Should still flag undefined references
1735        assert_eq!(result.len(), 1);
1736        assert!(result[0].message.contains("undefined"));
1737    }
1738
1739    #[test]
1740    fn test_ignore_with_reference_links() {
1741        // Test ignore list with full reference link syntax [text][ref]
1742        let config = MD052Config {
1743            shortcut_syntax: false,
1744            ignore: vec!["CustomType".to_string()],
1745        };
1746        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1747
1748        let content = r#"# Test
1749
1750See [documentation][CustomType] for details.
1751See [other docs][MissingRef] for more.
1752"#;
1753        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1754        let result = rule.check(&ctx).unwrap();
1755
1756        // Debug: print warnings if test fails
1757        for (i, w) in result.iter().enumerate() {
1758            eprintln!("Warning {}: {}", i, w.message);
1759        }
1760
1761        // Should flag [MissingRef] but not [CustomType]
1762        // Note: reference IDs are lowercased in the message
1763        assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1764        assert!(
1765            result[0].message.contains("missingref"),
1766            "Expected 'missingref' in message: {}",
1767            result[0].message
1768        );
1769    }
1770
1771    #[test]
1772    fn test_ignore_multiple() {
1773        // Test multiple ignored names work correctly
1774        let config = MD052Config {
1775            shortcut_syntax: true,
1776            ignore: vec![
1777                "i32".to_string(),
1778                "u64".to_string(),
1779                "String".to_string(),
1780                "Arc".to_string(),
1781                "Mutex".to_string(),
1782            ],
1783        };
1784        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1785
1786        let content = r#"# Types
1787
1788[i32] [u64] [String] [Arc] [Mutex] [Box]
1789"#;
1790        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1791        let result = rule.check(&ctx).unwrap();
1792
1793        // Note: i32 and u64 are already in the hardcoded list, so they'd be skipped anyway
1794        // String is NOT in the hardcoded list, so we test that the user config works
1795        // [Box] should be flagged (not in ignore)
1796        assert_eq!(result.len(), 1);
1797        assert!(result[0].message.contains("Box"));
1798    }
1799}