Skip to main content

rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::SHORTCUT_REF_REGEX;
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for code blocks (standard regex is fine)
23static FENCED_CODE_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap());
24
25// Pattern for output example sections (standard regex is fine)
26static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
27    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
28
29// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
30// Extended to include additional common alert types
31static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
32    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
33        .unwrap()
34});
35
36// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
37// This pattern specifically looks for:
38// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
39// - IPv6 with zone IDs: https://[fe80::1%eth0]
40// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
41// - API paths with array notation: https://api.example.com/users[0]
42// But NOT markdown reference links that happen to follow URLs
43static URL_WITH_BRACKETS: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
45
46/// Rule MD052: Reference links and images should use reference style
47///
48/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
49///
50/// This rule is triggered when a reference link or image uses a reference that isn't defined.
51///
52/// ## Configuration
53///
54/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
55///
56/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
57/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
58#[derive(Clone, Default)]
59pub struct MD052ReferenceLinkImages {
60    config: MD052Config,
61}
62
63impl MD052ReferenceLinkImages {
64    pub fn new() -> Self {
65        Self {
66            config: MD052Config::default(),
67        }
68    }
69
70    pub fn from_config_struct(config: MD052Config) -> Self {
71        Self { config }
72    }
73
74    /// Strip surrounding backticks from a string
75    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
76    fn strip_backticks(s: &str) -> &str {
77        s.trim_start_matches('`').trim_end_matches('`')
78    }
79
80    /// Check if a string is a valid Python identifier
81    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
82    /// like `str`, `int`, etc. should be accepted as valid auto-references
83    fn is_valid_python_identifier(s: &str) -> bool {
84        if s.is_empty() {
85            return false;
86        }
87        let first_char = s.chars().next().unwrap();
88        if !first_char.is_ascii_alphabetic() && first_char != '_' {
89            return false;
90        }
91        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
92    }
93
94    /// Check if text matches a known non-reference pattern that should be skipped.
95    ///
96    /// These are deterministic patterns from markdown extensions or code examples,
97    /// not heuristics. Returns true for:
98    /// - User-configured names via `ignore` config option
99    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
100    /// - Programming syntax: [T], [null], [i32], ["string"]
101    /// - Descriptive text: [default: value], [0-9]
102    fn is_known_non_reference_pattern(&self, text: &str) -> bool {
103        // Check user-configured ignore list first (case-insensitive match)
104        // Reference IDs are normalized to lowercase during parsing,
105        // so we use case-insensitive comparison for user convenience
106        if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
107            return true;
108        }
109        // Skip numeric patterns (array indices, ranges)
110        if text.chars().all(|c| c.is_ascii_digit()) {
111            return true;
112        }
113
114        // Skip numeric ranges like [1:3], [0:10], etc.
115        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
116            return true;
117        }
118
119        // Skip patterns that look like config sections [tool.something], [section.subsection]
120        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
121        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
122        if text.contains('.')
123            && !text.contains(' ')
124            && !text.contains('-')
125            && !text.contains('_')
126            && !text.contains('`')
127        {
128            // Config sections typically have dots, no spaces, and only alphanumeric + dots
129            return true;
130        }
131
132        // Skip glob/wildcard patterns like [*], [...], [**]
133        if text == "*" || text == "..." || text == "**" {
134            return true;
135        }
136
137        // Skip patterns that look like file paths [dir/file], [src/utils]
138        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
139            return true;
140        }
141
142        // Skip programming type annotations like [int, str], [Dict[str, Any]]
143        // These typically have commas and/or nested brackets
144        if text.contains(',') || text.contains('[') || text.contains(']') {
145            // Check if it looks like a type annotation pattern
146            return true;
147        }
148
149        // Note: We don't filter out patterns with backticks because backticks in reference names
150        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
151
152        // Skip patterns that look like module/class paths ONLY if they don't have backticks
153        // Backticks indicate intentional code formatting in a reference name
154        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
155        if !text.contains('`')
156            && text.contains('.')
157            && !text.contains(' ')
158            && !text.contains('-')
159            && !text.contains('_')
160        {
161            return true;
162        }
163
164        // Note: We don't filter based on word count anymore because legitimate references
165        // can have many words, like "python language reference for import statements"
166        // Word count filtering was causing false positives where valid references were
167        // being incorrectly flagged as unused
168
169        // Skip patterns that are just punctuation or operators
170        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
171            return true;
172        }
173
174        // Skip very short non-word patterns (likely operators or syntax)
175        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
176            return true;
177        }
178
179        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
180        if (text.starts_with('"') && text.ends_with('"'))
181            || (text.starts_with('\'') && text.ends_with('\''))
182            || text.contains('"')
183            || text.contains('\'')
184        {
185            return true;
186        }
187
188        // Skip descriptive patterns with colon like [default: the project root]
189        // But allow simple numeric ranges which are handled above
190        if text.contains(':') && text.contains(' ') {
191            return true;
192        }
193
194        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
195        if text.starts_with('!') {
196            return true;
197        }
198
199        // Skip footnote syntax like [^1], [^note], etc.
200        // Footnotes start with ^ and are a common markdown extension
201        if text.starts_with('^') {
202            return true;
203        }
204
205        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
206        // Citations in these formats start with @ inside brackets
207        if text.starts_with('@') {
208            return true;
209        }
210
211        // Skip table of contents markers like [TOC]
212        // Used by Python-Markdown and other processors
213        if text == "TOC" {
214            return true;
215        }
216
217        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
218        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
219            return true;
220        }
221
222        // Skip common programming type names, literals, and short identifiers
223        // that are likely not markdown references
224        let common_non_refs = [
225            // Programming types
226            "object",
227            "Object",
228            "any",
229            "Any",
230            "inv",
231            "void",
232            "bool",
233            "int",
234            "float",
235            "str",
236            "char",
237            "i8",
238            "i16",
239            "i32",
240            "i64",
241            "i128",
242            "isize",
243            "u8",
244            "u16",
245            "u32",
246            "u64",
247            "u128",
248            "usize",
249            "f32",
250            "f64",
251            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
252            "null",
253            "true",
254            "false",
255            "NaN",
256            "Infinity",
257            // Common JavaScript output patterns
258            "object Object",
259        ];
260
261        if common_non_refs.contains(&text) {
262            return true;
263        }
264
265        false
266    }
267
268    /// Check if a position is inside any code span
269    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
270        code_spans
271            .iter()
272            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
273    }
274
275    /// Check if a byte position is within an HTML tag
276    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
277        // Check HTML tags
278        for html_tag in ctx.html_tags().iter() {
279            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
280                return true;
281            }
282        }
283        false
284    }
285
286    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
287        use crate::config::MarkdownFlavor;
288        use crate::utils::skip_context::is_mkdocs_snippet_line;
289
290        let mut references = HashSet::new();
291        let mut in_code_block = false;
292        let mut code_fence_marker = String::new();
293
294        for line in content.lines() {
295            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
296            if is_mkdocs_snippet_line(
297                line,
298                if mkdocs_mode {
299                    MarkdownFlavor::MkDocs
300                } else {
301                    MarkdownFlavor::Standard
302                },
303            ) {
304                continue;
305            }
306            // Handle code block boundaries
307            if let Some(cap) = FENCED_CODE_START.captures(line) {
308                if let Some(fence) = cap.get(2) {
309                    // Get the fence marker (``` or ~~~) without the indentation
310                    let fence_str = fence.as_str();
311                    if !in_code_block {
312                        in_code_block = true;
313                        code_fence_marker = fence_str.to_string();
314                    } else if line.trim_start().starts_with(&code_fence_marker) {
315                        // Check if this could be a closing fence
316                        let trimmed = line.trim_start();
317                        // A closing fence should be just the fence characters, possibly with trailing whitespace
318                        if trimmed.starts_with(&code_fence_marker) {
319                            let after_fence = &trimmed[code_fence_marker.len()..];
320                            if after_fence.trim().is_empty() {
321                                in_code_block = false;
322                                code_fence_marker.clear();
323                            }
324                        }
325                    }
326                }
327                continue;
328            }
329
330            // Skip lines in code blocks
331            if in_code_block {
332                continue;
333            }
334
335            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
336            // Abbreviations are not reference links and should not be tracked
337            if line.trim_start().starts_with("*[") {
338                continue;
339            }
340
341            if let Some(cap) = REF_REGEX.captures(line) {
342                // Store references in lowercase for case-insensitive comparison
343                if let Some(reference) = cap.get(1) {
344                    references.insert(reference.as_str().to_lowercase());
345                }
346            }
347        }
348
349        references
350    }
351
352    fn find_undefined_references(
353        &self,
354        references: &HashSet<String>,
355        ctx: &crate::lint_context::LintContext,
356        mkdocs_mode: bool,
357    ) -> Vec<(usize, usize, usize, String)> {
358        let mut undefined = Vec::new();
359        let mut reported_refs = HashMap::new();
360        let mut in_code_block = false;
361        let mut code_fence_marker = String::new();
362        let mut in_example_section = false;
363
364        // Get code spans once for the entire function
365        let code_spans = ctx.code_spans();
366
367        // Use cached data for reference links and images
368        for link in &ctx.links {
369            if !link.is_reference {
370                continue; // Skip inline links
371            }
372
373            // Skip links inside Jinja templates
374            if ctx.is_in_jinja_range(link.byte_offset) {
375                continue;
376            }
377
378            // Skip links inside code spans
379            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
380                continue;
381            }
382
383            // Skip links inside HTML comments (uses pre-computed ranges)
384            if ctx.is_in_html_comment(link.byte_offset) {
385                continue;
386            }
387
388            // Skip links inside HTML tags
389            if Self::is_in_html_tag(ctx, link.byte_offset) {
390                continue;
391            }
392
393            // Skip links inside math contexts
394            if is_in_math_context(ctx, link.byte_offset) {
395                continue;
396            }
397
398            // Skip links inside table cells
399            if is_in_table_cell(ctx, link.line, link.start_col) {
400                continue;
401            }
402
403            // Skip links inside frontmatter
404            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
405                continue;
406            }
407
408            // Skip Quarto/Pandoc citations ([@citation], @citation)
409            // Citations look like reference links but are bibliography references
410            if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
411                continue;
412            }
413
414            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
415            // Shortcodes may contain template syntax that looks like reference links
416            if ctx.is_in_shortcode(link.byte_offset) {
417                continue;
418            }
419
420            if let Some(ref_id) = &link.reference_id {
421                let reference_lower = ref_id.to_lowercase();
422
423                // Skip known non-reference patterns (markdown extensions, code examples)
424                if self.is_known_non_reference_pattern(ref_id) {
425                    continue;
426                }
427
428                // Skip MkDocs auto-references if in MkDocs mode
429                // Check both the reference_id and the link text for shorthand references
430                // Strip backticks since MkDocs resolves `module.Class` as module.Class
431                let stripped_ref = Self::strip_backticks(ref_id);
432                let stripped_text = Self::strip_backticks(&link.text);
433                if mkdocs_mode
434                    && (is_mkdocs_auto_reference(stripped_ref)
435                        || is_mkdocs_auto_reference(stripped_text)
436                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
437                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
438                {
439                    continue;
440                }
441
442                // Check if reference is defined
443                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
444                    // Check if the line is in an example section or list item
445                    if let Some(line_info) = ctx.line_info(link.line) {
446                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
447                            in_example_section = true;
448                            continue;
449                        }
450
451                        if in_example_section {
452                            continue;
453                        }
454
455                        // Skip list items
456                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
457                            continue;
458                        }
459
460                        // Skip lines that are HTML content
461                        let trimmed = line_info.content(ctx.content).trim_start();
462                        if trimmed.starts_with('<') {
463                            continue;
464                        }
465                    }
466
467                    let match_len = link.byte_end - link.byte_offset;
468                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
469                    reported_refs.insert(reference_lower, true);
470                }
471            }
472        }
473
474        // Use cached data for reference images
475        for image in &ctx.images {
476            if !image.is_reference {
477                continue; // Skip inline images
478            }
479
480            // Skip images inside Jinja templates
481            if ctx.is_in_jinja_range(image.byte_offset) {
482                continue;
483            }
484
485            // Skip images inside code spans
486            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
487                continue;
488            }
489
490            // Skip images inside HTML comments (uses pre-computed ranges)
491            if ctx.is_in_html_comment(image.byte_offset) {
492                continue;
493            }
494
495            // Skip images inside HTML tags
496            if Self::is_in_html_tag(ctx, image.byte_offset) {
497                continue;
498            }
499
500            // Skip images inside math contexts
501            if is_in_math_context(ctx, image.byte_offset) {
502                continue;
503            }
504
505            // Skip images inside table cells
506            if is_in_table_cell(ctx, image.line, image.start_col) {
507                continue;
508            }
509
510            // Skip images inside frontmatter
511            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
512                continue;
513            }
514
515            if let Some(ref_id) = &image.reference_id {
516                let reference_lower = ref_id.to_lowercase();
517
518                // Skip known non-reference patterns (markdown extensions, code examples)
519                if self.is_known_non_reference_pattern(ref_id) {
520                    continue;
521                }
522
523                // Skip MkDocs auto-references if in MkDocs mode
524                // Check both the reference_id and the alt text for shorthand references
525                // Strip backticks since MkDocs resolves `module.Class` as module.Class
526                let stripped_ref = Self::strip_backticks(ref_id);
527                let stripped_alt = Self::strip_backticks(&image.alt_text);
528                if mkdocs_mode
529                    && (is_mkdocs_auto_reference(stripped_ref)
530                        || is_mkdocs_auto_reference(stripped_alt)
531                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
532                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
533                {
534                    continue;
535                }
536
537                // Check if reference is defined
538                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
539                    // Check if the line is in an example section or list item
540                    if let Some(line_info) = ctx.line_info(image.line) {
541                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
542                            in_example_section = true;
543                            continue;
544                        }
545
546                        if in_example_section {
547                            continue;
548                        }
549
550                        // Skip list items
551                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
552                            continue;
553                        }
554
555                        // Skip lines that are HTML content
556                        let trimmed = line_info.content(ctx.content).trim_start();
557                        if trimmed.starts_with('<') {
558                            continue;
559                        }
560                    }
561
562                    let match_len = image.byte_end - image.byte_offset;
563                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
564                    reported_refs.insert(reference_lower, true);
565                }
566            }
567        }
568
569        // Build a set of byte ranges that are already covered by parsed links/images
570        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
571
572        // Add ranges from parsed links
573        for link in &ctx.links {
574            covered_ranges.push((link.byte_offset, link.byte_end));
575        }
576
577        // Add ranges from parsed images
578        for image in &ctx.images {
579            covered_ranges.push((image.byte_offset, image.byte_end));
580        }
581
582        // Sort ranges by start position
583        covered_ranges.sort_by_key(|&(start, _)| start);
584
585        // Handle shortcut references [text] which aren't captured in ctx.links
586        // Only check these if shortcut_syntax is enabled (default: false)
587        // Shortcut syntax is ambiguous because [text] could be a reference link
588        // OR just text in brackets (like spec notation in quotes)
589        if !self.config.shortcut_syntax {
590            return undefined;
591        }
592
593        // Need to use regex for shortcut references
594        let lines = ctx.raw_lines();
595        in_example_section = false; // Reset for line-by-line processing
596
597        for (line_num, line) in lines.iter().enumerate() {
598            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
599            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
600                continue;
601            }
602
603            // Handle code blocks
604            if let Some(cap) = FENCED_CODE_START.captures(line) {
605                if let Some(fence) = cap.get(2) {
606                    // Get the fence marker (``` or ~~~) without the indentation
607                    let fence_str = fence.as_str();
608                    if !in_code_block {
609                        in_code_block = true;
610                        code_fence_marker = fence_str.to_string();
611                    } else if line.trim_start().starts_with(&code_fence_marker) {
612                        // Check if this could be a closing fence
613                        let trimmed = line.trim_start();
614                        // A closing fence should be just the fence characters, possibly with trailing whitespace
615                        if trimmed.starts_with(&code_fence_marker) {
616                            let after_fence = &trimmed[code_fence_marker.len()..];
617                            if after_fence.trim().is_empty() {
618                                in_code_block = false;
619                                code_fence_marker.clear();
620                            }
621                        }
622                    }
623                }
624                continue;
625            }
626
627            if in_code_block {
628                continue;
629            }
630
631            // Check for example sections
632            if OUTPUT_EXAMPLE_START.is_match(line) {
633                in_example_section = true;
634                continue;
635            }
636
637            if in_example_section {
638                // Check if we're exiting the example section (another heading)
639                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
640                    in_example_section = false;
641                } else {
642                    continue;
643                }
644            }
645
646            // Skip list items
647            if LIST_ITEM_REGEX.is_match(line) {
648                continue;
649            }
650
651            // Skip lines that are HTML content
652            let trimmed_line = line.trim_start();
653            if trimmed_line.starts_with('<') {
654                continue;
655            }
656
657            // Skip GitHub alerts/callouts (e.g., > [!TIP])
658            if GITHUB_ALERT_REGEX.is_match(line) {
659                continue;
660            }
661
662            // Skip abbreviation definitions (*[ABBR]: Definition)
663            // These are not reference links and should not be checked
664            if trimmed_line.starts_with("*[") {
665                continue;
666            }
667
668            // Collect positions of brackets that are part of URLs (IPv6, etc.)
669            // so we can exclude them from reference checking
670            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
671            for mat in URL_WITH_BRACKETS.find_iter(line) {
672                // Find all bracket pairs within this URL match
673                let url_str = mat.as_str();
674                let url_start = mat.start();
675
676                // Find brackets within the URL (e.g., in https://[::1]:8080)
677                let mut idx = 0;
678                while idx < url_str.len() {
679                    if let Some(bracket_start) = url_str[idx..].find('[') {
680                        let bracket_start_abs = url_start + idx + bracket_start;
681                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
682                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
683                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
684                            idx += bracket_start + bracket_end + 2;
685                        } else {
686                            break;
687                        }
688                    } else {
689                        break;
690                    }
691                }
692            }
693
694            // Check shortcut references: [reference]
695            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
696                for cap in captures {
697                    if let Some(ref_match) = cap.get(1) {
698                        // Check if this bracket is part of a URL (IPv6, etc.)
699                        let bracket_start = cap.get(0).unwrap().start();
700                        let bracket_end = cap.get(0).unwrap().end();
701
702                        // Skip if this bracket pair is within any URL bracket range
703                        let is_in_url = url_bracket_ranges
704                            .iter()
705                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
706
707                        if is_in_url {
708                            continue;
709                        }
710
711                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
712                        // Check if there's a ^ immediately before the opening bracket
713                        if bracket_start > 0 {
714                            // bracket_start is a byte offset, so we need to check the byte before
715                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
716                                && *byte == b'^'
717                            {
718                                continue; // This is an inline footnote, skip it
719                            }
720                        }
721
722                        let reference = ref_match.as_str();
723                        let reference_lower = reference.to_lowercase();
724
725                        // Skip known non-reference patterns (markdown extensions, code examples)
726                        if self.is_known_non_reference_pattern(reference) {
727                            continue;
728                        }
729
730                        // Skip GitHub alerts (including extended types)
731                        if let Some(alert_type) = reference.strip_prefix('!')
732                            && matches!(
733                                alert_type,
734                                "NOTE"
735                                    | "TIP"
736                                    | "WARNING"
737                                    | "IMPORTANT"
738                                    | "CAUTION"
739                                    | "INFO"
740                                    | "SUCCESS"
741                                    | "FAILURE"
742                                    | "DANGER"
743                                    | "BUG"
744                                    | "EXAMPLE"
745                                    | "QUOTE"
746                            )
747                        {
748                            continue;
749                        }
750
751                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
752                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
753                        if mkdocs_mode
754                            && (reference.starts_with("start:") || reference.starts_with("end:"))
755                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
756                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
757                        {
758                            continue;
759                        }
760
761                        // Skip MkDocs auto-references if in MkDocs mode
762                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
763                        let stripped_ref = Self::strip_backticks(reference);
764                        if mkdocs_mode
765                            && (is_mkdocs_auto_reference(stripped_ref)
766                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
767                        {
768                            continue;
769                        }
770
771                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
772                            let full_match = cap.get(0).unwrap();
773                            let col = full_match.start();
774
775                            // Skip if inside code span
776                            let code_spans = ctx.code_spans();
777                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
778                                continue;
779                            }
780
781                            // Check if this position is within a covered range
782                            let line_start_byte = ctx.line_offsets[line_num];
783                            let byte_pos = line_start_byte + col;
784
785                            // Skip if inside Jinja template
786                            if ctx.is_in_jinja_range(byte_pos) {
787                                continue;
788                            }
789
790                            // Skip if inside code block
791                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
792                                &ctx.code_blocks,
793                                byte_pos,
794                            ) {
795                                continue;
796                            }
797
798                            // Skip if inside HTML comment (uses pre-computed ranges)
799                            if ctx.is_in_html_comment(byte_pos) {
800                                continue;
801                            }
802
803                            // Skip if inside HTML tag
804                            if Self::is_in_html_tag(ctx, byte_pos) {
805                                continue;
806                            }
807
808                            // Skip if inside math context
809                            if is_in_math_context(ctx, byte_pos) {
810                                continue;
811                            }
812
813                            // Skip if inside table cell
814                            if is_in_table_cell(ctx, line_num + 1, col) {
815                                continue;
816                            }
817
818                            let byte_end = byte_pos + (full_match.end() - full_match.start());
819
820                            // Check if this shortcut ref overlaps with any parsed link/image
821                            let mut is_covered = false;
822                            for &(range_start, range_end) in &covered_ranges {
823                                if range_start <= byte_pos && byte_end <= range_end {
824                                    // This shortcut ref is completely within a parsed link/image
825                                    is_covered = true;
826                                    break;
827                                }
828                                if range_start > byte_end {
829                                    // No need to check further (ranges are sorted)
830                                    break;
831                                }
832                            }
833
834                            if is_covered {
835                                continue;
836                            }
837
838                            // More sophisticated checks to avoid false positives
839
840                            // Check 1: If preceded by ], this might be part of [text][ref]
841                            // Look for the pattern ...][ref] and check if there's a matching [ before
842                            let line_chars: Vec<char> = line.chars().collect();
843                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
844                                // Look backwards for a [ that would make this [text][ref]
845                                let mut bracket_count = 1; // We already saw one ]
846                                let mut check_pos = col.saturating_sub(2);
847                                let mut found_opening = false;
848
849                                while check_pos > 0 && check_pos < line_chars.len() {
850                                    match line_chars.get(check_pos) {
851                                        Some(&']') => bracket_count += 1,
852                                        Some(&'[') => {
853                                            bracket_count -= 1;
854                                            if bracket_count == 0 {
855                                                // Check if this [ is escaped
856                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
857                                                    found_opening = true;
858                                                }
859                                                break;
860                                            }
861                                        }
862                                        _ => {}
863                                    }
864                                    if check_pos == 0 {
865                                        break;
866                                    }
867                                    check_pos = check_pos.saturating_sub(1);
868                                }
869
870                                if found_opening {
871                                    // This is part of [text][ref], skip it
872                                    continue;
873                                }
874                            }
875
876                            // Check 2: If there's an escaped bracket pattern before this
877                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
878                            let before_text = &line[..col];
879                            if before_text.contains("\\]") {
880                                // Check if there's a \[ before the \]
881                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
882                                    let search_text = &before_text[..escaped_close_pos];
883                                    if search_text.contains("\\[") {
884                                        // This looks like \[...\][ref], skip it
885                                        continue;
886                                    }
887                                }
888                            }
889
890                            let match_len = full_match.end() - full_match.start();
891                            undefined.push((line_num, col, match_len, reference.to_string()));
892                            reported_refs.insert(reference_lower, true);
893                        }
894                    }
895                }
896            }
897        }
898
899        undefined
900    }
901}
902
903impl Rule for MD052ReferenceLinkImages {
904    fn name(&self) -> &'static str {
905        "MD052"
906    }
907
908    fn description(&self) -> &'static str {
909        "Reference links and images should use a reference that exists"
910    }
911
912    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
913        let content = ctx.content;
914        let mut warnings = Vec::new();
915
916        // OPTIMIZATION: Early exit if no brackets at all
917        if !content.contains('[') {
918            return Ok(warnings);
919        }
920
921        // Check if we're in MkDocs mode from the context
922        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
923
924        let references = self.extract_references(content, mkdocs_mode);
925
926        // Use optimized detection method with cached link/image data
927        let lines = ctx.raw_lines();
928        for (line_num, col, match_len, reference) in self.find_undefined_references(&references, ctx, mkdocs_mode) {
929            let line_content = lines.get(line_num).unwrap_or(&"");
930
931            // Calculate precise character range for the entire undefined reference
932            let (start_line, start_col, end_line, end_col) =
933                calculate_match_range(line_num + 1, line_content, col, match_len);
934
935            warnings.push(LintWarning {
936                rule_name: Some(self.name().to_string()),
937                line: start_line,
938                column: start_col,
939                end_line,
940                end_column: end_col,
941                message: format!("Reference '{reference}' not found"),
942                severity: Severity::Warning,
943                fix: None,
944            });
945        }
946
947        Ok(warnings)
948    }
949
950    /// Check if this rule should be skipped for performance
951    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
952        // Skip if content is empty or has no links/images
953        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
954    }
955
956    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
957        let content = ctx.content;
958        // No automatic fix available for undefined references
959        Ok(content.to_string())
960    }
961
962    fn as_any(&self) -> &dyn std::any::Any {
963        self
964    }
965
966    fn default_config_section(&self) -> Option<(String, toml::Value)> {
967        let json_value = serde_json::to_value(&self.config).ok()?;
968        Some((
969            self.name().to_string(),
970            crate::rule_config_serde::json_to_toml_value(&json_value)?,
971        ))
972    }
973
974    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
975    where
976        Self: Sized,
977    {
978        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
979        Box::new(Self::from_config_struct(rule_config))
980    }
981}
982
983#[cfg(test)]
984mod tests {
985    use super::*;
986    use crate::lint_context::LintContext;
987
988    #[test]
989    fn test_valid_reference_link() {
990        let rule = MD052ReferenceLinkImages::new();
991        let content = "[text][ref]\n\n[ref]: https://example.com";
992        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
993        let result = rule.check(&ctx).unwrap();
994
995        assert_eq!(result.len(), 0);
996    }
997
998    #[test]
999    fn test_undefined_reference_link() {
1000        let rule = MD052ReferenceLinkImages::new();
1001        let content = "[text][undefined]";
1002        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1003        let result = rule.check(&ctx).unwrap();
1004
1005        assert_eq!(result.len(), 1);
1006        assert!(result[0].message.contains("Reference 'undefined' not found"));
1007    }
1008
1009    #[test]
1010    fn test_valid_reference_image() {
1011        let rule = MD052ReferenceLinkImages::new();
1012        let content = "![alt][img]\n\n[img]: image.jpg";
1013        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1014        let result = rule.check(&ctx).unwrap();
1015
1016        assert_eq!(result.len(), 0);
1017    }
1018
1019    #[test]
1020    fn test_undefined_reference_image() {
1021        let rule = MD052ReferenceLinkImages::new();
1022        let content = "![alt][missing]";
1023        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1024        let result = rule.check(&ctx).unwrap();
1025
1026        assert_eq!(result.len(), 1);
1027        assert!(result[0].message.contains("Reference 'missing' not found"));
1028    }
1029
1030    #[test]
1031    fn test_case_insensitive_references() {
1032        let rule = MD052ReferenceLinkImages::new();
1033        let content = "[Text][REF]\n\n[ref]: https://example.com";
1034        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1035        let result = rule.check(&ctx).unwrap();
1036
1037        assert_eq!(result.len(), 0);
1038    }
1039
1040    #[test]
1041    fn test_shortcut_reference_valid() {
1042        let rule = MD052ReferenceLinkImages::new();
1043        let content = "[ref]\n\n[ref]: https://example.com";
1044        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1045        let result = rule.check(&ctx).unwrap();
1046
1047        assert_eq!(result.len(), 0);
1048    }
1049
1050    #[test]
1051    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
1052        // Shortcut syntax checking is disabled by default
1053        // Enable it to test undefined shortcut references
1054        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1055            shortcut_syntax: true,
1056            ..Default::default()
1057        });
1058        let content = "[undefined]";
1059        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1060        let result = rule.check(&ctx).unwrap();
1061
1062        assert_eq!(result.len(), 1);
1063        assert!(result[0].message.contains("Reference 'undefined' not found"));
1064    }
1065
1066    #[test]
1067    fn test_shortcut_reference_not_checked_by_default() {
1068        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1069        let rule = MD052ReferenceLinkImages::new();
1070        let content = "[undefined]";
1071        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1072        let result = rule.check(&ctx).unwrap();
1073
1074        // Should be 0 because shortcut_syntax is false by default
1075        assert_eq!(result.len(), 0);
1076    }
1077
1078    #[test]
1079    fn test_inline_links_ignored() {
1080        let rule = MD052ReferenceLinkImages::new();
1081        let content = "[text](https://example.com)";
1082        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1083        let result = rule.check(&ctx).unwrap();
1084
1085        assert_eq!(result.len(), 0);
1086    }
1087
1088    #[test]
1089    fn test_inline_images_ignored() {
1090        let rule = MD052ReferenceLinkImages::new();
1091        let content = "![alt](image.jpg)";
1092        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1093        let result = rule.check(&ctx).unwrap();
1094
1095        assert_eq!(result.len(), 0);
1096    }
1097
1098    #[test]
1099    fn test_references_in_code_blocks_ignored() {
1100        let rule = MD052ReferenceLinkImages::new();
1101        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1102        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1103        let result = rule.check(&ctx).unwrap();
1104
1105        assert_eq!(result.len(), 0);
1106    }
1107
1108    #[test]
1109    fn test_references_in_inline_code_ignored() {
1110        let rule = MD052ReferenceLinkImages::new();
1111        let content = "`[undefined]`";
1112        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1113        let result = rule.check(&ctx).unwrap();
1114
1115        // References inside inline code spans should be ignored
1116        assert_eq!(result.len(), 0);
1117    }
1118
1119    #[test]
1120    fn test_comprehensive_inline_code_detection() {
1121        // Enable shortcut_syntax to test comprehensive detection
1122        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1123            shortcut_syntax: true,
1124            ..Default::default()
1125        });
1126        let content = r#"# Test
1127
1128This `[inside]` should be ignored.
1129This [outside] should be flagged.
1130Reference links `[text][ref]` in code are ignored.
1131Regular reference [text][missing] should be flagged.
1132Images `![alt][img]` in code are ignored.
1133Regular image ![alt][badimg] should be flagged.
1134
1135Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1136
1137```
1138[code block content] should be ignored
1139```
1140
1141`Multiple [refs] in [same] code span` ignored."#;
1142
1143        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1144        let result = rule.check(&ctx).unwrap();
1145
1146        // Should only flag: outside, missing, badimg, three (4 total)
1147        assert_eq!(result.len(), 4);
1148
1149        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1150        assert!(messages.iter().any(|m| m.contains("outside")));
1151        assert!(messages.iter().any(|m| m.contains("missing")));
1152        assert!(messages.iter().any(|m| m.contains("badimg")));
1153        assert!(messages.iter().any(|m| m.contains("three")));
1154
1155        // Should NOT flag any references inside code spans
1156        assert!(!messages.iter().any(|m| m.contains("inside")));
1157        assert!(!messages.iter().any(|m| m.contains("one")));
1158        assert!(!messages.iter().any(|m| m.contains("two")));
1159        assert!(!messages.iter().any(|m| m.contains("refs")));
1160        assert!(!messages.iter().any(|m| m.contains("same")));
1161    }
1162
1163    #[test]
1164    fn test_multiple_undefined_references() {
1165        let rule = MD052ReferenceLinkImages::new();
1166        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1167        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1168        let result = rule.check(&ctx).unwrap();
1169
1170        assert_eq!(result.len(), 3);
1171        assert!(result[0].message.contains("ref1"));
1172        assert!(result[1].message.contains("ref2"));
1173        assert!(result[2].message.contains("ref3"));
1174    }
1175
1176    #[test]
1177    fn test_mixed_valid_and_undefined() {
1178        let rule = MD052ReferenceLinkImages::new();
1179        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1180        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1181        let result = rule.check(&ctx).unwrap();
1182
1183        assert_eq!(result.len(), 1);
1184        assert!(result[0].message.contains("missing"));
1185    }
1186
1187    #[test]
1188    fn test_empty_reference() {
1189        let rule = MD052ReferenceLinkImages::new();
1190        let content = "[text][]\n\n[ref]: https://example.com";
1191        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1192        let result = rule.check(&ctx).unwrap();
1193
1194        // Empty reference should use the link text as reference
1195        assert_eq!(result.len(), 1);
1196    }
1197
1198    #[test]
1199    fn test_escaped_brackets_ignored() {
1200        let rule = MD052ReferenceLinkImages::new();
1201        let content = "\\[not a link\\]";
1202        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1203        let result = rule.check(&ctx).unwrap();
1204
1205        assert_eq!(result.len(), 0);
1206    }
1207
1208    #[test]
1209    fn test_list_items_ignored() {
1210        let rule = MD052ReferenceLinkImages::new();
1211        let content = "- [undefined]\n* [another]\n+ [third]";
1212        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1213        let result = rule.check(&ctx).unwrap();
1214
1215        // List items that look like shortcut references should be ignored
1216        assert_eq!(result.len(), 0);
1217    }
1218
1219    #[test]
1220    fn test_output_example_section_ignored() {
1221        // Enable shortcut_syntax to test example section handling
1222        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1223            shortcut_syntax: true,
1224            ..Default::default()
1225        });
1226        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1227        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1228        let result = rule.check(&ctx).unwrap();
1229
1230        // Only the reference outside the Output section should be flagged
1231        assert_eq!(result.len(), 1);
1232        assert!(result[0].message.contains("missing"));
1233    }
1234
1235    #[test]
1236    fn test_reference_definitions_in_code_blocks_ignored() {
1237        let rule = MD052ReferenceLinkImages::new();
1238        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1239        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1240        let result = rule.check(&ctx).unwrap();
1241
1242        // Reference defined in code block should not count
1243        assert_eq!(result.len(), 1);
1244        assert!(result[0].message.contains("ref"));
1245    }
1246
1247    #[test]
1248    fn test_multiple_references_to_same_undefined() {
1249        let rule = MD052ReferenceLinkImages::new();
1250        let content = "[first][missing] [second][missing] [third][missing]";
1251        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1252        let result = rule.check(&ctx).unwrap();
1253
1254        // Should only report once per unique reference
1255        assert_eq!(result.len(), 1);
1256        assert!(result[0].message.contains("missing"));
1257    }
1258
1259    #[test]
1260    fn test_reference_with_special_characters() {
1261        let rule = MD052ReferenceLinkImages::new();
1262        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1263        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1264        let result = rule.check(&ctx).unwrap();
1265
1266        assert_eq!(result.len(), 0);
1267    }
1268
1269    #[test]
1270    fn test_issue_51_html_attribute_not_reference() {
1271        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1272        let rule = MD052ReferenceLinkImages::new();
1273        let content = r#"# Example
1274
1275## Test
1276
1277Want to fill out this form?
1278
1279<form method="post">
1280    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1281</form>"#;
1282        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1283        let result = rule.check(&ctx).unwrap();
1284
1285        assert_eq!(
1286            result.len(),
1287            0,
1288            "HTML attributes with square brackets should not be flagged as undefined references"
1289        );
1290    }
1291
1292    #[test]
1293    fn test_extract_references() {
1294        let rule = MD052ReferenceLinkImages::new();
1295        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1296        let refs = rule.extract_references(content, false);
1297
1298        assert_eq!(refs.len(), 3);
1299        assert!(refs.contains("ref1"));
1300        assert!(refs.contains("ref2"));
1301        assert!(refs.contains("ref3"));
1302    }
1303
1304    #[test]
1305    fn test_inline_code_not_flagged() {
1306        // Enable shortcut_syntax to test inline code detection
1307        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1308            shortcut_syntax: true,
1309            ..Default::default()
1310        });
1311
1312        // Test that arrays in inline code are not flagged as references
1313        let content = r#"# Test
1314
1315Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1316
1317Also, `[todo]` is not a reference link.
1318
1319But this [reference] should be flagged.
1320
1321And this `[inline code]` should not be flagged.
1322"#;
1323
1324        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1325        let warnings = rule.check(&ctx).unwrap();
1326
1327        // Should only flag [reference], not the ones in backticks
1328        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1329        assert!(warnings[0].message.contains("'reference'"));
1330    }
1331
1332    #[test]
1333    fn test_code_block_references_ignored() {
1334        // Enable shortcut_syntax to test code block handling
1335        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1336            shortcut_syntax: true,
1337            ..Default::default()
1338        });
1339
1340        let content = r#"# Test
1341
1342```markdown
1343[undefined] reference in code block
1344![undefined] image in code block
1345```
1346
1347[real-undefined] reference outside
1348"#;
1349
1350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1351        let warnings = rule.check(&ctx).unwrap();
1352
1353        // Should only flag [real-undefined], not the ones in code block
1354        assert_eq!(warnings.len(), 1);
1355        assert!(warnings[0].message.contains("'real-undefined'"));
1356    }
1357
1358    #[test]
1359    fn test_html_comments_ignored() {
1360        // Test for issue #20 - MD052 should not flag content inside HTML comments
1361        let rule = MD052ReferenceLinkImages::new();
1362
1363        // Test the exact case from issue #20
1364        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1365<!--- set_env EDITOR 'python3 fake_editor.py' -->
1366
1367```bash
1368$ python3 vote.py
13693 votes for: 2
13702 votes for: 3, 4
1371```"#;
1372        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1373        let result = rule.check(&ctx).unwrap();
1374        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1375
1376        // Test various reference patterns inside HTML comments
1377        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1378Normal [text][undefined]
1379<!-- Another [comment][with] references -->"#;
1380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1381        let result = rule.check(&ctx).unwrap();
1382        assert_eq!(
1383            result.len(),
1384            1,
1385            "Should only flag the undefined reference outside comments"
1386        );
1387        assert!(result[0].message.contains("undefined"));
1388
1389        // Test multi-line HTML comments
1390        let content = r#"<!--
1391[ref1]
1392[ref2][ref3]
1393-->
1394[actual][undefined]"#;
1395        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1396        let result = rule.check(&ctx).unwrap();
1397        assert_eq!(
1398            result.len(),
1399            1,
1400            "Should not flag references in multi-line HTML comments"
1401        );
1402        assert!(result[0].message.contains("undefined"));
1403
1404        // Test mixed scenarios
1405        let content = r#"<!-- Comment with [1:] pattern -->
1406Valid [link][ref]
1407<!-- More [refs][in][comments] -->
1408![image][missing]
1409
1410[ref]: https://example.com"#;
1411        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1412        let result = rule.check(&ctx).unwrap();
1413        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1414        assert!(result[0].message.contains("missing"));
1415    }
1416
1417    #[test]
1418    fn test_frontmatter_ignored() {
1419        // Test for issue #24 - MD052 should not flag content inside frontmatter
1420        // Enable shortcut_syntax to test frontmatter handling
1421        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1422            shortcut_syntax: true,
1423            ..Default::default()
1424        });
1425
1426        // Test YAML frontmatter with arrays and references
1427        let content = r#"---
1428layout: post
1429title: "My Jekyll Post"
1430date: 2023-01-01
1431categories: blog
1432tags: ["test", "example"]
1433author: John Doe
1434---
1435
1436# My Blog Post
1437
1438This is the actual markdown content that should be linted.
1439
1440[undefined] reference should be flagged.
1441
1442## Section 1
1443
1444Some content here."#;
1445        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1446        let result = rule.check(&ctx).unwrap();
1447
1448        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1449        assert_eq!(
1450            result.len(),
1451            1,
1452            "Should only flag the undefined reference outside frontmatter"
1453        );
1454        assert!(result[0].message.contains("undefined"));
1455
1456        // Test TOML frontmatter
1457        let content = r#"+++
1458title = "My Post"
1459tags = ["example", "test"]
1460+++
1461
1462# Content
1463
1464[missing] reference should be flagged."#;
1465        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1466        let result = rule.check(&ctx).unwrap();
1467        assert_eq!(
1468            result.len(),
1469            1,
1470            "Should only flag the undefined reference outside TOML frontmatter"
1471        );
1472        assert!(result[0].message.contains("missing"));
1473    }
1474
1475    #[test]
1476    fn test_mkdocs_snippet_markers_not_flagged() {
1477        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1478        // Enable shortcut_syntax to test snippet marker handling
1479        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1480            shortcut_syntax: true,
1481            ..Default::default()
1482        });
1483
1484        // Test snippet section markers
1485        let content = r#"# Document with MkDocs Snippets
1486
1487Some content here.
1488
1489# -8<- [start:remote-content]
1490
1491This is the remote content section.
1492
1493# -8<- [end:remote-content]
1494
1495More content here.
1496
1497<!-- --8<-- [start:another-section] -->
1498Content in another section
1499<!-- --8<-- [end:another-section] -->"#;
1500        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1501        let result = rule.check(&ctx).unwrap();
1502
1503        // Should not flag any snippet markers as undefined references
1504        assert_eq!(
1505            result.len(),
1506            0,
1507            "Should not flag MkDocs snippet markers as undefined references"
1508        );
1509
1510        // Test that the snippet marker lines are properly skipped
1511        // but regular undefined references on other lines are still caught
1512        let content = r#"# Document
1513
1514# -8<- [start:section]
1515Content with [reference] inside snippet section
1516# -8<- [end:section]
1517
1518Regular [undefined] reference outside snippet markers."#;
1519        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1520        let result = rule.check(&ctx).unwrap();
1521
1522        assert_eq!(
1523            result.len(),
1524            2,
1525            "Should flag undefined references but skip snippet marker lines"
1526        );
1527        // The references inside the content should be flagged, but not start: and end:
1528        assert!(result[0].message.contains("reference"));
1529        assert!(result[1].message.contains("undefined"));
1530
1531        // Test in standard mode - should flag the markers as undefined
1532        let content = r#"# Document
1533
1534# -8<- [start:section]
1535# -8<- [end:section]"#;
1536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1537        let result = rule.check(&ctx).unwrap();
1538
1539        assert_eq!(
1540            result.len(),
1541            2,
1542            "In standard mode, snippet markers should be flagged as undefined references"
1543        );
1544    }
1545
1546    #[test]
1547    fn test_pandoc_citations_not_flagged() {
1548        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1549        // Enable shortcut_syntax to test citation handling
1550        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1551            shortcut_syntax: true,
1552            ..Default::default()
1553        });
1554
1555        let content = r#"# Research Paper
1556
1557We are using the **bookdown** package [@R-bookdown] in this sample book.
1558This was built on top of R Markdown and **knitr** [@xie2015].
1559
1560Multiple citations [@citation1; @citation2; @citation3] are also supported.
1561
1562Regular [undefined] reference should still be flagged.
1563"#;
1564        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1565        let result = rule.check(&ctx).unwrap();
1566
1567        // Should only flag the undefined reference, not the citations
1568        assert_eq!(
1569            result.len(),
1570            1,
1571            "Should only flag the undefined reference, not Pandoc citations"
1572        );
1573        assert!(result[0].message.contains("undefined"));
1574    }
1575
1576    #[test]
1577    fn test_pandoc_inline_footnotes_not_flagged() {
1578        // Test that Pandoc inline footnote syntax is not flagged
1579        // Enable shortcut_syntax to test inline footnote handling
1580        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1581            shortcut_syntax: true,
1582            ..Default::default()
1583        });
1584
1585        let content = r#"# Math Document
1586
1587You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1588
1589Another footnote^[with some text and a [link](https://example.com)].
1590
1591But this [reference] without ^ should be flagged.
1592"#;
1593        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1594        let result = rule.check(&ctx).unwrap();
1595
1596        // Should only flag the reference without ^
1597        assert_eq!(
1598            result.len(),
1599            1,
1600            "Should only flag the regular reference, not inline footnotes"
1601        );
1602        assert!(result[0].message.contains("reference"));
1603    }
1604
1605    #[test]
1606    fn test_github_alerts_not_flagged() {
1607        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1608        // Enable shortcut_syntax to test GitHub alert handling
1609        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1610            shortcut_syntax: true,
1611            ..Default::default()
1612        });
1613
1614        // Test various GitHub alert types
1615        let content = r#"# Document with GitHub Alerts
1616
1617> [!NOTE]
1618> This is a note alert.
1619
1620> [!TIP]
1621> This is a tip alert.
1622
1623> [!IMPORTANT]
1624> This is an important alert.
1625
1626> [!WARNING]
1627> This is a warning alert.
1628
1629> [!CAUTION]
1630> This is a caution alert.
1631
1632Regular content with [undefined] reference."#;
1633        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1634        let result = rule.check(&ctx).unwrap();
1635
1636        // Should only flag the undefined reference, not the GitHub alerts
1637        assert_eq!(
1638            result.len(),
1639            1,
1640            "Should only flag the undefined reference, not GitHub alerts"
1641        );
1642        assert!(result[0].message.contains("undefined"));
1643        assert_eq!(result[0].line, 18); // Line with [undefined]
1644
1645        // Test GitHub alerts with additional content
1646        let content = r#"> [!TIP]
1647> Here's a useful tip about [something].
1648> Multiple lines are allowed.
1649
1650[something] is mentioned but not defined."#;
1651        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1652        let result = rule.check(&ctx).unwrap();
1653
1654        // Should flag only the [something] outside blockquotes
1655        // The test shows we're only catching one, which might be correct behavior
1656        // matching markdownlint's approach
1657        assert_eq!(result.len(), 1, "Should flag undefined reference");
1658        assert!(result[0].message.contains("something"));
1659
1660        // Test GitHub alerts with proper references
1661        let content = r#"> [!NOTE]
1662> See [reference] for more details.
1663
1664[reference]: https://example.com"#;
1665        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1666        let result = rule.check(&ctx).unwrap();
1667
1668        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1669        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1670    }
1671
1672    #[test]
1673    fn test_ignore_config() {
1674        // Test that user-configured ignore list is respected
1675        let config = MD052Config {
1676            shortcut_syntax: true,
1677            ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1678        };
1679        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1680
1681        let content = r#"# Document with Custom Types
1682
1683Use [Vec] for dynamic arrays.
1684Use [HashMap] for key-value storage.
1685Use [Option] for nullable values.
1686Use [Result] for error handling.
1687"#;
1688        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1689        let result = rule.check(&ctx).unwrap();
1690
1691        // Should only flag [Result] because it's not in ignore
1692        assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1693        assert!(result[0].message.contains("Result"));
1694    }
1695
1696    #[test]
1697    fn test_ignore_case_insensitive() {
1698        // Test that ignore list is case-insensitive
1699        let config = MD052Config {
1700            shortcut_syntax: true,
1701            ignore: vec!["Vec".to_string()],
1702        };
1703        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1704
1705        let content = r#"# Case Insensitivity Test
1706
1707[Vec] should be ignored.
1708[vec] should also be ignored (different case, same match).
1709[VEC] should also be ignored (different case, same match).
1710[undefined] should be flagged (not in ignore list).
1711"#;
1712        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1713        let result = rule.check(&ctx).unwrap();
1714
1715        // Should only flag [undefined] because ignore is case-insensitive
1716        assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1717        assert!(result[0].message.contains("undefined"));
1718    }
1719
1720    #[test]
1721    fn test_ignore_empty_by_default() {
1722        // Test that empty ignore list doesn't affect existing behavior
1723        let rule = MD052ReferenceLinkImages::new();
1724
1725        let content = "[text][undefined]";
1726        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1727        let result = rule.check(&ctx).unwrap();
1728
1729        // Should still flag undefined references
1730        assert_eq!(result.len(), 1);
1731        assert!(result[0].message.contains("undefined"));
1732    }
1733
1734    #[test]
1735    fn test_ignore_with_reference_links() {
1736        // Test ignore list with full reference link syntax [text][ref]
1737        let config = MD052Config {
1738            shortcut_syntax: false,
1739            ignore: vec!["CustomType".to_string()],
1740        };
1741        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1742
1743        let content = r#"# Test
1744
1745See [documentation][CustomType] for details.
1746See [other docs][MissingRef] for more.
1747"#;
1748        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1749        let result = rule.check(&ctx).unwrap();
1750
1751        // Debug: print warnings if test fails
1752        for (i, w) in result.iter().enumerate() {
1753            eprintln!("Warning {}: {}", i, w.message);
1754        }
1755
1756        // Should flag [MissingRef] but not [CustomType]
1757        // Note: reference IDs are lowercased in the message
1758        assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1759        assert!(
1760            result[0].message.contains("missingref"),
1761            "Expected 'missingref' in message: {}",
1762            result[0].message
1763        );
1764    }
1765
1766    #[test]
1767    fn test_ignore_multiple() {
1768        // Test multiple ignored names work correctly
1769        let config = MD052Config {
1770            shortcut_syntax: true,
1771            ignore: vec![
1772                "i32".to_string(),
1773                "u64".to_string(),
1774                "String".to_string(),
1775                "Arc".to_string(),
1776                "Mutex".to_string(),
1777            ],
1778        };
1779        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1780
1781        let content = r#"# Types
1782
1783[i32] [u64] [String] [Arc] [Mutex] [Box]
1784"#;
1785        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1786        let result = rule.check(&ctx).unwrap();
1787
1788        // Note: i32 and u64 are already in the hardcoded list, so they'd be skipped anyway
1789        // String is NOT in the hardcoded list, so we test that the user config works
1790        // [Box] should be flagged (not in ignore)
1791        assert_eq!(result.len(), 1);
1792        assert!(result[0].message.contains("Box"));
1793    }
1794}