Skip to main content

rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::SHORTCUT_REF_REGEX;
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for code blocks (standard regex is fine)
23static FENCED_CODE_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap());
24
25// Pattern for output example sections (standard regex is fine)
26static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
27    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
28
29// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
30// Extended to include additional common alert types
31static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
32    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
33        .unwrap()
34});
35
36// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
37// This pattern specifically looks for:
38// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
39// - IPv6 with zone IDs: https://[fe80::1%eth0]
40// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
41// - API paths with array notation: https://api.example.com/users[0]
42// But NOT markdown reference links that happen to follow URLs
43static URL_WITH_BRACKETS: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
45
46/// Rule MD052: Reference links and images should use reference style
47///
48/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
49///
50/// This rule is triggered when a reference link or image uses a reference that isn't defined.
51///
52/// ## Configuration
53///
54/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
55///
56/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
57/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
58#[derive(Clone, Default)]
59pub struct MD052ReferenceLinkImages {
60    config: MD052Config,
61}
62
63impl MD052ReferenceLinkImages {
64    pub fn new() -> Self {
65        Self {
66            config: MD052Config::default(),
67        }
68    }
69
70    pub fn from_config_struct(config: MD052Config) -> Self {
71        Self { config }
72    }
73
74    /// Strip surrounding backticks from a string
75    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
76    fn strip_backticks(s: &str) -> &str {
77        s.trim_start_matches('`').trim_end_matches('`')
78    }
79
80    /// Check if a string is a valid Python identifier
81    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
82    /// like `str`, `int`, etc. should be accepted as valid auto-references
83    fn is_valid_python_identifier(s: &str) -> bool {
84        if s.is_empty() {
85            return false;
86        }
87        let first_char = s.chars().next().unwrap();
88        if !first_char.is_ascii_alphabetic() && first_char != '_' {
89            return false;
90        }
91        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
92    }
93
94    /// Check if text matches a known non-reference pattern that should be skipped.
95    ///
96    /// These are deterministic patterns from markdown extensions or code examples,
97    /// not heuristics. Returns true for:
98    /// - User-configured names via `ignore` config option
99    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
100    /// - Programming syntax: [T], [null], [i32], ["string"]
101    /// - Descriptive text: [default: value], [0-9]
102    fn is_known_non_reference_pattern(&self, text: &str) -> bool {
103        // Check user-configured ignore list first (case-insensitive match)
104        // Reference IDs are normalized to lowercase during parsing,
105        // so we use case-insensitive comparison for user convenience
106        if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
107            return true;
108        }
109        // Skip numeric patterns (array indices, ranges)
110        if text.chars().all(|c| c.is_ascii_digit()) {
111            return true;
112        }
113
114        // Skip numeric ranges like [1:3], [0:10], etc.
115        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
116            return true;
117        }
118
119        // Skip patterns that look like config sections [tool.something], [section.subsection]
120        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
121        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
122        if text.contains('.')
123            && !text.contains(' ')
124            && !text.contains('-')
125            && !text.contains('_')
126            && !text.contains('`')
127        {
128            // Config sections typically have dots, no spaces, and only alphanumeric + dots
129            return true;
130        }
131
132        // Skip glob/wildcard patterns like [*], [...], [**]
133        if text == "*" || text == "..." || text == "**" {
134            return true;
135        }
136
137        // Skip patterns that look like file paths [dir/file], [src/utils]
138        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
139            return true;
140        }
141
142        // Skip programming type annotations like [int, str], [Dict[str, Any]]
143        // These typically have commas and/or nested brackets
144        if text.contains(',') || text.contains('[') || text.contains(']') {
145            // Check if it looks like a type annotation pattern
146            return true;
147        }
148
149        // Note: We don't filter out patterns with backticks because backticks in reference names
150        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
151
152        // Skip patterns that look like module/class paths ONLY if they don't have backticks
153        // Backticks indicate intentional code formatting in a reference name
154        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
155        if !text.contains('`')
156            && text.contains('.')
157            && !text.contains(' ')
158            && !text.contains('-')
159            && !text.contains('_')
160        {
161            return true;
162        }
163
164        // Note: We don't filter based on word count anymore because legitimate references
165        // can have many words, like "python language reference for import statements"
166        // Word count filtering was causing false positives where valid references were
167        // being incorrectly flagged as unused
168
169        // Skip patterns that are just punctuation or operators
170        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
171            return true;
172        }
173
174        // Skip very short non-word patterns (likely operators or syntax)
175        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
176            return true;
177        }
178
179        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
180        if (text.starts_with('"') && text.ends_with('"'))
181            || (text.starts_with('\'') && text.ends_with('\''))
182            || text.contains('"')
183            || text.contains('\'')
184        {
185            return true;
186        }
187
188        // Skip descriptive patterns with colon like [default: the project root]
189        // But allow simple numeric ranges which are handled above
190        if text.contains(':') && text.contains(' ') {
191            return true;
192        }
193
194        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
195        if text.starts_with('!') {
196            return true;
197        }
198
199        // Skip footnote syntax like [^1], [^note], etc.
200        // Footnotes start with ^ and are a common markdown extension
201        if text.starts_with('^') {
202            return true;
203        }
204
205        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
206        // Citations in these formats start with @ inside brackets
207        if text.starts_with('@') {
208            return true;
209        }
210
211        // Skip table of contents markers like [TOC]
212        // Used by Python-Markdown and other processors
213        if text == "TOC" {
214            return true;
215        }
216
217        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
218        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
219            return true;
220        }
221
222        // Skip common programming type names, literals, and short identifiers
223        // that are likely not markdown references
224        let common_non_refs = [
225            // Programming types
226            "object",
227            "Object",
228            "any",
229            "Any",
230            "inv",
231            "void",
232            "bool",
233            "int",
234            "float",
235            "str",
236            "char",
237            "i8",
238            "i16",
239            "i32",
240            "i64",
241            "i128",
242            "isize",
243            "u8",
244            "u16",
245            "u32",
246            "u64",
247            "u128",
248            "usize",
249            "f32",
250            "f64",
251            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
252            "null",
253            "true",
254            "false",
255            "NaN",
256            "Infinity",
257            // Common JavaScript output patterns
258            "object Object",
259        ];
260
261        if common_non_refs.contains(&text) {
262            return true;
263        }
264
265        false
266    }
267
268    /// Check if a position is inside any code span
269    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
270        code_spans
271            .iter()
272            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
273    }
274
275    /// Check if a byte position is within an HTML tag
276    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
277        // Check HTML tags
278        for html_tag in ctx.html_tags().iter() {
279            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
280                return true;
281            }
282        }
283        false
284    }
285
286    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
287        use crate::config::MarkdownFlavor;
288        use crate::utils::skip_context::is_mkdocs_snippet_line;
289
290        let mut references = HashSet::new();
291        let mut in_code_block = false;
292        let mut code_fence_marker = String::new();
293
294        for line in content.lines() {
295            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
296            if is_mkdocs_snippet_line(
297                line,
298                if mkdocs_mode {
299                    MarkdownFlavor::MkDocs
300                } else {
301                    MarkdownFlavor::Standard
302                },
303            ) {
304                continue;
305            }
306            // Handle code block boundaries
307            if let Some(cap) = FENCED_CODE_START.captures(line) {
308                if let Some(fence) = cap.get(2) {
309                    // Get the fence marker (``` or ~~~) without the indentation
310                    let fence_str = fence.as_str();
311                    if !in_code_block {
312                        in_code_block = true;
313                        code_fence_marker = fence_str.to_string();
314                    } else if line.trim_start().starts_with(&code_fence_marker) {
315                        // Check if this could be a closing fence
316                        let trimmed = line.trim_start();
317                        // A closing fence should be just the fence characters, possibly with trailing whitespace
318                        if trimmed.starts_with(&code_fence_marker) {
319                            let after_fence = &trimmed[code_fence_marker.len()..];
320                            if after_fence.trim().is_empty() {
321                                in_code_block = false;
322                                code_fence_marker.clear();
323                            }
324                        }
325                    }
326                }
327                continue;
328            }
329
330            // Skip lines in code blocks
331            if in_code_block {
332                continue;
333            }
334
335            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
336            // Abbreviations are not reference links and should not be tracked
337            if line.trim_start().starts_with("*[") {
338                continue;
339            }
340
341            if let Some(cap) = REF_REGEX.captures(line) {
342                // Store references in lowercase for case-insensitive comparison
343                if let Some(reference) = cap.get(1) {
344                    references.insert(reference.as_str().to_lowercase());
345                }
346            }
347        }
348
349        references
350    }
351
352    fn find_undefined_references(
353        &self,
354        references: &HashSet<String>,
355        ctx: &crate::lint_context::LintContext,
356        mkdocs_mode: bool,
357    ) -> Vec<(usize, usize, usize, String)> {
358        let mut undefined = Vec::new();
359        let mut reported_refs = HashMap::new();
360        let mut in_code_block = false;
361        let mut code_fence_marker = String::new();
362        let mut in_example_section = false;
363
364        // Get code spans once for the entire function
365        let code_spans = ctx.code_spans();
366
367        // Use cached data for reference links and images
368        for link in &ctx.links {
369            if !link.is_reference {
370                continue; // Skip inline links
371            }
372
373            // Skip links inside Jinja templates
374            if ctx.is_in_jinja_range(link.byte_offset) {
375                continue;
376            }
377
378            // Skip links inside code spans
379            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
380                continue;
381            }
382
383            // Skip links inside HTML comments (uses pre-computed ranges)
384            if ctx.is_in_html_comment(link.byte_offset) {
385                continue;
386            }
387
388            // Skip links inside HTML tags
389            if Self::is_in_html_tag(ctx, link.byte_offset) {
390                continue;
391            }
392
393            // Skip links inside math contexts
394            if is_in_math_context(ctx, link.byte_offset) {
395                continue;
396            }
397
398            // Skip links inside table cells
399            if is_in_table_cell(ctx, link.line, link.start_col) {
400                continue;
401            }
402
403            // Skip links inside frontmatter
404            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
405                continue;
406            }
407
408            // Skip Quarto/Pandoc citations ([@citation], @citation)
409            // Citations look like reference links but are bibliography references
410            if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
411                continue;
412            }
413
414            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
415            // Shortcodes may contain template syntax that looks like reference links
416            if ctx.is_in_shortcode(link.byte_offset) {
417                continue;
418            }
419
420            if let Some(ref_id) = &link.reference_id {
421                let reference_lower = ref_id.to_lowercase();
422
423                // Skip known non-reference patterns (markdown extensions, code examples)
424                if self.is_known_non_reference_pattern(ref_id) {
425                    continue;
426                }
427
428                // Skip MkDocs auto-references if in MkDocs mode
429                // Check both the reference_id and the link text for shorthand references
430                // Strip backticks since MkDocs resolves `module.Class` as module.Class
431                let stripped_ref = Self::strip_backticks(ref_id);
432                let stripped_text = Self::strip_backticks(&link.text);
433                if mkdocs_mode
434                    && (is_mkdocs_auto_reference(stripped_ref)
435                        || is_mkdocs_auto_reference(stripped_text)
436                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
437                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
438                {
439                    continue;
440                }
441
442                // Check if reference is defined
443                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
444                    // Check if the line is in an example section or list item
445                    if let Some(line_info) = ctx.line_info(link.line) {
446                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
447                            in_example_section = true;
448                            continue;
449                        }
450
451                        if in_example_section {
452                            continue;
453                        }
454
455                        // Skip list items
456                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
457                            continue;
458                        }
459
460                        // Skip lines that are HTML content
461                        let trimmed = line_info.content(ctx.content).trim_start();
462                        if trimmed.starts_with('<') {
463                            continue;
464                        }
465                    }
466
467                    let match_len = link.byte_end - link.byte_offset;
468                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
469                    reported_refs.insert(reference_lower, true);
470                }
471            }
472        }
473
474        // Use cached data for reference images
475        for image in &ctx.images {
476            if !image.is_reference {
477                continue; // Skip inline images
478            }
479
480            // Skip images inside Jinja templates
481            if ctx.is_in_jinja_range(image.byte_offset) {
482                continue;
483            }
484
485            // Skip images inside code spans
486            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
487                continue;
488            }
489
490            // Skip images inside HTML comments (uses pre-computed ranges)
491            if ctx.is_in_html_comment(image.byte_offset) {
492                continue;
493            }
494
495            // Skip images inside HTML tags
496            if Self::is_in_html_tag(ctx, image.byte_offset) {
497                continue;
498            }
499
500            // Skip images inside math contexts
501            if is_in_math_context(ctx, image.byte_offset) {
502                continue;
503            }
504
505            // Skip images inside table cells
506            if is_in_table_cell(ctx, image.line, image.start_col) {
507                continue;
508            }
509
510            // Skip images inside frontmatter
511            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
512                continue;
513            }
514
515            if let Some(ref_id) = &image.reference_id {
516                let reference_lower = ref_id.to_lowercase();
517
518                // Skip known non-reference patterns (markdown extensions, code examples)
519                if self.is_known_non_reference_pattern(ref_id) {
520                    continue;
521                }
522
523                // Skip MkDocs auto-references if in MkDocs mode
524                // Check both the reference_id and the alt text for shorthand references
525                // Strip backticks since MkDocs resolves `module.Class` as module.Class
526                let stripped_ref = Self::strip_backticks(ref_id);
527                let stripped_alt = Self::strip_backticks(&image.alt_text);
528                if mkdocs_mode
529                    && (is_mkdocs_auto_reference(stripped_ref)
530                        || is_mkdocs_auto_reference(stripped_alt)
531                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
532                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
533                {
534                    continue;
535                }
536
537                // Check if reference is defined
538                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
539                    // Check if the line is in an example section or list item
540                    if let Some(line_info) = ctx.line_info(image.line) {
541                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
542                            in_example_section = true;
543                            continue;
544                        }
545
546                        if in_example_section {
547                            continue;
548                        }
549
550                        // Skip list items
551                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
552                            continue;
553                        }
554
555                        // Skip lines that are HTML content
556                        let trimmed = line_info.content(ctx.content).trim_start();
557                        if trimmed.starts_with('<') {
558                            continue;
559                        }
560                    }
561
562                    let match_len = image.byte_end - image.byte_offset;
563                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
564                    reported_refs.insert(reference_lower, true);
565                }
566            }
567        }
568
569        // Build a set of byte ranges that are already covered by parsed links/images
570        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
571
572        // Add ranges from parsed links
573        for link in &ctx.links {
574            covered_ranges.push((link.byte_offset, link.byte_end));
575        }
576
577        // Add ranges from parsed images
578        for image in &ctx.images {
579            covered_ranges.push((image.byte_offset, image.byte_end));
580        }
581
582        // Sort ranges by start position
583        covered_ranges.sort_by_key(|&(start, _)| start);
584
585        // Handle shortcut references [text] which aren't captured in ctx.links
586        // Only check these if shortcut_syntax is enabled (default: false)
587        // Shortcut syntax is ambiguous because [text] could be a reference link
588        // OR just text in brackets (like spec notation in quotes)
589        if !self.config.shortcut_syntax {
590            return undefined;
591        }
592
593        // Need to use regex for shortcut references
594        let lines = ctx.raw_lines();
595        in_example_section = false; // Reset for line-by-line processing
596
597        for (line_num, line) in lines.iter().enumerate() {
598            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
599            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
600                continue;
601            }
602
603            // Handle code blocks
604            if let Some(cap) = FENCED_CODE_START.captures(line) {
605                if let Some(fence) = cap.get(2) {
606                    // Get the fence marker (``` or ~~~) without the indentation
607                    let fence_str = fence.as_str();
608                    if !in_code_block {
609                        in_code_block = true;
610                        code_fence_marker = fence_str.to_string();
611                    } else if line.trim_start().starts_with(&code_fence_marker) {
612                        // Check if this could be a closing fence
613                        let trimmed = line.trim_start();
614                        // A closing fence should be just the fence characters, possibly with trailing whitespace
615                        if trimmed.starts_with(&code_fence_marker) {
616                            let after_fence = &trimmed[code_fence_marker.len()..];
617                            if after_fence.trim().is_empty() {
618                                in_code_block = false;
619                                code_fence_marker.clear();
620                            }
621                        }
622                    }
623                }
624                continue;
625            }
626
627            if in_code_block {
628                continue;
629            }
630
631            // Check for example sections
632            if OUTPUT_EXAMPLE_START.is_match(line) {
633                in_example_section = true;
634                continue;
635            }
636
637            if in_example_section {
638                // Check if we're exiting the example section (another heading)
639                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
640                    in_example_section = false;
641                } else {
642                    continue;
643                }
644            }
645
646            // Skip list items
647            if LIST_ITEM_REGEX.is_match(line) {
648                continue;
649            }
650
651            // Skip lines that are HTML content
652            let trimmed_line = line.trim_start();
653            if trimmed_line.starts_with('<') {
654                continue;
655            }
656
657            // Skip GitHub alerts/callouts (e.g., > [!TIP])
658            if GITHUB_ALERT_REGEX.is_match(line) {
659                continue;
660            }
661
662            // Skip abbreviation definitions (*[ABBR]: Definition)
663            // These are not reference links and should not be checked
664            if trimmed_line.starts_with("*[") {
665                continue;
666            }
667
668            // Collect positions of brackets that are part of URLs (IPv6, etc.)
669            // so we can exclude them from reference checking
670            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
671            for mat in URL_WITH_BRACKETS.find_iter(line) {
672                // Find all bracket pairs within this URL match
673                let url_str = mat.as_str();
674                let url_start = mat.start();
675
676                // Find brackets within the URL (e.g., in https://[::1]:8080)
677                let mut idx = 0;
678                while idx < url_str.len() {
679                    if let Some(bracket_start) = url_str[idx..].find('[') {
680                        let bracket_start_abs = url_start + idx + bracket_start;
681                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
682                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
683                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
684                            idx += bracket_start + bracket_end + 2;
685                        } else {
686                            break;
687                        }
688                    } else {
689                        break;
690                    }
691                }
692            }
693
694            // Check shortcut references: [reference]
695            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
696                for cap in captures {
697                    if let Some(ref_match) = cap.get(1) {
698                        // Check if this bracket is part of a URL (IPv6, etc.)
699                        let bracket_start = cap.get(0).unwrap().start();
700                        let bracket_end = cap.get(0).unwrap().end();
701
702                        // Skip if this bracket pair is within any URL bracket range
703                        let is_in_url = url_bracket_ranges
704                            .iter()
705                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
706
707                        if is_in_url {
708                            continue;
709                        }
710
711                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
712                        // Check if there's a ^ immediately before the opening bracket
713                        if bracket_start > 0 {
714                            // bracket_start is a byte offset, so we need to check the byte before
715                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
716                                && *byte == b'^'
717                            {
718                                continue; // This is an inline footnote, skip it
719                            }
720                        }
721
722                        let reference = ref_match.as_str();
723                        let reference_lower = reference.to_lowercase();
724
725                        // Skip known non-reference patterns (markdown extensions, code examples)
726                        if self.is_known_non_reference_pattern(reference) {
727                            continue;
728                        }
729
730                        // Skip GitHub alerts (including extended types)
731                        if let Some(alert_type) = reference.strip_prefix('!')
732                            && matches!(
733                                alert_type,
734                                "NOTE"
735                                    | "TIP"
736                                    | "WARNING"
737                                    | "IMPORTANT"
738                                    | "CAUTION"
739                                    | "INFO"
740                                    | "SUCCESS"
741                                    | "FAILURE"
742                                    | "DANGER"
743                                    | "BUG"
744                                    | "EXAMPLE"
745                                    | "QUOTE"
746                            )
747                        {
748                            continue;
749                        }
750
751                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
752                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
753                        if mkdocs_mode
754                            && (reference.starts_with("start:") || reference.starts_with("end:"))
755                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
756                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
757                        {
758                            continue;
759                        }
760
761                        // Skip MkDocs auto-references if in MkDocs mode
762                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
763                        let stripped_ref = Self::strip_backticks(reference);
764                        if mkdocs_mode
765                            && (is_mkdocs_auto_reference(stripped_ref)
766                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
767                        {
768                            continue;
769                        }
770
771                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
772                            let full_match = cap.get(0).unwrap();
773                            let col = full_match.start();
774
775                            // Skip if inside code span
776                            let code_spans = ctx.code_spans();
777                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
778                                continue;
779                            }
780
781                            // Check if this position is within a covered range
782                            let line_start_byte = ctx.line_offsets[line_num];
783                            let byte_pos = line_start_byte + col;
784
785                            // Skip if inside Jinja template
786                            if ctx.is_in_jinja_range(byte_pos) {
787                                continue;
788                            }
789
790                            // Skip if inside code block
791                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
792                                &ctx.code_blocks,
793                                byte_pos,
794                            ) {
795                                continue;
796                            }
797
798                            // Skip if inside HTML comment (uses pre-computed ranges)
799                            if ctx.is_in_html_comment(byte_pos) {
800                                continue;
801                            }
802
803                            // Skip if inside HTML tag
804                            if Self::is_in_html_tag(ctx, byte_pos) {
805                                continue;
806                            }
807
808                            // Skip if inside math context
809                            if is_in_math_context(ctx, byte_pos) {
810                                continue;
811                            }
812
813                            // Skip if inside table cell
814                            if is_in_table_cell(ctx, line_num + 1, col) {
815                                continue;
816                            }
817
818                            let byte_end = byte_pos + (full_match.end() - full_match.start());
819
820                            // Check if this shortcut ref overlaps with any parsed link/image
821                            let mut is_covered = false;
822                            for &(range_start, range_end) in &covered_ranges {
823                                if range_start <= byte_pos && byte_end <= range_end {
824                                    // This shortcut ref is completely within a parsed link/image
825                                    is_covered = true;
826                                    break;
827                                }
828                                if range_start > byte_end {
829                                    // No need to check further (ranges are sorted)
830                                    break;
831                                }
832                            }
833
834                            if is_covered {
835                                continue;
836                            }
837
838                            // More sophisticated checks to avoid false positives
839
840                            // Check 1: If preceded by ], this might be part of [text][ref]
841                            // Look for the pattern ...][ref] and check if there's a matching [ before
842                            let line_chars: Vec<char> = line.chars().collect();
843                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
844                                // Look backwards for a [ that would make this [text][ref]
845                                let mut bracket_count = 1; // We already saw one ]
846                                let mut check_pos = col.saturating_sub(2);
847                                let mut found_opening = false;
848
849                                while check_pos > 0 && check_pos < line_chars.len() {
850                                    match line_chars.get(check_pos) {
851                                        Some(&']') => bracket_count += 1,
852                                        Some(&'[') => {
853                                            bracket_count -= 1;
854                                            if bracket_count == 0 {
855                                                // Check if this [ is escaped
856                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
857                                                    found_opening = true;
858                                                }
859                                                break;
860                                            }
861                                        }
862                                        _ => {}
863                                    }
864                                    if check_pos == 0 {
865                                        break;
866                                    }
867                                    check_pos = check_pos.saturating_sub(1);
868                                }
869
870                                if found_opening {
871                                    // This is part of [text][ref], skip it
872                                    continue;
873                                }
874                            }
875
876                            // Check 2: If there's an escaped bracket pattern before this
877                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
878                            let before_text = &line[..col];
879                            if before_text.contains("\\]") {
880                                // Check if there's a \[ before the \]
881                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
882                                    let search_text = &before_text[..escaped_close_pos];
883                                    if search_text.contains("\\[") {
884                                        // This looks like \[...\][ref], skip it
885                                        continue;
886                                    }
887                                }
888                            }
889
890                            let match_len = full_match.end() - full_match.start();
891                            undefined.push((line_num, col, match_len, reference.to_string()));
892                            reported_refs.insert(reference_lower, true);
893                        }
894                    }
895                }
896            }
897        }
898
899        undefined
900    }
901}
902
903impl Rule for MD052ReferenceLinkImages {
904    fn name(&self) -> &'static str {
905        "MD052"
906    }
907
908    fn description(&self) -> &'static str {
909        "Reference links and images should use a reference that exists"
910    }
911
912    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
913        let content = ctx.content;
914        let mut warnings = Vec::new();
915
916        // OPTIMIZATION: Early exit if no brackets at all
917        if !content.contains('[') {
918            return Ok(warnings);
919        }
920
921        // Check if we're in MkDocs mode from the context
922        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
923
924        let references = self.extract_references(content, mkdocs_mode);
925
926        // Use optimized detection method with cached link/image data
927        let lines = ctx.raw_lines();
928        for (line_num, col, match_len, reference) in self.find_undefined_references(&references, ctx, mkdocs_mode) {
929            let line_content = lines.get(line_num).unwrap_or(&"");
930
931            // Calculate precise character range for the entire undefined reference
932            let (start_line, start_col, end_line, end_col) =
933                calculate_match_range(line_num + 1, line_content, col, match_len);
934
935            warnings.push(LintWarning {
936                rule_name: Some(self.name().to_string()),
937                line: start_line,
938                column: start_col,
939                end_line,
940                end_column: end_col,
941                message: format!("Reference '{reference}' not found"),
942                severity: Severity::Warning,
943                fix: None,
944            });
945        }
946
947        Ok(warnings)
948    }
949
950    /// Check if this rule should be skipped for performance
951    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
952        // Skip if content is empty or has no links/images
953        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
954    }
955
956    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
957        let content = ctx.content;
958        // No automatic fix available for undefined references
959        Ok(content.to_string())
960    }
961
962    fn as_any(&self) -> &dyn std::any::Any {
963        self
964    }
965
966    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
967    where
968        Self: Sized,
969    {
970        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
971        Box::new(Self::from_config_struct(rule_config))
972    }
973}
974
975#[cfg(test)]
976mod tests {
977    use super::*;
978    use crate::lint_context::LintContext;
979
980    #[test]
981    fn test_valid_reference_link() {
982        let rule = MD052ReferenceLinkImages::new();
983        let content = "[text][ref]\n\n[ref]: https://example.com";
984        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
985        let result = rule.check(&ctx).unwrap();
986
987        assert_eq!(result.len(), 0);
988    }
989
990    #[test]
991    fn test_undefined_reference_link() {
992        let rule = MD052ReferenceLinkImages::new();
993        let content = "[text][undefined]";
994        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
995        let result = rule.check(&ctx).unwrap();
996
997        assert_eq!(result.len(), 1);
998        assert!(result[0].message.contains("Reference 'undefined' not found"));
999    }
1000
1001    #[test]
1002    fn test_valid_reference_image() {
1003        let rule = MD052ReferenceLinkImages::new();
1004        let content = "![alt][img]\n\n[img]: image.jpg";
1005        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1006        let result = rule.check(&ctx).unwrap();
1007
1008        assert_eq!(result.len(), 0);
1009    }
1010
1011    #[test]
1012    fn test_undefined_reference_image() {
1013        let rule = MD052ReferenceLinkImages::new();
1014        let content = "![alt][missing]";
1015        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1016        let result = rule.check(&ctx).unwrap();
1017
1018        assert_eq!(result.len(), 1);
1019        assert!(result[0].message.contains("Reference 'missing' not found"));
1020    }
1021
1022    #[test]
1023    fn test_case_insensitive_references() {
1024        let rule = MD052ReferenceLinkImages::new();
1025        let content = "[Text][REF]\n\n[ref]: https://example.com";
1026        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1027        let result = rule.check(&ctx).unwrap();
1028
1029        assert_eq!(result.len(), 0);
1030    }
1031
1032    #[test]
1033    fn test_shortcut_reference_valid() {
1034        let rule = MD052ReferenceLinkImages::new();
1035        let content = "[ref]\n\n[ref]: https://example.com";
1036        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1037        let result = rule.check(&ctx).unwrap();
1038
1039        assert_eq!(result.len(), 0);
1040    }
1041
1042    #[test]
1043    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
1044        // Shortcut syntax checking is disabled by default
1045        // Enable it to test undefined shortcut references
1046        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1047            shortcut_syntax: true,
1048            ..Default::default()
1049        });
1050        let content = "[undefined]";
1051        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1052        let result = rule.check(&ctx).unwrap();
1053
1054        assert_eq!(result.len(), 1);
1055        assert!(result[0].message.contains("Reference 'undefined' not found"));
1056    }
1057
1058    #[test]
1059    fn test_shortcut_reference_not_checked_by_default() {
1060        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1061        let rule = MD052ReferenceLinkImages::new();
1062        let content = "[undefined]";
1063        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1064        let result = rule.check(&ctx).unwrap();
1065
1066        // Should be 0 because shortcut_syntax is false by default
1067        assert_eq!(result.len(), 0);
1068    }
1069
1070    #[test]
1071    fn test_inline_links_ignored() {
1072        let rule = MD052ReferenceLinkImages::new();
1073        let content = "[text](https://example.com)";
1074        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1075        let result = rule.check(&ctx).unwrap();
1076
1077        assert_eq!(result.len(), 0);
1078    }
1079
1080    #[test]
1081    fn test_inline_images_ignored() {
1082        let rule = MD052ReferenceLinkImages::new();
1083        let content = "![alt](image.jpg)";
1084        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1085        let result = rule.check(&ctx).unwrap();
1086
1087        assert_eq!(result.len(), 0);
1088    }
1089
1090    #[test]
1091    fn test_references_in_code_blocks_ignored() {
1092        let rule = MD052ReferenceLinkImages::new();
1093        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1094        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1095        let result = rule.check(&ctx).unwrap();
1096
1097        assert_eq!(result.len(), 0);
1098    }
1099
1100    #[test]
1101    fn test_references_in_inline_code_ignored() {
1102        let rule = MD052ReferenceLinkImages::new();
1103        let content = "`[undefined]`";
1104        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1105        let result = rule.check(&ctx).unwrap();
1106
1107        // References inside inline code spans should be ignored
1108        assert_eq!(result.len(), 0);
1109    }
1110
1111    #[test]
1112    fn test_comprehensive_inline_code_detection() {
1113        // Enable shortcut_syntax to test comprehensive detection
1114        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1115            shortcut_syntax: true,
1116            ..Default::default()
1117        });
1118        let content = r#"# Test
1119
1120This `[inside]` should be ignored.
1121This [outside] should be flagged.
1122Reference links `[text][ref]` in code are ignored.
1123Regular reference [text][missing] should be flagged.
1124Images `![alt][img]` in code are ignored.
1125Regular image ![alt][badimg] should be flagged.
1126
1127Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1128
1129```
1130[code block content] should be ignored
1131```
1132
1133`Multiple [refs] in [same] code span` ignored."#;
1134
1135        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1136        let result = rule.check(&ctx).unwrap();
1137
1138        // Should only flag: outside, missing, badimg, three (4 total)
1139        assert_eq!(result.len(), 4);
1140
1141        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1142        assert!(messages.iter().any(|m| m.contains("outside")));
1143        assert!(messages.iter().any(|m| m.contains("missing")));
1144        assert!(messages.iter().any(|m| m.contains("badimg")));
1145        assert!(messages.iter().any(|m| m.contains("three")));
1146
1147        // Should NOT flag any references inside code spans
1148        assert!(!messages.iter().any(|m| m.contains("inside")));
1149        assert!(!messages.iter().any(|m| m.contains("one")));
1150        assert!(!messages.iter().any(|m| m.contains("two")));
1151        assert!(!messages.iter().any(|m| m.contains("refs")));
1152        assert!(!messages.iter().any(|m| m.contains("same")));
1153    }
1154
1155    #[test]
1156    fn test_multiple_undefined_references() {
1157        let rule = MD052ReferenceLinkImages::new();
1158        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1159        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1160        let result = rule.check(&ctx).unwrap();
1161
1162        assert_eq!(result.len(), 3);
1163        assert!(result[0].message.contains("ref1"));
1164        assert!(result[1].message.contains("ref2"));
1165        assert!(result[2].message.contains("ref3"));
1166    }
1167
1168    #[test]
1169    fn test_mixed_valid_and_undefined() {
1170        let rule = MD052ReferenceLinkImages::new();
1171        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1172        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1173        let result = rule.check(&ctx).unwrap();
1174
1175        assert_eq!(result.len(), 1);
1176        assert!(result[0].message.contains("missing"));
1177    }
1178
1179    #[test]
1180    fn test_empty_reference() {
1181        let rule = MD052ReferenceLinkImages::new();
1182        let content = "[text][]\n\n[ref]: https://example.com";
1183        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1184        let result = rule.check(&ctx).unwrap();
1185
1186        // Empty reference should use the link text as reference
1187        assert_eq!(result.len(), 1);
1188    }
1189
1190    #[test]
1191    fn test_escaped_brackets_ignored() {
1192        let rule = MD052ReferenceLinkImages::new();
1193        let content = "\\[not a link\\]";
1194        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1195        let result = rule.check(&ctx).unwrap();
1196
1197        assert_eq!(result.len(), 0);
1198    }
1199
1200    #[test]
1201    fn test_list_items_ignored() {
1202        let rule = MD052ReferenceLinkImages::new();
1203        let content = "- [undefined]\n* [another]\n+ [third]";
1204        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1205        let result = rule.check(&ctx).unwrap();
1206
1207        // List items that look like shortcut references should be ignored
1208        assert_eq!(result.len(), 0);
1209    }
1210
1211    #[test]
1212    fn test_output_example_section_ignored() {
1213        // Enable shortcut_syntax to test example section handling
1214        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1215            shortcut_syntax: true,
1216            ..Default::default()
1217        });
1218        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1219        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1220        let result = rule.check(&ctx).unwrap();
1221
1222        // Only the reference outside the Output section should be flagged
1223        assert_eq!(result.len(), 1);
1224        assert!(result[0].message.contains("missing"));
1225    }
1226
1227    #[test]
1228    fn test_reference_definitions_in_code_blocks_ignored() {
1229        let rule = MD052ReferenceLinkImages::new();
1230        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1231        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1232        let result = rule.check(&ctx).unwrap();
1233
1234        // Reference defined in code block should not count
1235        assert_eq!(result.len(), 1);
1236        assert!(result[0].message.contains("ref"));
1237    }
1238
1239    #[test]
1240    fn test_multiple_references_to_same_undefined() {
1241        let rule = MD052ReferenceLinkImages::new();
1242        let content = "[first][missing] [second][missing] [third][missing]";
1243        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1244        let result = rule.check(&ctx).unwrap();
1245
1246        // Should only report once per unique reference
1247        assert_eq!(result.len(), 1);
1248        assert!(result[0].message.contains("missing"));
1249    }
1250
1251    #[test]
1252    fn test_reference_with_special_characters() {
1253        let rule = MD052ReferenceLinkImages::new();
1254        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1255        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1256        let result = rule.check(&ctx).unwrap();
1257
1258        assert_eq!(result.len(), 0);
1259    }
1260
1261    #[test]
1262    fn test_issue_51_html_attribute_not_reference() {
1263        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1264        let rule = MD052ReferenceLinkImages::new();
1265        let content = r#"# Example
1266
1267## Test
1268
1269Want to fill out this form?
1270
1271<form method="post">
1272    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1273</form>"#;
1274        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1275        let result = rule.check(&ctx).unwrap();
1276
1277        assert_eq!(
1278            result.len(),
1279            0,
1280            "HTML attributes with square brackets should not be flagged as undefined references"
1281        );
1282    }
1283
1284    #[test]
1285    fn test_extract_references() {
1286        let rule = MD052ReferenceLinkImages::new();
1287        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1288        let refs = rule.extract_references(content, false);
1289
1290        assert_eq!(refs.len(), 3);
1291        assert!(refs.contains("ref1"));
1292        assert!(refs.contains("ref2"));
1293        assert!(refs.contains("ref3"));
1294    }
1295
1296    #[test]
1297    fn test_inline_code_not_flagged() {
1298        // Enable shortcut_syntax to test inline code detection
1299        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1300            shortcut_syntax: true,
1301            ..Default::default()
1302        });
1303
1304        // Test that arrays in inline code are not flagged as references
1305        let content = r#"# Test
1306
1307Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1308
1309Also, `[todo]` is not a reference link.
1310
1311But this [reference] should be flagged.
1312
1313And this `[inline code]` should not be flagged.
1314"#;
1315
1316        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1317        let warnings = rule.check(&ctx).unwrap();
1318
1319        // Should only flag [reference], not the ones in backticks
1320        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1321        assert!(warnings[0].message.contains("'reference'"));
1322    }
1323
1324    #[test]
1325    fn test_code_block_references_ignored() {
1326        // Enable shortcut_syntax to test code block handling
1327        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1328            shortcut_syntax: true,
1329            ..Default::default()
1330        });
1331
1332        let content = r#"# Test
1333
1334```markdown
1335[undefined] reference in code block
1336![undefined] image in code block
1337```
1338
1339[real-undefined] reference outside
1340"#;
1341
1342        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1343        let warnings = rule.check(&ctx).unwrap();
1344
1345        // Should only flag [real-undefined], not the ones in code block
1346        assert_eq!(warnings.len(), 1);
1347        assert!(warnings[0].message.contains("'real-undefined'"));
1348    }
1349
1350    #[test]
1351    fn test_html_comments_ignored() {
1352        // Test for issue #20 - MD052 should not flag content inside HTML comments
1353        let rule = MD052ReferenceLinkImages::new();
1354
1355        // Test the exact case from issue #20
1356        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1357<!--- set_env EDITOR 'python3 fake_editor.py' -->
1358
1359```bash
1360$ python3 vote.py
13613 votes for: 2
13622 votes for: 3, 4
1363```"#;
1364        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1365        let result = rule.check(&ctx).unwrap();
1366        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1367
1368        // Test various reference patterns inside HTML comments
1369        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1370Normal [text][undefined]
1371<!-- Another [comment][with] references -->"#;
1372        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1373        let result = rule.check(&ctx).unwrap();
1374        assert_eq!(
1375            result.len(),
1376            1,
1377            "Should only flag the undefined reference outside comments"
1378        );
1379        assert!(result[0].message.contains("undefined"));
1380
1381        // Test multi-line HTML comments
1382        let content = r#"<!--
1383[ref1]
1384[ref2][ref3]
1385-->
1386[actual][undefined]"#;
1387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1388        let result = rule.check(&ctx).unwrap();
1389        assert_eq!(
1390            result.len(),
1391            1,
1392            "Should not flag references in multi-line HTML comments"
1393        );
1394        assert!(result[0].message.contains("undefined"));
1395
1396        // Test mixed scenarios
1397        let content = r#"<!-- Comment with [1:] pattern -->
1398Valid [link][ref]
1399<!-- More [refs][in][comments] -->
1400![image][missing]
1401
1402[ref]: https://example.com"#;
1403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1404        let result = rule.check(&ctx).unwrap();
1405        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1406        assert!(result[0].message.contains("missing"));
1407    }
1408
1409    #[test]
1410    fn test_frontmatter_ignored() {
1411        // Test for issue #24 - MD052 should not flag content inside frontmatter
1412        // Enable shortcut_syntax to test frontmatter handling
1413        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1414            shortcut_syntax: true,
1415            ..Default::default()
1416        });
1417
1418        // Test YAML frontmatter with arrays and references
1419        let content = r#"---
1420layout: post
1421title: "My Jekyll Post"
1422date: 2023-01-01
1423categories: blog
1424tags: ["test", "example"]
1425author: John Doe
1426---
1427
1428# My Blog Post
1429
1430This is the actual markdown content that should be linted.
1431
1432[undefined] reference should be flagged.
1433
1434## Section 1
1435
1436Some content here."#;
1437        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1438        let result = rule.check(&ctx).unwrap();
1439
1440        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1441        assert_eq!(
1442            result.len(),
1443            1,
1444            "Should only flag the undefined reference outside frontmatter"
1445        );
1446        assert!(result[0].message.contains("undefined"));
1447
1448        // Test TOML frontmatter
1449        let content = r#"+++
1450title = "My Post"
1451tags = ["example", "test"]
1452+++
1453
1454# Content
1455
1456[missing] reference should be flagged."#;
1457        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1458        let result = rule.check(&ctx).unwrap();
1459        assert_eq!(
1460            result.len(),
1461            1,
1462            "Should only flag the undefined reference outside TOML frontmatter"
1463        );
1464        assert!(result[0].message.contains("missing"));
1465    }
1466
1467    #[test]
1468    fn test_mkdocs_snippet_markers_not_flagged() {
1469        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1470        // Enable shortcut_syntax to test snippet marker handling
1471        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1472            shortcut_syntax: true,
1473            ..Default::default()
1474        });
1475
1476        // Test snippet section markers
1477        let content = r#"# Document with MkDocs Snippets
1478
1479Some content here.
1480
1481# -8<- [start:remote-content]
1482
1483This is the remote content section.
1484
1485# -8<- [end:remote-content]
1486
1487More content here.
1488
1489<!-- --8<-- [start:another-section] -->
1490Content in another section
1491<!-- --8<-- [end:another-section] -->"#;
1492        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1493        let result = rule.check(&ctx).unwrap();
1494
1495        // Should not flag any snippet markers as undefined references
1496        assert_eq!(
1497            result.len(),
1498            0,
1499            "Should not flag MkDocs snippet markers as undefined references"
1500        );
1501
1502        // Test that the snippet marker lines are properly skipped
1503        // but regular undefined references on other lines are still caught
1504        let content = r#"# Document
1505
1506# -8<- [start:section]
1507Content with [reference] inside snippet section
1508# -8<- [end:section]
1509
1510Regular [undefined] reference outside snippet markers."#;
1511        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1512        let result = rule.check(&ctx).unwrap();
1513
1514        assert_eq!(
1515            result.len(),
1516            2,
1517            "Should flag undefined references but skip snippet marker lines"
1518        );
1519        // The references inside the content should be flagged, but not start: and end:
1520        assert!(result[0].message.contains("reference"));
1521        assert!(result[1].message.contains("undefined"));
1522
1523        // Test in standard mode - should flag the markers as undefined
1524        let content = r#"# Document
1525
1526# -8<- [start:section]
1527# -8<- [end:section]"#;
1528        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1529        let result = rule.check(&ctx).unwrap();
1530
1531        assert_eq!(
1532            result.len(),
1533            2,
1534            "In standard mode, snippet markers should be flagged as undefined references"
1535        );
1536    }
1537
1538    #[test]
1539    fn test_pandoc_citations_not_flagged() {
1540        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1541        // Enable shortcut_syntax to test citation handling
1542        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1543            shortcut_syntax: true,
1544            ..Default::default()
1545        });
1546
1547        let content = r#"# Research Paper
1548
1549We are using the **bookdown** package [@R-bookdown] in this sample book.
1550This was built on top of R Markdown and **knitr** [@xie2015].
1551
1552Multiple citations [@citation1; @citation2; @citation3] are also supported.
1553
1554Regular [undefined] reference should still be flagged.
1555"#;
1556        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1557        let result = rule.check(&ctx).unwrap();
1558
1559        // Should only flag the undefined reference, not the citations
1560        assert_eq!(
1561            result.len(),
1562            1,
1563            "Should only flag the undefined reference, not Pandoc citations"
1564        );
1565        assert!(result[0].message.contains("undefined"));
1566    }
1567
1568    #[test]
1569    fn test_pandoc_inline_footnotes_not_flagged() {
1570        // Test that Pandoc inline footnote syntax is not flagged
1571        // Enable shortcut_syntax to test inline footnote handling
1572        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1573            shortcut_syntax: true,
1574            ..Default::default()
1575        });
1576
1577        let content = r#"# Math Document
1578
1579You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1580
1581Another footnote^[with some text and a [link](https://example.com)].
1582
1583But this [reference] without ^ should be flagged.
1584"#;
1585        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1586        let result = rule.check(&ctx).unwrap();
1587
1588        // Should only flag the reference without ^
1589        assert_eq!(
1590            result.len(),
1591            1,
1592            "Should only flag the regular reference, not inline footnotes"
1593        );
1594        assert!(result[0].message.contains("reference"));
1595    }
1596
1597    #[test]
1598    fn test_github_alerts_not_flagged() {
1599        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1600        // Enable shortcut_syntax to test GitHub alert handling
1601        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1602            shortcut_syntax: true,
1603            ..Default::default()
1604        });
1605
1606        // Test various GitHub alert types
1607        let content = r#"# Document with GitHub Alerts
1608
1609> [!NOTE]
1610> This is a note alert.
1611
1612> [!TIP]
1613> This is a tip alert.
1614
1615> [!IMPORTANT]
1616> This is an important alert.
1617
1618> [!WARNING]
1619> This is a warning alert.
1620
1621> [!CAUTION]
1622> This is a caution alert.
1623
1624Regular content with [undefined] reference."#;
1625        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1626        let result = rule.check(&ctx).unwrap();
1627
1628        // Should only flag the undefined reference, not the GitHub alerts
1629        assert_eq!(
1630            result.len(),
1631            1,
1632            "Should only flag the undefined reference, not GitHub alerts"
1633        );
1634        assert!(result[0].message.contains("undefined"));
1635        assert_eq!(result[0].line, 18); // Line with [undefined]
1636
1637        // Test GitHub alerts with additional content
1638        let content = r#"> [!TIP]
1639> Here's a useful tip about [something].
1640> Multiple lines are allowed.
1641
1642[something] is mentioned but not defined."#;
1643        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1644        let result = rule.check(&ctx).unwrap();
1645
1646        // Should flag only the [something] outside blockquotes
1647        // The test shows we're only catching one, which might be correct behavior
1648        // matching markdownlint's approach
1649        assert_eq!(result.len(), 1, "Should flag undefined reference");
1650        assert!(result[0].message.contains("something"));
1651
1652        // Test GitHub alerts with proper references
1653        let content = r#"> [!NOTE]
1654> See [reference] for more details.
1655
1656[reference]: https://example.com"#;
1657        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1658        let result = rule.check(&ctx).unwrap();
1659
1660        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1661        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1662    }
1663
1664    #[test]
1665    fn test_ignore_config() {
1666        // Test that user-configured ignore list is respected
1667        let config = MD052Config {
1668            shortcut_syntax: true,
1669            ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1670        };
1671        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1672
1673        let content = r#"# Document with Custom Types
1674
1675Use [Vec] for dynamic arrays.
1676Use [HashMap] for key-value storage.
1677Use [Option] for nullable values.
1678Use [Result] for error handling.
1679"#;
1680        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1681        let result = rule.check(&ctx).unwrap();
1682
1683        // Should only flag [Result] because it's not in ignore
1684        assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1685        assert!(result[0].message.contains("Result"));
1686    }
1687
1688    #[test]
1689    fn test_ignore_case_insensitive() {
1690        // Test that ignore list is case-insensitive
1691        let config = MD052Config {
1692            shortcut_syntax: true,
1693            ignore: vec!["Vec".to_string()],
1694        };
1695        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1696
1697        let content = r#"# Case Insensitivity Test
1698
1699[Vec] should be ignored.
1700[vec] should also be ignored (different case, same match).
1701[VEC] should also be ignored (different case, same match).
1702[undefined] should be flagged (not in ignore list).
1703"#;
1704        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1705        let result = rule.check(&ctx).unwrap();
1706
1707        // Should only flag [undefined] because ignore is case-insensitive
1708        assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1709        assert!(result[0].message.contains("undefined"));
1710    }
1711
1712    #[test]
1713    fn test_ignore_empty_by_default() {
1714        // Test that empty ignore list doesn't affect existing behavior
1715        let rule = MD052ReferenceLinkImages::new();
1716
1717        let content = "[text][undefined]";
1718        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1719        let result = rule.check(&ctx).unwrap();
1720
1721        // Should still flag undefined references
1722        assert_eq!(result.len(), 1);
1723        assert!(result[0].message.contains("undefined"));
1724    }
1725
1726    #[test]
1727    fn test_ignore_with_reference_links() {
1728        // Test ignore list with full reference link syntax [text][ref]
1729        let config = MD052Config {
1730            shortcut_syntax: false,
1731            ignore: vec!["CustomType".to_string()],
1732        };
1733        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1734
1735        let content = r#"# Test
1736
1737See [documentation][CustomType] for details.
1738See [other docs][MissingRef] for more.
1739"#;
1740        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1741        let result = rule.check(&ctx).unwrap();
1742
1743        // Debug: print warnings if test fails
1744        for (i, w) in result.iter().enumerate() {
1745            eprintln!("Warning {}: {}", i, w.message);
1746        }
1747
1748        // Should flag [MissingRef] but not [CustomType]
1749        // Note: reference IDs are lowercased in the message
1750        assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1751        assert!(
1752            result[0].message.contains("missingref"),
1753            "Expected 'missingref' in message: {}",
1754            result[0].message
1755        );
1756    }
1757
1758    #[test]
1759    fn test_ignore_multiple() {
1760        // Test multiple ignored names work correctly
1761        let config = MD052Config {
1762            shortcut_syntax: true,
1763            ignore: vec![
1764                "i32".to_string(),
1765                "u64".to_string(),
1766                "String".to_string(),
1767                "Arc".to_string(),
1768                "Mutex".to_string(),
1769            ],
1770        };
1771        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1772
1773        let content = r#"# Types
1774
1775[i32] [u64] [String] [Arc] [Mutex] [Box]
1776"#;
1777        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1778        let result = rule.check(&ctx).unwrap();
1779
1780        // Note: i32 and u64 are already in the hardcoded list, so they'd be skipped anyway
1781        // String is NOT in the hardcoded list, so we test that the user config works
1782        // [Box] should be flagged (not in ignore)
1783        assert_eq!(result.len(), 1);
1784        assert!(result[0].message.contains("Box"));
1785    }
1786}