rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for code blocks (standard regex is fine)
23static FENCED_CODE_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap());
24
25// Pattern for output example sections (standard regex is fine)
26static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
27    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
28
29// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
30// Extended to include additional common alert types
31static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
32    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
33        .unwrap()
34});
35
36// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
37// This pattern specifically looks for:
38// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
39// - IPv6 with zone IDs: https://[fe80::1%eth0]
40// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
41// - API paths with array notation: https://api.example.com/users[0]
42// But NOT markdown reference links that happen to follow URLs
43static URL_WITH_BRACKETS: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
45
46/// Rule MD052: Reference links and images should use reference style
47///
48/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
49///
50/// This rule is triggered when a reference link or image uses a reference that isn't defined.
51///
52/// ## Configuration
53///
54/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
55///
56/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
57/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
58#[derive(Clone, Default)]
59pub struct MD052ReferenceLinkImages {
60    config: MD052Config,
61}
62
63impl MD052ReferenceLinkImages {
64    pub fn new() -> Self {
65        Self {
66            config: MD052Config::default(),
67        }
68    }
69
70    pub fn from_config_struct(config: MD052Config) -> Self {
71        Self { config }
72    }
73
74    /// Strip surrounding backticks from a string
75    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
76    fn strip_backticks(s: &str) -> &str {
77        s.trim_start_matches('`').trim_end_matches('`')
78    }
79
80    /// Check if a string is a valid Python identifier
81    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
82    /// like `str`, `int`, etc. should be accepted as valid auto-references
83    fn is_valid_python_identifier(s: &str) -> bool {
84        if s.is_empty() {
85            return false;
86        }
87        let first_char = s.chars().next().unwrap();
88        if !first_char.is_ascii_alphabetic() && first_char != '_' {
89            return false;
90        }
91        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
92    }
93
94    /// Check if text matches a known non-reference pattern that should be skipped.
95    ///
96    /// These are deterministic patterns from markdown extensions or code examples,
97    /// not heuristics. Returns true for:
98    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
99    /// - Programming syntax: [T], [null], [i32], ["string"]
100    /// - Descriptive text: [default: value], [0-9]
101    fn is_known_non_reference_pattern(text: &str) -> bool {
102        // Skip numeric patterns (array indices, ranges)
103        if text.chars().all(|c| c.is_ascii_digit()) {
104            return true;
105        }
106
107        // Skip numeric ranges like [1:3], [0:10], etc.
108        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
109            return true;
110        }
111
112        // Skip patterns that look like config sections [tool.something], [section.subsection]
113        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
114        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
115        if text.contains('.')
116            && !text.contains(' ')
117            && !text.contains('-')
118            && !text.contains('_')
119            && !text.contains('`')
120        {
121            // Config sections typically have dots, no spaces, and only alphanumeric + dots
122            return true;
123        }
124
125        // Skip glob/wildcard patterns like [*], [...], [**]
126        if text == "*" || text == "..." || text == "**" {
127            return true;
128        }
129
130        // Skip patterns that look like file paths [dir/file], [src/utils]
131        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
132            return true;
133        }
134
135        // Skip programming type annotations like [int, str], [Dict[str, Any]]
136        // These typically have commas and/or nested brackets
137        if text.contains(',') || text.contains('[') || text.contains(']') {
138            // Check if it looks like a type annotation pattern
139            return true;
140        }
141
142        // Note: We don't filter out patterns with backticks because backticks in reference names
143        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
144
145        // Skip patterns that look like module/class paths ONLY if they don't have backticks
146        // Backticks indicate intentional code formatting in a reference name
147        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
148        if !text.contains('`')
149            && text.contains('.')
150            && !text.contains(' ')
151            && !text.contains('-')
152            && !text.contains('_')
153        {
154            return true;
155        }
156
157        // Note: We don't filter based on word count anymore because legitimate references
158        // can have many words, like "python language reference for import statements"
159        // Word count filtering was causing false positives where valid references were
160        // being incorrectly flagged as unused
161
162        // Skip patterns that are just punctuation or operators
163        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
164            return true;
165        }
166
167        // Skip very short non-word patterns (likely operators or syntax)
168        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
169            return true;
170        }
171
172        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
173        if (text.starts_with('"') && text.ends_with('"'))
174            || (text.starts_with('\'') && text.ends_with('\''))
175            || text.contains('"')
176            || text.contains('\'')
177        {
178            return true;
179        }
180
181        // Skip descriptive patterns with colon like [default: the project root]
182        // But allow simple numeric ranges which are handled above
183        if text.contains(':') && text.contains(' ') {
184            return true;
185        }
186
187        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
188        if text.starts_with('!') {
189            return true;
190        }
191
192        // Skip footnote syntax like [^1], [^note], etc.
193        // Footnotes start with ^ and are a common markdown extension
194        if text.starts_with('^') {
195            return true;
196        }
197
198        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
199        // Citations in these formats start with @ inside brackets
200        if text.starts_with('@') {
201            return true;
202        }
203
204        // Skip table of contents markers like [TOC]
205        // Used by Python-Markdown and other processors
206        if text == "TOC" {
207            return true;
208        }
209
210        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
211        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
212            return true;
213        }
214
215        // Skip common programming type names, literals, and short identifiers
216        // that are likely not markdown references
217        let common_non_refs = [
218            // Programming types
219            "object",
220            "Object",
221            "any",
222            "Any",
223            "inv",
224            "void",
225            "bool",
226            "int",
227            "float",
228            "str",
229            "char",
230            "i8",
231            "i16",
232            "i32",
233            "i64",
234            "i128",
235            "isize",
236            "u8",
237            "u16",
238            "u32",
239            "u64",
240            "u128",
241            "usize",
242            "f32",
243            "f64",
244            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
245            "null",
246            "true",
247            "false",
248            "NaN",
249            "Infinity",
250            // Common JavaScript output patterns
251            "object Object",
252        ];
253
254        if common_non_refs.contains(&text) {
255            return true;
256        }
257
258        false
259    }
260
261    /// Check if a position is inside any code span
262    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
263        code_spans
264            .iter()
265            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
266    }
267
268    /// Check if a byte position is within an HTML comment
269    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
270        for m in HTML_COMMENT_PATTERN.find_iter(content) {
271            if m.start() <= byte_pos && byte_pos < m.end() {
272                return true;
273            }
274        }
275        false
276    }
277
278    /// Check if a byte position is within an HTML tag
279    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
280        // Check HTML tags
281        for html_tag in ctx.html_tags().iter() {
282            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
283                return true;
284            }
285        }
286        false
287    }
288
289    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
290        use crate::config::MarkdownFlavor;
291        use crate::utils::skip_context::is_mkdocs_snippet_line;
292
293        let mut references = HashSet::new();
294        let mut in_code_block = false;
295        let mut code_fence_marker = String::new();
296
297        for line in content.lines() {
298            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
299            if is_mkdocs_snippet_line(
300                line,
301                if mkdocs_mode {
302                    MarkdownFlavor::MkDocs
303                } else {
304                    MarkdownFlavor::Standard
305                },
306            ) {
307                continue;
308            }
309            // Handle code block boundaries
310            if let Some(cap) = FENCED_CODE_START.captures(line) {
311                if let Some(fence) = cap.get(2) {
312                    // Get the fence marker (``` or ~~~) without the indentation
313                    let fence_str = fence.as_str();
314                    if !in_code_block {
315                        in_code_block = true;
316                        code_fence_marker = fence_str.to_string();
317                    } else if line.trim_start().starts_with(&code_fence_marker) {
318                        // Check if this could be a closing fence
319                        let trimmed = line.trim_start();
320                        // A closing fence should be just the fence characters, possibly with trailing whitespace
321                        if trimmed.starts_with(&code_fence_marker) {
322                            let after_fence = &trimmed[code_fence_marker.len()..];
323                            if after_fence.trim().is_empty() {
324                                in_code_block = false;
325                                code_fence_marker.clear();
326                            }
327                        }
328                    }
329                }
330                continue;
331            }
332
333            // Skip lines in code blocks
334            if in_code_block {
335                continue;
336            }
337
338            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
339            // Abbreviations are not reference links and should not be tracked
340            if line.trim_start().starts_with("*[") {
341                continue;
342            }
343
344            if let Some(cap) = REF_REGEX.captures(line) {
345                // Store references in lowercase for case-insensitive comparison
346                if let Some(reference) = cap.get(1) {
347                    references.insert(reference.as_str().to_lowercase());
348                }
349            }
350        }
351
352        references
353    }
354
355    fn find_undefined_references(
356        &self,
357        content: &str,
358        references: &HashSet<String>,
359        ctx: &crate::lint_context::LintContext,
360        mkdocs_mode: bool,
361    ) -> Vec<(usize, usize, usize, String)> {
362        let mut undefined = Vec::new();
363        let mut reported_refs = HashMap::new();
364        let mut in_code_block = false;
365        let mut code_fence_marker = String::new();
366        let mut in_example_section = false;
367
368        // Get code spans once for the entire function
369        let code_spans = ctx.code_spans();
370
371        // Use cached data for reference links and images
372        for link in &ctx.links {
373            if !link.is_reference {
374                continue; // Skip inline links
375            }
376
377            // Skip links inside Jinja templates
378            if ctx.is_in_jinja_range(link.byte_offset) {
379                continue;
380            }
381
382            // Skip links inside code spans
383            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
384                continue;
385            }
386
387            // Skip links inside HTML comments
388            if Self::is_in_html_comment(content, link.byte_offset) {
389                continue;
390            }
391
392            // Skip links inside HTML tags
393            if Self::is_in_html_tag(ctx, link.byte_offset) {
394                continue;
395            }
396
397            // Skip links inside math contexts
398            if is_in_math_context(ctx, link.byte_offset) {
399                continue;
400            }
401
402            // Skip links inside table cells
403            if is_in_table_cell(ctx, link.line, link.start_col) {
404                continue;
405            }
406
407            // Skip links inside frontmatter
408            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
409                continue;
410            }
411
412            if let Some(ref_id) = &link.reference_id {
413                let reference_lower = ref_id.to_lowercase();
414
415                // Skip known non-reference patterns (markdown extensions, code examples)
416                if Self::is_known_non_reference_pattern(ref_id) {
417                    continue;
418                }
419
420                // Skip MkDocs auto-references if in MkDocs mode
421                // Check both the reference_id and the link text for shorthand references
422                // Strip backticks since MkDocs resolves `module.Class` as module.Class
423                let stripped_ref = Self::strip_backticks(ref_id);
424                let stripped_text = Self::strip_backticks(&link.text);
425                if mkdocs_mode
426                    && (is_mkdocs_auto_reference(stripped_ref)
427                        || is_mkdocs_auto_reference(stripped_text)
428                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
429                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
430                {
431                    continue;
432                }
433
434                // Check if reference is defined
435                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
436                    // Check if the line is in an example section or list item
437                    if let Some(line_info) = ctx.line_info(link.line) {
438                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
439                            in_example_section = true;
440                            continue;
441                        }
442
443                        if in_example_section {
444                            continue;
445                        }
446
447                        // Skip list items
448                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
449                            continue;
450                        }
451
452                        // Skip lines that are HTML content
453                        let trimmed = line_info.content(ctx.content).trim_start();
454                        if trimmed.starts_with('<') {
455                            continue;
456                        }
457                    }
458
459                    let match_len = link.byte_end - link.byte_offset;
460                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
461                    reported_refs.insert(reference_lower, true);
462                }
463            }
464        }
465
466        // Use cached data for reference images
467        for image in &ctx.images {
468            if !image.is_reference {
469                continue; // Skip inline images
470            }
471
472            // Skip images inside Jinja templates
473            if ctx.is_in_jinja_range(image.byte_offset) {
474                continue;
475            }
476
477            // Skip images inside code spans
478            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
479                continue;
480            }
481
482            // Skip images inside HTML comments
483            if Self::is_in_html_comment(content, image.byte_offset) {
484                continue;
485            }
486
487            // Skip images inside HTML tags
488            if Self::is_in_html_tag(ctx, image.byte_offset) {
489                continue;
490            }
491
492            // Skip images inside math contexts
493            if is_in_math_context(ctx, image.byte_offset) {
494                continue;
495            }
496
497            // Skip images inside table cells
498            if is_in_table_cell(ctx, image.line, image.start_col) {
499                continue;
500            }
501
502            // Skip images inside frontmatter
503            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
504                continue;
505            }
506
507            if let Some(ref_id) = &image.reference_id {
508                let reference_lower = ref_id.to_lowercase();
509
510                // Skip known non-reference patterns (markdown extensions, code examples)
511                if Self::is_known_non_reference_pattern(ref_id) {
512                    continue;
513                }
514
515                // Skip MkDocs auto-references if in MkDocs mode
516                // Check both the reference_id and the alt text for shorthand references
517                // Strip backticks since MkDocs resolves `module.Class` as module.Class
518                let stripped_ref = Self::strip_backticks(ref_id);
519                let stripped_alt = Self::strip_backticks(&image.alt_text);
520                if mkdocs_mode
521                    && (is_mkdocs_auto_reference(stripped_ref)
522                        || is_mkdocs_auto_reference(stripped_alt)
523                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
524                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
525                {
526                    continue;
527                }
528
529                // Check if reference is defined
530                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
531                    // Check if the line is in an example section or list item
532                    if let Some(line_info) = ctx.line_info(image.line) {
533                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
534                            in_example_section = true;
535                            continue;
536                        }
537
538                        if in_example_section {
539                            continue;
540                        }
541
542                        // Skip list items
543                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
544                            continue;
545                        }
546
547                        // Skip lines that are HTML content
548                        let trimmed = line_info.content(ctx.content).trim_start();
549                        if trimmed.starts_with('<') {
550                            continue;
551                        }
552                    }
553
554                    let match_len = image.byte_end - image.byte_offset;
555                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
556                    reported_refs.insert(reference_lower, true);
557                }
558            }
559        }
560
561        // Build a set of byte ranges that are already covered by parsed links/images
562        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
563
564        // Add ranges from parsed links
565        for link in &ctx.links {
566            covered_ranges.push((link.byte_offset, link.byte_end));
567        }
568
569        // Add ranges from parsed images
570        for image in &ctx.images {
571            covered_ranges.push((image.byte_offset, image.byte_end));
572        }
573
574        // Sort ranges by start position
575        covered_ranges.sort_by_key(|&(start, _)| start);
576
577        // Handle shortcut references [text] which aren't captured in ctx.links
578        // Only check these if shortcut_syntax is enabled (default: false)
579        // Shortcut syntax is ambiguous because [text] could be a reference link
580        // OR just text in brackets (like spec notation in quotes)
581        if !self.config.shortcut_syntax {
582            return undefined;
583        }
584
585        // Need to use regex for shortcut references
586        let lines: Vec<&str> = content.lines().collect();
587        in_example_section = false; // Reset for line-by-line processing
588
589        for (line_num, line) in lines.iter().enumerate() {
590            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
591            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
592                continue;
593            }
594
595            // Handle code blocks
596            if let Some(cap) = FENCED_CODE_START.captures(line) {
597                if let Some(fence) = cap.get(2) {
598                    // Get the fence marker (``` or ~~~) without the indentation
599                    let fence_str = fence.as_str();
600                    if !in_code_block {
601                        in_code_block = true;
602                        code_fence_marker = fence_str.to_string();
603                    } else if line.trim_start().starts_with(&code_fence_marker) {
604                        // Check if this could be a closing fence
605                        let trimmed = line.trim_start();
606                        // A closing fence should be just the fence characters, possibly with trailing whitespace
607                        if trimmed.starts_with(&code_fence_marker) {
608                            let after_fence = &trimmed[code_fence_marker.len()..];
609                            if after_fence.trim().is_empty() {
610                                in_code_block = false;
611                                code_fence_marker.clear();
612                            }
613                        }
614                    }
615                }
616                continue;
617            }
618
619            if in_code_block {
620                continue;
621            }
622
623            // Check for example sections
624            if OUTPUT_EXAMPLE_START.is_match(line) {
625                in_example_section = true;
626                continue;
627            }
628
629            if in_example_section {
630                // Check if we're exiting the example section (another heading)
631                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
632                    in_example_section = false;
633                } else {
634                    continue;
635                }
636            }
637
638            // Skip list items
639            if LIST_ITEM_REGEX.is_match(line) {
640                continue;
641            }
642
643            // Skip lines that are HTML content
644            let trimmed_line = line.trim_start();
645            if trimmed_line.starts_with('<') {
646                continue;
647            }
648
649            // Skip GitHub alerts/callouts (e.g., > [!TIP])
650            if GITHUB_ALERT_REGEX.is_match(line) {
651                continue;
652            }
653
654            // Skip abbreviation definitions (*[ABBR]: Definition)
655            // These are not reference links and should not be checked
656            if trimmed_line.starts_with("*[") {
657                continue;
658            }
659
660            // Collect positions of brackets that are part of URLs (IPv6, etc.)
661            // so we can exclude them from reference checking
662            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
663            for mat in URL_WITH_BRACKETS.find_iter(line) {
664                // Find all bracket pairs within this URL match
665                let url_str = mat.as_str();
666                let url_start = mat.start();
667
668                // Find brackets within the URL (e.g., in https://[::1]:8080)
669                let mut idx = 0;
670                while idx < url_str.len() {
671                    if let Some(bracket_start) = url_str[idx..].find('[') {
672                        let bracket_start_abs = url_start + idx + bracket_start;
673                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
674                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
675                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
676                            idx += bracket_start + bracket_end + 2;
677                        } else {
678                            break;
679                        }
680                    } else {
681                        break;
682                    }
683                }
684            }
685
686            // Check shortcut references: [reference]
687            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
688                for cap in captures {
689                    if let Some(ref_match) = cap.get(1) {
690                        // Check if this bracket is part of a URL (IPv6, etc.)
691                        let bracket_start = cap.get(0).unwrap().start();
692                        let bracket_end = cap.get(0).unwrap().end();
693
694                        // Skip if this bracket pair is within any URL bracket range
695                        let is_in_url = url_bracket_ranges
696                            .iter()
697                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
698
699                        if is_in_url {
700                            continue;
701                        }
702
703                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
704                        // Check if there's a ^ immediately before the opening bracket
705                        if bracket_start > 0 {
706                            // bracket_start is a byte offset, so we need to check the byte before
707                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
708                                && *byte == b'^'
709                            {
710                                continue; // This is an inline footnote, skip it
711                            }
712                        }
713
714                        let reference = ref_match.as_str();
715                        let reference_lower = reference.to_lowercase();
716
717                        // Skip known non-reference patterns (markdown extensions, code examples)
718                        if Self::is_known_non_reference_pattern(reference) {
719                            continue;
720                        }
721
722                        // Skip GitHub alerts (including extended types)
723                        if let Some(alert_type) = reference.strip_prefix('!')
724                            && matches!(
725                                alert_type,
726                                "NOTE"
727                                    | "TIP"
728                                    | "WARNING"
729                                    | "IMPORTANT"
730                                    | "CAUTION"
731                                    | "INFO"
732                                    | "SUCCESS"
733                                    | "FAILURE"
734                                    | "DANGER"
735                                    | "BUG"
736                                    | "EXAMPLE"
737                                    | "QUOTE"
738                            )
739                        {
740                            continue;
741                        }
742
743                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
744                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
745                        if mkdocs_mode
746                            && (reference.starts_with("start:") || reference.starts_with("end:"))
747                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
748                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
749                        {
750                            continue;
751                        }
752
753                        // Skip MkDocs auto-references if in MkDocs mode
754                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
755                        let stripped_ref = Self::strip_backticks(reference);
756                        if mkdocs_mode
757                            && (is_mkdocs_auto_reference(stripped_ref)
758                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
759                        {
760                            continue;
761                        }
762
763                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
764                            let full_match = cap.get(0).unwrap();
765                            let col = full_match.start();
766
767                            // Skip if inside code span
768                            let code_spans = ctx.code_spans();
769                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
770                                continue;
771                            }
772
773                            // Check if this position is within a covered range
774                            let line_start_byte = ctx.line_offsets[line_num];
775                            let byte_pos = line_start_byte + col;
776
777                            // Skip if inside Jinja template
778                            if ctx.is_in_jinja_range(byte_pos) {
779                                continue;
780                            }
781
782                            // Skip if inside code block
783                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
784                                &ctx.code_blocks,
785                                byte_pos,
786                            ) {
787                                continue;
788                            }
789
790                            // Skip if inside HTML comment
791                            if Self::is_in_html_comment(content, byte_pos) {
792                                continue;
793                            }
794
795                            // Skip if inside HTML tag
796                            if Self::is_in_html_tag(ctx, byte_pos) {
797                                continue;
798                            }
799
800                            // Skip if inside math context
801                            if is_in_math_context(ctx, byte_pos) {
802                                continue;
803                            }
804
805                            // Skip if inside table cell
806                            if is_in_table_cell(ctx, line_num + 1, col) {
807                                continue;
808                            }
809
810                            let byte_end = byte_pos + (full_match.end() - full_match.start());
811
812                            // Check if this shortcut ref overlaps with any parsed link/image
813                            let mut is_covered = false;
814                            for &(range_start, range_end) in &covered_ranges {
815                                if range_start <= byte_pos && byte_end <= range_end {
816                                    // This shortcut ref is completely within a parsed link/image
817                                    is_covered = true;
818                                    break;
819                                }
820                                if range_start > byte_end {
821                                    // No need to check further (ranges are sorted)
822                                    break;
823                                }
824                            }
825
826                            if is_covered {
827                                continue;
828                            }
829
830                            // More sophisticated checks to avoid false positives
831
832                            // Check 1: If preceded by ], this might be part of [text][ref]
833                            // Look for the pattern ...][ref] and check if there's a matching [ before
834                            let line_chars: Vec<char> = line.chars().collect();
835                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
836                                // Look backwards for a [ that would make this [text][ref]
837                                let mut bracket_count = 1; // We already saw one ]
838                                let mut check_pos = col.saturating_sub(2);
839                                let mut found_opening = false;
840
841                                while check_pos > 0 && check_pos < line_chars.len() {
842                                    match line_chars.get(check_pos) {
843                                        Some(&']') => bracket_count += 1,
844                                        Some(&'[') => {
845                                            bracket_count -= 1;
846                                            if bracket_count == 0 {
847                                                // Check if this [ is escaped
848                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
849                                                    found_opening = true;
850                                                }
851                                                break;
852                                            }
853                                        }
854                                        _ => {}
855                                    }
856                                    if check_pos == 0 {
857                                        break;
858                                    }
859                                    check_pos = check_pos.saturating_sub(1);
860                                }
861
862                                if found_opening {
863                                    // This is part of [text][ref], skip it
864                                    continue;
865                                }
866                            }
867
868                            // Check 2: If there's an escaped bracket pattern before this
869                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
870                            let before_text = &line[..col];
871                            if before_text.contains("\\]") {
872                                // Check if there's a \[ before the \]
873                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
874                                    let search_text = &before_text[..escaped_close_pos];
875                                    if search_text.contains("\\[") {
876                                        // This looks like \[...\][ref], skip it
877                                        continue;
878                                    }
879                                }
880                            }
881
882                            let match_len = full_match.end() - full_match.start();
883                            undefined.push((line_num, col, match_len, reference.to_string()));
884                            reported_refs.insert(reference_lower, true);
885                        }
886                    }
887                }
888            }
889        }
890
891        undefined
892    }
893}
894
895impl Rule for MD052ReferenceLinkImages {
896    fn name(&self) -> &'static str {
897        "MD052"
898    }
899
900    fn description(&self) -> &'static str {
901        "Reference links and images should use a reference that exists"
902    }
903
904    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
905        let content = ctx.content;
906        let mut warnings = Vec::new();
907
908        // OPTIMIZATION: Early exit if no brackets at all
909        if !content.contains('[') {
910            return Ok(warnings);
911        }
912
913        // Check if we're in MkDocs mode from the context
914        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
915
916        let references = self.extract_references(content, mkdocs_mode);
917
918        // Use optimized detection method with cached link/image data
919        for (line_num, col, match_len, reference) in
920            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
921        {
922            let lines: Vec<&str> = content.lines().collect();
923            let line_content = lines.get(line_num).unwrap_or(&"");
924
925            // Calculate precise character range for the entire undefined reference
926            let (start_line, start_col, end_line, end_col) =
927                calculate_match_range(line_num + 1, line_content, col, match_len);
928
929            warnings.push(LintWarning {
930                rule_name: Some(self.name().to_string()),
931                line: start_line,
932                column: start_col,
933                end_line,
934                end_column: end_col,
935                message: format!("Reference '{reference}' not found"),
936                severity: Severity::Warning,
937                fix: None,
938            });
939        }
940
941        Ok(warnings)
942    }
943
944    /// Check if this rule should be skipped for performance
945    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
946        // Skip if content is empty or has no links/images
947        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
948    }
949
950    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
951        let content = ctx.content;
952        // No automatic fix available for undefined references
953        Ok(content.to_string())
954    }
955
956    fn as_any(&self) -> &dyn std::any::Any {
957        self
958    }
959
960    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
961    where
962        Self: Sized,
963    {
964        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
965        Box::new(Self::from_config_struct(rule_config))
966    }
967}
968
969#[cfg(test)]
970mod tests {
971    use super::*;
972    use crate::lint_context::LintContext;
973
974    #[test]
975    fn test_valid_reference_link() {
976        let rule = MD052ReferenceLinkImages::new();
977        let content = "[text][ref]\n\n[ref]: https://example.com";
978        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
979        let result = rule.check(&ctx).unwrap();
980
981        assert_eq!(result.len(), 0);
982    }
983
984    #[test]
985    fn test_undefined_reference_link() {
986        let rule = MD052ReferenceLinkImages::new();
987        let content = "[text][undefined]";
988        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
989        let result = rule.check(&ctx).unwrap();
990
991        assert_eq!(result.len(), 1);
992        assert!(result[0].message.contains("Reference 'undefined' not found"));
993    }
994
995    #[test]
996    fn test_valid_reference_image() {
997        let rule = MD052ReferenceLinkImages::new();
998        let content = "![alt][img]\n\n[img]: image.jpg";
999        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1000        let result = rule.check(&ctx).unwrap();
1001
1002        assert_eq!(result.len(), 0);
1003    }
1004
1005    #[test]
1006    fn test_undefined_reference_image() {
1007        let rule = MD052ReferenceLinkImages::new();
1008        let content = "![alt][missing]";
1009        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1010        let result = rule.check(&ctx).unwrap();
1011
1012        assert_eq!(result.len(), 1);
1013        assert!(result[0].message.contains("Reference 'missing' not found"));
1014    }
1015
1016    #[test]
1017    fn test_case_insensitive_references() {
1018        let rule = MD052ReferenceLinkImages::new();
1019        let content = "[Text][REF]\n\n[ref]: https://example.com";
1020        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1021        let result = rule.check(&ctx).unwrap();
1022
1023        assert_eq!(result.len(), 0);
1024    }
1025
1026    #[test]
1027    fn test_shortcut_reference_valid() {
1028        let rule = MD052ReferenceLinkImages::new();
1029        let content = "[ref]\n\n[ref]: https://example.com";
1030        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1031        let result = rule.check(&ctx).unwrap();
1032
1033        assert_eq!(result.len(), 0);
1034    }
1035
1036    #[test]
1037    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
1038        // Shortcut syntax checking is disabled by default
1039        // Enable it to test undefined shortcut references
1040        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1041        let content = "[undefined]";
1042        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1043        let result = rule.check(&ctx).unwrap();
1044
1045        assert_eq!(result.len(), 1);
1046        assert!(result[0].message.contains("Reference 'undefined' not found"));
1047    }
1048
1049    #[test]
1050    fn test_shortcut_reference_not_checked_by_default() {
1051        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1052        let rule = MD052ReferenceLinkImages::new();
1053        let content = "[undefined]";
1054        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1055        let result = rule.check(&ctx).unwrap();
1056
1057        // Should be 0 because shortcut_syntax is false by default
1058        assert_eq!(result.len(), 0);
1059    }
1060
1061    #[test]
1062    fn test_inline_links_ignored() {
1063        let rule = MD052ReferenceLinkImages::new();
1064        let content = "[text](https://example.com)";
1065        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1066        let result = rule.check(&ctx).unwrap();
1067
1068        assert_eq!(result.len(), 0);
1069    }
1070
1071    #[test]
1072    fn test_inline_images_ignored() {
1073        let rule = MD052ReferenceLinkImages::new();
1074        let content = "![alt](image.jpg)";
1075        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1076        let result = rule.check(&ctx).unwrap();
1077
1078        assert_eq!(result.len(), 0);
1079    }
1080
1081    #[test]
1082    fn test_references_in_code_blocks_ignored() {
1083        let rule = MD052ReferenceLinkImages::new();
1084        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1085        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1086        let result = rule.check(&ctx).unwrap();
1087
1088        assert_eq!(result.len(), 0);
1089    }
1090
1091    #[test]
1092    fn test_references_in_inline_code_ignored() {
1093        let rule = MD052ReferenceLinkImages::new();
1094        let content = "`[undefined]`";
1095        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1096        let result = rule.check(&ctx).unwrap();
1097
1098        // References inside inline code spans should be ignored
1099        assert_eq!(result.len(), 0);
1100    }
1101
1102    #[test]
1103    fn test_comprehensive_inline_code_detection() {
1104        // Enable shortcut_syntax to test comprehensive detection
1105        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1106        let content = r#"# Test
1107
1108This `[inside]` should be ignored.
1109This [outside] should be flagged.
1110Reference links `[text][ref]` in code are ignored.
1111Regular reference [text][missing] should be flagged.
1112Images `![alt][img]` in code are ignored.
1113Regular image ![alt][badimg] should be flagged.
1114
1115Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1116
1117```
1118[code block content] should be ignored
1119```
1120
1121`Multiple [refs] in [same] code span` ignored."#;
1122
1123        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1124        let result = rule.check(&ctx).unwrap();
1125
1126        // Should only flag: outside, missing, badimg, three (4 total)
1127        assert_eq!(result.len(), 4);
1128
1129        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1130        assert!(messages.iter().any(|m| m.contains("outside")));
1131        assert!(messages.iter().any(|m| m.contains("missing")));
1132        assert!(messages.iter().any(|m| m.contains("badimg")));
1133        assert!(messages.iter().any(|m| m.contains("three")));
1134
1135        // Should NOT flag any references inside code spans
1136        assert!(!messages.iter().any(|m| m.contains("inside")));
1137        assert!(!messages.iter().any(|m| m.contains("one")));
1138        assert!(!messages.iter().any(|m| m.contains("two")));
1139        assert!(!messages.iter().any(|m| m.contains("refs")));
1140        assert!(!messages.iter().any(|m| m.contains("same")));
1141    }
1142
1143    #[test]
1144    fn test_multiple_undefined_references() {
1145        let rule = MD052ReferenceLinkImages::new();
1146        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1147        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1148        let result = rule.check(&ctx).unwrap();
1149
1150        assert_eq!(result.len(), 3);
1151        assert!(result[0].message.contains("ref1"));
1152        assert!(result[1].message.contains("ref2"));
1153        assert!(result[2].message.contains("ref3"));
1154    }
1155
1156    #[test]
1157    fn test_mixed_valid_and_undefined() {
1158        let rule = MD052ReferenceLinkImages::new();
1159        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1160        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1161        let result = rule.check(&ctx).unwrap();
1162
1163        assert_eq!(result.len(), 1);
1164        assert!(result[0].message.contains("missing"));
1165    }
1166
1167    #[test]
1168    fn test_empty_reference() {
1169        let rule = MD052ReferenceLinkImages::new();
1170        let content = "[text][]\n\n[ref]: https://example.com";
1171        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1172        let result = rule.check(&ctx).unwrap();
1173
1174        // Empty reference should use the link text as reference
1175        assert_eq!(result.len(), 1);
1176    }
1177
1178    #[test]
1179    fn test_escaped_brackets_ignored() {
1180        let rule = MD052ReferenceLinkImages::new();
1181        let content = "\\[not a link\\]";
1182        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1183        let result = rule.check(&ctx).unwrap();
1184
1185        assert_eq!(result.len(), 0);
1186    }
1187
1188    #[test]
1189    fn test_list_items_ignored() {
1190        let rule = MD052ReferenceLinkImages::new();
1191        let content = "- [undefined]\n* [another]\n+ [third]";
1192        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1193        let result = rule.check(&ctx).unwrap();
1194
1195        // List items that look like shortcut references should be ignored
1196        assert_eq!(result.len(), 0);
1197    }
1198
1199    #[test]
1200    fn test_output_example_section_ignored() {
1201        // Enable shortcut_syntax to test example section handling
1202        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1203        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1204        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1205        let result = rule.check(&ctx).unwrap();
1206
1207        // Only the reference outside the Output section should be flagged
1208        assert_eq!(result.len(), 1);
1209        assert!(result[0].message.contains("missing"));
1210    }
1211
1212    #[test]
1213    fn test_reference_definitions_in_code_blocks_ignored() {
1214        let rule = MD052ReferenceLinkImages::new();
1215        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1216        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1217        let result = rule.check(&ctx).unwrap();
1218
1219        // Reference defined in code block should not count
1220        assert_eq!(result.len(), 1);
1221        assert!(result[0].message.contains("ref"));
1222    }
1223
1224    #[test]
1225    fn test_multiple_references_to_same_undefined() {
1226        let rule = MD052ReferenceLinkImages::new();
1227        let content = "[first][missing] [second][missing] [third][missing]";
1228        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1229        let result = rule.check(&ctx).unwrap();
1230
1231        // Should only report once per unique reference
1232        assert_eq!(result.len(), 1);
1233        assert!(result[0].message.contains("missing"));
1234    }
1235
1236    #[test]
1237    fn test_reference_with_special_characters() {
1238        let rule = MD052ReferenceLinkImages::new();
1239        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1240        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1241        let result = rule.check(&ctx).unwrap();
1242
1243        assert_eq!(result.len(), 0);
1244    }
1245
1246    #[test]
1247    fn test_issue_51_html_attribute_not_reference() {
1248        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1249        let rule = MD052ReferenceLinkImages::new();
1250        let content = r#"# Example
1251
1252## Test
1253
1254Want to fill out this form?
1255
1256<form method="post">
1257    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1258</form>"#;
1259        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1260        let result = rule.check(&ctx).unwrap();
1261
1262        assert_eq!(
1263            result.len(),
1264            0,
1265            "HTML attributes with square brackets should not be flagged as undefined references"
1266        );
1267    }
1268
1269    #[test]
1270    fn test_extract_references() {
1271        let rule = MD052ReferenceLinkImages::new();
1272        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1273        let refs = rule.extract_references(content, false);
1274
1275        assert_eq!(refs.len(), 3);
1276        assert!(refs.contains("ref1"));
1277        assert!(refs.contains("ref2"));
1278        assert!(refs.contains("ref3"));
1279    }
1280
1281    #[test]
1282    fn test_inline_code_not_flagged() {
1283        // Enable shortcut_syntax to test inline code detection
1284        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1285
1286        // Test that arrays in inline code are not flagged as references
1287        let content = r#"# Test
1288
1289Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1290
1291Also, `[todo]` is not a reference link.
1292
1293But this [reference] should be flagged.
1294
1295And this `[inline code]` should not be flagged.
1296"#;
1297
1298        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1299        let warnings = rule.check(&ctx).unwrap();
1300
1301        // Should only flag [reference], not the ones in backticks
1302        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1303        assert!(warnings[0].message.contains("'reference'"));
1304    }
1305
1306    #[test]
1307    fn test_code_block_references_ignored() {
1308        // Enable shortcut_syntax to test code block handling
1309        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1310
1311        let content = r#"# Test
1312
1313```markdown
1314[undefined] reference in code block
1315![undefined] image in code block
1316```
1317
1318[real-undefined] reference outside
1319"#;
1320
1321        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1322        let warnings = rule.check(&ctx).unwrap();
1323
1324        // Should only flag [real-undefined], not the ones in code block
1325        assert_eq!(warnings.len(), 1);
1326        assert!(warnings[0].message.contains("'real-undefined'"));
1327    }
1328
1329    #[test]
1330    fn test_html_comments_ignored() {
1331        // Test for issue #20 - MD052 should not flag content inside HTML comments
1332        let rule = MD052ReferenceLinkImages::new();
1333
1334        // Test the exact case from issue #20
1335        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1336<!--- set_env EDITOR 'python3 fake_editor.py' -->
1337
1338```bash
1339$ python3 vote.py
13403 votes for: 2
13412 votes for: 3, 4
1342```"#;
1343        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1344        let result = rule.check(&ctx).unwrap();
1345        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1346
1347        // Test various reference patterns inside HTML comments
1348        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1349Normal [text][undefined]
1350<!-- Another [comment][with] references -->"#;
1351        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1352        let result = rule.check(&ctx).unwrap();
1353        assert_eq!(
1354            result.len(),
1355            1,
1356            "Should only flag the undefined reference outside comments"
1357        );
1358        assert!(result[0].message.contains("undefined"));
1359
1360        // Test multi-line HTML comments
1361        let content = r#"<!--
1362[ref1]
1363[ref2][ref3]
1364-->
1365[actual][undefined]"#;
1366        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1367        let result = rule.check(&ctx).unwrap();
1368        assert_eq!(
1369            result.len(),
1370            1,
1371            "Should not flag references in multi-line HTML comments"
1372        );
1373        assert!(result[0].message.contains("undefined"));
1374
1375        // Test mixed scenarios
1376        let content = r#"<!-- Comment with [1:] pattern -->
1377Valid [link][ref]
1378<!-- More [refs][in][comments] -->
1379![image][missing]
1380
1381[ref]: https://example.com"#;
1382        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1383        let result = rule.check(&ctx).unwrap();
1384        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1385        assert!(result[0].message.contains("missing"));
1386    }
1387
1388    #[test]
1389    fn test_frontmatter_ignored() {
1390        // Test for issue #24 - MD052 should not flag content inside frontmatter
1391        // Enable shortcut_syntax to test frontmatter handling
1392        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1393
1394        // Test YAML frontmatter with arrays and references
1395        let content = r#"---
1396layout: post
1397title: "My Jekyll Post"
1398date: 2023-01-01
1399categories: blog
1400tags: ["test", "example"]
1401author: John Doe
1402---
1403
1404# My Blog Post
1405
1406This is the actual markdown content that should be linted.
1407
1408[undefined] reference should be flagged.
1409
1410## Section 1
1411
1412Some content here."#;
1413        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1414        let result = rule.check(&ctx).unwrap();
1415
1416        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1417        assert_eq!(
1418            result.len(),
1419            1,
1420            "Should only flag the undefined reference outside frontmatter"
1421        );
1422        assert!(result[0].message.contains("undefined"));
1423
1424        // Test TOML frontmatter
1425        let content = r#"+++
1426title = "My Post"
1427tags = ["example", "test"]
1428+++
1429
1430# Content
1431
1432[missing] reference should be flagged."#;
1433        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1434        let result = rule.check(&ctx).unwrap();
1435        assert_eq!(
1436            result.len(),
1437            1,
1438            "Should only flag the undefined reference outside TOML frontmatter"
1439        );
1440        assert!(result[0].message.contains("missing"));
1441    }
1442
1443    #[test]
1444    fn test_mkdocs_snippet_markers_not_flagged() {
1445        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1446        // Enable shortcut_syntax to test snippet marker handling
1447        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1448
1449        // Test snippet section markers
1450        let content = r#"# Document with MkDocs Snippets
1451
1452Some content here.
1453
1454# -8<- [start:remote-content]
1455
1456This is the remote content section.
1457
1458# -8<- [end:remote-content]
1459
1460More content here.
1461
1462<!-- --8<-- [start:another-section] -->
1463Content in another section
1464<!-- --8<-- [end:another-section] -->"#;
1465        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1466        let result = rule.check(&ctx).unwrap();
1467
1468        // Should not flag any snippet markers as undefined references
1469        assert_eq!(
1470            result.len(),
1471            0,
1472            "Should not flag MkDocs snippet markers as undefined references"
1473        );
1474
1475        // Test that the snippet marker lines are properly skipped
1476        // but regular undefined references on other lines are still caught
1477        let content = r#"# Document
1478
1479# -8<- [start:section]
1480Content with [reference] inside snippet section
1481# -8<- [end:section]
1482
1483Regular [undefined] reference outside snippet markers."#;
1484        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1485        let result = rule.check(&ctx).unwrap();
1486
1487        assert_eq!(
1488            result.len(),
1489            2,
1490            "Should flag undefined references but skip snippet marker lines"
1491        );
1492        // The references inside the content should be flagged, but not start: and end:
1493        assert!(result[0].message.contains("reference"));
1494        assert!(result[1].message.contains("undefined"));
1495
1496        // Test in standard mode - should flag the markers as undefined
1497        let content = r#"# Document
1498
1499# -8<- [start:section]
1500# -8<- [end:section]"#;
1501        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1502        let result = rule.check(&ctx).unwrap();
1503
1504        assert_eq!(
1505            result.len(),
1506            2,
1507            "In standard mode, snippet markers should be flagged as undefined references"
1508        );
1509    }
1510
1511    #[test]
1512    fn test_pandoc_citations_not_flagged() {
1513        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1514        // Enable shortcut_syntax to test citation handling
1515        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1516
1517        let content = r#"# Research Paper
1518
1519We are using the **bookdown** package [@R-bookdown] in this sample book.
1520This was built on top of R Markdown and **knitr** [@xie2015].
1521
1522Multiple citations [@citation1; @citation2; @citation3] are also supported.
1523
1524Regular [undefined] reference should still be flagged.
1525"#;
1526        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1527        let result = rule.check(&ctx).unwrap();
1528
1529        // Should only flag the undefined reference, not the citations
1530        assert_eq!(
1531            result.len(),
1532            1,
1533            "Should only flag the undefined reference, not Pandoc citations"
1534        );
1535        assert!(result[0].message.contains("undefined"));
1536    }
1537
1538    #[test]
1539    fn test_pandoc_inline_footnotes_not_flagged() {
1540        // Test that Pandoc inline footnote syntax is not flagged
1541        // Enable shortcut_syntax to test inline footnote handling
1542        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1543
1544        let content = r#"# Math Document
1545
1546You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1547
1548Another footnote^[with some text and a [link](https://example.com)].
1549
1550But this [reference] without ^ should be flagged.
1551"#;
1552        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1553        let result = rule.check(&ctx).unwrap();
1554
1555        // Should only flag the reference without ^
1556        assert_eq!(
1557            result.len(),
1558            1,
1559            "Should only flag the regular reference, not inline footnotes"
1560        );
1561        assert!(result[0].message.contains("reference"));
1562    }
1563
1564    #[test]
1565    fn test_github_alerts_not_flagged() {
1566        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1567        // Enable shortcut_syntax to test GitHub alert handling
1568        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1569
1570        // Test various GitHub alert types
1571        let content = r#"# Document with GitHub Alerts
1572
1573> [!NOTE]
1574> This is a note alert.
1575
1576> [!TIP]
1577> This is a tip alert.
1578
1579> [!IMPORTANT]
1580> This is an important alert.
1581
1582> [!WARNING]
1583> This is a warning alert.
1584
1585> [!CAUTION]
1586> This is a caution alert.
1587
1588Regular content with [undefined] reference."#;
1589        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1590        let result = rule.check(&ctx).unwrap();
1591
1592        // Should only flag the undefined reference, not the GitHub alerts
1593        assert_eq!(
1594            result.len(),
1595            1,
1596            "Should only flag the undefined reference, not GitHub alerts"
1597        );
1598        assert!(result[0].message.contains("undefined"));
1599        assert_eq!(result[0].line, 18); // Line with [undefined]
1600
1601        // Test GitHub alerts with additional content
1602        let content = r#"> [!TIP]
1603> Here's a useful tip about [something].
1604> Multiple lines are allowed.
1605
1606[something] is mentioned but not defined."#;
1607        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1608        let result = rule.check(&ctx).unwrap();
1609
1610        // Should flag only the [something] outside blockquotes
1611        // The test shows we're only catching one, which might be correct behavior
1612        // matching markdownlint's approach
1613        assert_eq!(result.len(), 1, "Should flag undefined reference");
1614        assert!(result[0].message.contains("something"));
1615
1616        // Test GitHub alerts with proper references
1617        let content = r#"> [!NOTE]
1618> See [reference] for more details.
1619
1620[reference]: https://example.com"#;
1621        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1622        let result = rule.check(&ctx).unwrap();
1623
1624        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1625        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1626    }
1627}