rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13// Pattern to match reference definitions [ref]: url
14// Note: \S* instead of \S+ to allow empty definitions like [ref]:
15// The capturing group handles nested brackets to support cases like [`union[t, none]`]:
16static REF_REGEX: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19// Pattern for list items to exclude from reference checks (standard regex is fine)
20static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22// Pattern for code blocks (standard regex is fine)
23static FENCED_CODE_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap());
24
25// Pattern for output example sections (standard regex is fine)
26static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
27    LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
28
29// Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
30// Extended to include additional common alert types
31static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
32    Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
33        .unwrap()
34});
35
36// Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
37// This pattern specifically looks for:
38// - IPv6 addresses: https://[::1] or https://[2001:db8::1]
39// - IPv6 with zone IDs: https://[fe80::1%eth0]
40// - IPv6 mixed notation: https://[::ffff:192.0.2.1]
41// - API paths with array notation: https://api.example.com/users[0]
42// But NOT markdown reference links that happen to follow URLs
43static URL_WITH_BRACKETS: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
45
46/// Rule MD052: Reference links and images should use reference style
47///
48/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
49///
50/// This rule is triggered when a reference link or image uses a reference that isn't defined.
51///
52/// ## Configuration
53///
54/// - `shortcut-syntax`: Whether to check shortcut reference syntax `[text]` (default: false)
55///
56/// By default, only full (`[text][ref]`) and collapsed (`[text][]`) reference syntax is checked.
57/// Shortcut syntax is ambiguous because `[text]` could be a reference link OR just text in brackets.
58#[derive(Clone, Default)]
59pub struct MD052ReferenceLinkImages {
60    config: MD052Config,
61}
62
63impl MD052ReferenceLinkImages {
64    pub fn new() -> Self {
65        Self {
66            config: MD052Config::default(),
67        }
68    }
69
70    pub fn from_config_struct(config: MD052Config) -> Self {
71        Self { config }
72    }
73
74    /// Strip surrounding backticks from a string
75    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
76    fn strip_backticks(s: &str) -> &str {
77        s.trim_start_matches('`').trim_end_matches('`')
78    }
79
80    /// Check if a string is a valid Python identifier
81    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
82    /// like `str`, `int`, etc. should be accepted as valid auto-references
83    fn is_valid_python_identifier(s: &str) -> bool {
84        if s.is_empty() {
85            return false;
86        }
87        let first_char = s.chars().next().unwrap();
88        if !first_char.is_ascii_alphabetic() && first_char != '_' {
89            return false;
90        }
91        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
92    }
93
94    /// Check if text matches a known non-reference pattern that should be skipped.
95    ///
96    /// These are deterministic patterns from markdown extensions or code examples,
97    /// not heuristics. Returns true for:
98    /// - User-configured names via `ignore` config option
99    /// - Markdown extensions: [^footnote], [@citation], [!alert], [TOC]
100    /// - Programming syntax: [T], [null], [i32], ["string"]
101    /// - Descriptive text: [default: value], [0-9]
102    fn is_known_non_reference_pattern(&self, text: &str) -> bool {
103        // Check user-configured ignore list first (case-insensitive match)
104        // Reference IDs are normalized to lowercase during parsing,
105        // so we use case-insensitive comparison for user convenience
106        if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
107            return true;
108        }
109        // Skip numeric patterns (array indices, ranges)
110        if text.chars().all(|c| c.is_ascii_digit()) {
111            return true;
112        }
113
114        // Skip numeric ranges like [1:3], [0:10], etc.
115        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
116            return true;
117        }
118
119        // Skip patterns that look like config sections [tool.something], [section.subsection]
120        // But not if they contain other non-alphanumeric chars like hyphens, underscores, or backticks
121        // Backticks indicate intentional code formatting in a reference name (e.g., [`module.Class`])
122        if text.contains('.')
123            && !text.contains(' ')
124            && !text.contains('-')
125            && !text.contains('_')
126            && !text.contains('`')
127        {
128            // Config sections typically have dots, no spaces, and only alphanumeric + dots
129            return true;
130        }
131
132        // Skip glob/wildcard patterns like [*], [...], [**]
133        if text == "*" || text == "..." || text == "**" {
134            return true;
135        }
136
137        // Skip patterns that look like file paths [dir/file], [src/utils]
138        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
139            return true;
140        }
141
142        // Skip programming type annotations like [int, str], [Dict[str, Any]]
143        // These typically have commas and/or nested brackets
144        if text.contains(',') || text.contains('[') || text.contains(']') {
145            // Check if it looks like a type annotation pattern
146            return true;
147        }
148
149        // Note: We don't filter out patterns with backticks because backticks in reference names
150        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
151
152        // Skip patterns that look like module/class paths ONLY if they don't have backticks
153        // Backticks indicate intentional code formatting in a reference name
154        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
155        if !text.contains('`')
156            && text.contains('.')
157            && !text.contains(' ')
158            && !text.contains('-')
159            && !text.contains('_')
160        {
161            return true;
162        }
163
164        // Note: We don't filter based on word count anymore because legitimate references
165        // can have many words, like "python language reference for import statements"
166        // Word count filtering was causing false positives where valid references were
167        // being incorrectly flagged as unused
168
169        // Skip patterns that are just punctuation or operators
170        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
171            return true;
172        }
173
174        // Skip very short non-word patterns (likely operators or syntax)
175        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
176            return true;
177        }
178
179        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
180        if (text.starts_with('"') && text.ends_with('"'))
181            || (text.starts_with('\'') && text.ends_with('\''))
182            || text.contains('"')
183            || text.contains('\'')
184        {
185            return true;
186        }
187
188        // Skip descriptive patterns with colon like [default: the project root]
189        // But allow simple numeric ranges which are handled above
190        if text.contains(':') && text.contains(' ') {
191            return true;
192        }
193
194        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
195        if text.starts_with('!') {
196            return true;
197        }
198
199        // Skip footnote syntax like [^1], [^note], etc.
200        // Footnotes start with ^ and are a common markdown extension
201        if text.starts_with('^') {
202            return true;
203        }
204
205        // Skip Pandoc/RMarkdown/Quarto citation syntax like [@citation-key]
206        // Citations in these formats start with @ inside brackets
207        if text.starts_with('@') {
208            return true;
209        }
210
211        // Skip table of contents markers like [TOC]
212        // Used by Python-Markdown and other processors
213        if text == "TOC" {
214            return true;
215        }
216
217        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
218        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
219            return true;
220        }
221
222        // Skip common programming type names, literals, and short identifiers
223        // that are likely not markdown references
224        let common_non_refs = [
225            // Programming types
226            "object",
227            "Object",
228            "any",
229            "Any",
230            "inv",
231            "void",
232            "bool",
233            "int",
234            "float",
235            "str",
236            "char",
237            "i8",
238            "i16",
239            "i32",
240            "i64",
241            "i128",
242            "isize",
243            "u8",
244            "u16",
245            "u32",
246            "u64",
247            "u128",
248            "usize",
249            "f32",
250            "f64",
251            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
252            "null",
253            "true",
254            "false",
255            "NaN",
256            "Infinity",
257            // Common JavaScript output patterns
258            "object Object",
259        ];
260
261        if common_non_refs.contains(&text) {
262            return true;
263        }
264
265        false
266    }
267
268    /// Check if a position is inside any code span
269    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
270        code_spans
271            .iter()
272            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
273    }
274
275    /// Check if a byte position is within an HTML comment
276    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
277        for m in HTML_COMMENT_PATTERN.find_iter(content) {
278            if m.start() <= byte_pos && byte_pos < m.end() {
279                return true;
280            }
281        }
282        false
283    }
284
285    /// Check if a byte position is within an HTML tag
286    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
287        // Check HTML tags
288        for html_tag in ctx.html_tags().iter() {
289            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
290                return true;
291            }
292        }
293        false
294    }
295
296    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
297        use crate::config::MarkdownFlavor;
298        use crate::utils::skip_context::is_mkdocs_snippet_line;
299
300        let mut references = HashSet::new();
301        let mut in_code_block = false;
302        let mut code_fence_marker = String::new();
303
304        for line in content.lines() {
305            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
306            if is_mkdocs_snippet_line(
307                line,
308                if mkdocs_mode {
309                    MarkdownFlavor::MkDocs
310                } else {
311                    MarkdownFlavor::Standard
312                },
313            ) {
314                continue;
315            }
316            // Handle code block boundaries
317            if let Some(cap) = FENCED_CODE_START.captures(line) {
318                if let Some(fence) = cap.get(2) {
319                    // Get the fence marker (``` or ~~~) without the indentation
320                    let fence_str = fence.as_str();
321                    if !in_code_block {
322                        in_code_block = true;
323                        code_fence_marker = fence_str.to_string();
324                    } else if line.trim_start().starts_with(&code_fence_marker) {
325                        // Check if this could be a closing fence
326                        let trimmed = line.trim_start();
327                        // A closing fence should be just the fence characters, possibly with trailing whitespace
328                        if trimmed.starts_with(&code_fence_marker) {
329                            let after_fence = &trimmed[code_fence_marker.len()..];
330                            if after_fence.trim().is_empty() {
331                                in_code_block = false;
332                                code_fence_marker.clear();
333                            }
334                        }
335                    }
336                }
337                continue;
338            }
339
340            // Skip lines in code blocks
341            if in_code_block {
342                continue;
343            }
344
345            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
346            // Abbreviations are not reference links and should not be tracked
347            if line.trim_start().starts_with("*[") {
348                continue;
349            }
350
351            if let Some(cap) = REF_REGEX.captures(line) {
352                // Store references in lowercase for case-insensitive comparison
353                if let Some(reference) = cap.get(1) {
354                    references.insert(reference.as_str().to_lowercase());
355                }
356            }
357        }
358
359        references
360    }
361
362    fn find_undefined_references(
363        &self,
364        content: &str,
365        references: &HashSet<String>,
366        ctx: &crate::lint_context::LintContext,
367        mkdocs_mode: bool,
368    ) -> Vec<(usize, usize, usize, String)> {
369        let mut undefined = Vec::new();
370        let mut reported_refs = HashMap::new();
371        let mut in_code_block = false;
372        let mut code_fence_marker = String::new();
373        let mut in_example_section = false;
374
375        // Get code spans once for the entire function
376        let code_spans = ctx.code_spans();
377
378        // Use cached data for reference links and images
379        for link in &ctx.links {
380            if !link.is_reference {
381                continue; // Skip inline links
382            }
383
384            // Skip links inside Jinja templates
385            if ctx.is_in_jinja_range(link.byte_offset) {
386                continue;
387            }
388
389            // Skip links inside code spans
390            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
391                continue;
392            }
393
394            // Skip links inside HTML comments
395            if Self::is_in_html_comment(content, link.byte_offset) {
396                continue;
397            }
398
399            // Skip links inside HTML tags
400            if Self::is_in_html_tag(ctx, link.byte_offset) {
401                continue;
402            }
403
404            // Skip links inside math contexts
405            if is_in_math_context(ctx, link.byte_offset) {
406                continue;
407            }
408
409            // Skip links inside table cells
410            if is_in_table_cell(ctx, link.line, link.start_col) {
411                continue;
412            }
413
414            // Skip links inside frontmatter
415            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
416                continue;
417            }
418
419            if let Some(ref_id) = &link.reference_id {
420                let reference_lower = ref_id.to_lowercase();
421
422                // Skip known non-reference patterns (markdown extensions, code examples)
423                if self.is_known_non_reference_pattern(ref_id) {
424                    continue;
425                }
426
427                // Skip MkDocs auto-references if in MkDocs mode
428                // Check both the reference_id and the link text for shorthand references
429                // Strip backticks since MkDocs resolves `module.Class` as module.Class
430                let stripped_ref = Self::strip_backticks(ref_id);
431                let stripped_text = Self::strip_backticks(&link.text);
432                if mkdocs_mode
433                    && (is_mkdocs_auto_reference(stripped_ref)
434                        || is_mkdocs_auto_reference(stripped_text)
435                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
436                        || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
437                {
438                    continue;
439                }
440
441                // Check if reference is defined
442                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
443                    // Check if the line is in an example section or list item
444                    if let Some(line_info) = ctx.line_info(link.line) {
445                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
446                            in_example_section = true;
447                            continue;
448                        }
449
450                        if in_example_section {
451                            continue;
452                        }
453
454                        // Skip list items
455                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
456                            continue;
457                        }
458
459                        // Skip lines that are HTML content
460                        let trimmed = line_info.content(ctx.content).trim_start();
461                        if trimmed.starts_with('<') {
462                            continue;
463                        }
464                    }
465
466                    let match_len = link.byte_end - link.byte_offset;
467                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
468                    reported_refs.insert(reference_lower, true);
469                }
470            }
471        }
472
473        // Use cached data for reference images
474        for image in &ctx.images {
475            if !image.is_reference {
476                continue; // Skip inline images
477            }
478
479            // Skip images inside Jinja templates
480            if ctx.is_in_jinja_range(image.byte_offset) {
481                continue;
482            }
483
484            // Skip images inside code spans
485            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
486                continue;
487            }
488
489            // Skip images inside HTML comments
490            if Self::is_in_html_comment(content, image.byte_offset) {
491                continue;
492            }
493
494            // Skip images inside HTML tags
495            if Self::is_in_html_tag(ctx, image.byte_offset) {
496                continue;
497            }
498
499            // Skip images inside math contexts
500            if is_in_math_context(ctx, image.byte_offset) {
501                continue;
502            }
503
504            // Skip images inside table cells
505            if is_in_table_cell(ctx, image.line, image.start_col) {
506                continue;
507            }
508
509            // Skip images inside frontmatter
510            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
511                continue;
512            }
513
514            if let Some(ref_id) = &image.reference_id {
515                let reference_lower = ref_id.to_lowercase();
516
517                // Skip known non-reference patterns (markdown extensions, code examples)
518                if self.is_known_non_reference_pattern(ref_id) {
519                    continue;
520                }
521
522                // Skip MkDocs auto-references if in MkDocs mode
523                // Check both the reference_id and the alt text for shorthand references
524                // Strip backticks since MkDocs resolves `module.Class` as module.Class
525                let stripped_ref = Self::strip_backticks(ref_id);
526                let stripped_alt = Self::strip_backticks(&image.alt_text);
527                if mkdocs_mode
528                    && (is_mkdocs_auto_reference(stripped_ref)
529                        || is_mkdocs_auto_reference(stripped_alt)
530                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
531                        || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
532                {
533                    continue;
534                }
535
536                // Check if reference is defined
537                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
538                    // Check if the line is in an example section or list item
539                    if let Some(line_info) = ctx.line_info(image.line) {
540                        if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
541                            in_example_section = true;
542                            continue;
543                        }
544
545                        if in_example_section {
546                            continue;
547                        }
548
549                        // Skip list items
550                        if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
551                            continue;
552                        }
553
554                        // Skip lines that are HTML content
555                        let trimmed = line_info.content(ctx.content).trim_start();
556                        if trimmed.starts_with('<') {
557                            continue;
558                        }
559                    }
560
561                    let match_len = image.byte_end - image.byte_offset;
562                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
563                    reported_refs.insert(reference_lower, true);
564                }
565            }
566        }
567
568        // Build a set of byte ranges that are already covered by parsed links/images
569        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
570
571        // Add ranges from parsed links
572        for link in &ctx.links {
573            covered_ranges.push((link.byte_offset, link.byte_end));
574        }
575
576        // Add ranges from parsed images
577        for image in &ctx.images {
578            covered_ranges.push((image.byte_offset, image.byte_end));
579        }
580
581        // Sort ranges by start position
582        covered_ranges.sort_by_key(|&(start, _)| start);
583
584        // Handle shortcut references [text] which aren't captured in ctx.links
585        // Only check these if shortcut_syntax is enabled (default: false)
586        // Shortcut syntax is ambiguous because [text] could be a reference link
587        // OR just text in brackets (like spec notation in quotes)
588        if !self.config.shortcut_syntax {
589            return undefined;
590        }
591
592        // Need to use regex for shortcut references
593        let lines: Vec<&str> = content.lines().collect();
594        in_example_section = false; // Reset for line-by-line processing
595
596        for (line_num, line) in lines.iter().enumerate() {
597            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
598            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
599                continue;
600            }
601
602            // Handle code blocks
603            if let Some(cap) = FENCED_CODE_START.captures(line) {
604                if let Some(fence) = cap.get(2) {
605                    // Get the fence marker (``` or ~~~) without the indentation
606                    let fence_str = fence.as_str();
607                    if !in_code_block {
608                        in_code_block = true;
609                        code_fence_marker = fence_str.to_string();
610                    } else if line.trim_start().starts_with(&code_fence_marker) {
611                        // Check if this could be a closing fence
612                        let trimmed = line.trim_start();
613                        // A closing fence should be just the fence characters, possibly with trailing whitespace
614                        if trimmed.starts_with(&code_fence_marker) {
615                            let after_fence = &trimmed[code_fence_marker.len()..];
616                            if after_fence.trim().is_empty() {
617                                in_code_block = false;
618                                code_fence_marker.clear();
619                            }
620                        }
621                    }
622                }
623                continue;
624            }
625
626            if in_code_block {
627                continue;
628            }
629
630            // Check for example sections
631            if OUTPUT_EXAMPLE_START.is_match(line) {
632                in_example_section = true;
633                continue;
634            }
635
636            if in_example_section {
637                // Check if we're exiting the example section (another heading)
638                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
639                    in_example_section = false;
640                } else {
641                    continue;
642                }
643            }
644
645            // Skip list items
646            if LIST_ITEM_REGEX.is_match(line) {
647                continue;
648            }
649
650            // Skip lines that are HTML content
651            let trimmed_line = line.trim_start();
652            if trimmed_line.starts_with('<') {
653                continue;
654            }
655
656            // Skip GitHub alerts/callouts (e.g., > [!TIP])
657            if GITHUB_ALERT_REGEX.is_match(line) {
658                continue;
659            }
660
661            // Skip abbreviation definitions (*[ABBR]: Definition)
662            // These are not reference links and should not be checked
663            if trimmed_line.starts_with("*[") {
664                continue;
665            }
666
667            // Collect positions of brackets that are part of URLs (IPv6, etc.)
668            // so we can exclude them from reference checking
669            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
670            for mat in URL_WITH_BRACKETS.find_iter(line) {
671                // Find all bracket pairs within this URL match
672                let url_str = mat.as_str();
673                let url_start = mat.start();
674
675                // Find brackets within the URL (e.g., in https://[::1]:8080)
676                let mut idx = 0;
677                while idx < url_str.len() {
678                    if let Some(bracket_start) = url_str[idx..].find('[') {
679                        let bracket_start_abs = url_start + idx + bracket_start;
680                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
681                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
682                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
683                            idx += bracket_start + bracket_end + 2;
684                        } else {
685                            break;
686                        }
687                    } else {
688                        break;
689                    }
690                }
691            }
692
693            // Check shortcut references: [reference]
694            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
695                for cap in captures {
696                    if let Some(ref_match) = cap.get(1) {
697                        // Check if this bracket is part of a URL (IPv6, etc.)
698                        let bracket_start = cap.get(0).unwrap().start();
699                        let bracket_end = cap.get(0).unwrap().end();
700
701                        // Skip if this bracket pair is within any URL bracket range
702                        let is_in_url = url_bracket_ranges
703                            .iter()
704                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
705
706                        if is_in_url {
707                            continue;
708                        }
709
710                        // Skip Pandoc/RMarkdown inline footnotes: ^[text]
711                        // Check if there's a ^ immediately before the opening bracket
712                        if bracket_start > 0 {
713                            // bracket_start is a byte offset, so we need to check the byte before
714                            if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
715                                && *byte == b'^'
716                            {
717                                continue; // This is an inline footnote, skip it
718                            }
719                        }
720
721                        let reference = ref_match.as_str();
722                        let reference_lower = reference.to_lowercase();
723
724                        // Skip known non-reference patterns (markdown extensions, code examples)
725                        if self.is_known_non_reference_pattern(reference) {
726                            continue;
727                        }
728
729                        // Skip GitHub alerts (including extended types)
730                        if let Some(alert_type) = reference.strip_prefix('!')
731                            && matches!(
732                                alert_type,
733                                "NOTE"
734                                    | "TIP"
735                                    | "WARNING"
736                                    | "IMPORTANT"
737                                    | "CAUTION"
738                                    | "INFO"
739                                    | "SUCCESS"
740                                    | "FAILURE"
741                                    | "DANGER"
742                                    | "BUG"
743                                    | "EXAMPLE"
744                                    | "QUOTE"
745                            )
746                        {
747                            continue;
748                        }
749
750                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
751                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
752                        if mkdocs_mode
753                            && (reference.starts_with("start:") || reference.starts_with("end:"))
754                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
755                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
756                        {
757                            continue;
758                        }
759
760                        // Skip MkDocs auto-references if in MkDocs mode
761                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
762                        let stripped_ref = Self::strip_backticks(reference);
763                        if mkdocs_mode
764                            && (is_mkdocs_auto_reference(stripped_ref)
765                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
766                        {
767                            continue;
768                        }
769
770                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
771                            let full_match = cap.get(0).unwrap();
772                            let col = full_match.start();
773
774                            // Skip if inside code span
775                            let code_spans = ctx.code_spans();
776                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
777                                continue;
778                            }
779
780                            // Check if this position is within a covered range
781                            let line_start_byte = ctx.line_offsets[line_num];
782                            let byte_pos = line_start_byte + col;
783
784                            // Skip if inside Jinja template
785                            if ctx.is_in_jinja_range(byte_pos) {
786                                continue;
787                            }
788
789                            // Skip if inside code block
790                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
791                                &ctx.code_blocks,
792                                byte_pos,
793                            ) {
794                                continue;
795                            }
796
797                            // Skip if inside HTML comment
798                            if Self::is_in_html_comment(content, byte_pos) {
799                                continue;
800                            }
801
802                            // Skip if inside HTML tag
803                            if Self::is_in_html_tag(ctx, byte_pos) {
804                                continue;
805                            }
806
807                            // Skip if inside math context
808                            if is_in_math_context(ctx, byte_pos) {
809                                continue;
810                            }
811
812                            // Skip if inside table cell
813                            if is_in_table_cell(ctx, line_num + 1, col) {
814                                continue;
815                            }
816
817                            let byte_end = byte_pos + (full_match.end() - full_match.start());
818
819                            // Check if this shortcut ref overlaps with any parsed link/image
820                            let mut is_covered = false;
821                            for &(range_start, range_end) in &covered_ranges {
822                                if range_start <= byte_pos && byte_end <= range_end {
823                                    // This shortcut ref is completely within a parsed link/image
824                                    is_covered = true;
825                                    break;
826                                }
827                                if range_start > byte_end {
828                                    // No need to check further (ranges are sorted)
829                                    break;
830                                }
831                            }
832
833                            if is_covered {
834                                continue;
835                            }
836
837                            // More sophisticated checks to avoid false positives
838
839                            // Check 1: If preceded by ], this might be part of [text][ref]
840                            // Look for the pattern ...][ref] and check if there's a matching [ before
841                            let line_chars: Vec<char> = line.chars().collect();
842                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
843                                // Look backwards for a [ that would make this [text][ref]
844                                let mut bracket_count = 1; // We already saw one ]
845                                let mut check_pos = col.saturating_sub(2);
846                                let mut found_opening = false;
847
848                                while check_pos > 0 && check_pos < line_chars.len() {
849                                    match line_chars.get(check_pos) {
850                                        Some(&']') => bracket_count += 1,
851                                        Some(&'[') => {
852                                            bracket_count -= 1;
853                                            if bracket_count == 0 {
854                                                // Check if this [ is escaped
855                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
856                                                    found_opening = true;
857                                                }
858                                                break;
859                                            }
860                                        }
861                                        _ => {}
862                                    }
863                                    if check_pos == 0 {
864                                        break;
865                                    }
866                                    check_pos = check_pos.saturating_sub(1);
867                                }
868
869                                if found_opening {
870                                    // This is part of [text][ref], skip it
871                                    continue;
872                                }
873                            }
874
875                            // Check 2: If there's an escaped bracket pattern before this
876                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
877                            let before_text = &line[..col];
878                            if before_text.contains("\\]") {
879                                // Check if there's a \[ before the \]
880                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
881                                    let search_text = &before_text[..escaped_close_pos];
882                                    if search_text.contains("\\[") {
883                                        // This looks like \[...\][ref], skip it
884                                        continue;
885                                    }
886                                }
887                            }
888
889                            let match_len = full_match.end() - full_match.start();
890                            undefined.push((line_num, col, match_len, reference.to_string()));
891                            reported_refs.insert(reference_lower, true);
892                        }
893                    }
894                }
895            }
896        }
897
898        undefined
899    }
900}
901
902impl Rule for MD052ReferenceLinkImages {
903    fn name(&self) -> &'static str {
904        "MD052"
905    }
906
907    fn description(&self) -> &'static str {
908        "Reference links and images should use a reference that exists"
909    }
910
911    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
912        let content = ctx.content;
913        let mut warnings = Vec::new();
914
915        // OPTIMIZATION: Early exit if no brackets at all
916        if !content.contains('[') {
917            return Ok(warnings);
918        }
919
920        // Check if we're in MkDocs mode from the context
921        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
922
923        let references = self.extract_references(content, mkdocs_mode);
924
925        // Use optimized detection method with cached link/image data
926        for (line_num, col, match_len, reference) in
927            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
928        {
929            let lines: Vec<&str> = content.lines().collect();
930            let line_content = lines.get(line_num).unwrap_or(&"");
931
932            // Calculate precise character range for the entire undefined reference
933            let (start_line, start_col, end_line, end_col) =
934                calculate_match_range(line_num + 1, line_content, col, match_len);
935
936            warnings.push(LintWarning {
937                rule_name: Some(self.name().to_string()),
938                line: start_line,
939                column: start_col,
940                end_line,
941                end_column: end_col,
942                message: format!("Reference '{reference}' not found"),
943                severity: Severity::Warning,
944                fix: None,
945            });
946        }
947
948        Ok(warnings)
949    }
950
951    /// Check if this rule should be skipped for performance
952    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
953        // Skip if content is empty or has no links/images
954        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
955    }
956
957    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
958        let content = ctx.content;
959        // No automatic fix available for undefined references
960        Ok(content.to_string())
961    }
962
963    fn as_any(&self) -> &dyn std::any::Any {
964        self
965    }
966
967    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
968    where
969        Self: Sized,
970    {
971        let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
972        Box::new(Self::from_config_struct(rule_config))
973    }
974}
975
976#[cfg(test)]
977mod tests {
978    use super::*;
979    use crate::lint_context::LintContext;
980
981    #[test]
982    fn test_valid_reference_link() {
983        let rule = MD052ReferenceLinkImages::new();
984        let content = "[text][ref]\n\n[ref]: https://example.com";
985        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
986        let result = rule.check(&ctx).unwrap();
987
988        assert_eq!(result.len(), 0);
989    }
990
991    #[test]
992    fn test_undefined_reference_link() {
993        let rule = MD052ReferenceLinkImages::new();
994        let content = "[text][undefined]";
995        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
996        let result = rule.check(&ctx).unwrap();
997
998        assert_eq!(result.len(), 1);
999        assert!(result[0].message.contains("Reference 'undefined' not found"));
1000    }
1001
1002    #[test]
1003    fn test_valid_reference_image() {
1004        let rule = MD052ReferenceLinkImages::new();
1005        let content = "![alt][img]\n\n[img]: image.jpg";
1006        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1007        let result = rule.check(&ctx).unwrap();
1008
1009        assert_eq!(result.len(), 0);
1010    }
1011
1012    #[test]
1013    fn test_undefined_reference_image() {
1014        let rule = MD052ReferenceLinkImages::new();
1015        let content = "![alt][missing]";
1016        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1017        let result = rule.check(&ctx).unwrap();
1018
1019        assert_eq!(result.len(), 1);
1020        assert!(result[0].message.contains("Reference 'missing' not found"));
1021    }
1022
1023    #[test]
1024    fn test_case_insensitive_references() {
1025        let rule = MD052ReferenceLinkImages::new();
1026        let content = "[Text][REF]\n\n[ref]: https://example.com";
1027        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1028        let result = rule.check(&ctx).unwrap();
1029
1030        assert_eq!(result.len(), 0);
1031    }
1032
1033    #[test]
1034    fn test_shortcut_reference_valid() {
1035        let rule = MD052ReferenceLinkImages::new();
1036        let content = "[ref]\n\n[ref]: https://example.com";
1037        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1038        let result = rule.check(&ctx).unwrap();
1039
1040        assert_eq!(result.len(), 0);
1041    }
1042
1043    #[test]
1044    fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
1045        // Shortcut syntax checking is disabled by default
1046        // Enable it to test undefined shortcut references
1047        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1048            shortcut_syntax: true,
1049            ..Default::default()
1050        });
1051        let content = "[undefined]";
1052        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1053        let result = rule.check(&ctx).unwrap();
1054
1055        assert_eq!(result.len(), 1);
1056        assert!(result[0].message.contains("Reference 'undefined' not found"));
1057    }
1058
1059    #[test]
1060    fn test_shortcut_reference_not_checked_by_default() {
1061        // By default, shortcut references are NOT checked (matches markdownlint behavior)
1062        let rule = MD052ReferenceLinkImages::new();
1063        let content = "[undefined]";
1064        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1065        let result = rule.check(&ctx).unwrap();
1066
1067        // Should be 0 because shortcut_syntax is false by default
1068        assert_eq!(result.len(), 0);
1069    }
1070
1071    #[test]
1072    fn test_inline_links_ignored() {
1073        let rule = MD052ReferenceLinkImages::new();
1074        let content = "[text](https://example.com)";
1075        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1076        let result = rule.check(&ctx).unwrap();
1077
1078        assert_eq!(result.len(), 0);
1079    }
1080
1081    #[test]
1082    fn test_inline_images_ignored() {
1083        let rule = MD052ReferenceLinkImages::new();
1084        let content = "![alt](image.jpg)";
1085        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1086        let result = rule.check(&ctx).unwrap();
1087
1088        assert_eq!(result.len(), 0);
1089    }
1090
1091    #[test]
1092    fn test_references_in_code_blocks_ignored() {
1093        let rule = MD052ReferenceLinkImages::new();
1094        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1095        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1096        let result = rule.check(&ctx).unwrap();
1097
1098        assert_eq!(result.len(), 0);
1099    }
1100
1101    #[test]
1102    fn test_references_in_inline_code_ignored() {
1103        let rule = MD052ReferenceLinkImages::new();
1104        let content = "`[undefined]`";
1105        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1106        let result = rule.check(&ctx).unwrap();
1107
1108        // References inside inline code spans should be ignored
1109        assert_eq!(result.len(), 0);
1110    }
1111
1112    #[test]
1113    fn test_comprehensive_inline_code_detection() {
1114        // Enable shortcut_syntax to test comprehensive detection
1115        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1116            shortcut_syntax: true,
1117            ..Default::default()
1118        });
1119        let content = r#"# Test
1120
1121This `[inside]` should be ignored.
1122This [outside] should be flagged.
1123Reference links `[text][ref]` in code are ignored.
1124Regular reference [text][missing] should be flagged.
1125Images `![alt][img]` in code are ignored.
1126Regular image ![alt][badimg] should be flagged.
1127
1128Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1129
1130```
1131[code block content] should be ignored
1132```
1133
1134`Multiple [refs] in [same] code span` ignored."#;
1135
1136        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1137        let result = rule.check(&ctx).unwrap();
1138
1139        // Should only flag: outside, missing, badimg, three (4 total)
1140        assert_eq!(result.len(), 4);
1141
1142        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1143        assert!(messages.iter().any(|m| m.contains("outside")));
1144        assert!(messages.iter().any(|m| m.contains("missing")));
1145        assert!(messages.iter().any(|m| m.contains("badimg")));
1146        assert!(messages.iter().any(|m| m.contains("three")));
1147
1148        // Should NOT flag any references inside code spans
1149        assert!(!messages.iter().any(|m| m.contains("inside")));
1150        assert!(!messages.iter().any(|m| m.contains("one")));
1151        assert!(!messages.iter().any(|m| m.contains("two")));
1152        assert!(!messages.iter().any(|m| m.contains("refs")));
1153        assert!(!messages.iter().any(|m| m.contains("same")));
1154    }
1155
1156    #[test]
1157    fn test_multiple_undefined_references() {
1158        let rule = MD052ReferenceLinkImages::new();
1159        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1160        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1161        let result = rule.check(&ctx).unwrap();
1162
1163        assert_eq!(result.len(), 3);
1164        assert!(result[0].message.contains("ref1"));
1165        assert!(result[1].message.contains("ref2"));
1166        assert!(result[2].message.contains("ref3"));
1167    }
1168
1169    #[test]
1170    fn test_mixed_valid_and_undefined() {
1171        let rule = MD052ReferenceLinkImages::new();
1172        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1173        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1174        let result = rule.check(&ctx).unwrap();
1175
1176        assert_eq!(result.len(), 1);
1177        assert!(result[0].message.contains("missing"));
1178    }
1179
1180    #[test]
1181    fn test_empty_reference() {
1182        let rule = MD052ReferenceLinkImages::new();
1183        let content = "[text][]\n\n[ref]: https://example.com";
1184        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1185        let result = rule.check(&ctx).unwrap();
1186
1187        // Empty reference should use the link text as reference
1188        assert_eq!(result.len(), 1);
1189    }
1190
1191    #[test]
1192    fn test_escaped_brackets_ignored() {
1193        let rule = MD052ReferenceLinkImages::new();
1194        let content = "\\[not a link\\]";
1195        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1196        let result = rule.check(&ctx).unwrap();
1197
1198        assert_eq!(result.len(), 0);
1199    }
1200
1201    #[test]
1202    fn test_list_items_ignored() {
1203        let rule = MD052ReferenceLinkImages::new();
1204        let content = "- [undefined]\n* [another]\n+ [third]";
1205        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1206        let result = rule.check(&ctx).unwrap();
1207
1208        // List items that look like shortcut references should be ignored
1209        assert_eq!(result.len(), 0);
1210    }
1211
1212    #[test]
1213    fn test_output_example_section_ignored() {
1214        // Enable shortcut_syntax to test example section handling
1215        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1216            shortcut_syntax: true,
1217            ..Default::default()
1218        });
1219        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1220        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1221        let result = rule.check(&ctx).unwrap();
1222
1223        // Only the reference outside the Output section should be flagged
1224        assert_eq!(result.len(), 1);
1225        assert!(result[0].message.contains("missing"));
1226    }
1227
1228    #[test]
1229    fn test_reference_definitions_in_code_blocks_ignored() {
1230        let rule = MD052ReferenceLinkImages::new();
1231        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1232        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1233        let result = rule.check(&ctx).unwrap();
1234
1235        // Reference defined in code block should not count
1236        assert_eq!(result.len(), 1);
1237        assert!(result[0].message.contains("ref"));
1238    }
1239
1240    #[test]
1241    fn test_multiple_references_to_same_undefined() {
1242        let rule = MD052ReferenceLinkImages::new();
1243        let content = "[first][missing] [second][missing] [third][missing]";
1244        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1245        let result = rule.check(&ctx).unwrap();
1246
1247        // Should only report once per unique reference
1248        assert_eq!(result.len(), 1);
1249        assert!(result[0].message.contains("missing"));
1250    }
1251
1252    #[test]
1253    fn test_reference_with_special_characters() {
1254        let rule = MD052ReferenceLinkImages::new();
1255        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1256        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1257        let result = rule.check(&ctx).unwrap();
1258
1259        assert_eq!(result.len(), 0);
1260    }
1261
1262    #[test]
1263    fn test_issue_51_html_attribute_not_reference() {
1264        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1265        let rule = MD052ReferenceLinkImages::new();
1266        let content = r#"# Example
1267
1268## Test
1269
1270Want to fill out this form?
1271
1272<form method="post">
1273    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1274</form>"#;
1275        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1276        let result = rule.check(&ctx).unwrap();
1277
1278        assert_eq!(
1279            result.len(),
1280            0,
1281            "HTML attributes with square brackets should not be flagged as undefined references"
1282        );
1283    }
1284
1285    #[test]
1286    fn test_extract_references() {
1287        let rule = MD052ReferenceLinkImages::new();
1288        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1289        let refs = rule.extract_references(content, false);
1290
1291        assert_eq!(refs.len(), 3);
1292        assert!(refs.contains("ref1"));
1293        assert!(refs.contains("ref2"));
1294        assert!(refs.contains("ref3"));
1295    }
1296
1297    #[test]
1298    fn test_inline_code_not_flagged() {
1299        // Enable shortcut_syntax to test inline code detection
1300        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1301            shortcut_syntax: true,
1302            ..Default::default()
1303        });
1304
1305        // Test that arrays in inline code are not flagged as references
1306        let content = r#"# Test
1307
1308Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1309
1310Also, `[todo]` is not a reference link.
1311
1312But this [reference] should be flagged.
1313
1314And this `[inline code]` should not be flagged.
1315"#;
1316
1317        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1318        let warnings = rule.check(&ctx).unwrap();
1319
1320        // Should only flag [reference], not the ones in backticks
1321        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1322        assert!(warnings[0].message.contains("'reference'"));
1323    }
1324
1325    #[test]
1326    fn test_code_block_references_ignored() {
1327        // Enable shortcut_syntax to test code block handling
1328        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1329            shortcut_syntax: true,
1330            ..Default::default()
1331        });
1332
1333        let content = r#"# Test
1334
1335```markdown
1336[undefined] reference in code block
1337![undefined] image in code block
1338```
1339
1340[real-undefined] reference outside
1341"#;
1342
1343        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1344        let warnings = rule.check(&ctx).unwrap();
1345
1346        // Should only flag [real-undefined], not the ones in code block
1347        assert_eq!(warnings.len(), 1);
1348        assert!(warnings[0].message.contains("'real-undefined'"));
1349    }
1350
1351    #[test]
1352    fn test_html_comments_ignored() {
1353        // Test for issue #20 - MD052 should not flag content inside HTML comments
1354        let rule = MD052ReferenceLinkImages::new();
1355
1356        // Test the exact case from issue #20
1357        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1358<!--- set_env EDITOR 'python3 fake_editor.py' -->
1359
1360```bash
1361$ python3 vote.py
13623 votes for: 2
13632 votes for: 3, 4
1364```"#;
1365        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1366        let result = rule.check(&ctx).unwrap();
1367        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1368
1369        // Test various reference patterns inside HTML comments
1370        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1371Normal [text][undefined]
1372<!-- Another [comment][with] references -->"#;
1373        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1374        let result = rule.check(&ctx).unwrap();
1375        assert_eq!(
1376            result.len(),
1377            1,
1378            "Should only flag the undefined reference outside comments"
1379        );
1380        assert!(result[0].message.contains("undefined"));
1381
1382        // Test multi-line HTML comments
1383        let content = r#"<!--
1384[ref1]
1385[ref2][ref3]
1386-->
1387[actual][undefined]"#;
1388        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1389        let result = rule.check(&ctx).unwrap();
1390        assert_eq!(
1391            result.len(),
1392            1,
1393            "Should not flag references in multi-line HTML comments"
1394        );
1395        assert!(result[0].message.contains("undefined"));
1396
1397        // Test mixed scenarios
1398        let content = r#"<!-- Comment with [1:] pattern -->
1399Valid [link][ref]
1400<!-- More [refs][in][comments] -->
1401![image][missing]
1402
1403[ref]: https://example.com"#;
1404        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1405        let result = rule.check(&ctx).unwrap();
1406        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1407        assert!(result[0].message.contains("missing"));
1408    }
1409
1410    #[test]
1411    fn test_frontmatter_ignored() {
1412        // Test for issue #24 - MD052 should not flag content inside frontmatter
1413        // Enable shortcut_syntax to test frontmatter handling
1414        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1415            shortcut_syntax: true,
1416            ..Default::default()
1417        });
1418
1419        // Test YAML frontmatter with arrays and references
1420        let content = r#"---
1421layout: post
1422title: "My Jekyll Post"
1423date: 2023-01-01
1424categories: blog
1425tags: ["test", "example"]
1426author: John Doe
1427---
1428
1429# My Blog Post
1430
1431This is the actual markdown content that should be linted.
1432
1433[undefined] reference should be flagged.
1434
1435## Section 1
1436
1437Some content here."#;
1438        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1439        let result = rule.check(&ctx).unwrap();
1440
1441        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1442        assert_eq!(
1443            result.len(),
1444            1,
1445            "Should only flag the undefined reference outside frontmatter"
1446        );
1447        assert!(result[0].message.contains("undefined"));
1448
1449        // Test TOML frontmatter
1450        let content = r#"+++
1451title = "My Post"
1452tags = ["example", "test"]
1453+++
1454
1455# Content
1456
1457[missing] reference should be flagged."#;
1458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1459        let result = rule.check(&ctx).unwrap();
1460        assert_eq!(
1461            result.len(),
1462            1,
1463            "Should only flag the undefined reference outside TOML frontmatter"
1464        );
1465        assert!(result[0].message.contains("missing"));
1466    }
1467
1468    #[test]
1469    fn test_mkdocs_snippet_markers_not_flagged() {
1470        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1471        // Enable shortcut_syntax to test snippet marker handling
1472        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1473            shortcut_syntax: true,
1474            ..Default::default()
1475        });
1476
1477        // Test snippet section markers
1478        let content = r#"# Document with MkDocs Snippets
1479
1480Some content here.
1481
1482# -8<- [start:remote-content]
1483
1484This is the remote content section.
1485
1486# -8<- [end:remote-content]
1487
1488More content here.
1489
1490<!-- --8<-- [start:another-section] -->
1491Content in another section
1492<!-- --8<-- [end:another-section] -->"#;
1493        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1494        let result = rule.check(&ctx).unwrap();
1495
1496        // Should not flag any snippet markers as undefined references
1497        assert_eq!(
1498            result.len(),
1499            0,
1500            "Should not flag MkDocs snippet markers as undefined references"
1501        );
1502
1503        // Test that the snippet marker lines are properly skipped
1504        // but regular undefined references on other lines are still caught
1505        let content = r#"# Document
1506
1507# -8<- [start:section]
1508Content with [reference] inside snippet section
1509# -8<- [end:section]
1510
1511Regular [undefined] reference outside snippet markers."#;
1512        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1513        let result = rule.check(&ctx).unwrap();
1514
1515        assert_eq!(
1516            result.len(),
1517            2,
1518            "Should flag undefined references but skip snippet marker lines"
1519        );
1520        // The references inside the content should be flagged, but not start: and end:
1521        assert!(result[0].message.contains("reference"));
1522        assert!(result[1].message.contains("undefined"));
1523
1524        // Test in standard mode - should flag the markers as undefined
1525        let content = r#"# Document
1526
1527# -8<- [start:section]
1528# -8<- [end:section]"#;
1529        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1530        let result = rule.check(&ctx).unwrap();
1531
1532        assert_eq!(
1533            result.len(),
1534            2,
1535            "In standard mode, snippet markers should be flagged as undefined references"
1536        );
1537    }
1538
1539    #[test]
1540    fn test_pandoc_citations_not_flagged() {
1541        // Test that Pandoc/RMarkdown/Quarto citation syntax is not flagged
1542        // Enable shortcut_syntax to test citation handling
1543        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1544            shortcut_syntax: true,
1545            ..Default::default()
1546        });
1547
1548        let content = r#"# Research Paper
1549
1550We are using the **bookdown** package [@R-bookdown] in this sample book.
1551This was built on top of R Markdown and **knitr** [@xie2015].
1552
1553Multiple citations [@citation1; @citation2; @citation3] are also supported.
1554
1555Regular [undefined] reference should still be flagged.
1556"#;
1557        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1558        let result = rule.check(&ctx).unwrap();
1559
1560        // Should only flag the undefined reference, not the citations
1561        assert_eq!(
1562            result.len(),
1563            1,
1564            "Should only flag the undefined reference, not Pandoc citations"
1565        );
1566        assert!(result[0].message.contains("undefined"));
1567    }
1568
1569    #[test]
1570    fn test_pandoc_inline_footnotes_not_flagged() {
1571        // Test that Pandoc inline footnote syntax is not flagged
1572        // Enable shortcut_syntax to test inline footnote handling
1573        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1574            shortcut_syntax: true,
1575            ..Default::default()
1576        });
1577
1578        let content = r#"# Math Document
1579
1580You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1581
1582Another footnote^[with some text and a [link](https://example.com)].
1583
1584But this [reference] without ^ should be flagged.
1585"#;
1586        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1587        let result = rule.check(&ctx).unwrap();
1588
1589        // Should only flag the reference without ^
1590        assert_eq!(
1591            result.len(),
1592            1,
1593            "Should only flag the regular reference, not inline footnotes"
1594        );
1595        assert!(result[0].message.contains("reference"));
1596    }
1597
1598    #[test]
1599    fn test_github_alerts_not_flagged() {
1600        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1601        // Enable shortcut_syntax to test GitHub alert handling
1602        let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1603            shortcut_syntax: true,
1604            ..Default::default()
1605        });
1606
1607        // Test various GitHub alert types
1608        let content = r#"# Document with GitHub Alerts
1609
1610> [!NOTE]
1611> This is a note alert.
1612
1613> [!TIP]
1614> This is a tip alert.
1615
1616> [!IMPORTANT]
1617> This is an important alert.
1618
1619> [!WARNING]
1620> This is a warning alert.
1621
1622> [!CAUTION]
1623> This is a caution alert.
1624
1625Regular content with [undefined] reference."#;
1626        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1627        let result = rule.check(&ctx).unwrap();
1628
1629        // Should only flag the undefined reference, not the GitHub alerts
1630        assert_eq!(
1631            result.len(),
1632            1,
1633            "Should only flag the undefined reference, not GitHub alerts"
1634        );
1635        assert!(result[0].message.contains("undefined"));
1636        assert_eq!(result[0].line, 18); // Line with [undefined]
1637
1638        // Test GitHub alerts with additional content
1639        let content = r#"> [!TIP]
1640> Here's a useful tip about [something].
1641> Multiple lines are allowed.
1642
1643[something] is mentioned but not defined."#;
1644        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1645        let result = rule.check(&ctx).unwrap();
1646
1647        // Should flag only the [something] outside blockquotes
1648        // The test shows we're only catching one, which might be correct behavior
1649        // matching markdownlint's approach
1650        assert_eq!(result.len(), 1, "Should flag undefined reference");
1651        assert!(result[0].message.contains("something"));
1652
1653        // Test GitHub alerts with proper references
1654        let content = r#"> [!NOTE]
1655> See [reference] for more details.
1656
1657[reference]: https://example.com"#;
1658        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1659        let result = rule.check(&ctx).unwrap();
1660
1661        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1662        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1663    }
1664
1665    #[test]
1666    fn test_ignore_config() {
1667        // Test that user-configured ignore list is respected
1668        let config = MD052Config {
1669            shortcut_syntax: true,
1670            ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1671        };
1672        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1673
1674        let content = r#"# Document with Custom Types
1675
1676Use [Vec] for dynamic arrays.
1677Use [HashMap] for key-value storage.
1678Use [Option] for nullable values.
1679Use [Result] for error handling.
1680"#;
1681        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1682        let result = rule.check(&ctx).unwrap();
1683
1684        // Should only flag [Result] because it's not in ignore
1685        assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1686        assert!(result[0].message.contains("Result"));
1687    }
1688
1689    #[test]
1690    fn test_ignore_case_insensitive() {
1691        // Test that ignore list is case-insensitive
1692        let config = MD052Config {
1693            shortcut_syntax: true,
1694            ignore: vec!["Vec".to_string()],
1695        };
1696        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1697
1698        let content = r#"# Case Insensitivity Test
1699
1700[Vec] should be ignored.
1701[vec] should also be ignored (different case, same match).
1702[VEC] should also be ignored (different case, same match).
1703[undefined] should be flagged (not in ignore list).
1704"#;
1705        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1706        let result = rule.check(&ctx).unwrap();
1707
1708        // Should only flag [undefined] because ignore is case-insensitive
1709        assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1710        assert!(result[0].message.contains("undefined"));
1711    }
1712
1713    #[test]
1714    fn test_ignore_empty_by_default() {
1715        // Test that empty ignore list doesn't affect existing behavior
1716        let rule = MD052ReferenceLinkImages::new();
1717
1718        let content = "[text][undefined]";
1719        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1720        let result = rule.check(&ctx).unwrap();
1721
1722        // Should still flag undefined references
1723        assert_eq!(result.len(), 1);
1724        assert!(result[0].message.contains("undefined"));
1725    }
1726
1727    #[test]
1728    fn test_ignore_with_reference_links() {
1729        // Test ignore list with full reference link syntax [text][ref]
1730        let config = MD052Config {
1731            shortcut_syntax: false,
1732            ignore: vec!["CustomType".to_string()],
1733        };
1734        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1735
1736        let content = r#"# Test
1737
1738See [documentation][CustomType] for details.
1739See [other docs][MissingRef] for more.
1740"#;
1741        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1742        let result = rule.check(&ctx).unwrap();
1743
1744        // Debug: print warnings if test fails
1745        for (i, w) in result.iter().enumerate() {
1746            eprintln!("Warning {}: {}", i, w.message);
1747        }
1748
1749        // Should flag [MissingRef] but not [CustomType]
1750        // Note: reference IDs are lowercased in the message
1751        assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1752        assert!(
1753            result[0].message.contains("missingref"),
1754            "Expected 'missingref' in message: {}",
1755            result[0].message
1756        );
1757    }
1758
1759    #[test]
1760    fn test_ignore_multiple() {
1761        // Test multiple ignored names work correctly
1762        let config = MD052Config {
1763            shortcut_syntax: true,
1764            ignore: vec![
1765                "i32".to_string(),
1766                "u64".to_string(),
1767                "String".to_string(),
1768                "Arc".to_string(),
1769                "Mutex".to_string(),
1770            ],
1771        };
1772        let rule = MD052ReferenceLinkImages::from_config_struct(config);
1773
1774        let content = r#"# Types
1775
1776[i32] [u64] [String] [Arc] [Mutex] [Box]
1777"#;
1778        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1779        let result = rule.check(&ctx).unwrap();
1780
1781        // Note: i32 and u64 are already in the hardcoded list, so they'd be skipped anyway
1782        // String is NOT in the hardcoded list, so we test that the user config works
1783        // [Box] should be flagged (not in ignore)
1784        assert_eq!(result.len(), 1);
1785        assert!(result[0].message.contains("Box"));
1786    }
1787}