rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    // The capturing group handles nested brackets to support cases like [`union[t, none]`]:
14    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16    // Pattern for list items to exclude from reference checks (standard regex is fine)
17    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19    // Pattern for code blocks (standard regex is fine)
20    static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22    // Pattern for output example sections (standard regex is fine)
23    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
26    // Extended to include additional common alert types
27    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29    // Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
30    // This pattern specifically looks for:
31    // - IPv6 addresses: https://[::1] or https://[2001:db8::1]
32    // - IPv6 with zone IDs: https://[fe80::1%eth0]
33    // - IPv6 mixed notation: https://[::ffff:192.0.2.1]
34    // - API paths with array notation: https://api.example.com/users[0]
35    // But NOT markdown reference links that happen to follow URLs
36    static ref URL_WITH_BRACKETS: Regex = Regex::new(
37        r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38    ).unwrap();
39}
40
41/// Rule MD052: Reference links and images should use reference style
42///
43/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
44///
45/// This rule is triggered when a reference link or image uses a reference that isn't defined.
46#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50    pub fn new() -> Self {
51        Self {}
52    }
53
54    /// Strip surrounding backticks from a string
55    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
56    fn strip_backticks(s: &str) -> &str {
57        s.trim_start_matches('`').trim_end_matches('`')
58    }
59
60    /// Check if a string is a valid Python identifier
61    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
62    /// like `str`, `int`, etc. should be accepted as valid auto-references
63    fn is_valid_python_identifier(s: &str) -> bool {
64        if s.is_empty() {
65            return false;
66        }
67        let first_char = s.chars().next().unwrap();
68        if !first_char.is_ascii_alphabetic() && first_char != '_' {
69            return false;
70        }
71        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
72    }
73
74    /// Check if a pattern is likely NOT a markdown reference
75    /// Returns true if this pattern should be skipped
76    fn is_likely_not_reference(text: &str) -> bool {
77        // Skip numeric patterns (array indices, ranges)
78        if text.chars().all(|c| c.is_ascii_digit()) {
79            return true;
80        }
81
82        // Skip numeric ranges like [1:3], [0:10], etc.
83        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
84            return true;
85        }
86
87        // Skip patterns that look like config sections [tool.something], [section.subsection]
88        // But not if they contain other non-alphanumeric chars like hyphens or underscores
89        if text.contains('.') && !text.contains(' ') && !text.contains('-') && !text.contains('_') {
90            // Config sections typically have dots, no spaces, and only alphanumeric + dots
91            return true;
92        }
93
94        // Skip glob/wildcard patterns like [*], [...], [**]
95        if text == "*" || text == "..." || text == "**" {
96            return true;
97        }
98
99        // Skip patterns that look like file paths [dir/file], [src/utils]
100        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
101            return true;
102        }
103
104        // Skip programming type annotations like [int, str], [Dict[str, Any]]
105        // These typically have commas and/or nested brackets
106        if text.contains(',') || text.contains('[') || text.contains(']') {
107            // Check if it looks like a type annotation pattern
108            return true;
109        }
110
111        // Note: We don't filter out patterns with backticks because backticks in reference names
112        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
113
114        // Skip patterns that look like module/class paths ONLY if they don't have backticks
115        // Backticks indicate intentional code formatting in a reference name
116        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
117        if !text.contains('`')
118            && text.contains('.')
119            && !text.contains(' ')
120            && !text.contains('-')
121            && !text.contains('_')
122        {
123            return true;
124        }
125
126        // Note: We don't filter based on word count anymore because legitimate references
127        // can have many words, like "python language reference for import statements"
128        // Word count filtering was causing false positives where valid references were
129        // being incorrectly flagged as unused
130
131        // Skip patterns that are just punctuation or operators
132        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
133            return true;
134        }
135
136        // Skip very short non-word patterns (likely operators or syntax)
137        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
138            return true;
139        }
140
141        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
142        if (text.starts_with('"') && text.ends_with('"'))
143            || (text.starts_with('\'') && text.ends_with('\''))
144            || text.contains('"')
145            || text.contains('\'')
146        {
147            return true;
148        }
149
150        // Skip descriptive patterns with colon like [default: the project root]
151        // But allow simple numeric ranges which are handled above
152        if text.contains(':') && text.contains(' ') {
153            return true;
154        }
155
156        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
157        if text.starts_with('!') {
158            return true;
159        }
160
161        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
162        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
163            return true;
164        }
165
166        // Skip common programming type names and short identifiers
167        // that are likely not markdown references
168        let common_non_refs = [
169            "object", "Object", "any", "Any", "inv", "void", "bool", "int", "float", "str", "char", "i8", "i16", "i32",
170            "i64", "i128", "isize", "u8", "u16", "u32", "u64", "u128", "usize", "f32", "f64",
171        ];
172
173        if common_non_refs.contains(&text) {
174            return true;
175        }
176
177        false
178    }
179
180    /// Check if a position is inside any code span
181    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
182        code_spans
183            .iter()
184            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
185    }
186
187    /// Check if a byte position is within an HTML comment
188    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
189        for m in HTML_COMMENT_PATTERN.find_iter(content) {
190            if m.start() <= byte_pos && byte_pos < m.end() {
191                return true;
192            }
193        }
194        false
195    }
196
197    /// Check if a byte position is within an HTML tag
198    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
199        // Check HTML tags
200        for html_tag in ctx.html_tags().iter() {
201            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
202                return true;
203            }
204        }
205        false
206    }
207
208    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
209        use crate::config::MarkdownFlavor;
210        use crate::utils::skip_context::is_mkdocs_snippet_line;
211
212        let mut references = HashSet::new();
213        let mut in_code_block = false;
214        let mut code_fence_marker = String::new();
215
216        for line in content.lines() {
217            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
218            if is_mkdocs_snippet_line(
219                line,
220                if mkdocs_mode {
221                    MarkdownFlavor::MkDocs
222                } else {
223                    MarkdownFlavor::Standard
224                },
225            ) {
226                continue;
227            }
228            // Handle code block boundaries
229            if let Some(cap) = FENCED_CODE_START.captures(line) {
230                if let Some(fence) = cap.get(2) {
231                    // Get the fence marker (``` or ~~~) without the indentation
232                    let fence_str = fence.as_str();
233                    if !in_code_block {
234                        in_code_block = true;
235                        code_fence_marker = fence_str.to_string();
236                    } else if line.trim_start().starts_with(&code_fence_marker) {
237                        // Check if this could be a closing fence
238                        let trimmed = line.trim_start();
239                        // A closing fence should be just the fence characters, possibly with trailing whitespace
240                        if trimmed.starts_with(&code_fence_marker) {
241                            let after_fence = &trimmed[code_fence_marker.len()..];
242                            if after_fence.trim().is_empty() {
243                                in_code_block = false;
244                                code_fence_marker.clear();
245                            }
246                        }
247                    }
248                }
249                continue;
250            }
251
252            // Skip lines in code blocks
253            if in_code_block {
254                continue;
255            }
256
257            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
258            // Abbreviations are not reference links and should not be tracked
259            if line.trim_start().starts_with("*[") {
260                continue;
261            }
262
263            if let Some(cap) = REF_REGEX.captures(line) {
264                // Store references in lowercase for case-insensitive comparison
265                if let Some(reference) = cap.get(1) {
266                    references.insert(reference.as_str().to_lowercase());
267                }
268            }
269        }
270
271        references
272    }
273
274    fn find_undefined_references(
275        &self,
276        content: &str,
277        references: &HashSet<String>,
278        ctx: &crate::lint_context::LintContext,
279        mkdocs_mode: bool,
280    ) -> Vec<(usize, usize, usize, String)> {
281        let mut undefined = Vec::new();
282        let mut reported_refs = HashMap::new();
283        let mut in_code_block = false;
284        let mut code_fence_marker = String::new();
285        let mut in_example_section = false;
286
287        // Get code spans once for the entire function
288        let code_spans = ctx.code_spans();
289
290        // Use cached data for reference links and images
291        for link in &ctx.links {
292            if !link.is_reference {
293                continue; // Skip inline links
294            }
295
296            // Skip links inside code spans
297            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
298                continue;
299            }
300
301            // Skip links inside HTML comments
302            if Self::is_in_html_comment(content, link.byte_offset) {
303                continue;
304            }
305
306            // Skip links inside HTML tags
307            if Self::is_in_html_tag(ctx, link.byte_offset) {
308                continue;
309            }
310
311            // Skip links inside math contexts
312            if is_in_math_context(ctx, link.byte_offset) {
313                continue;
314            }
315
316            // Skip links inside table cells
317            if is_in_table_cell(ctx, link.line, link.start_col) {
318                continue;
319            }
320
321            // Skip links inside frontmatter
322            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
323                continue;
324            }
325
326            if let Some(ref_id) = &link.reference_id {
327                let reference_lower = ref_id.to_lowercase();
328
329                // Skip MkDocs auto-references if in MkDocs mode
330                // Check both the reference_id and the link text for shorthand references
331                // Strip backticks since MkDocs resolves `module.Class` as module.Class
332                let stripped_ref = Self::strip_backticks(ref_id);
333                let stripped_text = Self::strip_backticks(&link.text);
334                if mkdocs_mode
335                    && (is_mkdocs_auto_reference(stripped_ref)
336                        || is_mkdocs_auto_reference(stripped_text)
337                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
338                        || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
339                {
340                    continue;
341                }
342
343                // Check if reference is defined
344                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
345                    // Check if the line is in an example section or list item
346                    if let Some(line_info) = ctx.line_info(link.line) {
347                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
348                            in_example_section = true;
349                            continue;
350                        }
351
352                        if in_example_section {
353                            continue;
354                        }
355
356                        // Skip list items
357                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
358                            continue;
359                        }
360
361                        // Skip lines that are HTML content
362                        let trimmed = line_info.content.trim_start();
363                        if trimmed.starts_with('<') {
364                            continue;
365                        }
366                    }
367
368                    let match_len = link.byte_end - link.byte_offset;
369                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
370                    reported_refs.insert(reference_lower, true);
371                }
372            }
373        }
374
375        // Use cached data for reference images
376        for image in &ctx.images {
377            if !image.is_reference {
378                continue; // Skip inline images
379            }
380
381            // Skip images inside code spans
382            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
383                continue;
384            }
385
386            // Skip images inside HTML comments
387            if Self::is_in_html_comment(content, image.byte_offset) {
388                continue;
389            }
390
391            // Skip images inside HTML tags
392            if Self::is_in_html_tag(ctx, image.byte_offset) {
393                continue;
394            }
395
396            // Skip images inside math contexts
397            if is_in_math_context(ctx, image.byte_offset) {
398                continue;
399            }
400
401            // Skip images inside table cells
402            if is_in_table_cell(ctx, image.line, image.start_col) {
403                continue;
404            }
405
406            // Skip images inside frontmatter
407            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
408                continue;
409            }
410
411            if let Some(ref_id) = &image.reference_id {
412                let reference_lower = ref_id.to_lowercase();
413
414                // Skip MkDocs auto-references if in MkDocs mode
415                // Check both the reference_id and the alt text for shorthand references
416                // Strip backticks since MkDocs resolves `module.Class` as module.Class
417                let stripped_ref = Self::strip_backticks(ref_id);
418                let stripped_alt = Self::strip_backticks(&image.alt_text);
419                if mkdocs_mode
420                    && (is_mkdocs_auto_reference(stripped_ref)
421                        || is_mkdocs_auto_reference(stripped_alt)
422                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
423                        || (image.alt_text.as_str() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
424                {
425                    continue;
426                }
427
428                // Check if reference is defined
429                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
430                    // Check if the line is in an example section or list item
431                    if let Some(line_info) = ctx.line_info(image.line) {
432                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
433                            in_example_section = true;
434                            continue;
435                        }
436
437                        if in_example_section {
438                            continue;
439                        }
440
441                        // Skip list items
442                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
443                            continue;
444                        }
445
446                        // Skip lines that are HTML content
447                        let trimmed = line_info.content.trim_start();
448                        if trimmed.starts_with('<') {
449                            continue;
450                        }
451                    }
452
453                    let match_len = image.byte_end - image.byte_offset;
454                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
455                    reported_refs.insert(reference_lower, true);
456                }
457            }
458        }
459
460        // Build a set of byte ranges that are already covered by parsed links/images
461        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
462
463        // Add ranges from parsed links
464        for link in &ctx.links {
465            covered_ranges.push((link.byte_offset, link.byte_end));
466        }
467
468        // Add ranges from parsed images
469        for image in &ctx.images {
470            covered_ranges.push((image.byte_offset, image.byte_end));
471        }
472
473        // Sort ranges by start position
474        covered_ranges.sort_by_key(|&(start, _)| start);
475
476        // Handle shortcut references [text] which aren't captured in ctx.links
477        // Need to use regex for these
478        let lines: Vec<&str> = content.lines().collect();
479        in_example_section = false; // Reset for line-by-line processing
480
481        for (line_num, line) in lines.iter().enumerate() {
482            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
483            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
484                continue;
485            }
486
487            // Handle code blocks
488            if let Some(cap) = FENCED_CODE_START.captures(line) {
489                if let Some(fence) = cap.get(2) {
490                    // Get the fence marker (``` or ~~~) without the indentation
491                    let fence_str = fence.as_str();
492                    if !in_code_block {
493                        in_code_block = true;
494                        code_fence_marker = fence_str.to_string();
495                    } else if line.trim_start().starts_with(&code_fence_marker) {
496                        // Check if this could be a closing fence
497                        let trimmed = line.trim_start();
498                        // A closing fence should be just the fence characters, possibly with trailing whitespace
499                        if trimmed.starts_with(&code_fence_marker) {
500                            let after_fence = &trimmed[code_fence_marker.len()..];
501                            if after_fence.trim().is_empty() {
502                                in_code_block = false;
503                                code_fence_marker.clear();
504                            }
505                        }
506                    }
507                }
508                continue;
509            }
510
511            if in_code_block {
512                continue;
513            }
514
515            // Check for example sections
516            if OUTPUT_EXAMPLE_START.is_match(line) {
517                in_example_section = true;
518                continue;
519            }
520
521            if in_example_section {
522                // Check if we're exiting the example section (another heading)
523                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
524                    in_example_section = false;
525                } else {
526                    continue;
527                }
528            }
529
530            // Skip list items
531            if LIST_ITEM_REGEX.is_match(line) {
532                continue;
533            }
534
535            // Skip lines that are HTML content
536            let trimmed_line = line.trim_start();
537            if trimmed_line.starts_with('<') {
538                continue;
539            }
540
541            // Skip GitHub alerts/callouts (e.g., > [!TIP])
542            if GITHUB_ALERT_REGEX.is_match(line) {
543                continue;
544            }
545
546            // Skip abbreviation definitions (*[ABBR]: Definition)
547            // These are not reference links and should not be checked
548            if trimmed_line.starts_with("*[") {
549                continue;
550            }
551
552            // Collect positions of brackets that are part of URLs (IPv6, etc.)
553            // so we can exclude them from reference checking
554            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
555            for mat in URL_WITH_BRACKETS.find_iter(line) {
556                // Find all bracket pairs within this URL match
557                let url_str = mat.as_str();
558                let url_start = mat.start();
559
560                // Find brackets within the URL (e.g., in https://[::1]:8080)
561                let mut idx = 0;
562                while idx < url_str.len() {
563                    if let Some(bracket_start) = url_str[idx..].find('[') {
564                        let bracket_start_abs = url_start + idx + bracket_start;
565                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
566                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
567                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
568                            idx += bracket_start + bracket_end + 2;
569                        } else {
570                            break;
571                        }
572                    } else {
573                        break;
574                    }
575                }
576            }
577
578            // Check shortcut references: [reference]
579            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
580                for cap in captures {
581                    if let Some(ref_match) = cap.get(1) {
582                        // Check if this bracket is part of a URL (IPv6, etc.)
583                        let bracket_start = cap.get(0).unwrap().start();
584                        let bracket_end = cap.get(0).unwrap().end();
585
586                        // Skip if this bracket pair is within any URL bracket range
587                        let is_in_url = url_bracket_ranges
588                            .iter()
589                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
590
591                        if is_in_url {
592                            continue;
593                        }
594
595                        let reference = ref_match.as_str();
596                        let reference_lower = reference.to_lowercase();
597
598                        // Skip patterns that are likely not markdown references
599                        if Self::is_likely_not_reference(reference) {
600                            continue;
601                        }
602
603                        // Skip GitHub alerts (including extended types)
604                        if let Some(alert_type) = reference.strip_prefix('!')
605                            && matches!(
606                                alert_type,
607                                "NOTE"
608                                    | "TIP"
609                                    | "WARNING"
610                                    | "IMPORTANT"
611                                    | "CAUTION"
612                                    | "INFO"
613                                    | "SUCCESS"
614                                    | "FAILURE"
615                                    | "DANGER"
616                                    | "BUG"
617                                    | "EXAMPLE"
618                                    | "QUOTE"
619                            )
620                        {
621                            continue;
622                        }
623
624                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
625                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
626                        if mkdocs_mode
627                            && (reference.starts_with("start:") || reference.starts_with("end:"))
628                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
629                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
630                        {
631                            continue;
632                        }
633
634                        // Skip MkDocs auto-references if in MkDocs mode
635                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
636                        let stripped_ref = Self::strip_backticks(reference);
637                        if mkdocs_mode
638                            && (is_mkdocs_auto_reference(stripped_ref)
639                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
640                        {
641                            continue;
642                        }
643
644                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
645                            let full_match = cap.get(0).unwrap();
646                            let col = full_match.start();
647
648                            // Skip if inside code span
649                            let code_spans = ctx.code_spans();
650                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
651                                continue;
652                            }
653
654                            // Check if this position is within a covered range
655                            let line_start_byte = ctx.line_offsets[line_num];
656                            let byte_pos = line_start_byte + col;
657
658                            // Skip if inside code block
659                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
660                                &ctx.code_blocks,
661                                byte_pos,
662                            ) {
663                                continue;
664                            }
665
666                            // Skip if inside HTML comment
667                            if Self::is_in_html_comment(content, byte_pos) {
668                                continue;
669                            }
670
671                            // Skip if inside HTML tag
672                            if Self::is_in_html_tag(ctx, byte_pos) {
673                                continue;
674                            }
675
676                            // Skip if inside math context
677                            if is_in_math_context(ctx, byte_pos) {
678                                continue;
679                            }
680
681                            // Skip if inside table cell
682                            if is_in_table_cell(ctx, line_num + 1, col) {
683                                continue;
684                            }
685
686                            let byte_end = byte_pos + (full_match.end() - full_match.start());
687
688                            // Check if this shortcut ref overlaps with any parsed link/image
689                            let mut is_covered = false;
690                            for &(range_start, range_end) in &covered_ranges {
691                                if range_start <= byte_pos && byte_end <= range_end {
692                                    // This shortcut ref is completely within a parsed link/image
693                                    is_covered = true;
694                                    break;
695                                }
696                                if range_start > byte_end {
697                                    // No need to check further (ranges are sorted)
698                                    break;
699                                }
700                            }
701
702                            if is_covered {
703                                continue;
704                            }
705
706                            // More sophisticated checks to avoid false positives
707
708                            // Check 1: If preceded by ], this might be part of [text][ref]
709                            // Look for the pattern ...][ref] and check if there's a matching [ before
710                            let line_chars: Vec<char> = line.chars().collect();
711                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
712                                // Look backwards for a [ that would make this [text][ref]
713                                let mut bracket_count = 1; // We already saw one ]
714                                let mut check_pos = col.saturating_sub(2);
715                                let mut found_opening = false;
716
717                                while check_pos > 0 && check_pos < line_chars.len() {
718                                    match line_chars.get(check_pos) {
719                                        Some(&']') => bracket_count += 1,
720                                        Some(&'[') => {
721                                            bracket_count -= 1;
722                                            if bracket_count == 0 {
723                                                // Check if this [ is escaped
724                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
725                                                    found_opening = true;
726                                                }
727                                                break;
728                                            }
729                                        }
730                                        _ => {}
731                                    }
732                                    if check_pos == 0 {
733                                        break;
734                                    }
735                                    check_pos = check_pos.saturating_sub(1);
736                                }
737
738                                if found_opening {
739                                    // This is part of [text][ref], skip it
740                                    continue;
741                                }
742                            }
743
744                            // Check 2: If there's an escaped bracket pattern before this
745                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
746                            let before_text = &line[..col];
747                            if before_text.contains("\\]") {
748                                // Check if there's a \[ before the \]
749                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
750                                    let search_text = &before_text[..escaped_close_pos];
751                                    if search_text.contains("\\[") {
752                                        // This looks like \[...\][ref], skip it
753                                        continue;
754                                    }
755                                }
756                            }
757
758                            let match_len = full_match.end() - full_match.start();
759                            undefined.push((line_num, col, match_len, reference.to_string()));
760                            reported_refs.insert(reference_lower, true);
761                        }
762                    }
763                }
764            }
765        }
766
767        undefined
768    }
769}
770
771impl Rule for MD052ReferenceLinkImages {
772    fn name(&self) -> &'static str {
773        "MD052"
774    }
775
776    fn description(&self) -> &'static str {
777        "Reference links and images should use a reference that exists"
778    }
779
780    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
781        let content = ctx.content;
782        let mut warnings = Vec::new();
783
784        // OPTIMIZATION: Early exit if no reference-style links/images exist
785        // Check if there are any reference-style links or images in the document
786        let has_reference_links = ctx.links.iter().any(|l| l.is_reference);
787        let has_reference_images = ctx.images.iter().any(|i| i.is_reference);
788
789        // Quick check: If document contains no brackets at all, nothing to check
790        if !content.contains('[') {
791            return Ok(warnings);
792        }
793
794        // Quick check for reference definitions
795        let has_reference_definitions = content.contains("]:");
796
797        // If we have no reference links/images AND no reference definitions,
798        // then check if we might have shortcut references [text]
799        if !has_reference_links && !has_reference_images && !has_reference_definitions {
800            // Only do expensive shortcut checking if we have brackets but no links/images/refs
801            // This handles the case where all brackets are inline links [text](url)
802            let all_brackets_are_inline = ctx.links.iter().all(|l| !l.is_reference)
803                && ctx.images.iter().all(|i| !i.is_reference)
804                && ctx.links.len() + ctx.images.len() > 0;
805
806            if all_brackets_are_inline {
807                return Ok(warnings); // All brackets accounted for as inline links/images
808            }
809        }
810
811        // Check if we're in MkDocs mode from the context
812        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
813
814        let references = self.extract_references(content, mkdocs_mode);
815
816        // Use optimized detection method with cached link/image data
817        for (line_num, col, match_len, reference) in
818            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
819        {
820            let lines: Vec<&str> = content.lines().collect();
821            let line_content = lines.get(line_num).unwrap_or(&"");
822
823            // Calculate precise character range for the entire undefined reference
824            let (start_line, start_col, end_line, end_col) =
825                calculate_match_range(line_num + 1, line_content, col, match_len);
826
827            warnings.push(LintWarning {
828                rule_name: Some(self.name()),
829                line: start_line,
830                column: start_col,
831                end_line,
832                end_column: end_col,
833                message: format!("Reference '{reference}' not found"),
834                severity: Severity::Warning,
835                fix: None,
836            });
837        }
838
839        Ok(warnings)
840    }
841
842    /// Check if this rule should be skipped for performance
843    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
844        // Skip if content is empty or has no links/images
845        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
846    }
847
848    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
849        let content = ctx.content;
850        // No automatic fix available for undefined references
851        Ok(content.to_string())
852    }
853
854    fn as_any(&self) -> &dyn std::any::Any {
855        self
856    }
857
858    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
859    where
860        Self: Sized,
861    {
862        // Flavor is now accessed from LintContext during check
863        Box::new(MD052ReferenceLinkImages::new())
864    }
865}
866
867#[cfg(test)]
868mod tests {
869    use super::*;
870    use crate::lint_context::LintContext;
871
872    #[test]
873    fn test_valid_reference_link() {
874        let rule = MD052ReferenceLinkImages::new();
875        let content = "[text][ref]\n\n[ref]: https://example.com";
876        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
877        let result = rule.check(&ctx).unwrap();
878
879        assert_eq!(result.len(), 0);
880    }
881
882    #[test]
883    fn test_undefined_reference_link() {
884        let rule = MD052ReferenceLinkImages::new();
885        let content = "[text][undefined]";
886        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
887        let result = rule.check(&ctx).unwrap();
888
889        assert_eq!(result.len(), 1);
890        assert!(result[0].message.contains("Reference 'undefined' not found"));
891    }
892
893    #[test]
894    fn test_valid_reference_image() {
895        let rule = MD052ReferenceLinkImages::new();
896        let content = "![alt][img]\n\n[img]: image.jpg";
897        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
898        let result = rule.check(&ctx).unwrap();
899
900        assert_eq!(result.len(), 0);
901    }
902
903    #[test]
904    fn test_undefined_reference_image() {
905        let rule = MD052ReferenceLinkImages::new();
906        let content = "![alt][missing]";
907        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
908        let result = rule.check(&ctx).unwrap();
909
910        assert_eq!(result.len(), 1);
911        assert!(result[0].message.contains("Reference 'missing' not found"));
912    }
913
914    #[test]
915    fn test_case_insensitive_references() {
916        let rule = MD052ReferenceLinkImages::new();
917        let content = "[Text][REF]\n\n[ref]: https://example.com";
918        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
919        let result = rule.check(&ctx).unwrap();
920
921        assert_eq!(result.len(), 0);
922    }
923
924    #[test]
925    fn test_shortcut_reference_valid() {
926        let rule = MD052ReferenceLinkImages::new();
927        let content = "[ref]\n\n[ref]: https://example.com";
928        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
929        let result = rule.check(&ctx).unwrap();
930
931        assert_eq!(result.len(), 0);
932    }
933
934    #[test]
935    fn test_shortcut_reference_undefined() {
936        let rule = MD052ReferenceLinkImages::new();
937        let content = "[undefined]";
938        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
939        let result = rule.check(&ctx).unwrap();
940
941        assert_eq!(result.len(), 1);
942        assert!(result[0].message.contains("Reference 'undefined' not found"));
943    }
944
945    #[test]
946    fn test_inline_links_ignored() {
947        let rule = MD052ReferenceLinkImages::new();
948        let content = "[text](https://example.com)";
949        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
950        let result = rule.check(&ctx).unwrap();
951
952        assert_eq!(result.len(), 0);
953    }
954
955    #[test]
956    fn test_inline_images_ignored() {
957        let rule = MD052ReferenceLinkImages::new();
958        let content = "![alt](image.jpg)";
959        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
960        let result = rule.check(&ctx).unwrap();
961
962        assert_eq!(result.len(), 0);
963    }
964
965    #[test]
966    fn test_references_in_code_blocks_ignored() {
967        let rule = MD052ReferenceLinkImages::new();
968        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
969        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
970        let result = rule.check(&ctx).unwrap();
971
972        assert_eq!(result.len(), 0);
973    }
974
975    #[test]
976    fn test_references_in_inline_code_ignored() {
977        let rule = MD052ReferenceLinkImages::new();
978        let content = "`[undefined]`";
979        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
980        let result = rule.check(&ctx).unwrap();
981
982        // References inside inline code spans should be ignored
983        assert_eq!(result.len(), 0);
984    }
985
986    #[test]
987    fn test_comprehensive_inline_code_detection() {
988        let rule = MD052ReferenceLinkImages::new();
989        let content = r#"# Test
990
991This `[inside]` should be ignored.
992This [outside] should be flagged.
993Reference links `[text][ref]` in code are ignored.
994Regular reference [text][missing] should be flagged.
995Images `![alt][img]` in code are ignored.
996Regular image ![alt][badimg] should be flagged.
997
998Multiple `[one]` and `[two]` in code ignored, but [three] is not.
999
1000```
1001[code block content] should be ignored
1002```
1003
1004`Multiple [refs] in [same] code span` ignored."#;
1005
1006        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1007        let result = rule.check(&ctx).unwrap();
1008
1009        // Should only flag: outside, missing, badimg, three (4 total)
1010        assert_eq!(result.len(), 4);
1011
1012        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1013        assert!(messages.iter().any(|m| m.contains("outside")));
1014        assert!(messages.iter().any(|m| m.contains("missing")));
1015        assert!(messages.iter().any(|m| m.contains("badimg")));
1016        assert!(messages.iter().any(|m| m.contains("three")));
1017
1018        // Should NOT flag any references inside code spans
1019        assert!(!messages.iter().any(|m| m.contains("inside")));
1020        assert!(!messages.iter().any(|m| m.contains("one")));
1021        assert!(!messages.iter().any(|m| m.contains("two")));
1022        assert!(!messages.iter().any(|m| m.contains("refs")));
1023        assert!(!messages.iter().any(|m| m.contains("same")));
1024    }
1025
1026    #[test]
1027    fn test_multiple_undefined_references() {
1028        let rule = MD052ReferenceLinkImages::new();
1029        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1030        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1031        let result = rule.check(&ctx).unwrap();
1032
1033        assert_eq!(result.len(), 3);
1034        assert!(result[0].message.contains("ref1"));
1035        assert!(result[1].message.contains("ref2"));
1036        assert!(result[2].message.contains("ref3"));
1037    }
1038
1039    #[test]
1040    fn test_mixed_valid_and_undefined() {
1041        let rule = MD052ReferenceLinkImages::new();
1042        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1043        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1044        let result = rule.check(&ctx).unwrap();
1045
1046        assert_eq!(result.len(), 1);
1047        assert!(result[0].message.contains("missing"));
1048    }
1049
1050    #[test]
1051    fn test_empty_reference() {
1052        let rule = MD052ReferenceLinkImages::new();
1053        let content = "[text][]\n\n[ref]: https://example.com";
1054        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1055        let result = rule.check(&ctx).unwrap();
1056
1057        // Empty reference should use the link text as reference
1058        assert_eq!(result.len(), 1);
1059    }
1060
1061    #[test]
1062    fn test_escaped_brackets_ignored() {
1063        let rule = MD052ReferenceLinkImages::new();
1064        let content = "\\[not a link\\]";
1065        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1066        let result = rule.check(&ctx).unwrap();
1067
1068        assert_eq!(result.len(), 0);
1069    }
1070
1071    #[test]
1072    fn test_list_items_ignored() {
1073        let rule = MD052ReferenceLinkImages::new();
1074        let content = "- [undefined]\n* [another]\n+ [third]";
1075        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1076        let result = rule.check(&ctx).unwrap();
1077
1078        // List items that look like shortcut references should be ignored
1079        assert_eq!(result.len(), 0);
1080    }
1081
1082    #[test]
1083    fn test_output_example_section_ignored() {
1084        let rule = MD052ReferenceLinkImages::new();
1085        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1086        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1087        let result = rule.check(&ctx).unwrap();
1088
1089        // Only the reference outside the Output section should be flagged
1090        assert_eq!(result.len(), 1);
1091        assert!(result[0].message.contains("missing"));
1092    }
1093
1094    #[test]
1095    fn test_reference_definitions_in_code_blocks_ignored() {
1096        let rule = MD052ReferenceLinkImages::new();
1097        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1098        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1099        let result = rule.check(&ctx).unwrap();
1100
1101        // Reference defined in code block should not count
1102        assert_eq!(result.len(), 1);
1103        assert!(result[0].message.contains("ref"));
1104    }
1105
1106    #[test]
1107    fn test_multiple_references_to_same_undefined() {
1108        let rule = MD052ReferenceLinkImages::new();
1109        let content = "[first][missing] [second][missing] [third][missing]";
1110        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1111        let result = rule.check(&ctx).unwrap();
1112
1113        // Should only report once per unique reference
1114        assert_eq!(result.len(), 1);
1115        assert!(result[0].message.contains("missing"));
1116    }
1117
1118    #[test]
1119    fn test_reference_with_special_characters() {
1120        let rule = MD052ReferenceLinkImages::new();
1121        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1122        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1123        let result = rule.check(&ctx).unwrap();
1124
1125        assert_eq!(result.len(), 0);
1126    }
1127
1128    #[test]
1129    fn test_issue_51_html_attribute_not_reference() {
1130        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1131        let rule = MD052ReferenceLinkImages::new();
1132        let content = r#"# Example
1133
1134## Test
1135
1136Want to fill out this form?
1137
1138<form method="post">
1139    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1140</form>"#;
1141        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1142        let result = rule.check(&ctx).unwrap();
1143
1144        assert_eq!(
1145            result.len(),
1146            0,
1147            "HTML attributes with square brackets should not be flagged as undefined references"
1148        );
1149    }
1150
1151    #[test]
1152    fn test_extract_references() {
1153        let rule = MD052ReferenceLinkImages::new();
1154        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1155        let refs = rule.extract_references(content, false);
1156
1157        assert_eq!(refs.len(), 3);
1158        assert!(refs.contains("ref1"));
1159        assert!(refs.contains("ref2"));
1160        assert!(refs.contains("ref3"));
1161    }
1162
1163    #[test]
1164    fn test_inline_code_not_flagged() {
1165        let rule = MD052ReferenceLinkImages::new();
1166
1167        // Test that arrays in inline code are not flagged as references
1168        let content = r#"# Test
1169
1170Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1171
1172Also, `[todo]` is not a reference link.
1173
1174But this [reference] should be flagged.
1175
1176And this `[inline code]` should not be flagged.
1177"#;
1178
1179        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1180        let warnings = rule.check(&ctx).unwrap();
1181
1182        // Should only flag [reference], not the ones in backticks
1183        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1184        assert!(warnings[0].message.contains("'reference'"));
1185    }
1186
1187    #[test]
1188    fn test_code_block_references_ignored() {
1189        let rule = MD052ReferenceLinkImages::new();
1190
1191        let content = r#"# Test
1192
1193```markdown
1194[undefined] reference in code block
1195![undefined] image in code block
1196```
1197
1198[real-undefined] reference outside
1199"#;
1200
1201        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1202        let warnings = rule.check(&ctx).unwrap();
1203
1204        // Should only flag [real-undefined], not the ones in code block
1205        assert_eq!(warnings.len(), 1);
1206        assert!(warnings[0].message.contains("'real-undefined'"));
1207    }
1208
1209    #[test]
1210    fn test_html_comments_ignored() {
1211        // Test for issue #20 - MD052 should not flag content inside HTML comments
1212        let rule = MD052ReferenceLinkImages::new();
1213
1214        // Test the exact case from issue #20
1215        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1216<!--- set_env EDITOR 'python3 fake_editor.py' -->
1217
1218```bash
1219$ python3 vote.py
12203 votes for: 2
12212 votes for: 3, 4
1222```"#;
1223        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1224        let result = rule.check(&ctx).unwrap();
1225        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1226
1227        // Test various reference patterns inside HTML comments
1228        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1229Normal [text][undefined]
1230<!-- Another [comment][with] references -->"#;
1231        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1232        let result = rule.check(&ctx).unwrap();
1233        assert_eq!(
1234            result.len(),
1235            1,
1236            "Should only flag the undefined reference outside comments"
1237        );
1238        assert!(result[0].message.contains("undefined"));
1239
1240        // Test multi-line HTML comments
1241        let content = r#"<!--
1242[ref1]
1243[ref2][ref3]
1244-->
1245[actual][undefined]"#;
1246        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1247        let result = rule.check(&ctx).unwrap();
1248        assert_eq!(
1249            result.len(),
1250            1,
1251            "Should not flag references in multi-line HTML comments"
1252        );
1253        assert!(result[0].message.contains("undefined"));
1254
1255        // Test mixed scenarios
1256        let content = r#"<!-- Comment with [1:] pattern -->
1257Valid [link][ref]
1258<!-- More [refs][in][comments] -->
1259![image][missing]
1260
1261[ref]: https://example.com"#;
1262        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1263        let result = rule.check(&ctx).unwrap();
1264        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1265        assert!(result[0].message.contains("missing"));
1266    }
1267
1268    #[test]
1269    fn test_frontmatter_ignored() {
1270        // Test for issue #24 - MD052 should not flag content inside frontmatter
1271        let rule = MD052ReferenceLinkImages::new();
1272
1273        // Test YAML frontmatter with arrays and references
1274        let content = r#"---
1275layout: post
1276title: "My Jekyll Post"
1277date: 2023-01-01
1278categories: blog
1279tags: ["test", "example"]
1280author: John Doe
1281---
1282
1283# My Blog Post
1284
1285This is the actual markdown content that should be linted.
1286
1287[undefined] reference should be flagged.
1288
1289## Section 1
1290
1291Some content here."#;
1292        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1293        let result = rule.check(&ctx).unwrap();
1294
1295        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1296        assert_eq!(
1297            result.len(),
1298            1,
1299            "Should only flag the undefined reference outside frontmatter"
1300        );
1301        assert!(result[0].message.contains("undefined"));
1302
1303        // Test TOML frontmatter
1304        let content = r#"+++
1305title = "My Post"
1306tags = ["example", "test"]
1307+++
1308
1309# Content
1310
1311[missing] reference should be flagged."#;
1312        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1313        let result = rule.check(&ctx).unwrap();
1314        assert_eq!(
1315            result.len(),
1316            1,
1317            "Should only flag the undefined reference outside TOML frontmatter"
1318        );
1319        assert!(result[0].message.contains("missing"));
1320    }
1321
1322    #[test]
1323    fn test_mkdocs_snippet_markers_not_flagged() {
1324        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1325        let rule = MD052ReferenceLinkImages::new();
1326
1327        // Test snippet section markers
1328        let content = r#"# Document with MkDocs Snippets
1329
1330Some content here.
1331
1332# -8<- [start:remote-content]
1333
1334This is the remote content section.
1335
1336# -8<- [end:remote-content]
1337
1338More content here.
1339
1340<!-- --8<-- [start:another-section] -->
1341Content in another section
1342<!-- --8<-- [end:another-section] -->"#;
1343        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1344        let result = rule.check(&ctx).unwrap();
1345
1346        // Should not flag any snippet markers as undefined references
1347        assert_eq!(
1348            result.len(),
1349            0,
1350            "Should not flag MkDocs snippet markers as undefined references"
1351        );
1352
1353        // Test that the snippet marker lines are properly skipped
1354        // but regular undefined references on other lines are still caught
1355        let content = r#"# Document
1356
1357# -8<- [start:section]
1358Content with [reference] inside snippet section
1359# -8<- [end:section]
1360
1361Regular [undefined] reference outside snippet markers."#;
1362        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1363        let result = rule.check(&ctx).unwrap();
1364
1365        assert_eq!(
1366            result.len(),
1367            2,
1368            "Should flag undefined references but skip snippet marker lines"
1369        );
1370        // The references inside the content should be flagged, but not start: and end:
1371        assert!(result[0].message.contains("reference"));
1372        assert!(result[1].message.contains("undefined"));
1373
1374        // Test in standard mode - should flag the markers as undefined
1375        let content = r#"# Document
1376
1377# -8<- [start:section]
1378# -8<- [end:section]"#;
1379        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1380        let result = rule.check(&ctx).unwrap();
1381
1382        assert_eq!(
1383            result.len(),
1384            2,
1385            "In standard mode, snippet markers should be flagged as undefined references"
1386        );
1387    }
1388
1389    #[test]
1390    fn test_github_alerts_not_flagged() {
1391        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1392        let rule = MD052ReferenceLinkImages::new();
1393
1394        // Test various GitHub alert types
1395        let content = r#"# Document with GitHub Alerts
1396
1397> [!NOTE]
1398> This is a note alert.
1399
1400> [!TIP]
1401> This is a tip alert.
1402
1403> [!IMPORTANT]
1404> This is an important alert.
1405
1406> [!WARNING]
1407> This is a warning alert.
1408
1409> [!CAUTION]
1410> This is a caution alert.
1411
1412Regular content with [undefined] reference."#;
1413        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1414        let result = rule.check(&ctx).unwrap();
1415
1416        // Should only flag the undefined reference, not the GitHub alerts
1417        assert_eq!(
1418            result.len(),
1419            1,
1420            "Should only flag the undefined reference, not GitHub alerts"
1421        );
1422        assert!(result[0].message.contains("undefined"));
1423        assert_eq!(result[0].line, 18); // Line with [undefined]
1424
1425        // Test GitHub alerts with additional content
1426        let content = r#"> [!TIP]
1427> Here's a useful tip about [something].
1428> Multiple lines are allowed.
1429
1430[something] is mentioned but not defined."#;
1431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1432        let result = rule.check(&ctx).unwrap();
1433
1434        // Should flag only the [something] outside blockquotes
1435        // The test shows we're only catching one, which might be correct behavior
1436        // matching markdownlint's approach
1437        assert_eq!(result.len(), 1, "Should flag undefined reference");
1438        assert!(result[0].message.contains("something"));
1439
1440        // Test GitHub alerts with proper references
1441        let content = r#"> [!NOTE]
1442> See [reference] for more details.
1443
1444[reference]: https://example.com"#;
1445        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1446        let result = rule.check(&ctx).unwrap();
1447
1448        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1449        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1450    }
1451}