rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_front_matter, is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    // The capturing group handles nested brackets to support cases like [`union[t, none]`]:
14    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16    // Pattern for list items to exclude from reference checks (standard regex is fine)
17    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19    // Pattern for code blocks (standard regex is fine)
20    static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22    // Pattern for output example sections (standard regex is fine)
23    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
26    // Extended to include additional common alert types
27    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29    // Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
30    // This pattern specifically looks for:
31    // - IPv6 addresses: https://[::1] or https://[2001:db8::1]
32    // - IPv6 with zone IDs: https://[fe80::1%eth0]
33    // - IPv6 mixed notation: https://[::ffff:192.0.2.1]
34    // - API paths with array notation: https://api.example.com/users[0]
35    // But NOT markdown reference links that happen to follow URLs
36    static ref URL_WITH_BRACKETS: Regex = Regex::new(
37        r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38    ).unwrap();
39}
40
41/// Rule MD052: Reference links and images should use reference style
42///
43/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
44///
45/// This rule is triggered when a reference link or image uses a reference that isn't defined.
46#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50    pub fn new() -> Self {
51        Self {}
52    }
53
54    /// Strip surrounding backticks from a string
55    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
56    fn strip_backticks(s: &str) -> &str {
57        s.trim_start_matches('`').trim_end_matches('`')
58    }
59
60    /// Check if a string is a valid Python identifier
61    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
62    /// like `str`, `int`, etc. should be accepted as valid auto-references
63    fn is_valid_python_identifier(s: &str) -> bool {
64        if s.is_empty() {
65            return false;
66        }
67        let first_char = s.chars().next().unwrap();
68        if !first_char.is_ascii_alphabetic() && first_char != '_' {
69            return false;
70        }
71        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
72    }
73
74    /// Check if a pattern is likely NOT a markdown reference
75    /// Returns true if this pattern should be skipped
76    fn is_likely_not_reference(text: &str) -> bool {
77        // Skip numeric patterns (array indices, ranges)
78        if text.chars().all(|c| c.is_ascii_digit()) {
79            return true;
80        }
81
82        // Skip numeric ranges like [1:3], [0:10], etc.
83        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
84            return true;
85        }
86
87        // Skip patterns that look like config sections [tool.something], [section.subsection]
88        // But not if they contain other non-alphanumeric chars like hyphens or underscores
89        if text.contains('.') && !text.contains(' ') && !text.contains('-') && !text.contains('_') {
90            // Config sections typically have dots, no spaces, and only alphanumeric + dots
91            return true;
92        }
93
94        // Skip glob/wildcard patterns like [*], [...], [**]
95        if text == "*" || text == "..." || text == "**" {
96            return true;
97        }
98
99        // Skip patterns that look like file paths [dir/file], [src/utils]
100        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
101            return true;
102        }
103
104        // Skip programming type annotations like [int, str], [Dict[str, Any]]
105        // These typically have commas and/or nested brackets
106        if text.contains(',') || text.contains('[') || text.contains(']') {
107            // Check if it looks like a type annotation pattern
108            return true;
109        }
110
111        // Note: We don't filter out patterns with backticks because backticks in reference names
112        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
113
114        // Skip patterns that look like module/class paths ONLY if they don't have backticks
115        // Backticks indicate intentional code formatting in a reference name
116        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
117        if !text.contains('`')
118            && text.contains('.')
119            && !text.contains(' ')
120            && !text.contains('-')
121            && !text.contains('_')
122        {
123            return true;
124        }
125
126        // Note: We don't filter based on word count anymore because legitimate references
127        // can have many words, like "python language reference for import statements"
128        // Word count filtering was causing false positives where valid references were
129        // being incorrectly flagged as unused
130
131        // Skip patterns that are just punctuation or operators
132        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
133            return true;
134        }
135
136        // Skip very short non-word patterns (likely operators or syntax)
137        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
138            return true;
139        }
140
141        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
142        if (text.starts_with('"') && text.ends_with('"'))
143            || (text.starts_with('\'') && text.ends_with('\''))
144            || text.contains('"')
145            || text.contains('\'')
146        {
147            return true;
148        }
149
150        // Skip descriptive patterns with colon like [default: the project root]
151        // But allow simple numeric ranges which are handled above
152        if text.contains(':') && text.contains(' ') {
153            return true;
154        }
155
156        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
157        if text.starts_with('!') {
158            return true;
159        }
160
161        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
162        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
163            return true;
164        }
165
166        // Skip common programming type names and short identifiers
167        // that are likely not markdown references
168        let common_non_refs = [
169            "object", "Object", "any", "Any", "inv", "void", "bool", "int", "float", "str", "char", "i8", "i16", "i32",
170            "i64", "i128", "isize", "u8", "u16", "u32", "u64", "u128", "usize", "f32", "f64",
171        ];
172
173        if common_non_refs.contains(&text) {
174            return true;
175        }
176
177        false
178    }
179
180    /// Check if a position is inside any code span
181    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
182        code_spans
183            .iter()
184            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
185    }
186
187    /// Check if a byte position is within an HTML comment
188    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
189        for m in HTML_COMMENT_PATTERN.find_iter(content) {
190            if m.start() <= byte_pos && byte_pos < m.end() {
191                return true;
192            }
193        }
194        false
195    }
196
197    /// Check if a byte position is within an HTML tag
198    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
199        // Check HTML tags
200        for html_tag in ctx.html_tags().iter() {
201            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
202                return true;
203            }
204        }
205        false
206    }
207
208    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
209        use crate::config::MarkdownFlavor;
210        use crate::utils::skip_context::is_mkdocs_snippet_line;
211
212        let mut references = HashSet::new();
213        let mut in_code_block = false;
214        let mut code_fence_marker = String::new();
215
216        for line in content.lines() {
217            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
218            if is_mkdocs_snippet_line(
219                line,
220                if mkdocs_mode {
221                    MarkdownFlavor::MkDocs
222                } else {
223                    MarkdownFlavor::Standard
224                },
225            ) {
226                continue;
227            }
228            // Handle code block boundaries
229            if let Some(cap) = FENCED_CODE_START.captures(line) {
230                if let Some(fence) = cap.get(2) {
231                    // Get the fence marker (``` or ~~~) without the indentation
232                    let fence_str = fence.as_str();
233                    if !in_code_block {
234                        in_code_block = true;
235                        code_fence_marker = fence_str.to_string();
236                    } else if line.trim_start().starts_with(&code_fence_marker) {
237                        // Check if this could be a closing fence
238                        let trimmed = line.trim_start();
239                        // A closing fence should be just the fence characters, possibly with trailing whitespace
240                        if trimmed.starts_with(&code_fence_marker) {
241                            let after_fence = &trimmed[code_fence_marker.len()..];
242                            if after_fence.trim().is_empty() {
243                                in_code_block = false;
244                                code_fence_marker.clear();
245                            }
246                        }
247                    }
248                }
249                continue;
250            }
251
252            // Skip lines in code blocks
253            if in_code_block {
254                continue;
255            }
256
257            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
258            // Abbreviations are not reference links and should not be tracked
259            if line.trim_start().starts_with("*[") {
260                continue;
261            }
262
263            if let Some(cap) = REF_REGEX.captures(line) {
264                // Store references in lowercase for case-insensitive comparison
265                if let Some(reference) = cap.get(1) {
266                    references.insert(reference.as_str().to_lowercase());
267                }
268            }
269        }
270
271        references
272    }
273
274    fn find_undefined_references(
275        &self,
276        content: &str,
277        references: &HashSet<String>,
278        ctx: &crate::lint_context::LintContext,
279        mkdocs_mode: bool,
280    ) -> Vec<(usize, usize, usize, String)> {
281        let mut undefined = Vec::new();
282        let mut reported_refs = HashMap::new();
283        let mut in_code_block = false;
284        let mut code_fence_marker = String::new();
285        let mut in_example_section = false;
286
287        // Get code spans once for the entire function
288        let code_spans = ctx.code_spans();
289
290        // Use cached data for reference links and images
291        for link in &ctx.links {
292            if !link.is_reference {
293                continue; // Skip inline links
294            }
295
296            // Skip links inside code spans
297            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
298                continue;
299            }
300
301            // Skip links inside HTML comments
302            if Self::is_in_html_comment(content, link.byte_offset) {
303                continue;
304            }
305
306            // Skip links inside HTML tags
307            if Self::is_in_html_tag(ctx, link.byte_offset) {
308                continue;
309            }
310
311            // Skip links inside math contexts
312            if is_in_math_context(ctx, link.byte_offset) {
313                continue;
314            }
315
316            // Skip links inside table cells
317            if is_in_table_cell(ctx, link.line, link.start_col) {
318                continue;
319            }
320
321            // Skip links inside frontmatter (convert from 1-based to 0-based line numbers)
322            if is_in_front_matter(content, link.line.saturating_sub(1)) {
323                continue;
324            }
325
326            if let Some(ref_id) = &link.reference_id {
327                let reference_lower = ref_id.to_lowercase();
328
329                // Skip MkDocs auto-references if in MkDocs mode
330                // Check both the reference_id and the link text for shorthand references
331                // Strip backticks since MkDocs resolves `module.Class` as module.Class
332                let stripped_ref = Self::strip_backticks(ref_id);
333                let stripped_text = Self::strip_backticks(&link.text);
334                if mkdocs_mode
335                    && (is_mkdocs_auto_reference(stripped_ref)
336                        || is_mkdocs_auto_reference(stripped_text)
337                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
338                        || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
339                {
340                    continue;
341                }
342
343                // Check if reference is defined
344                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
345                    // Check if the line is in an example section or list item
346                    if let Some(line_info) = ctx.line_info(link.line) {
347                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
348                            in_example_section = true;
349                            continue;
350                        }
351
352                        if in_example_section {
353                            continue;
354                        }
355
356                        // Skip list items
357                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
358                            continue;
359                        }
360
361                        // Skip lines that are HTML content
362                        let trimmed = line_info.content.trim_start();
363                        if trimmed.starts_with('<') {
364                            continue;
365                        }
366                    }
367
368                    let match_len = link.byte_end - link.byte_offset;
369                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
370                    reported_refs.insert(reference_lower, true);
371                }
372            }
373        }
374
375        // Use cached data for reference images
376        for image in &ctx.images {
377            if !image.is_reference {
378                continue; // Skip inline images
379            }
380
381            // Skip images inside code spans
382            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
383                continue;
384            }
385
386            // Skip images inside HTML comments
387            if Self::is_in_html_comment(content, image.byte_offset) {
388                continue;
389            }
390
391            // Skip images inside HTML tags
392            if Self::is_in_html_tag(ctx, image.byte_offset) {
393                continue;
394            }
395
396            // Skip images inside math contexts
397            if is_in_math_context(ctx, image.byte_offset) {
398                continue;
399            }
400
401            // Skip images inside table cells
402            if is_in_table_cell(ctx, image.line, image.start_col) {
403                continue;
404            }
405
406            // Skip images inside frontmatter (convert from 1-based to 0-based line numbers)
407            if is_in_front_matter(content, image.line.saturating_sub(1)) {
408                continue;
409            }
410
411            if let Some(ref_id) = &image.reference_id {
412                let reference_lower = ref_id.to_lowercase();
413
414                // Skip MkDocs auto-references if in MkDocs mode
415                // Check both the reference_id and the alt text for shorthand references
416                // Strip backticks since MkDocs resolves `module.Class` as module.Class
417                let stripped_ref = Self::strip_backticks(ref_id);
418                let stripped_alt = Self::strip_backticks(&image.alt_text);
419                if mkdocs_mode
420                    && (is_mkdocs_auto_reference(stripped_ref)
421                        || is_mkdocs_auto_reference(stripped_alt)
422                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
423                        || (image.alt_text.as_str() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
424                {
425                    continue;
426                }
427
428                // Check if reference is defined
429                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
430                    // Check if the line is in an example section or list item
431                    if let Some(line_info) = ctx.line_info(image.line) {
432                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
433                            in_example_section = true;
434                            continue;
435                        }
436
437                        if in_example_section {
438                            continue;
439                        }
440
441                        // Skip list items
442                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
443                            continue;
444                        }
445
446                        // Skip lines that are HTML content
447                        let trimmed = line_info.content.trim_start();
448                        if trimmed.starts_with('<') {
449                            continue;
450                        }
451                    }
452
453                    let match_len = image.byte_end - image.byte_offset;
454                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
455                    reported_refs.insert(reference_lower, true);
456                }
457            }
458        }
459
460        // Build a set of byte ranges that are already covered by parsed links/images
461        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
462
463        // Add ranges from parsed links
464        for link in &ctx.links {
465            covered_ranges.push((link.byte_offset, link.byte_end));
466        }
467
468        // Add ranges from parsed images
469        for image in &ctx.images {
470            covered_ranges.push((image.byte_offset, image.byte_end));
471        }
472
473        // Sort ranges by start position
474        covered_ranges.sort_by_key(|&(start, _)| start);
475
476        // Handle shortcut references [text] which aren't captured in ctx.links
477        // Need to use regex for these
478        let lines: Vec<&str> = content.lines().collect();
479        in_example_section = false; // Reset for line-by-line processing
480
481        for (line_num, line) in lines.iter().enumerate() {
482            // Skip lines in frontmatter (line_num is already 0-based)
483            if is_in_front_matter(content, line_num) {
484                continue;
485            }
486
487            // Handle code blocks
488            if let Some(cap) = FENCED_CODE_START.captures(line) {
489                if let Some(fence) = cap.get(2) {
490                    // Get the fence marker (``` or ~~~) without the indentation
491                    let fence_str = fence.as_str();
492                    if !in_code_block {
493                        in_code_block = true;
494                        code_fence_marker = fence_str.to_string();
495                    } else if line.trim_start().starts_with(&code_fence_marker) {
496                        // Check if this could be a closing fence
497                        let trimmed = line.trim_start();
498                        // A closing fence should be just the fence characters, possibly with trailing whitespace
499                        if trimmed.starts_with(&code_fence_marker) {
500                            let after_fence = &trimmed[code_fence_marker.len()..];
501                            if after_fence.trim().is_empty() {
502                                in_code_block = false;
503                                code_fence_marker.clear();
504                            }
505                        }
506                    }
507                }
508                continue;
509            }
510
511            if in_code_block {
512                continue;
513            }
514
515            // Check for example sections
516            if OUTPUT_EXAMPLE_START.is_match(line) {
517                in_example_section = true;
518                continue;
519            }
520
521            if in_example_section {
522                // Check if we're exiting the example section (another heading)
523                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
524                    in_example_section = false;
525                } else {
526                    continue;
527                }
528            }
529
530            // Skip list items
531            if LIST_ITEM_REGEX.is_match(line) {
532                continue;
533            }
534
535            // Skip lines that are HTML content
536            let trimmed_line = line.trim_start();
537            if trimmed_line.starts_with('<') {
538                continue;
539            }
540
541            // Skip GitHub alerts/callouts (e.g., > [!TIP])
542            if GITHUB_ALERT_REGEX.is_match(line) {
543                continue;
544            }
545
546            // Skip abbreviation definitions (*[ABBR]: Definition)
547            // These are not reference links and should not be checked
548            if trimmed_line.starts_with("*[") {
549                continue;
550            }
551
552            // Collect positions of brackets that are part of URLs (IPv6, etc.)
553            // so we can exclude them from reference checking
554            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
555            for mat in URL_WITH_BRACKETS.find_iter(line) {
556                // Find all bracket pairs within this URL match
557                let url_str = mat.as_str();
558                let url_start = mat.start();
559
560                // Find brackets within the URL (e.g., in https://[::1]:8080)
561                let mut idx = 0;
562                while idx < url_str.len() {
563                    if let Some(bracket_start) = url_str[idx..].find('[') {
564                        let bracket_start_abs = url_start + idx + bracket_start;
565                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
566                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
567                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
568                            idx += bracket_start + bracket_end + 2;
569                        } else {
570                            break;
571                        }
572                    } else {
573                        break;
574                    }
575                }
576            }
577
578            // Check shortcut references: [reference]
579            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
580                for cap in captures {
581                    if let Some(ref_match) = cap.get(1) {
582                        // Check if this bracket is part of a URL (IPv6, etc.)
583                        let bracket_start = cap.get(0).unwrap().start();
584                        let bracket_end = cap.get(0).unwrap().end();
585
586                        // Skip if this bracket pair is within any URL bracket range
587                        let is_in_url = url_bracket_ranges
588                            .iter()
589                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
590
591                        if is_in_url {
592                            continue;
593                        }
594
595                        let reference = ref_match.as_str();
596                        let reference_lower = reference.to_lowercase();
597
598                        // Skip patterns that are likely not markdown references
599                        if Self::is_likely_not_reference(reference) {
600                            continue;
601                        }
602
603                        // Skip GitHub alerts (including extended types)
604                        if let Some(alert_type) = reference.strip_prefix('!')
605                            && matches!(
606                                alert_type,
607                                "NOTE"
608                                    | "TIP"
609                                    | "WARNING"
610                                    | "IMPORTANT"
611                                    | "CAUTION"
612                                    | "INFO"
613                                    | "SUCCESS"
614                                    | "FAILURE"
615                                    | "DANGER"
616                                    | "BUG"
617                                    | "EXAMPLE"
618                                    | "QUOTE"
619                            )
620                        {
621                            continue;
622                        }
623
624                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
625                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
626                        if mkdocs_mode
627                            && (reference.starts_with("start:") || reference.starts_with("end:"))
628                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
629                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
630                        {
631                            continue;
632                        }
633
634                        // Skip MkDocs auto-references if in MkDocs mode
635                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
636                        let stripped_ref = Self::strip_backticks(reference);
637                        if mkdocs_mode
638                            && (is_mkdocs_auto_reference(stripped_ref)
639                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
640                        {
641                            continue;
642                        }
643
644                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
645                            let full_match = cap.get(0).unwrap();
646                            let col = full_match.start();
647
648                            // Skip if inside code span
649                            let code_spans = ctx.code_spans();
650                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
651                                continue;
652                            }
653
654                            // Check if this position is within a covered range
655                            let line_start_byte = ctx.line_offsets[line_num];
656                            let byte_pos = line_start_byte + col;
657
658                            // Skip if inside HTML comment
659                            if Self::is_in_html_comment(content, byte_pos) {
660                                continue;
661                            }
662
663                            // Skip if inside HTML tag
664                            if Self::is_in_html_tag(ctx, byte_pos) {
665                                continue;
666                            }
667
668                            // Skip if inside math context
669                            if is_in_math_context(ctx, byte_pos) {
670                                continue;
671                            }
672
673                            // Skip if inside table cell
674                            if is_in_table_cell(ctx, line_num + 1, col) {
675                                continue;
676                            }
677
678                            let byte_end = byte_pos + (full_match.end() - full_match.start());
679
680                            // Check if this shortcut ref overlaps with any parsed link/image
681                            let mut is_covered = false;
682                            for &(range_start, range_end) in &covered_ranges {
683                                if range_start <= byte_pos && byte_end <= range_end {
684                                    // This shortcut ref is completely within a parsed link/image
685                                    is_covered = true;
686                                    break;
687                                }
688                                if range_start > byte_end {
689                                    // No need to check further (ranges are sorted)
690                                    break;
691                                }
692                            }
693
694                            if is_covered {
695                                continue;
696                            }
697
698                            // More sophisticated checks to avoid false positives
699
700                            // Check 1: If preceded by ], this might be part of [text][ref]
701                            // Look for the pattern ...][ref] and check if there's a matching [ before
702                            let line_chars: Vec<char> = line.chars().collect();
703                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
704                                // Look backwards for a [ that would make this [text][ref]
705                                let mut bracket_count = 1; // We already saw one ]
706                                let mut check_pos = col.saturating_sub(2);
707                                let mut found_opening = false;
708
709                                while check_pos > 0 && check_pos < line_chars.len() {
710                                    match line_chars.get(check_pos) {
711                                        Some(&']') => bracket_count += 1,
712                                        Some(&'[') => {
713                                            bracket_count -= 1;
714                                            if bracket_count == 0 {
715                                                // Check if this [ is escaped
716                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
717                                                    found_opening = true;
718                                                }
719                                                break;
720                                            }
721                                        }
722                                        _ => {}
723                                    }
724                                    if check_pos == 0 {
725                                        break;
726                                    }
727                                    check_pos = check_pos.saturating_sub(1);
728                                }
729
730                                if found_opening {
731                                    // This is part of [text][ref], skip it
732                                    continue;
733                                }
734                            }
735
736                            // Check 2: If there's an escaped bracket pattern before this
737                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
738                            let before_text = &line[..col];
739                            if before_text.contains("\\]") {
740                                // Check if there's a \[ before the \]
741                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
742                                    let search_text = &before_text[..escaped_close_pos];
743                                    if search_text.contains("\\[") {
744                                        // This looks like \[...\][ref], skip it
745                                        continue;
746                                    }
747                                }
748                            }
749
750                            let match_len = full_match.end() - full_match.start();
751                            undefined.push((line_num, col, match_len, reference.to_string()));
752                            reported_refs.insert(reference_lower, true);
753                        }
754                    }
755                }
756            }
757        }
758
759        undefined
760    }
761}
762
763impl Rule for MD052ReferenceLinkImages {
764    fn name(&self) -> &'static str {
765        "MD052"
766    }
767
768    fn description(&self) -> &'static str {
769        "Reference links and images should use a reference that exists"
770    }
771
772    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
773        let content = ctx.content;
774        let mut warnings = Vec::new();
775
776        // OPTIMIZATION: Early exit if no reference-style links/images exist
777        // Check if there are any reference-style links or images in the document
778        let has_reference_links = ctx.links.iter().any(|l| l.is_reference);
779        let has_reference_images = ctx.images.iter().any(|i| i.is_reference);
780
781        // Quick check: If document contains no brackets at all, nothing to check
782        if !content.contains('[') {
783            return Ok(warnings);
784        }
785
786        // Quick check for reference definitions
787        let has_reference_definitions = content.contains("]:");
788
789        // If we have no reference links/images AND no reference definitions,
790        // then check if we might have shortcut references [text]
791        if !has_reference_links && !has_reference_images && !has_reference_definitions {
792            // Only do expensive shortcut checking if we have brackets but no links/images/refs
793            // This handles the case where all brackets are inline links [text](url)
794            let all_brackets_are_inline = ctx.links.iter().all(|l| !l.is_reference)
795                && ctx.images.iter().all(|i| !i.is_reference)
796                && ctx.links.len() + ctx.images.len() > 0;
797
798            if all_brackets_are_inline {
799                return Ok(warnings); // All brackets accounted for as inline links/images
800            }
801        }
802
803        // Check if we're in MkDocs mode from the context
804        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
805
806        let references = self.extract_references(content, mkdocs_mode);
807
808        // Use optimized detection method with cached link/image data
809        for (line_num, col, match_len, reference) in
810            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
811        {
812            let lines: Vec<&str> = content.lines().collect();
813            let line_content = lines.get(line_num).unwrap_or(&"");
814
815            // Calculate precise character range for the entire undefined reference
816            let (start_line, start_col, end_line, end_col) =
817                calculate_match_range(line_num + 1, line_content, col, match_len);
818
819            warnings.push(LintWarning {
820                rule_name: Some(self.name()),
821                line: start_line,
822                column: start_col,
823                end_line,
824                end_column: end_col,
825                message: format!("Reference '{reference}' not found"),
826                severity: Severity::Warning,
827                fix: None,
828            });
829        }
830
831        Ok(warnings)
832    }
833
834    /// Check if this rule should be skipped for performance
835    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
836        // Skip if content is empty or has no links/images
837        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
838    }
839
840    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
841        let content = ctx.content;
842        // No automatic fix available for undefined references
843        Ok(content.to_string())
844    }
845
846    fn as_any(&self) -> &dyn std::any::Any {
847        self
848    }
849
850    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
851    where
852        Self: Sized,
853    {
854        // Flavor is now accessed from LintContext during check
855        Box::new(MD052ReferenceLinkImages::new())
856    }
857}
858
859#[cfg(test)]
860mod tests {
861    use super::*;
862    use crate::lint_context::LintContext;
863
864    #[test]
865    fn test_valid_reference_link() {
866        let rule = MD052ReferenceLinkImages::new();
867        let content = "[text][ref]\n\n[ref]: https://example.com";
868        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
869        let result = rule.check(&ctx).unwrap();
870
871        assert_eq!(result.len(), 0);
872    }
873
874    #[test]
875    fn test_undefined_reference_link() {
876        let rule = MD052ReferenceLinkImages::new();
877        let content = "[text][undefined]";
878        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
879        let result = rule.check(&ctx).unwrap();
880
881        assert_eq!(result.len(), 1);
882        assert!(result[0].message.contains("Reference 'undefined' not found"));
883    }
884
885    #[test]
886    fn test_valid_reference_image() {
887        let rule = MD052ReferenceLinkImages::new();
888        let content = "![alt][img]\n\n[img]: image.jpg";
889        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
890        let result = rule.check(&ctx).unwrap();
891
892        assert_eq!(result.len(), 0);
893    }
894
895    #[test]
896    fn test_undefined_reference_image() {
897        let rule = MD052ReferenceLinkImages::new();
898        let content = "![alt][missing]";
899        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
900        let result = rule.check(&ctx).unwrap();
901
902        assert_eq!(result.len(), 1);
903        assert!(result[0].message.contains("Reference 'missing' not found"));
904    }
905
906    #[test]
907    fn test_case_insensitive_references() {
908        let rule = MD052ReferenceLinkImages::new();
909        let content = "[Text][REF]\n\n[ref]: https://example.com";
910        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
911        let result = rule.check(&ctx).unwrap();
912
913        assert_eq!(result.len(), 0);
914    }
915
916    #[test]
917    fn test_shortcut_reference_valid() {
918        let rule = MD052ReferenceLinkImages::new();
919        let content = "[ref]\n\n[ref]: https://example.com";
920        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
921        let result = rule.check(&ctx).unwrap();
922
923        assert_eq!(result.len(), 0);
924    }
925
926    #[test]
927    fn test_shortcut_reference_undefined() {
928        let rule = MD052ReferenceLinkImages::new();
929        let content = "[undefined]";
930        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
931        let result = rule.check(&ctx).unwrap();
932
933        assert_eq!(result.len(), 1);
934        assert!(result[0].message.contains("Reference 'undefined' not found"));
935    }
936
937    #[test]
938    fn test_inline_links_ignored() {
939        let rule = MD052ReferenceLinkImages::new();
940        let content = "[text](https://example.com)";
941        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
942        let result = rule.check(&ctx).unwrap();
943
944        assert_eq!(result.len(), 0);
945    }
946
947    #[test]
948    fn test_inline_images_ignored() {
949        let rule = MD052ReferenceLinkImages::new();
950        let content = "![alt](image.jpg)";
951        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
952        let result = rule.check(&ctx).unwrap();
953
954        assert_eq!(result.len(), 0);
955    }
956
957    #[test]
958    fn test_references_in_code_blocks_ignored() {
959        let rule = MD052ReferenceLinkImages::new();
960        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
961        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
962        let result = rule.check(&ctx).unwrap();
963
964        assert_eq!(result.len(), 0);
965    }
966
967    #[test]
968    fn test_references_in_inline_code_ignored() {
969        let rule = MD052ReferenceLinkImages::new();
970        let content = "`[undefined]`";
971        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
972        let result = rule.check(&ctx).unwrap();
973
974        // References inside inline code spans should be ignored
975        assert_eq!(result.len(), 0);
976    }
977
978    #[test]
979    fn test_comprehensive_inline_code_detection() {
980        let rule = MD052ReferenceLinkImages::new();
981        let content = r#"# Test
982
983This `[inside]` should be ignored.
984This [outside] should be flagged.
985Reference links `[text][ref]` in code are ignored.
986Regular reference [text][missing] should be flagged.
987Images `![alt][img]` in code are ignored.
988Regular image ![alt][badimg] should be flagged.
989
990Multiple `[one]` and `[two]` in code ignored, but [three] is not.
991
992```
993[code block content] should be ignored
994```
995
996`Multiple [refs] in [same] code span` ignored."#;
997
998        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
999        let result = rule.check(&ctx).unwrap();
1000
1001        // Should only flag: outside, missing, badimg, three (4 total)
1002        assert_eq!(result.len(), 4);
1003
1004        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1005        assert!(messages.iter().any(|m| m.contains("outside")));
1006        assert!(messages.iter().any(|m| m.contains("missing")));
1007        assert!(messages.iter().any(|m| m.contains("badimg")));
1008        assert!(messages.iter().any(|m| m.contains("three")));
1009
1010        // Should NOT flag any references inside code spans
1011        assert!(!messages.iter().any(|m| m.contains("inside")));
1012        assert!(!messages.iter().any(|m| m.contains("one")));
1013        assert!(!messages.iter().any(|m| m.contains("two")));
1014        assert!(!messages.iter().any(|m| m.contains("refs")));
1015        assert!(!messages.iter().any(|m| m.contains("same")));
1016    }
1017
1018    #[test]
1019    fn test_multiple_undefined_references() {
1020        let rule = MD052ReferenceLinkImages::new();
1021        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1022        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1023        let result = rule.check(&ctx).unwrap();
1024
1025        assert_eq!(result.len(), 3);
1026        assert!(result[0].message.contains("ref1"));
1027        assert!(result[1].message.contains("ref2"));
1028        assert!(result[2].message.contains("ref3"));
1029    }
1030
1031    #[test]
1032    fn test_mixed_valid_and_undefined() {
1033        let rule = MD052ReferenceLinkImages::new();
1034        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1035        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1036        let result = rule.check(&ctx).unwrap();
1037
1038        assert_eq!(result.len(), 1);
1039        assert!(result[0].message.contains("missing"));
1040    }
1041
1042    #[test]
1043    fn test_empty_reference() {
1044        let rule = MD052ReferenceLinkImages::new();
1045        let content = "[text][]\n\n[ref]: https://example.com";
1046        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1047        let result = rule.check(&ctx).unwrap();
1048
1049        // Empty reference should use the link text as reference
1050        assert_eq!(result.len(), 1);
1051    }
1052
1053    #[test]
1054    fn test_escaped_brackets_ignored() {
1055        let rule = MD052ReferenceLinkImages::new();
1056        let content = "\\[not a link\\]";
1057        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1058        let result = rule.check(&ctx).unwrap();
1059
1060        assert_eq!(result.len(), 0);
1061    }
1062
1063    #[test]
1064    fn test_list_items_ignored() {
1065        let rule = MD052ReferenceLinkImages::new();
1066        let content = "- [undefined]\n* [another]\n+ [third]";
1067        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1068        let result = rule.check(&ctx).unwrap();
1069
1070        // List items that look like shortcut references should be ignored
1071        assert_eq!(result.len(), 0);
1072    }
1073
1074    #[test]
1075    fn test_output_example_section_ignored() {
1076        let rule = MD052ReferenceLinkImages::new();
1077        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1078        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1079        let result = rule.check(&ctx).unwrap();
1080
1081        // Only the reference outside the Output section should be flagged
1082        assert_eq!(result.len(), 1);
1083        assert!(result[0].message.contains("missing"));
1084    }
1085
1086    #[test]
1087    fn test_reference_definitions_in_code_blocks_ignored() {
1088        let rule = MD052ReferenceLinkImages::new();
1089        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1090        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1091        let result = rule.check(&ctx).unwrap();
1092
1093        // Reference defined in code block should not count
1094        assert_eq!(result.len(), 1);
1095        assert!(result[0].message.contains("ref"));
1096    }
1097
1098    #[test]
1099    fn test_multiple_references_to_same_undefined() {
1100        let rule = MD052ReferenceLinkImages::new();
1101        let content = "[first][missing] [second][missing] [third][missing]";
1102        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1103        let result = rule.check(&ctx).unwrap();
1104
1105        // Should only report once per unique reference
1106        assert_eq!(result.len(), 1);
1107        assert!(result[0].message.contains("missing"));
1108    }
1109
1110    #[test]
1111    fn test_reference_with_special_characters() {
1112        let rule = MD052ReferenceLinkImages::new();
1113        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1114        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1115        let result = rule.check(&ctx).unwrap();
1116
1117        assert_eq!(result.len(), 0);
1118    }
1119
1120    #[test]
1121    fn test_issue_51_html_attribute_not_reference() {
1122        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1123        let rule = MD052ReferenceLinkImages::new();
1124        let content = r#"# Example
1125
1126## Test
1127
1128Want to fill out this form?
1129
1130<form method="post">
1131    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1132</form>"#;
1133        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1134        let result = rule.check(&ctx).unwrap();
1135
1136        assert_eq!(
1137            result.len(),
1138            0,
1139            "HTML attributes with square brackets should not be flagged as undefined references"
1140        );
1141    }
1142
1143    #[test]
1144    fn test_extract_references() {
1145        let rule = MD052ReferenceLinkImages::new();
1146        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1147        let refs = rule.extract_references(content, false);
1148
1149        assert_eq!(refs.len(), 3);
1150        assert!(refs.contains("ref1"));
1151        assert!(refs.contains("ref2"));
1152        assert!(refs.contains("ref3"));
1153    }
1154
1155    #[test]
1156    fn test_inline_code_not_flagged() {
1157        let rule = MD052ReferenceLinkImages::new();
1158
1159        // Test that arrays in inline code are not flagged as references
1160        let content = r#"# Test
1161
1162Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1163
1164Also, `[todo]` is not a reference link.
1165
1166But this [reference] should be flagged.
1167
1168And this `[inline code]` should not be flagged.
1169"#;
1170
1171        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1172        let warnings = rule.check(&ctx).unwrap();
1173
1174        // Should only flag [reference], not the ones in backticks
1175        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1176        assert!(warnings[0].message.contains("'reference'"));
1177    }
1178
1179    #[test]
1180    fn test_code_block_references_ignored() {
1181        let rule = MD052ReferenceLinkImages::new();
1182
1183        let content = r#"# Test
1184
1185```markdown
1186[undefined] reference in code block
1187![undefined] image in code block
1188```
1189
1190[real-undefined] reference outside
1191"#;
1192
1193        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1194        let warnings = rule.check(&ctx).unwrap();
1195
1196        // Should only flag [real-undefined], not the ones in code block
1197        assert_eq!(warnings.len(), 1);
1198        assert!(warnings[0].message.contains("'real-undefined'"));
1199    }
1200
1201    #[test]
1202    fn test_html_comments_ignored() {
1203        // Test for issue #20 - MD052 should not flag content inside HTML comments
1204        let rule = MD052ReferenceLinkImages::new();
1205
1206        // Test the exact case from issue #20
1207        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1208<!--- set_env EDITOR 'python3 fake_editor.py' -->
1209
1210```bash
1211$ python3 vote.py
12123 votes for: 2
12132 votes for: 3, 4
1214```"#;
1215        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1216        let result = rule.check(&ctx).unwrap();
1217        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1218
1219        // Test various reference patterns inside HTML comments
1220        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1221Normal [text][undefined]
1222<!-- Another [comment][with] references -->"#;
1223        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1224        let result = rule.check(&ctx).unwrap();
1225        assert_eq!(
1226            result.len(),
1227            1,
1228            "Should only flag the undefined reference outside comments"
1229        );
1230        assert!(result[0].message.contains("undefined"));
1231
1232        // Test multi-line HTML comments
1233        let content = r#"<!--
1234[ref1]
1235[ref2][ref3]
1236-->
1237[actual][undefined]"#;
1238        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1239        let result = rule.check(&ctx).unwrap();
1240        assert_eq!(
1241            result.len(),
1242            1,
1243            "Should not flag references in multi-line HTML comments"
1244        );
1245        assert!(result[0].message.contains("undefined"));
1246
1247        // Test mixed scenarios
1248        let content = r#"<!-- Comment with [1:] pattern -->
1249Valid [link][ref]
1250<!-- More [refs][in][comments] -->
1251![image][missing]
1252
1253[ref]: https://example.com"#;
1254        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1255        let result = rule.check(&ctx).unwrap();
1256        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1257        assert!(result[0].message.contains("missing"));
1258    }
1259
1260    #[test]
1261    fn test_frontmatter_ignored() {
1262        // Test for issue #24 - MD052 should not flag content inside frontmatter
1263        let rule = MD052ReferenceLinkImages::new();
1264
1265        // Test YAML frontmatter with arrays and references
1266        let content = r#"---
1267layout: post
1268title: "My Jekyll Post"
1269date: 2023-01-01
1270categories: blog
1271tags: ["test", "example"]
1272author: John Doe
1273---
1274
1275# My Blog Post
1276
1277This is the actual markdown content that should be linted.
1278
1279[undefined] reference should be flagged.
1280
1281## Section 1
1282
1283Some content here."#;
1284        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1285        let result = rule.check(&ctx).unwrap();
1286
1287        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1288        assert_eq!(
1289            result.len(),
1290            1,
1291            "Should only flag the undefined reference outside frontmatter"
1292        );
1293        assert!(result[0].message.contains("undefined"));
1294
1295        // Test TOML frontmatter
1296        let content = r#"+++
1297title = "My Post"
1298tags = ["example", "test"]
1299+++
1300
1301# Content
1302
1303[missing] reference should be flagged."#;
1304        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1305        let result = rule.check(&ctx).unwrap();
1306        assert_eq!(
1307            result.len(),
1308            1,
1309            "Should only flag the undefined reference outside TOML frontmatter"
1310        );
1311        assert!(result[0].message.contains("missing"));
1312    }
1313
1314    #[test]
1315    fn test_mkdocs_snippet_markers_not_flagged() {
1316        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1317        let rule = MD052ReferenceLinkImages::new();
1318
1319        // Test snippet section markers
1320        let content = r#"# Document with MkDocs Snippets
1321
1322Some content here.
1323
1324# -8<- [start:remote-content]
1325
1326This is the remote content section.
1327
1328# -8<- [end:remote-content]
1329
1330More content here.
1331
1332<!-- --8<-- [start:another-section] -->
1333Content in another section
1334<!-- --8<-- [end:another-section] -->"#;
1335        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1336        let result = rule.check(&ctx).unwrap();
1337
1338        // Should not flag any snippet markers as undefined references
1339        assert_eq!(
1340            result.len(),
1341            0,
1342            "Should not flag MkDocs snippet markers as undefined references"
1343        );
1344
1345        // Test that the snippet marker lines are properly skipped
1346        // but regular undefined references on other lines are still caught
1347        let content = r#"# Document
1348
1349# -8<- [start:section]
1350Content with [reference] inside snippet section
1351# -8<- [end:section]
1352
1353Regular [undefined] reference outside snippet markers."#;
1354        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1355        let result = rule.check(&ctx).unwrap();
1356
1357        assert_eq!(
1358            result.len(),
1359            2,
1360            "Should flag undefined references but skip snippet marker lines"
1361        );
1362        // The references inside the content should be flagged, but not start: and end:
1363        assert!(result[0].message.contains("reference"));
1364        assert!(result[1].message.contains("undefined"));
1365
1366        // Test in standard mode - should flag the markers as undefined
1367        let content = r#"# Document
1368
1369# -8<- [start:section]
1370# -8<- [end:section]"#;
1371        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1372        let result = rule.check(&ctx).unwrap();
1373
1374        assert_eq!(
1375            result.len(),
1376            2,
1377            "In standard mode, snippet markers should be flagged as undefined references"
1378        );
1379    }
1380
1381    #[test]
1382    fn test_github_alerts_not_flagged() {
1383        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1384        let rule = MD052ReferenceLinkImages::new();
1385
1386        // Test various GitHub alert types
1387        let content = r#"# Document with GitHub Alerts
1388
1389> [!NOTE]
1390> This is a note alert.
1391
1392> [!TIP]
1393> This is a tip alert.
1394
1395> [!IMPORTANT]
1396> This is an important alert.
1397
1398> [!WARNING]
1399> This is a warning alert.
1400
1401> [!CAUTION]
1402> This is a caution alert.
1403
1404Regular content with [undefined] reference."#;
1405        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1406        let result = rule.check(&ctx).unwrap();
1407
1408        // Should only flag the undefined reference, not the GitHub alerts
1409        assert_eq!(
1410            result.len(),
1411            1,
1412            "Should only flag the undefined reference, not GitHub alerts"
1413        );
1414        assert!(result[0].message.contains("undefined"));
1415        assert_eq!(result[0].line, 18); // Line with [undefined]
1416
1417        // Test GitHub alerts with additional content
1418        let content = r#"> [!TIP]
1419> Here's a useful tip about [something].
1420> Multiple lines are allowed.
1421
1422[something] is mentioned but not defined."#;
1423        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1424        let result = rule.check(&ctx).unwrap();
1425
1426        // Should flag only the [something] outside blockquotes
1427        // The test shows we're only catching one, which might be correct behavior
1428        // matching markdownlint's approach
1429        assert_eq!(result.len(), 1, "Should flag undefined reference");
1430        assert!(result[0].message.contains("something"));
1431
1432        // Test GitHub alerts with proper references
1433        let content = r#"> [!NOTE]
1434> See [reference] for more details.
1435
1436[reference]: https://example.com"#;
1437        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1438        let result = rule.check(&ctx).unwrap();
1439
1440        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1441        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1442    }
1443}