rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    // The capturing group handles nested brackets to support cases like [`union[t, none]`]:
14    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16    // Pattern for list items to exclude from reference checks (standard regex is fine)
17    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19    // Pattern for code blocks (standard regex is fine)
20    static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22    // Pattern for output example sections (standard regex is fine)
23    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
26    // Extended to include additional common alert types
27    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29    // Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
30    // This pattern specifically looks for:
31    // - IPv6 addresses: https://[::1] or https://[2001:db8::1]
32    // - IPv6 with zone IDs: https://[fe80::1%eth0]
33    // - IPv6 mixed notation: https://[::ffff:192.0.2.1]
34    // - API paths with array notation: https://api.example.com/users[0]
35    // But NOT markdown reference links that happen to follow URLs
36    static ref URL_WITH_BRACKETS: Regex = Regex::new(
37        r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38    ).unwrap();
39}
40
41/// Rule MD052: Reference links and images should use reference style
42///
43/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
44///
45/// This rule is triggered when a reference link or image uses a reference that isn't defined.
46#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50    pub fn new() -> Self {
51        Self {}
52    }
53
54    /// Strip surrounding backticks from a string
55    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
56    fn strip_backticks(s: &str) -> &str {
57        s.trim_start_matches('`').trim_end_matches('`')
58    }
59
60    /// Check if a string is a valid Python identifier
61    /// Used for MkDocs auto-reference detection where single-word backtick-wrapped identifiers
62    /// like `str`, `int`, etc. should be accepted as valid auto-references
63    fn is_valid_python_identifier(s: &str) -> bool {
64        if s.is_empty() {
65            return false;
66        }
67        let first_char = s.chars().next().unwrap();
68        if !first_char.is_ascii_alphabetic() && first_char != '_' {
69            return false;
70        }
71        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
72    }
73
74    /// Check if a pattern is likely NOT a markdown reference
75    /// Returns true if this pattern should be skipped
76    fn is_likely_not_reference(text: &str) -> bool {
77        // Skip numeric patterns (array indices, ranges)
78        if text.chars().all(|c| c.is_ascii_digit()) {
79            return true;
80        }
81
82        // Skip numeric ranges like [1:3], [0:10], etc.
83        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
84            return true;
85        }
86
87        // Skip patterns that look like config sections [tool.something], [section.subsection]
88        // But not if they contain other non-alphanumeric chars like hyphens or underscores
89        if text.contains('.') && !text.contains(' ') && !text.contains('-') && !text.contains('_') {
90            // Config sections typically have dots, no spaces, and only alphanumeric + dots
91            return true;
92        }
93
94        // Skip glob/wildcard patterns like [*], [...], [**]
95        if text == "*" || text == "..." || text == "**" {
96            return true;
97        }
98
99        // Skip patterns that look like file paths [dir/file], [src/utils]
100        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
101            return true;
102        }
103
104        // Skip programming type annotations like [int, str], [Dict[str, Any]]
105        // These typically have commas and/or nested brackets
106        if text.contains(',') || text.contains('[') || text.contains(']') {
107            // Check if it looks like a type annotation pattern
108            return true;
109        }
110
111        // Note: We don't filter out patterns with backticks because backticks in reference names
112        // are valid markdown syntax, e.g., [`dataclasses.InitVar`] is a valid reference name
113
114        // Skip patterns that look like module/class paths ONLY if they don't have backticks
115        // Backticks indicate intentional code formatting in a reference name
116        // e.g., skip [dataclasses.initvar] but allow [`typing.ClassVar`]
117        if !text.contains('`')
118            && text.contains('.')
119            && !text.contains(' ')
120            && !text.contains('-')
121            && !text.contains('_')
122        {
123            return true;
124        }
125
126        // Note: We don't filter based on word count anymore because legitimate references
127        // can have many words, like "python language reference for import statements"
128        // Word count filtering was causing false positives where valid references were
129        // being incorrectly flagged as unused
130
131        // Skip patterns that are just punctuation or operators
132        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
133            return true;
134        }
135
136        // Skip very short non-word patterns (likely operators or syntax)
137        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
138            return true;
139        }
140
141        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
142        if (text.starts_with('"') && text.ends_with('"'))
143            || (text.starts_with('\'') && text.ends_with('\''))
144            || text.contains('"')
145            || text.contains('\'')
146        {
147            return true;
148        }
149
150        // Skip descriptive patterns with colon like [default: the project root]
151        // But allow simple numeric ranges which are handled above
152        if text.contains(':') && text.contains(' ') {
153            return true;
154        }
155
156        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
157        if text.starts_with('!') {
158            return true;
159        }
160
161        // Skip footnote syntax like [^1], [^note], etc.
162        // Footnotes start with ^ and are a common markdown extension
163        if text.starts_with('^') {
164            return true;
165        }
166
167        // Skip table of contents markers like [TOC]
168        // Used by Python-Markdown and other processors
169        if text == "TOC" {
170            return true;
171        }
172
173        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
174        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
175            return true;
176        }
177
178        // Skip common programming type names, literals, and short identifiers
179        // that are likely not markdown references
180        let common_non_refs = [
181            // Programming types
182            "object",
183            "Object",
184            "any",
185            "Any",
186            "inv",
187            "void",
188            "bool",
189            "int",
190            "float",
191            "str",
192            "char",
193            "i8",
194            "i16",
195            "i32",
196            "i64",
197            "i128",
198            "isize",
199            "u8",
200            "u16",
201            "u32",
202            "u64",
203            "u128",
204            "usize",
205            "f32",
206            "f64",
207            // JavaScript/JSON literals (excluding "undefined" which is too ambiguous)
208            "null",
209            "true",
210            "false",
211            "NaN",
212            "Infinity",
213            // Common JavaScript output patterns
214            "object Object",
215        ];
216
217        if common_non_refs.contains(&text) {
218            return true;
219        }
220
221        false
222    }
223
224    /// Check if a position is inside any code span
225    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
226        code_spans
227            .iter()
228            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
229    }
230
231    /// Check if a byte position is within an HTML comment
232    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
233        for m in HTML_COMMENT_PATTERN.find_iter(content) {
234            if m.start() <= byte_pos && byte_pos < m.end() {
235                return true;
236            }
237        }
238        false
239    }
240
241    /// Check if a byte position is within an HTML tag
242    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
243        // Check HTML tags
244        for html_tag in ctx.html_tags().iter() {
245            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
246                return true;
247            }
248        }
249        false
250    }
251
252    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
253        use crate::config::MarkdownFlavor;
254        use crate::utils::skip_context::is_mkdocs_snippet_line;
255
256        let mut references = HashSet::new();
257        let mut in_code_block = false;
258        let mut code_fence_marker = String::new();
259
260        for line in content.lines() {
261            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
262            if is_mkdocs_snippet_line(
263                line,
264                if mkdocs_mode {
265                    MarkdownFlavor::MkDocs
266                } else {
267                    MarkdownFlavor::Standard
268                },
269            ) {
270                continue;
271            }
272            // Handle code block boundaries
273            if let Some(cap) = FENCED_CODE_START.captures(line) {
274                if let Some(fence) = cap.get(2) {
275                    // Get the fence marker (``` or ~~~) without the indentation
276                    let fence_str = fence.as_str();
277                    if !in_code_block {
278                        in_code_block = true;
279                        code_fence_marker = fence_str.to_string();
280                    } else if line.trim_start().starts_with(&code_fence_marker) {
281                        // Check if this could be a closing fence
282                        let trimmed = line.trim_start();
283                        // A closing fence should be just the fence characters, possibly with trailing whitespace
284                        if trimmed.starts_with(&code_fence_marker) {
285                            let after_fence = &trimmed[code_fence_marker.len()..];
286                            if after_fence.trim().is_empty() {
287                                in_code_block = false;
288                                code_fence_marker.clear();
289                            }
290                        }
291                    }
292                }
293                continue;
294            }
295
296            // Skip lines in code blocks
297            if in_code_block {
298                continue;
299            }
300
301            // Check for abbreviation syntax (*[ABBR]: Definition) and skip it
302            // Abbreviations are not reference links and should not be tracked
303            if line.trim_start().starts_with("*[") {
304                continue;
305            }
306
307            if let Some(cap) = REF_REGEX.captures(line) {
308                // Store references in lowercase for case-insensitive comparison
309                if let Some(reference) = cap.get(1) {
310                    references.insert(reference.as_str().to_lowercase());
311                }
312            }
313        }
314
315        references
316    }
317
318    fn find_undefined_references(
319        &self,
320        content: &str,
321        references: &HashSet<String>,
322        ctx: &crate::lint_context::LintContext,
323        mkdocs_mode: bool,
324    ) -> Vec<(usize, usize, usize, String)> {
325        let mut undefined = Vec::new();
326        let mut reported_refs = HashMap::new();
327        let mut in_code_block = false;
328        let mut code_fence_marker = String::new();
329        let mut in_example_section = false;
330
331        // Get code spans once for the entire function
332        let code_spans = ctx.code_spans();
333
334        // Use cached data for reference links and images
335        for link in &ctx.links {
336            if !link.is_reference {
337                continue; // Skip inline links
338            }
339
340            // Skip links inside code spans
341            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
342                continue;
343            }
344
345            // Skip links inside HTML comments
346            if Self::is_in_html_comment(content, link.byte_offset) {
347                continue;
348            }
349
350            // Skip links inside HTML tags
351            if Self::is_in_html_tag(ctx, link.byte_offset) {
352                continue;
353            }
354
355            // Skip links inside math contexts
356            if is_in_math_context(ctx, link.byte_offset) {
357                continue;
358            }
359
360            // Skip links inside table cells
361            if is_in_table_cell(ctx, link.line, link.start_col) {
362                continue;
363            }
364
365            // Skip links inside frontmatter
366            if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
367                continue;
368            }
369
370            if let Some(ref_id) = &link.reference_id {
371                let reference_lower = ref_id.to_lowercase();
372
373                // Skip MkDocs auto-references if in MkDocs mode
374                // Check both the reference_id and the link text for shorthand references
375                // Strip backticks since MkDocs resolves `module.Class` as module.Class
376                let stripped_ref = Self::strip_backticks(ref_id);
377                let stripped_text = Self::strip_backticks(&link.text);
378                if mkdocs_mode
379                    && (is_mkdocs_auto_reference(stripped_ref)
380                        || is_mkdocs_auto_reference(stripped_text)
381                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
382                        || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
383                {
384                    continue;
385                }
386
387                // Check if reference is defined
388                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
389                    // Check if the line is in an example section or list item
390                    if let Some(line_info) = ctx.line_info(link.line) {
391                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
392                            in_example_section = true;
393                            continue;
394                        }
395
396                        if in_example_section {
397                            continue;
398                        }
399
400                        // Skip list items
401                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
402                            continue;
403                        }
404
405                        // Skip lines that are HTML content
406                        let trimmed = line_info.content.trim_start();
407                        if trimmed.starts_with('<') {
408                            continue;
409                        }
410                    }
411
412                    let match_len = link.byte_end - link.byte_offset;
413                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
414                    reported_refs.insert(reference_lower, true);
415                }
416            }
417        }
418
419        // Use cached data for reference images
420        for image in &ctx.images {
421            if !image.is_reference {
422                continue; // Skip inline images
423            }
424
425            // Skip images inside code spans
426            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
427                continue;
428            }
429
430            // Skip images inside HTML comments
431            if Self::is_in_html_comment(content, image.byte_offset) {
432                continue;
433            }
434
435            // Skip images inside HTML tags
436            if Self::is_in_html_tag(ctx, image.byte_offset) {
437                continue;
438            }
439
440            // Skip images inside math contexts
441            if is_in_math_context(ctx, image.byte_offset) {
442                continue;
443            }
444
445            // Skip images inside table cells
446            if is_in_table_cell(ctx, image.line, image.start_col) {
447                continue;
448            }
449
450            // Skip images inside frontmatter
451            if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
452                continue;
453            }
454
455            if let Some(ref_id) = &image.reference_id {
456                let reference_lower = ref_id.to_lowercase();
457
458                // Skip MkDocs auto-references if in MkDocs mode
459                // Check both the reference_id and the alt text for shorthand references
460                // Strip backticks since MkDocs resolves `module.Class` as module.Class
461                let stripped_ref = Self::strip_backticks(ref_id);
462                let stripped_alt = Self::strip_backticks(&image.alt_text);
463                if mkdocs_mode
464                    && (is_mkdocs_auto_reference(stripped_ref)
465                        || is_mkdocs_auto_reference(stripped_alt)
466                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
467                        || (image.alt_text.as_str() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
468                {
469                    continue;
470                }
471
472                // Check if reference is defined
473                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
474                    // Check if the line is in an example section or list item
475                    if let Some(line_info) = ctx.line_info(image.line) {
476                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
477                            in_example_section = true;
478                            continue;
479                        }
480
481                        if in_example_section {
482                            continue;
483                        }
484
485                        // Skip list items
486                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
487                            continue;
488                        }
489
490                        // Skip lines that are HTML content
491                        let trimmed = line_info.content.trim_start();
492                        if trimmed.starts_with('<') {
493                            continue;
494                        }
495                    }
496
497                    let match_len = image.byte_end - image.byte_offset;
498                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
499                    reported_refs.insert(reference_lower, true);
500                }
501            }
502        }
503
504        // Build a set of byte ranges that are already covered by parsed links/images
505        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
506
507        // Add ranges from parsed links
508        for link in &ctx.links {
509            covered_ranges.push((link.byte_offset, link.byte_end));
510        }
511
512        // Add ranges from parsed images
513        for image in &ctx.images {
514            covered_ranges.push((image.byte_offset, image.byte_end));
515        }
516
517        // Sort ranges by start position
518        covered_ranges.sort_by_key(|&(start, _)| start);
519
520        // Handle shortcut references [text] which aren't captured in ctx.links
521        // Need to use regex for these
522        let lines: Vec<&str> = content.lines().collect();
523        in_example_section = false; // Reset for line-by-line processing
524
525        for (line_num, line) in lines.iter().enumerate() {
526            // Skip lines in frontmatter (convert 0-based to 1-based for line_info)
527            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
528                continue;
529            }
530
531            // Handle code blocks
532            if let Some(cap) = FENCED_CODE_START.captures(line) {
533                if let Some(fence) = cap.get(2) {
534                    // Get the fence marker (``` or ~~~) without the indentation
535                    let fence_str = fence.as_str();
536                    if !in_code_block {
537                        in_code_block = true;
538                        code_fence_marker = fence_str.to_string();
539                    } else if line.trim_start().starts_with(&code_fence_marker) {
540                        // Check if this could be a closing fence
541                        let trimmed = line.trim_start();
542                        // A closing fence should be just the fence characters, possibly with trailing whitespace
543                        if trimmed.starts_with(&code_fence_marker) {
544                            let after_fence = &trimmed[code_fence_marker.len()..];
545                            if after_fence.trim().is_empty() {
546                                in_code_block = false;
547                                code_fence_marker.clear();
548                            }
549                        }
550                    }
551                }
552                continue;
553            }
554
555            if in_code_block {
556                continue;
557            }
558
559            // Check for example sections
560            if OUTPUT_EXAMPLE_START.is_match(line) {
561                in_example_section = true;
562                continue;
563            }
564
565            if in_example_section {
566                // Check if we're exiting the example section (another heading)
567                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
568                    in_example_section = false;
569                } else {
570                    continue;
571                }
572            }
573
574            // Skip list items
575            if LIST_ITEM_REGEX.is_match(line) {
576                continue;
577            }
578
579            // Skip lines that are HTML content
580            let trimmed_line = line.trim_start();
581            if trimmed_line.starts_with('<') {
582                continue;
583            }
584
585            // Skip GitHub alerts/callouts (e.g., > [!TIP])
586            if GITHUB_ALERT_REGEX.is_match(line) {
587                continue;
588            }
589
590            // Skip abbreviation definitions (*[ABBR]: Definition)
591            // These are not reference links and should not be checked
592            if trimmed_line.starts_with("*[") {
593                continue;
594            }
595
596            // Collect positions of brackets that are part of URLs (IPv6, etc.)
597            // so we can exclude them from reference checking
598            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
599            for mat in URL_WITH_BRACKETS.find_iter(line) {
600                // Find all bracket pairs within this URL match
601                let url_str = mat.as_str();
602                let url_start = mat.start();
603
604                // Find brackets within the URL (e.g., in https://[::1]:8080)
605                let mut idx = 0;
606                while idx < url_str.len() {
607                    if let Some(bracket_start) = url_str[idx..].find('[') {
608                        let bracket_start_abs = url_start + idx + bracket_start;
609                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
610                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
611                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
612                            idx += bracket_start + bracket_end + 2;
613                        } else {
614                            break;
615                        }
616                    } else {
617                        break;
618                    }
619                }
620            }
621
622            // Check shortcut references: [reference]
623            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
624                for cap in captures {
625                    if let Some(ref_match) = cap.get(1) {
626                        // Check if this bracket is part of a URL (IPv6, etc.)
627                        let bracket_start = cap.get(0).unwrap().start();
628                        let bracket_end = cap.get(0).unwrap().end();
629
630                        // Skip if this bracket pair is within any URL bracket range
631                        let is_in_url = url_bracket_ranges
632                            .iter()
633                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
634
635                        if is_in_url {
636                            continue;
637                        }
638
639                        let reference = ref_match.as_str();
640                        let reference_lower = reference.to_lowercase();
641
642                        // Skip patterns that are likely not markdown references
643                        if Self::is_likely_not_reference(reference) {
644                            continue;
645                        }
646
647                        // Skip GitHub alerts (including extended types)
648                        if let Some(alert_type) = reference.strip_prefix('!')
649                            && matches!(
650                                alert_type,
651                                "NOTE"
652                                    | "TIP"
653                                    | "WARNING"
654                                    | "IMPORTANT"
655                                    | "CAUTION"
656                                    | "INFO"
657                                    | "SUCCESS"
658                                    | "FAILURE"
659                                    | "DANGER"
660                                    | "BUG"
661                                    | "EXAMPLE"
662                                    | "QUOTE"
663                            )
664                        {
665                            continue;
666                        }
667
668                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
669                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
670                        if mkdocs_mode
671                            && (reference.starts_with("start:") || reference.starts_with("end:"))
672                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
673                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
674                        {
675                            continue;
676                        }
677
678                        // Skip MkDocs auto-references if in MkDocs mode
679                        // Strip backticks since MkDocs resolves `module.Class` as module.Class
680                        let stripped_ref = Self::strip_backticks(reference);
681                        if mkdocs_mode
682                            && (is_mkdocs_auto_reference(stripped_ref)
683                                || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
684                        {
685                            continue;
686                        }
687
688                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
689                            let full_match = cap.get(0).unwrap();
690                            let col = full_match.start();
691
692                            // Skip if inside code span
693                            let code_spans = ctx.code_spans();
694                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
695                                continue;
696                            }
697
698                            // Check if this position is within a covered range
699                            let line_start_byte = ctx.line_offsets[line_num];
700                            let byte_pos = line_start_byte + col;
701
702                            // Skip if inside code block
703                            if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
704                                &ctx.code_blocks,
705                                byte_pos,
706                            ) {
707                                continue;
708                            }
709
710                            // Skip if inside HTML comment
711                            if Self::is_in_html_comment(content, byte_pos) {
712                                continue;
713                            }
714
715                            // Skip if inside HTML tag
716                            if Self::is_in_html_tag(ctx, byte_pos) {
717                                continue;
718                            }
719
720                            // Skip if inside math context
721                            if is_in_math_context(ctx, byte_pos) {
722                                continue;
723                            }
724
725                            // Skip if inside table cell
726                            if is_in_table_cell(ctx, line_num + 1, col) {
727                                continue;
728                            }
729
730                            let byte_end = byte_pos + (full_match.end() - full_match.start());
731
732                            // Check if this shortcut ref overlaps with any parsed link/image
733                            let mut is_covered = false;
734                            for &(range_start, range_end) in &covered_ranges {
735                                if range_start <= byte_pos && byte_end <= range_end {
736                                    // This shortcut ref is completely within a parsed link/image
737                                    is_covered = true;
738                                    break;
739                                }
740                                if range_start > byte_end {
741                                    // No need to check further (ranges are sorted)
742                                    break;
743                                }
744                            }
745
746                            if is_covered {
747                                continue;
748                            }
749
750                            // More sophisticated checks to avoid false positives
751
752                            // Check 1: If preceded by ], this might be part of [text][ref]
753                            // Look for the pattern ...][ref] and check if there's a matching [ before
754                            let line_chars: Vec<char> = line.chars().collect();
755                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
756                                // Look backwards for a [ that would make this [text][ref]
757                                let mut bracket_count = 1; // We already saw one ]
758                                let mut check_pos = col.saturating_sub(2);
759                                let mut found_opening = false;
760
761                                while check_pos > 0 && check_pos < line_chars.len() {
762                                    match line_chars.get(check_pos) {
763                                        Some(&']') => bracket_count += 1,
764                                        Some(&'[') => {
765                                            bracket_count -= 1;
766                                            if bracket_count == 0 {
767                                                // Check if this [ is escaped
768                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
769                                                    found_opening = true;
770                                                }
771                                                break;
772                                            }
773                                        }
774                                        _ => {}
775                                    }
776                                    if check_pos == 0 {
777                                        break;
778                                    }
779                                    check_pos = check_pos.saturating_sub(1);
780                                }
781
782                                if found_opening {
783                                    // This is part of [text][ref], skip it
784                                    continue;
785                                }
786                            }
787
788                            // Check 2: If there's an escaped bracket pattern before this
789                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
790                            let before_text = &line[..col];
791                            if before_text.contains("\\]") {
792                                // Check if there's a \[ before the \]
793                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
794                                    let search_text = &before_text[..escaped_close_pos];
795                                    if search_text.contains("\\[") {
796                                        // This looks like \[...\][ref], skip it
797                                        continue;
798                                    }
799                                }
800                            }
801
802                            let match_len = full_match.end() - full_match.start();
803                            undefined.push((line_num, col, match_len, reference.to_string()));
804                            reported_refs.insert(reference_lower, true);
805                        }
806                    }
807                }
808            }
809        }
810
811        undefined
812    }
813}
814
815impl Rule for MD052ReferenceLinkImages {
816    fn name(&self) -> &'static str {
817        "MD052"
818    }
819
820    fn description(&self) -> &'static str {
821        "Reference links and images should use a reference that exists"
822    }
823
824    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
825        let content = ctx.content;
826        let mut warnings = Vec::new();
827
828        // OPTIMIZATION: Early exit if no reference-style links/images exist
829        // Check if there are any reference-style links or images in the document
830        let has_reference_links = ctx.links.iter().any(|l| l.is_reference);
831        let has_reference_images = ctx.images.iter().any(|i| i.is_reference);
832
833        // Quick check: If document contains no brackets at all, nothing to check
834        if !content.contains('[') {
835            return Ok(warnings);
836        }
837
838        // Quick check for reference definitions
839        let has_reference_definitions = content.contains("]:");
840
841        // If we have no reference links/images AND no reference definitions,
842        // then check if we might have shortcut references [text]
843        if !has_reference_links && !has_reference_images && !has_reference_definitions {
844            // Only do expensive shortcut checking if we have brackets but no links/images/refs
845            // This handles the case where all brackets are inline links [text](url)
846            let all_brackets_are_inline = ctx.links.iter().all(|l| !l.is_reference)
847                && ctx.images.iter().all(|i| !i.is_reference)
848                && ctx.links.len() + ctx.images.len() > 0;
849
850            if all_brackets_are_inline {
851                return Ok(warnings); // All brackets accounted for as inline links/images
852            }
853        }
854
855        // Check if we're in MkDocs mode from the context
856        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
857
858        let references = self.extract_references(content, mkdocs_mode);
859
860        // Use optimized detection method with cached link/image data
861        for (line_num, col, match_len, reference) in
862            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
863        {
864            let lines: Vec<&str> = content.lines().collect();
865            let line_content = lines.get(line_num).unwrap_or(&"");
866
867            // Calculate precise character range for the entire undefined reference
868            let (start_line, start_col, end_line, end_col) =
869                calculate_match_range(line_num + 1, line_content, col, match_len);
870
871            warnings.push(LintWarning {
872                rule_name: Some(self.name()),
873                line: start_line,
874                column: start_col,
875                end_line,
876                end_column: end_col,
877                message: format!("Reference '{reference}' not found"),
878                severity: Severity::Warning,
879                fix: None,
880            });
881        }
882
883        Ok(warnings)
884    }
885
886    /// Check if this rule should be skipped for performance
887    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
888        // Skip if content is empty or has no links/images
889        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
890    }
891
892    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
893        let content = ctx.content;
894        // No automatic fix available for undefined references
895        Ok(content.to_string())
896    }
897
898    fn as_any(&self) -> &dyn std::any::Any {
899        self
900    }
901
902    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
903    where
904        Self: Sized,
905    {
906        // Flavor is now accessed from LintContext during check
907        Box::new(MD052ReferenceLinkImages::new())
908    }
909}
910
911#[cfg(test)]
912mod tests {
913    use super::*;
914    use crate::lint_context::LintContext;
915
916    #[test]
917    fn test_valid_reference_link() {
918        let rule = MD052ReferenceLinkImages::new();
919        let content = "[text][ref]\n\n[ref]: https://example.com";
920        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
921        let result = rule.check(&ctx).unwrap();
922
923        assert_eq!(result.len(), 0);
924    }
925
926    #[test]
927    fn test_undefined_reference_link() {
928        let rule = MD052ReferenceLinkImages::new();
929        let content = "[text][undefined]";
930        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
931        let result = rule.check(&ctx).unwrap();
932
933        assert_eq!(result.len(), 1);
934        assert!(result[0].message.contains("Reference 'undefined' not found"));
935    }
936
937    #[test]
938    fn test_valid_reference_image() {
939        let rule = MD052ReferenceLinkImages::new();
940        let content = "![alt][img]\n\n[img]: image.jpg";
941        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
942        let result = rule.check(&ctx).unwrap();
943
944        assert_eq!(result.len(), 0);
945    }
946
947    #[test]
948    fn test_undefined_reference_image() {
949        let rule = MD052ReferenceLinkImages::new();
950        let content = "![alt][missing]";
951        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
952        let result = rule.check(&ctx).unwrap();
953
954        assert_eq!(result.len(), 1);
955        assert!(result[0].message.contains("Reference 'missing' not found"));
956    }
957
958    #[test]
959    fn test_case_insensitive_references() {
960        let rule = MD052ReferenceLinkImages::new();
961        let content = "[Text][REF]\n\n[ref]: https://example.com";
962        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
963        let result = rule.check(&ctx).unwrap();
964
965        assert_eq!(result.len(), 0);
966    }
967
968    #[test]
969    fn test_shortcut_reference_valid() {
970        let rule = MD052ReferenceLinkImages::new();
971        let content = "[ref]\n\n[ref]: https://example.com";
972        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
973        let result = rule.check(&ctx).unwrap();
974
975        assert_eq!(result.len(), 0);
976    }
977
978    #[test]
979    fn test_shortcut_reference_undefined() {
980        let rule = MD052ReferenceLinkImages::new();
981        let content = "[undefined]";
982        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
983        let result = rule.check(&ctx).unwrap();
984
985        assert_eq!(result.len(), 1);
986        assert!(result[0].message.contains("Reference 'undefined' not found"));
987    }
988
989    #[test]
990    fn test_inline_links_ignored() {
991        let rule = MD052ReferenceLinkImages::new();
992        let content = "[text](https://example.com)";
993        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
994        let result = rule.check(&ctx).unwrap();
995
996        assert_eq!(result.len(), 0);
997    }
998
999    #[test]
1000    fn test_inline_images_ignored() {
1001        let rule = MD052ReferenceLinkImages::new();
1002        let content = "![alt](image.jpg)";
1003        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1004        let result = rule.check(&ctx).unwrap();
1005
1006        assert_eq!(result.len(), 0);
1007    }
1008
1009    #[test]
1010    fn test_references_in_code_blocks_ignored() {
1011        let rule = MD052ReferenceLinkImages::new();
1012        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1013        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1014        let result = rule.check(&ctx).unwrap();
1015
1016        assert_eq!(result.len(), 0);
1017    }
1018
1019    #[test]
1020    fn test_references_in_inline_code_ignored() {
1021        let rule = MD052ReferenceLinkImages::new();
1022        let content = "`[undefined]`";
1023        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1024        let result = rule.check(&ctx).unwrap();
1025
1026        // References inside inline code spans should be ignored
1027        assert_eq!(result.len(), 0);
1028    }
1029
1030    #[test]
1031    fn test_comprehensive_inline_code_detection() {
1032        let rule = MD052ReferenceLinkImages::new();
1033        let content = r#"# Test
1034
1035This `[inside]` should be ignored.
1036This [outside] should be flagged.
1037Reference links `[text][ref]` in code are ignored.
1038Regular reference [text][missing] should be flagged.
1039Images `![alt][img]` in code are ignored.
1040Regular image ![alt][badimg] should be flagged.
1041
1042Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1043
1044```
1045[code block content] should be ignored
1046```
1047
1048`Multiple [refs] in [same] code span` ignored."#;
1049
1050        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1051        let result = rule.check(&ctx).unwrap();
1052
1053        // Should only flag: outside, missing, badimg, three (4 total)
1054        assert_eq!(result.len(), 4);
1055
1056        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1057        assert!(messages.iter().any(|m| m.contains("outside")));
1058        assert!(messages.iter().any(|m| m.contains("missing")));
1059        assert!(messages.iter().any(|m| m.contains("badimg")));
1060        assert!(messages.iter().any(|m| m.contains("three")));
1061
1062        // Should NOT flag any references inside code spans
1063        assert!(!messages.iter().any(|m| m.contains("inside")));
1064        assert!(!messages.iter().any(|m| m.contains("one")));
1065        assert!(!messages.iter().any(|m| m.contains("two")));
1066        assert!(!messages.iter().any(|m| m.contains("refs")));
1067        assert!(!messages.iter().any(|m| m.contains("same")));
1068    }
1069
1070    #[test]
1071    fn test_multiple_undefined_references() {
1072        let rule = MD052ReferenceLinkImages::new();
1073        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1074        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1075        let result = rule.check(&ctx).unwrap();
1076
1077        assert_eq!(result.len(), 3);
1078        assert!(result[0].message.contains("ref1"));
1079        assert!(result[1].message.contains("ref2"));
1080        assert!(result[2].message.contains("ref3"));
1081    }
1082
1083    #[test]
1084    fn test_mixed_valid_and_undefined() {
1085        let rule = MD052ReferenceLinkImages::new();
1086        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1087        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1088        let result = rule.check(&ctx).unwrap();
1089
1090        assert_eq!(result.len(), 1);
1091        assert!(result[0].message.contains("missing"));
1092    }
1093
1094    #[test]
1095    fn test_empty_reference() {
1096        let rule = MD052ReferenceLinkImages::new();
1097        let content = "[text][]\n\n[ref]: https://example.com";
1098        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1099        let result = rule.check(&ctx).unwrap();
1100
1101        // Empty reference should use the link text as reference
1102        assert_eq!(result.len(), 1);
1103    }
1104
1105    #[test]
1106    fn test_escaped_brackets_ignored() {
1107        let rule = MD052ReferenceLinkImages::new();
1108        let content = "\\[not a link\\]";
1109        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1110        let result = rule.check(&ctx).unwrap();
1111
1112        assert_eq!(result.len(), 0);
1113    }
1114
1115    #[test]
1116    fn test_list_items_ignored() {
1117        let rule = MD052ReferenceLinkImages::new();
1118        let content = "- [undefined]\n* [another]\n+ [third]";
1119        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1120        let result = rule.check(&ctx).unwrap();
1121
1122        // List items that look like shortcut references should be ignored
1123        assert_eq!(result.len(), 0);
1124    }
1125
1126    #[test]
1127    fn test_output_example_section_ignored() {
1128        let rule = MD052ReferenceLinkImages::new();
1129        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1130        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1131        let result = rule.check(&ctx).unwrap();
1132
1133        // Only the reference outside the Output section should be flagged
1134        assert_eq!(result.len(), 1);
1135        assert!(result[0].message.contains("missing"));
1136    }
1137
1138    #[test]
1139    fn test_reference_definitions_in_code_blocks_ignored() {
1140        let rule = MD052ReferenceLinkImages::new();
1141        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1142        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1143        let result = rule.check(&ctx).unwrap();
1144
1145        // Reference defined in code block should not count
1146        assert_eq!(result.len(), 1);
1147        assert!(result[0].message.contains("ref"));
1148    }
1149
1150    #[test]
1151    fn test_multiple_references_to_same_undefined() {
1152        let rule = MD052ReferenceLinkImages::new();
1153        let content = "[first][missing] [second][missing] [third][missing]";
1154        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1155        let result = rule.check(&ctx).unwrap();
1156
1157        // Should only report once per unique reference
1158        assert_eq!(result.len(), 1);
1159        assert!(result[0].message.contains("missing"));
1160    }
1161
1162    #[test]
1163    fn test_reference_with_special_characters() {
1164        let rule = MD052ReferenceLinkImages::new();
1165        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1166        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1167        let result = rule.check(&ctx).unwrap();
1168
1169        assert_eq!(result.len(), 0);
1170    }
1171
1172    #[test]
1173    fn test_issue_51_html_attribute_not_reference() {
1174        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1175        let rule = MD052ReferenceLinkImages::new();
1176        let content = r#"# Example
1177
1178## Test
1179
1180Want to fill out this form?
1181
1182<form method="post">
1183    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1184</form>"#;
1185        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1186        let result = rule.check(&ctx).unwrap();
1187
1188        assert_eq!(
1189            result.len(),
1190            0,
1191            "HTML attributes with square brackets should not be flagged as undefined references"
1192        );
1193    }
1194
1195    #[test]
1196    fn test_extract_references() {
1197        let rule = MD052ReferenceLinkImages::new();
1198        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1199        let refs = rule.extract_references(content, false);
1200
1201        assert_eq!(refs.len(), 3);
1202        assert!(refs.contains("ref1"));
1203        assert!(refs.contains("ref2"));
1204        assert!(refs.contains("ref3"));
1205    }
1206
1207    #[test]
1208    fn test_inline_code_not_flagged() {
1209        let rule = MD052ReferenceLinkImages::new();
1210
1211        // Test that arrays in inline code are not flagged as references
1212        let content = r#"# Test
1213
1214Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1215
1216Also, `[todo]` is not a reference link.
1217
1218But this [reference] should be flagged.
1219
1220And this `[inline code]` should not be flagged.
1221"#;
1222
1223        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1224        let warnings = rule.check(&ctx).unwrap();
1225
1226        // Should only flag [reference], not the ones in backticks
1227        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1228        assert!(warnings[0].message.contains("'reference'"));
1229    }
1230
1231    #[test]
1232    fn test_code_block_references_ignored() {
1233        let rule = MD052ReferenceLinkImages::new();
1234
1235        let content = r#"# Test
1236
1237```markdown
1238[undefined] reference in code block
1239![undefined] image in code block
1240```
1241
1242[real-undefined] reference outside
1243"#;
1244
1245        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1246        let warnings = rule.check(&ctx).unwrap();
1247
1248        // Should only flag [real-undefined], not the ones in code block
1249        assert_eq!(warnings.len(), 1);
1250        assert!(warnings[0].message.contains("'real-undefined'"));
1251    }
1252
1253    #[test]
1254    fn test_html_comments_ignored() {
1255        // Test for issue #20 - MD052 should not flag content inside HTML comments
1256        let rule = MD052ReferenceLinkImages::new();
1257
1258        // Test the exact case from issue #20
1259        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1260<!--- set_env EDITOR 'python3 fake_editor.py' -->
1261
1262```bash
1263$ python3 vote.py
12643 votes for: 2
12652 votes for: 3, 4
1266```"#;
1267        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1268        let result = rule.check(&ctx).unwrap();
1269        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1270
1271        // Test various reference patterns inside HTML comments
1272        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1273Normal [text][undefined]
1274<!-- Another [comment][with] references -->"#;
1275        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1276        let result = rule.check(&ctx).unwrap();
1277        assert_eq!(
1278            result.len(),
1279            1,
1280            "Should only flag the undefined reference outside comments"
1281        );
1282        assert!(result[0].message.contains("undefined"));
1283
1284        // Test multi-line HTML comments
1285        let content = r#"<!--
1286[ref1]
1287[ref2][ref3]
1288-->
1289[actual][undefined]"#;
1290        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1291        let result = rule.check(&ctx).unwrap();
1292        assert_eq!(
1293            result.len(),
1294            1,
1295            "Should not flag references in multi-line HTML comments"
1296        );
1297        assert!(result[0].message.contains("undefined"));
1298
1299        // Test mixed scenarios
1300        let content = r#"<!-- Comment with [1:] pattern -->
1301Valid [link][ref]
1302<!-- More [refs][in][comments] -->
1303![image][missing]
1304
1305[ref]: https://example.com"#;
1306        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1307        let result = rule.check(&ctx).unwrap();
1308        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1309        assert!(result[0].message.contains("missing"));
1310    }
1311
1312    #[test]
1313    fn test_frontmatter_ignored() {
1314        // Test for issue #24 - MD052 should not flag content inside frontmatter
1315        let rule = MD052ReferenceLinkImages::new();
1316
1317        // Test YAML frontmatter with arrays and references
1318        let content = r#"---
1319layout: post
1320title: "My Jekyll Post"
1321date: 2023-01-01
1322categories: blog
1323tags: ["test", "example"]
1324author: John Doe
1325---
1326
1327# My Blog Post
1328
1329This is the actual markdown content that should be linted.
1330
1331[undefined] reference should be flagged.
1332
1333## Section 1
1334
1335Some content here."#;
1336        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1337        let result = rule.check(&ctx).unwrap();
1338
1339        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1340        assert_eq!(
1341            result.len(),
1342            1,
1343            "Should only flag the undefined reference outside frontmatter"
1344        );
1345        assert!(result[0].message.contains("undefined"));
1346
1347        // Test TOML frontmatter
1348        let content = r#"+++
1349title = "My Post"
1350tags = ["example", "test"]
1351+++
1352
1353# Content
1354
1355[missing] reference should be flagged."#;
1356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1357        let result = rule.check(&ctx).unwrap();
1358        assert_eq!(
1359            result.len(),
1360            1,
1361            "Should only flag the undefined reference outside TOML frontmatter"
1362        );
1363        assert!(result[0].message.contains("missing"));
1364    }
1365
1366    #[test]
1367    fn test_mkdocs_snippet_markers_not_flagged() {
1368        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1369        let rule = MD052ReferenceLinkImages::new();
1370
1371        // Test snippet section markers
1372        let content = r#"# Document with MkDocs Snippets
1373
1374Some content here.
1375
1376# -8<- [start:remote-content]
1377
1378This is the remote content section.
1379
1380# -8<- [end:remote-content]
1381
1382More content here.
1383
1384<!-- --8<-- [start:another-section] -->
1385Content in another section
1386<!-- --8<-- [end:another-section] -->"#;
1387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1388        let result = rule.check(&ctx).unwrap();
1389
1390        // Should not flag any snippet markers as undefined references
1391        assert_eq!(
1392            result.len(),
1393            0,
1394            "Should not flag MkDocs snippet markers as undefined references"
1395        );
1396
1397        // Test that the snippet marker lines are properly skipped
1398        // but regular undefined references on other lines are still caught
1399        let content = r#"# Document
1400
1401# -8<- [start:section]
1402Content with [reference] inside snippet section
1403# -8<- [end:section]
1404
1405Regular [undefined] reference outside snippet markers."#;
1406        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1407        let result = rule.check(&ctx).unwrap();
1408
1409        assert_eq!(
1410            result.len(),
1411            2,
1412            "Should flag undefined references but skip snippet marker lines"
1413        );
1414        // The references inside the content should be flagged, but not start: and end:
1415        assert!(result[0].message.contains("reference"));
1416        assert!(result[1].message.contains("undefined"));
1417
1418        // Test in standard mode - should flag the markers as undefined
1419        let content = r#"# Document
1420
1421# -8<- [start:section]
1422# -8<- [end:section]"#;
1423        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1424        let result = rule.check(&ctx).unwrap();
1425
1426        assert_eq!(
1427            result.len(),
1428            2,
1429            "In standard mode, snippet markers should be flagged as undefined references"
1430        );
1431    }
1432
1433    #[test]
1434    fn test_github_alerts_not_flagged() {
1435        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1436        let rule = MD052ReferenceLinkImages::new();
1437
1438        // Test various GitHub alert types
1439        let content = r#"# Document with GitHub Alerts
1440
1441> [!NOTE]
1442> This is a note alert.
1443
1444> [!TIP]
1445> This is a tip alert.
1446
1447> [!IMPORTANT]
1448> This is an important alert.
1449
1450> [!WARNING]
1451> This is a warning alert.
1452
1453> [!CAUTION]
1454> This is a caution alert.
1455
1456Regular content with [undefined] reference."#;
1457        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1458        let result = rule.check(&ctx).unwrap();
1459
1460        // Should only flag the undefined reference, not the GitHub alerts
1461        assert_eq!(
1462            result.len(),
1463            1,
1464            "Should only flag the undefined reference, not GitHub alerts"
1465        );
1466        assert!(result[0].message.contains("undefined"));
1467        assert_eq!(result[0].line, 18); // Line with [undefined]
1468
1469        // Test GitHub alerts with additional content
1470        let content = r#"> [!TIP]
1471> Here's a useful tip about [something].
1472> Multiple lines are allowed.
1473
1474[something] is mentioned but not defined."#;
1475        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1476        let result = rule.check(&ctx).unwrap();
1477
1478        // Should flag only the [something] outside blockquotes
1479        // The test shows we're only catching one, which might be correct behavior
1480        // matching markdownlint's approach
1481        assert_eq!(result.len(), 1, "Should flag undefined reference");
1482        assert!(result[0].message.contains("something"));
1483
1484        // Test GitHub alerts with proper references
1485        let content = r#"> [!NOTE]
1486> See [reference] for more details.
1487
1488[reference]: https://example.com"#;
1489        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1490        let result = rule.check(&ctx).unwrap();
1491
1492        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1493        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1494    }
1495}