rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_front_matter, is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    // The capturing group handles nested brackets to support cases like [`union[t, none]`]:
14    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16    // Pattern for list items to exclude from reference checks (standard regex is fine)
17    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19    // Pattern for code blocks (standard regex is fine)
20    static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22    // Pattern for output example sections (standard regex is fine)
23    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
26    // Extended to include additional common alert types
27    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29    // Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
30    // This pattern specifically looks for:
31    // - IPv6 addresses: https://[::1] or https://[2001:db8::1]
32    // - IPv6 with zone IDs: https://[fe80::1%eth0]
33    // - IPv6 mixed notation: https://[::ffff:192.0.2.1]
34    // - API paths with array notation: https://api.example.com/users[0]
35    // But NOT markdown reference links that happen to follow URLs
36    static ref URL_WITH_BRACKETS: Regex = Regex::new(
37        r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38    ).unwrap();
39}
40
41/// Rule MD052: Reference links and images should use reference style
42///
43/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
44///
45/// This rule is triggered when a reference link or image uses a reference that isn't defined.
46#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50    pub fn new() -> Self {
51        Self {}
52    }
53
54    /// Check if a pattern is likely NOT a markdown reference
55    /// Returns true if this pattern should be skipped
56    fn is_likely_not_reference(text: &str) -> bool {
57        // Skip numeric patterns (array indices, ranges)
58        if text.chars().all(|c| c.is_ascii_digit()) {
59            return true;
60        }
61
62        // Skip numeric ranges like [1:3], [0:10], etc.
63        if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
64            return true;
65        }
66
67        // Skip patterns that look like config sections [tool.something], [section.subsection]
68        // But not if they contain other non-alphanumeric chars like hyphens or underscores
69        if text.contains('.') && !text.contains(' ') && !text.contains('-') && !text.contains('_') {
70            // Config sections typically have dots, no spaces, and only alphanumeric + dots
71            return true;
72        }
73
74        // Skip glob/wildcard patterns like [*], [...], [**]
75        if text == "*" || text == "..." || text == "**" {
76            return true;
77        }
78
79        // Skip patterns that look like file paths [dir/file], [src/utils]
80        if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
81            return true;
82        }
83
84        // Skip patterns that are just punctuation or operators
85        if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
86            return true;
87        }
88
89        // Skip very short non-word patterns (likely operators or syntax)
90        if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
91            return true;
92        }
93
94        // Skip quoted patterns like ["E501"], ["ALL"], ["E", "F"]
95        if (text.starts_with('"') && text.ends_with('"'))
96            || (text.starts_with('\'') && text.ends_with('\''))
97            || text.contains('"')
98            || text.contains('\'')
99        {
100            return true;
101        }
102
103        // Skip descriptive patterns with colon like [default: the project root]
104        // But allow simple numeric ranges which are handled above
105        if text.contains(':') && text.contains(' ') {
106            return true;
107        }
108
109        // Skip alert/admonition patterns like [!WARN], [!NOTE], etc.
110        if text.starts_with('!') {
111            return true;
112        }
113
114        // Skip single uppercase letters (likely type parameters) like [T], [U], [K], [V]
115        if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
116            return true;
117        }
118
119        // Skip common programming type names and short identifiers
120        // that are likely not markdown references
121        let common_non_refs = [
122            "object", "Object", "any", "Any", "inv", "void", "bool", "int", "float", "str", "char", "i8", "i16", "i32",
123            "i64", "i128", "isize", "u8", "u16", "u32", "u64", "u128", "usize", "f32", "f64",
124        ];
125
126        if common_non_refs.contains(&text) {
127            return true;
128        }
129
130        false
131    }
132
133    /// Check if a position is inside any code span
134    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
135        code_spans
136            .iter()
137            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
138    }
139
140    /// Check if a byte position is within an HTML comment
141    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
142        for m in HTML_COMMENT_PATTERN.find_iter(content) {
143            if m.start() <= byte_pos && byte_pos < m.end() {
144                return true;
145            }
146        }
147        false
148    }
149
150    /// Check if a byte position is within an HTML tag
151    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
152        // Check HTML tags
153        for html_tag in ctx.html_tags().iter() {
154            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
155                return true;
156            }
157        }
158        false
159    }
160
161    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
162        use crate::config::MarkdownFlavor;
163        use crate::utils::skip_context::is_mkdocs_snippet_line;
164
165        let mut references = HashSet::new();
166        let mut in_code_block = false;
167        let mut code_fence_marker = String::new();
168
169        for line in content.lines() {
170            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
171            if is_mkdocs_snippet_line(
172                line,
173                if mkdocs_mode {
174                    MarkdownFlavor::MkDocs
175                } else {
176                    MarkdownFlavor::Standard
177                },
178            ) {
179                continue;
180            }
181            // Handle code block boundaries
182            if let Some(cap) = FENCED_CODE_START.captures(line) {
183                if let Some(fence) = cap.get(2) {
184                    // Get the fence marker (``` or ~~~) without the indentation
185                    let fence_str = fence.as_str();
186                    if !in_code_block {
187                        in_code_block = true;
188                        code_fence_marker = fence_str.to_string();
189                    } else if line.trim_start().starts_with(&code_fence_marker) {
190                        // Check if this could be a closing fence
191                        let trimmed = line.trim_start();
192                        // A closing fence should be just the fence characters, possibly with trailing whitespace
193                        if trimmed.starts_with(&code_fence_marker) {
194                            let after_fence = &trimmed[code_fence_marker.len()..];
195                            if after_fence.trim().is_empty() {
196                                in_code_block = false;
197                                code_fence_marker.clear();
198                            }
199                        }
200                    }
201                }
202                continue;
203            }
204
205            // Skip lines in code blocks
206            if in_code_block {
207                continue;
208            }
209
210            if let Some(cap) = REF_REGEX.captures(line) {
211                // Store references in lowercase for case-insensitive comparison
212                if let Some(reference) = cap.get(1) {
213                    references.insert(reference.as_str().to_lowercase());
214                }
215            }
216        }
217
218        references
219    }
220
221    fn find_undefined_references(
222        &self,
223        content: &str,
224        references: &HashSet<String>,
225        ctx: &crate::lint_context::LintContext,
226        mkdocs_mode: bool,
227    ) -> Vec<(usize, usize, usize, String)> {
228        let mut undefined = Vec::new();
229        let mut reported_refs = HashMap::new();
230        let mut in_code_block = false;
231        let mut code_fence_marker = String::new();
232        let mut in_example_section = false;
233
234        // Get code spans once for the entire function
235        let code_spans = ctx.code_spans();
236
237        // Use cached data for reference links and images
238        for link in &ctx.links {
239            if !link.is_reference {
240                continue; // Skip inline links
241            }
242
243            // Skip links inside code spans
244            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
245                continue;
246            }
247
248            // Skip links inside HTML comments
249            if Self::is_in_html_comment(content, link.byte_offset) {
250                continue;
251            }
252
253            // Skip links inside HTML tags
254            if Self::is_in_html_tag(ctx, link.byte_offset) {
255                continue;
256            }
257
258            // Skip links inside math contexts
259            if is_in_math_context(ctx, link.byte_offset) {
260                continue;
261            }
262
263            // Skip links inside table cells
264            if is_in_table_cell(ctx, link.line, link.start_col) {
265                continue;
266            }
267
268            // Skip links inside frontmatter (convert from 1-based to 0-based line numbers)
269            if is_in_front_matter(content, link.line.saturating_sub(1)) {
270                continue;
271            }
272
273            if let Some(ref_id) = &link.reference_id {
274                let reference_lower = ref_id.to_lowercase();
275
276                // Skip MkDocs auto-references if in MkDocs mode
277                // Check both the reference_id and the link text for shorthand references
278                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&link.text)) {
279                    continue;
280                }
281
282                // Check if reference is defined
283                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
284                    // Check if the line is in an example section or list item
285                    if let Some(line_info) = ctx.line_info(link.line) {
286                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
287                            in_example_section = true;
288                            continue;
289                        }
290
291                        if in_example_section {
292                            continue;
293                        }
294
295                        // Skip list items
296                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
297                            continue;
298                        }
299
300                        // Skip lines that are HTML content
301                        let trimmed = line_info.content.trim_start();
302                        if trimmed.starts_with('<') {
303                            continue;
304                        }
305                    }
306
307                    let match_len = link.byte_end - link.byte_offset;
308                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
309                    reported_refs.insert(reference_lower, true);
310                }
311            }
312        }
313
314        // Use cached data for reference images
315        for image in &ctx.images {
316            if !image.is_reference {
317                continue; // Skip inline images
318            }
319
320            // Skip images inside code spans
321            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
322                continue;
323            }
324
325            // Skip images inside HTML comments
326            if Self::is_in_html_comment(content, image.byte_offset) {
327                continue;
328            }
329
330            // Skip images inside HTML tags
331            if Self::is_in_html_tag(ctx, image.byte_offset) {
332                continue;
333            }
334
335            // Skip images inside math contexts
336            if is_in_math_context(ctx, image.byte_offset) {
337                continue;
338            }
339
340            // Skip images inside table cells
341            if is_in_table_cell(ctx, image.line, image.start_col) {
342                continue;
343            }
344
345            // Skip images inside frontmatter (convert from 1-based to 0-based line numbers)
346            if is_in_front_matter(content, image.line.saturating_sub(1)) {
347                continue;
348            }
349
350            if let Some(ref_id) = &image.reference_id {
351                let reference_lower = ref_id.to_lowercase();
352
353                // Skip MkDocs auto-references if in MkDocs mode
354                // Check both the reference_id and the alt text for shorthand references
355                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&image.alt_text)) {
356                    continue;
357                }
358
359                // Check if reference is defined
360                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
361                    // Check if the line is in an example section or list item
362                    if let Some(line_info) = ctx.line_info(image.line) {
363                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
364                            in_example_section = true;
365                            continue;
366                        }
367
368                        if in_example_section {
369                            continue;
370                        }
371
372                        // Skip list items
373                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
374                            continue;
375                        }
376
377                        // Skip lines that are HTML content
378                        let trimmed = line_info.content.trim_start();
379                        if trimmed.starts_with('<') {
380                            continue;
381                        }
382                    }
383
384                    let match_len = image.byte_end - image.byte_offset;
385                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
386                    reported_refs.insert(reference_lower, true);
387                }
388            }
389        }
390
391        // Build a set of byte ranges that are already covered by parsed links/images
392        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
393
394        // Add ranges from parsed links
395        for link in &ctx.links {
396            covered_ranges.push((link.byte_offset, link.byte_end));
397        }
398
399        // Add ranges from parsed images
400        for image in &ctx.images {
401            covered_ranges.push((image.byte_offset, image.byte_end));
402        }
403
404        // Sort ranges by start position
405        covered_ranges.sort_by_key(|&(start, _)| start);
406
407        // Handle shortcut references [text] which aren't captured in ctx.links
408        // Need to use regex for these
409        let lines: Vec<&str> = content.lines().collect();
410        in_example_section = false; // Reset for line-by-line processing
411
412        for (line_num, line) in lines.iter().enumerate() {
413            // Skip lines in frontmatter (line_num is already 0-based)
414            if is_in_front_matter(content, line_num) {
415                continue;
416            }
417
418            // Handle code blocks
419            if let Some(cap) = FENCED_CODE_START.captures(line) {
420                if let Some(fence) = cap.get(2) {
421                    // Get the fence marker (``` or ~~~) without the indentation
422                    let fence_str = fence.as_str();
423                    if !in_code_block {
424                        in_code_block = true;
425                        code_fence_marker = fence_str.to_string();
426                    } else if line.trim_start().starts_with(&code_fence_marker) {
427                        // Check if this could be a closing fence
428                        let trimmed = line.trim_start();
429                        // A closing fence should be just the fence characters, possibly with trailing whitespace
430                        if trimmed.starts_with(&code_fence_marker) {
431                            let after_fence = &trimmed[code_fence_marker.len()..];
432                            if after_fence.trim().is_empty() {
433                                in_code_block = false;
434                                code_fence_marker.clear();
435                            }
436                        }
437                    }
438                }
439                continue;
440            }
441
442            if in_code_block {
443                continue;
444            }
445
446            // Check for example sections
447            if OUTPUT_EXAMPLE_START.is_match(line) {
448                in_example_section = true;
449                continue;
450            }
451
452            if in_example_section {
453                // Check if we're exiting the example section (another heading)
454                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
455                    in_example_section = false;
456                } else {
457                    continue;
458                }
459            }
460
461            // Skip list items
462            if LIST_ITEM_REGEX.is_match(line) {
463                continue;
464            }
465
466            // Skip lines that are HTML content
467            let trimmed_line = line.trim_start();
468            if trimmed_line.starts_with('<') {
469                continue;
470            }
471
472            // Skip GitHub alerts/callouts (e.g., > [!TIP])
473            if GITHUB_ALERT_REGEX.is_match(line) {
474                continue;
475            }
476
477            // Collect positions of brackets that are part of URLs (IPv6, etc.)
478            // so we can exclude them from reference checking
479            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
480            for mat in URL_WITH_BRACKETS.find_iter(line) {
481                // Find all bracket pairs within this URL match
482                let url_str = mat.as_str();
483                let url_start = mat.start();
484
485                // Find brackets within the URL (e.g., in https://[::1]:8080)
486                let mut idx = 0;
487                while idx < url_str.len() {
488                    if let Some(bracket_start) = url_str[idx..].find('[') {
489                        let bracket_start_abs = url_start + idx + bracket_start;
490                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
491                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
492                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
493                            idx += bracket_start + bracket_end + 2;
494                        } else {
495                            break;
496                        }
497                    } else {
498                        break;
499                    }
500                }
501            }
502
503            // Check shortcut references: [reference]
504            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
505                for cap in captures {
506                    if let Some(ref_match) = cap.get(1) {
507                        // Check if this bracket is part of a URL (IPv6, etc.)
508                        let bracket_start = cap.get(0).unwrap().start();
509                        let bracket_end = cap.get(0).unwrap().end();
510
511                        // Skip if this bracket pair is within any URL bracket range
512                        let is_in_url = url_bracket_ranges
513                            .iter()
514                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
515
516                        if is_in_url {
517                            continue;
518                        }
519
520                        let reference = ref_match.as_str();
521                        let reference_lower = reference.to_lowercase();
522
523                        // Skip patterns that are likely not markdown references
524                        if Self::is_likely_not_reference(reference) {
525                            continue;
526                        }
527
528                        // Skip GitHub alerts (including extended types)
529                        if let Some(alert_type) = reference.strip_prefix('!')
530                            && matches!(
531                                alert_type,
532                                "NOTE"
533                                    | "TIP"
534                                    | "WARNING"
535                                    | "IMPORTANT"
536                                    | "CAUTION"
537                                    | "INFO"
538                                    | "SUCCESS"
539                                    | "FAILURE"
540                                    | "DANGER"
541                                    | "BUG"
542                                    | "EXAMPLE"
543                                    | "QUOTE"
544                            )
545                        {
546                            continue;
547                        }
548
549                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
550                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
551                        if mkdocs_mode
552                            && (reference.starts_with("start:") || reference.starts_with("end:"))
553                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
554                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
555                        {
556                            continue;
557                        }
558
559                        // Skip MkDocs auto-references if in MkDocs mode
560                        if mkdocs_mode && is_mkdocs_auto_reference(reference) {
561                            continue;
562                        }
563
564                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
565                            let full_match = cap.get(0).unwrap();
566                            let col = full_match.start();
567
568                            // Skip if inside code span
569                            let code_spans = ctx.code_spans();
570                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
571                                continue;
572                            }
573
574                            // Check if this position is within a covered range
575                            let line_start_byte = ctx.line_offsets[line_num];
576                            let byte_pos = line_start_byte + col;
577
578                            // Skip if inside HTML comment
579                            if Self::is_in_html_comment(content, byte_pos) {
580                                continue;
581                            }
582
583                            // Skip if inside HTML tag
584                            if Self::is_in_html_tag(ctx, byte_pos) {
585                                continue;
586                            }
587
588                            // Skip if inside math context
589                            if is_in_math_context(ctx, byte_pos) {
590                                continue;
591                            }
592
593                            // Skip if inside table cell
594                            if is_in_table_cell(ctx, line_num + 1, col) {
595                                continue;
596                            }
597
598                            let byte_end = byte_pos + (full_match.end() - full_match.start());
599
600                            // Check if this shortcut ref overlaps with any parsed link/image
601                            let mut is_covered = false;
602                            for &(range_start, range_end) in &covered_ranges {
603                                if range_start <= byte_pos && byte_end <= range_end {
604                                    // This shortcut ref is completely within a parsed link/image
605                                    is_covered = true;
606                                    break;
607                                }
608                                if range_start > byte_end {
609                                    // No need to check further (ranges are sorted)
610                                    break;
611                                }
612                            }
613
614                            if is_covered {
615                                continue;
616                            }
617
618                            // More sophisticated checks to avoid false positives
619
620                            // Check 1: If preceded by ], this might be part of [text][ref]
621                            // Look for the pattern ...][ref] and check if there's a matching [ before
622                            let line_chars: Vec<char> = line.chars().collect();
623                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
624                                // Look backwards for a [ that would make this [text][ref]
625                                let mut bracket_count = 1; // We already saw one ]
626                                let mut check_pos = col.saturating_sub(2);
627                                let mut found_opening = false;
628
629                                while check_pos > 0 && check_pos < line_chars.len() {
630                                    match line_chars.get(check_pos) {
631                                        Some(&']') => bracket_count += 1,
632                                        Some(&'[') => {
633                                            bracket_count -= 1;
634                                            if bracket_count == 0 {
635                                                // Check if this [ is escaped
636                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
637                                                    found_opening = true;
638                                                }
639                                                break;
640                                            }
641                                        }
642                                        _ => {}
643                                    }
644                                    if check_pos == 0 {
645                                        break;
646                                    }
647                                    check_pos = check_pos.saturating_sub(1);
648                                }
649
650                                if found_opening {
651                                    // This is part of [text][ref], skip it
652                                    continue;
653                                }
654                            }
655
656                            // Check 2: If there's an escaped bracket pattern before this
657                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
658                            let before_text = &line[..col];
659                            if before_text.contains("\\]") {
660                                // Check if there's a \[ before the \]
661                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
662                                    let search_text = &before_text[..escaped_close_pos];
663                                    if search_text.contains("\\[") {
664                                        // This looks like \[...\][ref], skip it
665                                        continue;
666                                    }
667                                }
668                            }
669
670                            let match_len = full_match.end() - full_match.start();
671                            undefined.push((line_num, col, match_len, reference.to_string()));
672                            reported_refs.insert(reference_lower, true);
673                        }
674                    }
675                }
676            }
677        }
678
679        undefined
680    }
681}
682
683impl Rule for MD052ReferenceLinkImages {
684    fn name(&self) -> &'static str {
685        "MD052"
686    }
687
688    fn description(&self) -> &'static str {
689        "Reference links and images should use a reference that exists"
690    }
691
692    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
693        let content = ctx.content;
694        let mut warnings = Vec::new();
695
696        // Check if we're in MkDocs mode from the context
697        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
698
699        let references = self.extract_references(content, mkdocs_mode);
700
701        // Use optimized detection method with cached link/image data
702        for (line_num, col, match_len, reference) in
703            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
704        {
705            let lines: Vec<&str> = content.lines().collect();
706            let line_content = lines.get(line_num).unwrap_or(&"");
707
708            // Calculate precise character range for the entire undefined reference
709            let (start_line, start_col, end_line, end_col) =
710                calculate_match_range(line_num + 1, line_content, col, match_len);
711
712            warnings.push(LintWarning {
713                rule_name: Some(self.name()),
714                line: start_line,
715                column: start_col,
716                end_line,
717                end_column: end_col,
718                message: format!("Reference '{reference}' not found"),
719                severity: Severity::Warning,
720                fix: None,
721            });
722        }
723
724        Ok(warnings)
725    }
726
727    /// Check if this rule should be skipped for performance
728    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
729        // Skip if content is empty or has no reference-style links/images
730        ctx.content.is_empty() || (!ctx.content.contains("](") && !ctx.content.contains("]["))
731    }
732
733    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
734        let content = ctx.content;
735        // No automatic fix available for undefined references
736        Ok(content.to_string())
737    }
738
739    fn as_any(&self) -> &dyn std::any::Any {
740        self
741    }
742
743    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
744    where
745        Self: Sized,
746    {
747        // Flavor is now accessed from LintContext during check
748        Box::new(MD052ReferenceLinkImages::new())
749    }
750}
751
752#[cfg(test)]
753mod tests {
754    use super::*;
755    use crate::lint_context::LintContext;
756
757    #[test]
758    fn test_valid_reference_link() {
759        let rule = MD052ReferenceLinkImages::new();
760        let content = "[text][ref]\n\n[ref]: https://example.com";
761        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
762        let result = rule.check(&ctx).unwrap();
763
764        assert_eq!(result.len(), 0);
765    }
766
767    #[test]
768    fn test_undefined_reference_link() {
769        let rule = MD052ReferenceLinkImages::new();
770        let content = "[text][undefined]";
771        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
772        let result = rule.check(&ctx).unwrap();
773
774        assert_eq!(result.len(), 1);
775        assert!(result[0].message.contains("Reference 'undefined' not found"));
776    }
777
778    #[test]
779    fn test_valid_reference_image() {
780        let rule = MD052ReferenceLinkImages::new();
781        let content = "![alt][img]\n\n[img]: image.jpg";
782        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
783        let result = rule.check(&ctx).unwrap();
784
785        assert_eq!(result.len(), 0);
786    }
787
788    #[test]
789    fn test_undefined_reference_image() {
790        let rule = MD052ReferenceLinkImages::new();
791        let content = "![alt][missing]";
792        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
793        let result = rule.check(&ctx).unwrap();
794
795        assert_eq!(result.len(), 1);
796        assert!(result[0].message.contains("Reference 'missing' not found"));
797    }
798
799    #[test]
800    fn test_case_insensitive_references() {
801        let rule = MD052ReferenceLinkImages::new();
802        let content = "[Text][REF]\n\n[ref]: https://example.com";
803        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
804        let result = rule.check(&ctx).unwrap();
805
806        assert_eq!(result.len(), 0);
807    }
808
809    #[test]
810    fn test_shortcut_reference_valid() {
811        let rule = MD052ReferenceLinkImages::new();
812        let content = "[ref]\n\n[ref]: https://example.com";
813        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
814        let result = rule.check(&ctx).unwrap();
815
816        assert_eq!(result.len(), 0);
817    }
818
819    #[test]
820    fn test_shortcut_reference_undefined() {
821        let rule = MD052ReferenceLinkImages::new();
822        let content = "[undefined]";
823        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
824        let result = rule.check(&ctx).unwrap();
825
826        assert_eq!(result.len(), 1);
827        assert!(result[0].message.contains("Reference 'undefined' not found"));
828    }
829
830    #[test]
831    fn test_inline_links_ignored() {
832        let rule = MD052ReferenceLinkImages::new();
833        let content = "[text](https://example.com)";
834        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
835        let result = rule.check(&ctx).unwrap();
836
837        assert_eq!(result.len(), 0);
838    }
839
840    #[test]
841    fn test_inline_images_ignored() {
842        let rule = MD052ReferenceLinkImages::new();
843        let content = "![alt](image.jpg)";
844        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
845        let result = rule.check(&ctx).unwrap();
846
847        assert_eq!(result.len(), 0);
848    }
849
850    #[test]
851    fn test_references_in_code_blocks_ignored() {
852        let rule = MD052ReferenceLinkImages::new();
853        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
854        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
855        let result = rule.check(&ctx).unwrap();
856
857        assert_eq!(result.len(), 0);
858    }
859
860    #[test]
861    fn test_references_in_inline_code_ignored() {
862        let rule = MD052ReferenceLinkImages::new();
863        let content = "`[undefined]`";
864        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
865        let result = rule.check(&ctx).unwrap();
866
867        // References inside inline code spans should be ignored
868        assert_eq!(result.len(), 0);
869    }
870
871    #[test]
872    fn test_comprehensive_inline_code_detection() {
873        let rule = MD052ReferenceLinkImages::new();
874        let content = r#"# Test
875
876This `[inside]` should be ignored.
877This [outside] should be flagged.
878Reference links `[text][ref]` in code are ignored.
879Regular reference [text][missing] should be flagged.
880Images `![alt][img]` in code are ignored.
881Regular image ![alt][badimg] should be flagged.
882
883Multiple `[one]` and `[two]` in code ignored, but [three] is not.
884
885```
886[code block content] should be ignored
887```
888
889`Multiple [refs] in [same] code span` ignored."#;
890
891        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
892        let result = rule.check(&ctx).unwrap();
893
894        // Should only flag: outside, missing, badimg, three (4 total)
895        assert_eq!(result.len(), 4);
896
897        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
898        assert!(messages.iter().any(|m| m.contains("outside")));
899        assert!(messages.iter().any(|m| m.contains("missing")));
900        assert!(messages.iter().any(|m| m.contains("badimg")));
901        assert!(messages.iter().any(|m| m.contains("three")));
902
903        // Should NOT flag any references inside code spans
904        assert!(!messages.iter().any(|m| m.contains("inside")));
905        assert!(!messages.iter().any(|m| m.contains("one")));
906        assert!(!messages.iter().any(|m| m.contains("two")));
907        assert!(!messages.iter().any(|m| m.contains("refs")));
908        assert!(!messages.iter().any(|m| m.contains("same")));
909    }
910
911    #[test]
912    fn test_multiple_undefined_references() {
913        let rule = MD052ReferenceLinkImages::new();
914        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
915        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
916        let result = rule.check(&ctx).unwrap();
917
918        assert_eq!(result.len(), 3);
919        assert!(result[0].message.contains("ref1"));
920        assert!(result[1].message.contains("ref2"));
921        assert!(result[2].message.contains("ref3"));
922    }
923
924    #[test]
925    fn test_mixed_valid_and_undefined() {
926        let rule = MD052ReferenceLinkImages::new();
927        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
928        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
929        let result = rule.check(&ctx).unwrap();
930
931        assert_eq!(result.len(), 1);
932        assert!(result[0].message.contains("missing"));
933    }
934
935    #[test]
936    fn test_empty_reference() {
937        let rule = MD052ReferenceLinkImages::new();
938        let content = "[text][]\n\n[ref]: https://example.com";
939        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
940        let result = rule.check(&ctx).unwrap();
941
942        // Empty reference should use the link text as reference
943        assert_eq!(result.len(), 1);
944    }
945
946    #[test]
947    fn test_escaped_brackets_ignored() {
948        let rule = MD052ReferenceLinkImages::new();
949        let content = "\\[not a link\\]";
950        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
951        let result = rule.check(&ctx).unwrap();
952
953        assert_eq!(result.len(), 0);
954    }
955
956    #[test]
957    fn test_list_items_ignored() {
958        let rule = MD052ReferenceLinkImages::new();
959        let content = "- [undefined]\n* [another]\n+ [third]";
960        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
961        let result = rule.check(&ctx).unwrap();
962
963        // List items that look like shortcut references should be ignored
964        assert_eq!(result.len(), 0);
965    }
966
967    #[test]
968    fn test_output_example_section_ignored() {
969        let rule = MD052ReferenceLinkImages::new();
970        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
971        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
972        let result = rule.check(&ctx).unwrap();
973
974        // Only the reference outside the Output section should be flagged
975        assert_eq!(result.len(), 1);
976        assert!(result[0].message.contains("missing"));
977    }
978
979    #[test]
980    fn test_reference_definitions_in_code_blocks_ignored() {
981        let rule = MD052ReferenceLinkImages::new();
982        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
983        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
984        let result = rule.check(&ctx).unwrap();
985
986        // Reference defined in code block should not count
987        assert_eq!(result.len(), 1);
988        assert!(result[0].message.contains("ref"));
989    }
990
991    #[test]
992    fn test_multiple_references_to_same_undefined() {
993        let rule = MD052ReferenceLinkImages::new();
994        let content = "[first][missing] [second][missing] [third][missing]";
995        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
996        let result = rule.check(&ctx).unwrap();
997
998        // Should only report once per unique reference
999        assert_eq!(result.len(), 1);
1000        assert!(result[0].message.contains("missing"));
1001    }
1002
1003    #[test]
1004    fn test_reference_with_special_characters() {
1005        let rule = MD052ReferenceLinkImages::new();
1006        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1007        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1008        let result = rule.check(&ctx).unwrap();
1009
1010        assert_eq!(result.len(), 0);
1011    }
1012
1013    #[test]
1014    fn test_issue_51_html_attribute_not_reference() {
1015        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
1016        let rule = MD052ReferenceLinkImages::new();
1017        let content = r#"# Example
1018
1019## Test
1020
1021Want to fill out this form?
1022
1023<form method="post">
1024    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1025</form>"#;
1026        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1027        let result = rule.check(&ctx).unwrap();
1028
1029        assert_eq!(
1030            result.len(),
1031            0,
1032            "HTML attributes with square brackets should not be flagged as undefined references"
1033        );
1034    }
1035
1036    #[test]
1037    fn test_extract_references() {
1038        let rule = MD052ReferenceLinkImages::new();
1039        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1040        let refs = rule.extract_references(content, false);
1041
1042        assert_eq!(refs.len(), 3);
1043        assert!(refs.contains("ref1"));
1044        assert!(refs.contains("ref2"));
1045        assert!(refs.contains("ref3"));
1046    }
1047
1048    #[test]
1049    fn test_inline_code_not_flagged() {
1050        let rule = MD052ReferenceLinkImages::new();
1051
1052        // Test that arrays in inline code are not flagged as references
1053        let content = r#"# Test
1054
1055Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1056
1057Also, `[todo]` is not a reference link.
1058
1059But this [reference] should be flagged.
1060
1061And this `[inline code]` should not be flagged.
1062"#;
1063
1064        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1065        let warnings = rule.check(&ctx).unwrap();
1066
1067        // Should only flag [reference], not the ones in backticks
1068        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1069        assert!(warnings[0].message.contains("'reference'"));
1070    }
1071
1072    #[test]
1073    fn test_code_block_references_ignored() {
1074        let rule = MD052ReferenceLinkImages::new();
1075
1076        let content = r#"# Test
1077
1078```markdown
1079[undefined] reference in code block
1080![undefined] image in code block
1081```
1082
1083[real-undefined] reference outside
1084"#;
1085
1086        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1087        let warnings = rule.check(&ctx).unwrap();
1088
1089        // Should only flag [real-undefined], not the ones in code block
1090        assert_eq!(warnings.len(), 1);
1091        assert!(warnings[0].message.contains("'real-undefined'"));
1092    }
1093
1094    #[test]
1095    fn test_html_comments_ignored() {
1096        // Test for issue #20 - MD052 should not flag content inside HTML comments
1097        let rule = MD052ReferenceLinkImages::new();
1098
1099        // Test the exact case from issue #20
1100        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1101<!--- set_env EDITOR 'python3 fake_editor.py' -->
1102
1103```bash
1104$ python3 vote.py
11053 votes for: 2
11062 votes for: 3, 4
1107```"#;
1108        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1109        let result = rule.check(&ctx).unwrap();
1110        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1111
1112        // Test various reference patterns inside HTML comments
1113        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1114Normal [text][undefined]
1115<!-- Another [comment][with] references -->"#;
1116        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1117        let result = rule.check(&ctx).unwrap();
1118        assert_eq!(
1119            result.len(),
1120            1,
1121            "Should only flag the undefined reference outside comments"
1122        );
1123        assert!(result[0].message.contains("undefined"));
1124
1125        // Test multi-line HTML comments
1126        let content = r#"<!--
1127[ref1]
1128[ref2][ref3]
1129-->
1130[actual][undefined]"#;
1131        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1132        let result = rule.check(&ctx).unwrap();
1133        assert_eq!(
1134            result.len(),
1135            1,
1136            "Should not flag references in multi-line HTML comments"
1137        );
1138        assert!(result[0].message.contains("undefined"));
1139
1140        // Test mixed scenarios
1141        let content = r#"<!-- Comment with [1:] pattern -->
1142Valid [link][ref]
1143<!-- More [refs][in][comments] -->
1144![image][missing]
1145
1146[ref]: https://example.com"#;
1147        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1148        let result = rule.check(&ctx).unwrap();
1149        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1150        assert!(result[0].message.contains("missing"));
1151    }
1152
1153    #[test]
1154    fn test_frontmatter_ignored() {
1155        // Test for issue #24 - MD052 should not flag content inside frontmatter
1156        let rule = MD052ReferenceLinkImages::new();
1157
1158        // Test YAML frontmatter with arrays and references
1159        let content = r#"---
1160layout: post
1161title: "My Jekyll Post"
1162date: 2023-01-01
1163categories: blog
1164tags: ["test", "example"]
1165author: John Doe
1166---
1167
1168# My Blog Post
1169
1170This is the actual markdown content that should be linted.
1171
1172[undefined] reference should be flagged.
1173
1174## Section 1
1175
1176Some content here."#;
1177        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1178        let result = rule.check(&ctx).unwrap();
1179
1180        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1181        assert_eq!(
1182            result.len(),
1183            1,
1184            "Should only flag the undefined reference outside frontmatter"
1185        );
1186        assert!(result[0].message.contains("undefined"));
1187
1188        // Test TOML frontmatter
1189        let content = r#"+++
1190title = "My Post"
1191tags = ["example", "test"]
1192+++
1193
1194# Content
1195
1196[missing] reference should be flagged."#;
1197        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1198        let result = rule.check(&ctx).unwrap();
1199        assert_eq!(
1200            result.len(),
1201            1,
1202            "Should only flag the undefined reference outside TOML frontmatter"
1203        );
1204        assert!(result[0].message.contains("missing"));
1205    }
1206
1207    #[test]
1208    fn test_mkdocs_snippet_markers_not_flagged() {
1209        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1210        let rule = MD052ReferenceLinkImages::new();
1211
1212        // Test snippet section markers
1213        let content = r#"# Document with MkDocs Snippets
1214
1215Some content here.
1216
1217# -8<- [start:remote-content]
1218
1219This is the remote content section.
1220
1221# -8<- [end:remote-content]
1222
1223More content here.
1224
1225<!-- --8<-- [start:another-section] -->
1226Content in another section
1227<!-- --8<-- [end:another-section] -->"#;
1228        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1229        let result = rule.check(&ctx).unwrap();
1230
1231        // Should not flag any snippet markers as undefined references
1232        assert_eq!(
1233            result.len(),
1234            0,
1235            "Should not flag MkDocs snippet markers as undefined references"
1236        );
1237
1238        // Test that the snippet marker lines are properly skipped
1239        // but regular undefined references on other lines are still caught
1240        let content = r#"# Document
1241
1242# -8<- [start:section]
1243Content with [reference] inside snippet section
1244# -8<- [end:section]
1245
1246Regular [undefined] reference outside snippet markers."#;
1247        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1248        let result = rule.check(&ctx).unwrap();
1249
1250        assert_eq!(
1251            result.len(),
1252            2,
1253            "Should flag undefined references but skip snippet marker lines"
1254        );
1255        // The references inside the content should be flagged, but not start: and end:
1256        assert!(result[0].message.contains("reference"));
1257        assert!(result[1].message.contains("undefined"));
1258
1259        // Test in standard mode - should flag the markers as undefined
1260        let content = r#"# Document
1261
1262# -8<- [start:section]
1263# -8<- [end:section]"#;
1264        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1265        let result = rule.check(&ctx).unwrap();
1266
1267        assert_eq!(
1268            result.len(),
1269            2,
1270            "In standard mode, snippet markers should be flagged as undefined references"
1271        );
1272    }
1273
1274    #[test]
1275    fn test_github_alerts_not_flagged() {
1276        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1277        let rule = MD052ReferenceLinkImages::new();
1278
1279        // Test various GitHub alert types
1280        let content = r#"# Document with GitHub Alerts
1281
1282> [!NOTE]
1283> This is a note alert.
1284
1285> [!TIP]
1286> This is a tip alert.
1287
1288> [!IMPORTANT]
1289> This is an important alert.
1290
1291> [!WARNING]
1292> This is a warning alert.
1293
1294> [!CAUTION]
1295> This is a caution alert.
1296
1297Regular content with [undefined] reference."#;
1298        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1299        let result = rule.check(&ctx).unwrap();
1300
1301        // Should only flag the undefined reference, not the GitHub alerts
1302        assert_eq!(
1303            result.len(),
1304            1,
1305            "Should only flag the undefined reference, not GitHub alerts"
1306        );
1307        assert!(result[0].message.contains("undefined"));
1308        assert_eq!(result[0].line, 18); // Line with [undefined]
1309
1310        // Test GitHub alerts with additional content
1311        let content = r#"> [!TIP]
1312> Here's a useful tip about [something].
1313> Multiple lines are allowed.
1314
1315[something] is mentioned but not defined."#;
1316        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1317        let result = rule.check(&ctx).unwrap();
1318
1319        // Should flag only the [something] outside blockquotes
1320        // The test shows we're only catching one, which might be correct behavior
1321        // matching markdownlint's approach
1322        assert_eq!(result.len(), 1, "Should flag undefined reference");
1323        assert!(result[0].message.contains("something"));
1324
1325        // Test GitHub alerts with proper references
1326        let content = r#"> [!NOTE]
1327> See [reference] for more details.
1328
1329[reference]: https://example.com"#;
1330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1331        let result = rule.check(&ctx).unwrap();
1332
1333        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1334        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1335    }
1336}