rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_front_matter, is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url (standard regex is fine)
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s*.*").unwrap();
14
15    // Pattern for list items to exclude from reference checks (standard regex is fine)
16    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
17
18    // Pattern for code blocks (standard regex is fine)
19    static ref FENCED_CODE_START: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
20
21    // Pattern for output example sections (standard regex is fine)
22    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
23
24    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
25    // Extended to include additional common alert types
26    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
27
28    // Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
29    // This pattern specifically looks for:
30    // - IPv6 addresses: https://[::1] or https://[2001:db8::1]
31    // - IPv6 with zone IDs: https://[fe80::1%eth0]
32    // - IPv6 mixed notation: https://[::ffff:192.0.2.1]
33    // - API paths with array notation: https://api.example.com/users[0]
34    // But NOT markdown reference links that happen to follow URLs
35    static ref URL_WITH_BRACKETS: Regex = Regex::new(
36        r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
37    ).unwrap();
38}
39
40/// Rule MD052: Reference links and images should use reference style
41///
42/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
43///
44/// This rule is triggered when a reference link or image uses a reference that isn't defined.
45#[derive(Clone, Default)]
46pub struct MD052ReferenceLinkImages {}
47
48impl MD052ReferenceLinkImages {
49    pub fn new() -> Self {
50        Self {}
51    }
52
53    /// Check if a position is inside any code span
54    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
55        code_spans
56            .iter()
57            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
58    }
59
60    /// Check if a byte position is within an HTML comment
61    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
62        for m in HTML_COMMENT_PATTERN.find_iter(content) {
63            if m.start() <= byte_pos && byte_pos < m.end() {
64                return true;
65            }
66        }
67        false
68    }
69
70    /// Check if a byte position is within an HTML tag
71    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
72        // Check HTML tags
73        for html_tag in ctx.html_tags().iter() {
74            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
75                return true;
76            }
77        }
78        false
79    }
80
81    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
82        use crate::config::MarkdownFlavor;
83        use crate::utils::skip_context::is_mkdocs_snippet_line;
84
85        let mut references = HashSet::new();
86        let mut in_code_block = false;
87        let mut code_fence_marker = String::new();
88
89        for line in content.lines() {
90            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
91            if is_mkdocs_snippet_line(
92                line,
93                if mkdocs_mode {
94                    MarkdownFlavor::MkDocs
95                } else {
96                    MarkdownFlavor::Standard
97                },
98            ) {
99                continue;
100            }
101            // Handle code block boundaries
102            if let Some(cap) = FENCED_CODE_START.captures(line) {
103                if let Some(marker) = cap.get(0) {
104                    let marker_str = marker.as_str().to_string();
105                    if !in_code_block {
106                        in_code_block = true;
107                        code_fence_marker = marker_str;
108                    } else if line.trim().starts_with(&code_fence_marker) {
109                        in_code_block = false;
110                        code_fence_marker.clear();
111                    }
112                }
113                continue;
114            }
115
116            // Skip lines in code blocks
117            if in_code_block {
118                continue;
119            }
120
121            if let Some(cap) = REF_REGEX.captures(line) {
122                // Store references in lowercase for case-insensitive comparison
123                if let Some(reference) = cap.get(1) {
124                    references.insert(reference.as_str().to_lowercase());
125                }
126            }
127        }
128
129        references
130    }
131
132    fn find_undefined_references(
133        &self,
134        content: &str,
135        references: &HashSet<String>,
136        ctx: &crate::lint_context::LintContext,
137        mkdocs_mode: bool,
138    ) -> Vec<(usize, usize, usize, String)> {
139        let mut undefined = Vec::new();
140        let mut reported_refs = HashMap::new();
141        let mut in_code_block = false;
142        let mut code_fence_marker = String::new();
143        let mut in_example_section = false;
144
145        // Get code spans once for the entire function
146        let code_spans = ctx.code_spans();
147
148        // Use cached data for reference links and images
149        for link in &ctx.links {
150            if !link.is_reference {
151                continue; // Skip inline links
152            }
153
154            // Skip links inside code spans
155            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
156                continue;
157            }
158
159            // Skip links inside HTML comments
160            if Self::is_in_html_comment(content, link.byte_offset) {
161                continue;
162            }
163
164            // Skip links inside math contexts
165            if is_in_math_context(ctx, link.byte_offset) {
166                continue;
167            }
168
169            // Skip links inside table cells
170            if is_in_table_cell(ctx, link.line, link.start_col) {
171                continue;
172            }
173
174            // Skip links inside frontmatter (convert from 1-based to 0-based line numbers)
175            if is_in_front_matter(content, link.line.saturating_sub(1)) {
176                continue;
177            }
178
179            if let Some(ref_id) = &link.reference_id {
180                let reference_lower = ref_id.to_lowercase();
181
182                // Skip MkDocs auto-references if in MkDocs mode
183                // Check both the reference_id and the link text for shorthand references
184                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&link.text)) {
185                    continue;
186                }
187
188                // Check if reference is defined
189                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
190                    // Check if the line is in an example section or list item
191                    if let Some(line_info) = ctx.line_info(link.line) {
192                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
193                            in_example_section = true;
194                            continue;
195                        }
196
197                        if in_example_section {
198                            continue;
199                        }
200
201                        // Skip list items
202                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
203                            continue;
204                        }
205                    }
206
207                    let match_len = link.byte_end - link.byte_offset;
208                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
209                    reported_refs.insert(reference_lower, true);
210                }
211            }
212        }
213
214        // Use cached data for reference images
215        for image in &ctx.images {
216            if !image.is_reference {
217                continue; // Skip inline images
218            }
219
220            // Skip images inside code spans
221            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
222                continue;
223            }
224
225            // Skip images inside HTML comments
226            if Self::is_in_html_comment(content, image.byte_offset) {
227                continue;
228            }
229
230            // Skip images inside math contexts
231            if is_in_math_context(ctx, image.byte_offset) {
232                continue;
233            }
234
235            // Skip images inside table cells
236            if is_in_table_cell(ctx, image.line, image.start_col) {
237                continue;
238            }
239
240            // Skip images inside frontmatter (convert from 1-based to 0-based line numbers)
241            if is_in_front_matter(content, image.line.saturating_sub(1)) {
242                continue;
243            }
244
245            if let Some(ref_id) = &image.reference_id {
246                let reference_lower = ref_id.to_lowercase();
247
248                // Skip MkDocs auto-references if in MkDocs mode
249                // Check both the reference_id and the alt text for shorthand references
250                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&image.alt_text)) {
251                    continue;
252                }
253
254                // Check if reference is defined
255                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
256                    // Check if the line is in an example section or list item
257                    if let Some(line_info) = ctx.line_info(image.line) {
258                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
259                            in_example_section = true;
260                            continue;
261                        }
262
263                        if in_example_section {
264                            continue;
265                        }
266
267                        // Skip list items
268                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
269                            continue;
270                        }
271                    }
272
273                    let match_len = image.byte_end - image.byte_offset;
274                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
275                    reported_refs.insert(reference_lower, true);
276                }
277            }
278        }
279
280        // Build a set of byte ranges that are already covered by parsed links/images
281        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
282
283        // Add ranges from parsed links
284        for link in &ctx.links {
285            covered_ranges.push((link.byte_offset, link.byte_end));
286        }
287
288        // Add ranges from parsed images
289        for image in &ctx.images {
290            covered_ranges.push((image.byte_offset, image.byte_end));
291        }
292
293        // Sort ranges by start position
294        covered_ranges.sort_by_key(|&(start, _)| start);
295
296        // Handle shortcut references [text] which aren't captured in ctx.links
297        // Need to use regex for these
298        let lines: Vec<&str> = content.lines().collect();
299        in_example_section = false; // Reset for line-by-line processing
300
301        for (line_num, line) in lines.iter().enumerate() {
302            // Skip lines in frontmatter (line_num is already 0-based)
303            if is_in_front_matter(content, line_num) {
304                continue;
305            }
306
307            // Handle code blocks
308            if let Some(cap) = FENCED_CODE_START.captures(line) {
309                if let Some(marker) = cap.get(0) {
310                    let marker_str = marker.as_str().to_string();
311                    if !in_code_block {
312                        in_code_block = true;
313                        code_fence_marker = marker_str;
314                    } else if line.trim().starts_with(&code_fence_marker) {
315                        in_code_block = false;
316                        code_fence_marker.clear();
317                    }
318                }
319                continue;
320            }
321
322            if in_code_block {
323                continue;
324            }
325
326            // Check for example sections
327            if OUTPUT_EXAMPLE_START.is_match(line) {
328                in_example_section = true;
329                continue;
330            }
331
332            if in_example_section {
333                // Check if we're exiting the example section (another heading)
334                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
335                    in_example_section = false;
336                } else {
337                    continue;
338                }
339            }
340
341            // Skip list items
342            if LIST_ITEM_REGEX.is_match(line) {
343                continue;
344            }
345
346            // Skip GitHub alerts/callouts (e.g., > [!TIP])
347            if GITHUB_ALERT_REGEX.is_match(line) {
348                continue;
349            }
350
351            // Collect positions of brackets that are part of URLs (IPv6, etc.)
352            // so we can exclude them from reference checking
353            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
354            for mat in URL_WITH_BRACKETS.find_iter(line) {
355                // Find all bracket pairs within this URL match
356                let url_str = mat.as_str();
357                let url_start = mat.start();
358
359                // Find brackets within the URL (e.g., in https://[::1]:8080)
360                let mut idx = 0;
361                while idx < url_str.len() {
362                    if let Some(bracket_start) = url_str[idx..].find('[') {
363                        let bracket_start_abs = url_start + idx + bracket_start;
364                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
365                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
366                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
367                            idx += bracket_start + bracket_end + 2;
368                        } else {
369                            break;
370                        }
371                    } else {
372                        break;
373                    }
374                }
375            }
376
377            // Check shortcut references: [reference]
378            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
379                for cap in captures {
380                    if let Some(ref_match) = cap.get(1) {
381                        // Check if this bracket is part of a URL (IPv6, etc.)
382                        let bracket_start = cap.get(0).unwrap().start();
383                        let bracket_end = cap.get(0).unwrap().end();
384
385                        // Skip if this bracket pair is within any URL bracket range
386                        let is_in_url = url_bracket_ranges
387                            .iter()
388                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
389
390                        if is_in_url {
391                            continue;
392                        }
393
394                        let reference = ref_match.as_str();
395                        let reference_lower = reference.to_lowercase();
396
397                        // Skip GitHub alerts (including extended types)
398                        if let Some(alert_type) = reference.strip_prefix('!')
399                            && matches!(
400                                alert_type,
401                                "NOTE"
402                                    | "TIP"
403                                    | "WARNING"
404                                    | "IMPORTANT"
405                                    | "CAUTION"
406                                    | "INFO"
407                                    | "SUCCESS"
408                                    | "FAILURE"
409                                    | "DANGER"
410                                    | "BUG"
411                                    | "EXAMPLE"
412                                    | "QUOTE"
413                            )
414                        {
415                            continue;
416                        }
417
418                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
419                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
420                        if mkdocs_mode
421                            && (reference.starts_with("start:") || reference.starts_with("end:"))
422                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
423                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
424                        {
425                            continue;
426                        }
427
428                        // Skip MkDocs auto-references if in MkDocs mode
429                        if mkdocs_mode && is_mkdocs_auto_reference(reference) {
430                            continue;
431                        }
432
433                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
434                            let full_match = cap.get(0).unwrap();
435                            let col = full_match.start();
436
437                            // Skip if inside code span
438                            let code_spans = ctx.code_spans();
439                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
440                                continue;
441                            }
442
443                            // Check if this position is within a covered range
444                            let line_start_byte = ctx.line_offsets[line_num];
445                            let byte_pos = line_start_byte + col;
446
447                            // Skip if inside HTML comment
448                            if Self::is_in_html_comment(content, byte_pos) {
449                                continue;
450                            }
451
452                            // Skip if inside HTML tag
453                            if Self::is_in_html_tag(ctx, byte_pos) {
454                                continue;
455                            }
456
457                            // Skip if inside math context
458                            if is_in_math_context(ctx, byte_pos) {
459                                continue;
460                            }
461
462                            // Skip if inside table cell
463                            if is_in_table_cell(ctx, line_num + 1, col) {
464                                continue;
465                            }
466
467                            let byte_end = byte_pos + (full_match.end() - full_match.start());
468
469                            // Check if this shortcut ref overlaps with any parsed link/image
470                            let mut is_covered = false;
471                            for &(range_start, range_end) in &covered_ranges {
472                                if range_start <= byte_pos && byte_end <= range_end {
473                                    // This shortcut ref is completely within a parsed link/image
474                                    is_covered = true;
475                                    break;
476                                }
477                                if range_start > byte_end {
478                                    // No need to check further (ranges are sorted)
479                                    break;
480                                }
481                            }
482
483                            if is_covered {
484                                continue;
485                            }
486
487                            // More sophisticated checks to avoid false positives
488
489                            // Check 1: If preceded by ], this might be part of [text][ref]
490                            // Look for the pattern ...][ref] and check if there's a matching [ before
491                            let line_chars: Vec<char> = line.chars().collect();
492                            if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
493                                // Look backwards for a [ that would make this [text][ref]
494                                let mut bracket_count = 1; // We already saw one ]
495                                let mut check_pos = col.saturating_sub(2);
496                                let mut found_opening = false;
497
498                                while check_pos > 0 && check_pos < line_chars.len() {
499                                    match line_chars.get(check_pos) {
500                                        Some(&']') => bracket_count += 1,
501                                        Some(&'[') => {
502                                            bracket_count -= 1;
503                                            if bracket_count == 0 {
504                                                // Check if this [ is escaped
505                                                if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
506                                                    found_opening = true;
507                                                }
508                                                break;
509                                            }
510                                        }
511                                        _ => {}
512                                    }
513                                    if check_pos == 0 {
514                                        break;
515                                    }
516                                    check_pos = check_pos.saturating_sub(1);
517                                }
518
519                                if found_opening {
520                                    // This is part of [text][ref], skip it
521                                    continue;
522                                }
523                            }
524
525                            // Check 2: If there's an escaped bracket pattern before this
526                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
527                            let before_text = &line[..col];
528                            if before_text.contains("\\]") {
529                                // Check if there's a \[ before the \]
530                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
531                                    let search_text = &before_text[..escaped_close_pos];
532                                    if search_text.contains("\\[") {
533                                        // This looks like \[...\][ref], skip it
534                                        continue;
535                                    }
536                                }
537                            }
538
539                            let match_len = full_match.end() - full_match.start();
540                            undefined.push((line_num, col, match_len, reference.to_string()));
541                            reported_refs.insert(reference_lower, true);
542                        }
543                    }
544                }
545            }
546        }
547
548        undefined
549    }
550}
551
552impl Rule for MD052ReferenceLinkImages {
553    fn name(&self) -> &'static str {
554        "MD052"
555    }
556
557    fn description(&self) -> &'static str {
558        "Reference links and images should use a reference that exists"
559    }
560
561    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
562        let content = ctx.content;
563        let mut warnings = Vec::new();
564
565        // Check if we're in MkDocs mode from the context
566        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
567
568        let references = self.extract_references(content, mkdocs_mode);
569
570        // Use optimized detection method with cached link/image data
571        for (line_num, col, match_len, reference) in
572            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
573        {
574            let lines: Vec<&str> = content.lines().collect();
575            let line_content = lines.get(line_num).unwrap_or(&"");
576
577            // Calculate precise character range for the entire undefined reference
578            let (start_line, start_col, end_line, end_col) =
579                calculate_match_range(line_num + 1, line_content, col, match_len);
580
581            warnings.push(LintWarning {
582                rule_name: Some(self.name()),
583                line: start_line,
584                column: start_col,
585                end_line,
586                end_column: end_col,
587                message: format!("Reference '{reference}' not found"),
588                severity: Severity::Warning,
589                fix: None,
590            });
591        }
592
593        Ok(warnings)
594    }
595
596    /// Check if this rule should be skipped for performance
597    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
598        // Skip if content is empty or has no reference-style links/images
599        ctx.content.is_empty() || (!ctx.content.contains("](") && !ctx.content.contains("]["))
600    }
601
602    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
603        let content = ctx.content;
604        // No automatic fix available for undefined references
605        Ok(content.to_string())
606    }
607
608    fn as_any(&self) -> &dyn std::any::Any {
609        self
610    }
611
612    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
613    where
614        Self: Sized,
615    {
616        // Flavor is now accessed from LintContext during check
617        Box::new(MD052ReferenceLinkImages::new())
618    }
619}
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624    use crate::lint_context::LintContext;
625
626    #[test]
627    fn test_valid_reference_link() {
628        let rule = MD052ReferenceLinkImages::new();
629        let content = "[text][ref]\n\n[ref]: https://example.com";
630        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
631        let result = rule.check(&ctx).unwrap();
632
633        assert_eq!(result.len(), 0);
634    }
635
636    #[test]
637    fn test_undefined_reference_link() {
638        let rule = MD052ReferenceLinkImages::new();
639        let content = "[text][undefined]";
640        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
641        let result = rule.check(&ctx).unwrap();
642
643        assert_eq!(result.len(), 1);
644        assert!(result[0].message.contains("Reference 'undefined' not found"));
645    }
646
647    #[test]
648    fn test_valid_reference_image() {
649        let rule = MD052ReferenceLinkImages::new();
650        let content = "![alt][img]\n\n[img]: image.jpg";
651        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
652        let result = rule.check(&ctx).unwrap();
653
654        assert_eq!(result.len(), 0);
655    }
656
657    #[test]
658    fn test_undefined_reference_image() {
659        let rule = MD052ReferenceLinkImages::new();
660        let content = "![alt][missing]";
661        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
662        let result = rule.check(&ctx).unwrap();
663
664        assert_eq!(result.len(), 1);
665        assert!(result[0].message.contains("Reference 'missing' not found"));
666    }
667
668    #[test]
669    fn test_case_insensitive_references() {
670        let rule = MD052ReferenceLinkImages::new();
671        let content = "[Text][REF]\n\n[ref]: https://example.com";
672        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
673        let result = rule.check(&ctx).unwrap();
674
675        assert_eq!(result.len(), 0);
676    }
677
678    #[test]
679    fn test_shortcut_reference_valid() {
680        let rule = MD052ReferenceLinkImages::new();
681        let content = "[ref]\n\n[ref]: https://example.com";
682        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
683        let result = rule.check(&ctx).unwrap();
684
685        assert_eq!(result.len(), 0);
686    }
687
688    #[test]
689    fn test_shortcut_reference_undefined() {
690        let rule = MD052ReferenceLinkImages::new();
691        let content = "[undefined]";
692        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
693        let result = rule.check(&ctx).unwrap();
694
695        assert_eq!(result.len(), 1);
696        assert!(result[0].message.contains("Reference 'undefined' not found"));
697    }
698
699    #[test]
700    fn test_inline_links_ignored() {
701        let rule = MD052ReferenceLinkImages::new();
702        let content = "[text](https://example.com)";
703        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
704        let result = rule.check(&ctx).unwrap();
705
706        assert_eq!(result.len(), 0);
707    }
708
709    #[test]
710    fn test_inline_images_ignored() {
711        let rule = MD052ReferenceLinkImages::new();
712        let content = "![alt](image.jpg)";
713        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
714        let result = rule.check(&ctx).unwrap();
715
716        assert_eq!(result.len(), 0);
717    }
718
719    #[test]
720    fn test_references_in_code_blocks_ignored() {
721        let rule = MD052ReferenceLinkImages::new();
722        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
723        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
724        let result = rule.check(&ctx).unwrap();
725
726        assert_eq!(result.len(), 0);
727    }
728
729    #[test]
730    fn test_references_in_inline_code_ignored() {
731        let rule = MD052ReferenceLinkImages::new();
732        let content = "`[undefined]`";
733        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
734        let result = rule.check(&ctx).unwrap();
735
736        // References inside inline code spans should be ignored
737        assert_eq!(result.len(), 0);
738    }
739
740    #[test]
741    fn test_comprehensive_inline_code_detection() {
742        let rule = MD052ReferenceLinkImages::new();
743        let content = r#"# Test
744
745This `[inside]` should be ignored.
746This [outside] should be flagged.
747Reference links `[text][ref]` in code are ignored.
748Regular reference [text][missing] should be flagged.
749Images `![alt][img]` in code are ignored.
750Regular image ![alt][badimg] should be flagged.
751
752Multiple `[one]` and `[two]` in code ignored, but [three] is not.
753
754```
755[code block content] should be ignored
756```
757
758`Multiple [refs] in [same] code span` ignored."#;
759
760        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
761        let result = rule.check(&ctx).unwrap();
762
763        // Should only flag: outside, missing, badimg, three (4 total)
764        assert_eq!(result.len(), 4);
765
766        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
767        assert!(messages.iter().any(|m| m.contains("outside")));
768        assert!(messages.iter().any(|m| m.contains("missing")));
769        assert!(messages.iter().any(|m| m.contains("badimg")));
770        assert!(messages.iter().any(|m| m.contains("three")));
771
772        // Should NOT flag any references inside code spans
773        assert!(!messages.iter().any(|m| m.contains("inside")));
774        assert!(!messages.iter().any(|m| m.contains("one")));
775        assert!(!messages.iter().any(|m| m.contains("two")));
776        assert!(!messages.iter().any(|m| m.contains("refs")));
777        assert!(!messages.iter().any(|m| m.contains("same")));
778    }
779
780    #[test]
781    fn test_multiple_undefined_references() {
782        let rule = MD052ReferenceLinkImages::new();
783        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
784        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
785        let result = rule.check(&ctx).unwrap();
786
787        assert_eq!(result.len(), 3);
788        assert!(result[0].message.contains("ref1"));
789        assert!(result[1].message.contains("ref2"));
790        assert!(result[2].message.contains("ref3"));
791    }
792
793    #[test]
794    fn test_mixed_valid_and_undefined() {
795        let rule = MD052ReferenceLinkImages::new();
796        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
797        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
798        let result = rule.check(&ctx).unwrap();
799
800        assert_eq!(result.len(), 1);
801        assert!(result[0].message.contains("missing"));
802    }
803
804    #[test]
805    fn test_empty_reference() {
806        let rule = MD052ReferenceLinkImages::new();
807        let content = "[text][]\n\n[ref]: https://example.com";
808        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
809        let result = rule.check(&ctx).unwrap();
810
811        // Empty reference should use the link text as reference
812        assert_eq!(result.len(), 1);
813    }
814
815    #[test]
816    fn test_escaped_brackets_ignored() {
817        let rule = MD052ReferenceLinkImages::new();
818        let content = "\\[not a link\\]";
819        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
820        let result = rule.check(&ctx).unwrap();
821
822        assert_eq!(result.len(), 0);
823    }
824
825    #[test]
826    fn test_list_items_ignored() {
827        let rule = MD052ReferenceLinkImages::new();
828        let content = "- [undefined]\n* [another]\n+ [third]";
829        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
830        let result = rule.check(&ctx).unwrap();
831
832        // List items that look like shortcut references should be ignored
833        assert_eq!(result.len(), 0);
834    }
835
836    #[test]
837    fn test_output_example_section_ignored() {
838        let rule = MD052ReferenceLinkImages::new();
839        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
840        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
841        let result = rule.check(&ctx).unwrap();
842
843        // Only the reference outside the Output section should be flagged
844        assert_eq!(result.len(), 1);
845        assert!(result[0].message.contains("missing"));
846    }
847
848    #[test]
849    fn test_reference_definitions_in_code_blocks_ignored() {
850        let rule = MD052ReferenceLinkImages::new();
851        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
852        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
853        let result = rule.check(&ctx).unwrap();
854
855        // Reference defined in code block should not count
856        assert_eq!(result.len(), 1);
857        assert!(result[0].message.contains("ref"));
858    }
859
860    #[test]
861    fn test_multiple_references_to_same_undefined() {
862        let rule = MD052ReferenceLinkImages::new();
863        let content = "[first][missing] [second][missing] [third][missing]";
864        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
865        let result = rule.check(&ctx).unwrap();
866
867        // Should only report once per unique reference
868        assert_eq!(result.len(), 1);
869        assert!(result[0].message.contains("missing"));
870    }
871
872    #[test]
873    fn test_reference_with_special_characters() {
874        let rule = MD052ReferenceLinkImages::new();
875        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
876        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
877        let result = rule.check(&ctx).unwrap();
878
879        assert_eq!(result.len(), 0);
880    }
881
882    #[test]
883    fn test_issue_51_html_attribute_not_reference() {
884        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
885        let rule = MD052ReferenceLinkImages::new();
886        let content = r#"# Example
887
888## Test
889
890Want to fill out this form?
891
892<form method="post">
893    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
894</form>"#;
895        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
896        let result = rule.check(&ctx).unwrap();
897
898        assert_eq!(
899            result.len(),
900            0,
901            "HTML attributes with square brackets should not be flagged as undefined references"
902        );
903    }
904
905    #[test]
906    fn test_extract_references() {
907        let rule = MD052ReferenceLinkImages::new();
908        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
909        let refs = rule.extract_references(content, false);
910
911        assert_eq!(refs.len(), 3);
912        assert!(refs.contains("ref1"));
913        assert!(refs.contains("ref2"));
914        assert!(refs.contains("ref3"));
915    }
916
917    #[test]
918    fn test_inline_code_not_flagged() {
919        let rule = MD052ReferenceLinkImages::new();
920
921        // Test that arrays in inline code are not flagged as references
922        let content = r#"# Test
923
924Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
925
926Also, `[todo]` is not a reference link.
927
928But this [reference] should be flagged.
929
930And this `[inline code]` should not be flagged.
931"#;
932
933        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
934        let warnings = rule.check(&ctx).unwrap();
935
936        // Should only flag [reference], not the ones in backticks
937        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
938        assert!(warnings[0].message.contains("'reference'"));
939    }
940
941    #[test]
942    fn test_code_block_references_ignored() {
943        let rule = MD052ReferenceLinkImages::new();
944
945        let content = r#"# Test
946
947```markdown
948[undefined] reference in code block
949![undefined] image in code block
950```
951
952[real-undefined] reference outside
953"#;
954
955        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
956        let warnings = rule.check(&ctx).unwrap();
957
958        // Should only flag [real-undefined], not the ones in code block
959        assert_eq!(warnings.len(), 1);
960        assert!(warnings[0].message.contains("'real-undefined'"));
961    }
962
963    #[test]
964    fn test_html_comments_ignored() {
965        // Test for issue #20 - MD052 should not flag content inside HTML comments
966        let rule = MD052ReferenceLinkImages::new();
967
968        // Test the exact case from issue #20
969        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
970<!--- set_env EDITOR 'python3 fake_editor.py' -->
971
972```bash
973$ python3 vote.py
9743 votes for: 2
9752 votes for: 3, 4
976```"#;
977        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
978        let result = rule.check(&ctx).unwrap();
979        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
980
981        // Test various reference patterns inside HTML comments
982        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
983Normal [text][undefined]
984<!-- Another [comment][with] references -->"#;
985        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
986        let result = rule.check(&ctx).unwrap();
987        assert_eq!(
988            result.len(),
989            1,
990            "Should only flag the undefined reference outside comments"
991        );
992        assert!(result[0].message.contains("undefined"));
993
994        // Test multi-line HTML comments
995        let content = r#"<!--
996[ref1]
997[ref2][ref3]
998-->
999[actual][undefined]"#;
1000        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1001        let result = rule.check(&ctx).unwrap();
1002        assert_eq!(
1003            result.len(),
1004            1,
1005            "Should not flag references in multi-line HTML comments"
1006        );
1007        assert!(result[0].message.contains("undefined"));
1008
1009        // Test mixed scenarios
1010        let content = r#"<!-- Comment with [1:] pattern -->
1011Valid [link][ref]
1012<!-- More [refs][in][comments] -->
1013![image][missing]
1014
1015[ref]: https://example.com"#;
1016        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1017        let result = rule.check(&ctx).unwrap();
1018        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1019        assert!(result[0].message.contains("missing"));
1020    }
1021
1022    #[test]
1023    fn test_frontmatter_ignored() {
1024        // Test for issue #24 - MD052 should not flag content inside frontmatter
1025        let rule = MD052ReferenceLinkImages::new();
1026
1027        // Test YAML frontmatter with arrays and references
1028        let content = r#"---
1029layout: post
1030title: "My Jekyll Post"
1031date: 2023-01-01
1032categories: blog
1033tags: ["test", "example"]
1034author: John Doe
1035---
1036
1037# My Blog Post
1038
1039This is the actual markdown content that should be linted.
1040
1041[undefined] reference should be flagged.
1042
1043## Section 1
1044
1045Some content here."#;
1046        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1047        let result = rule.check(&ctx).unwrap();
1048
1049        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1050        assert_eq!(
1051            result.len(),
1052            1,
1053            "Should only flag the undefined reference outside frontmatter"
1054        );
1055        assert!(result[0].message.contains("undefined"));
1056
1057        // Test TOML frontmatter
1058        let content = r#"+++
1059title = "My Post"
1060tags = ["example", "test"]
1061+++
1062
1063# Content
1064
1065[missing] reference should be flagged."#;
1066        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1067        let result = rule.check(&ctx).unwrap();
1068        assert_eq!(
1069            result.len(),
1070            1,
1071            "Should only flag the undefined reference outside TOML frontmatter"
1072        );
1073        assert!(result[0].message.contains("missing"));
1074    }
1075
1076    #[test]
1077    fn test_mkdocs_snippet_markers_not_flagged() {
1078        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1079        let rule = MD052ReferenceLinkImages::new();
1080
1081        // Test snippet section markers
1082        let content = r#"# Document with MkDocs Snippets
1083
1084Some content here.
1085
1086# -8<- [start:remote-content]
1087
1088This is the remote content section.
1089
1090# -8<- [end:remote-content]
1091
1092More content here.
1093
1094<!-- --8<-- [start:another-section] -->
1095Content in another section
1096<!-- --8<-- [end:another-section] -->"#;
1097        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1098        let result = rule.check(&ctx).unwrap();
1099
1100        // Should not flag any snippet markers as undefined references
1101        assert_eq!(
1102            result.len(),
1103            0,
1104            "Should not flag MkDocs snippet markers as undefined references"
1105        );
1106
1107        // Test that the snippet marker lines are properly skipped
1108        // but regular undefined references on other lines are still caught
1109        let content = r#"# Document
1110
1111# -8<- [start:section]
1112Content with [reference] inside snippet section
1113# -8<- [end:section]
1114
1115Regular [undefined] reference outside snippet markers."#;
1116        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1117        let result = rule.check(&ctx).unwrap();
1118
1119        assert_eq!(
1120            result.len(),
1121            2,
1122            "Should flag undefined references but skip snippet marker lines"
1123        );
1124        // The references inside the content should be flagged, but not start: and end:
1125        assert!(result[0].message.contains("reference"));
1126        assert!(result[1].message.contains("undefined"));
1127
1128        // Test in standard mode - should flag the markers as undefined
1129        let content = r#"# Document
1130
1131# -8<- [start:section]
1132# -8<- [end:section]"#;
1133        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1134        let result = rule.check(&ctx).unwrap();
1135
1136        assert_eq!(
1137            result.len(),
1138            2,
1139            "In standard mode, snippet markers should be flagged as undefined references"
1140        );
1141    }
1142
1143    #[test]
1144    fn test_github_alerts_not_flagged() {
1145        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1146        let rule = MD052ReferenceLinkImages::new();
1147
1148        // Test various GitHub alert types
1149        let content = r#"# Document with GitHub Alerts
1150
1151> [!NOTE]
1152> This is a note alert.
1153
1154> [!TIP]
1155> This is a tip alert.
1156
1157> [!IMPORTANT]
1158> This is an important alert.
1159
1160> [!WARNING]
1161> This is a warning alert.
1162
1163> [!CAUTION]
1164> This is a caution alert.
1165
1166Regular content with [undefined] reference."#;
1167        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1168        let result = rule.check(&ctx).unwrap();
1169
1170        // Should only flag the undefined reference, not the GitHub alerts
1171        assert_eq!(
1172            result.len(),
1173            1,
1174            "Should only flag the undefined reference, not GitHub alerts"
1175        );
1176        assert!(result[0].message.contains("undefined"));
1177        assert_eq!(result[0].line, 18); // Line with [undefined]
1178
1179        // Test GitHub alerts with additional content
1180        let content = r#"> [!TIP]
1181> Here's a useful tip about [something].
1182> Multiple lines are allowed.
1183
1184[something] is mentioned but not defined."#;
1185        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1186        let result = rule.check(&ctx).unwrap();
1187
1188        // Should flag only the [something] outside blockquotes
1189        // The test shows we're only catching one, which might be correct behavior
1190        // matching markdownlint's approach
1191        assert_eq!(result.len(), 1, "Should flag undefined reference");
1192        assert!(result[0].message.contains("something"));
1193
1194        // Test GitHub alerts with proper references
1195        let content = r#"> [!NOTE]
1196> See [reference] for more details.
1197
1198[reference]: https://example.com"#;
1199        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1200        let result = rule.check(&ctx).unwrap();
1201
1202        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1203        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1204    }
1205}