rumdl_lib/rules/
md052_reference_links_images.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_front_matter, is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11    // Pattern to match reference definitions [ref]: url (standard regex is fine)
12    // Note: \S* instead of \S+ to allow empty definitions like [ref]:
13    static ref REF_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s*.*").unwrap();
14
15    // Pattern for list items to exclude from reference checks (standard regex is fine)
16    static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
17
18    // Pattern for code blocks (standard regex is fine)
19    static ref FENCED_CODE_START: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
20
21    // Pattern for output example sections (standard regex is fine)
22    static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
23
24    // Pattern for GitHub alerts/callouts in blockquotes (e.g., > [!NOTE], > [!TIP], etc.)
25    // Extended to include additional common alert types
26    static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
27
28    // Pattern to detect URLs that may contain brackets (IPv6, API endpoints, etc.)
29    // This pattern specifically looks for:
30    // - IPv6 addresses: https://[::1] or https://[2001:db8::1]
31    // - IPv6 with zone IDs: https://[fe80::1%eth0]
32    // - IPv6 mixed notation: https://[::ffff:192.0.2.1]
33    // - API paths with array notation: https://api.example.com/users[0]
34    // But NOT markdown reference links that happen to follow URLs
35    static ref URL_WITH_BRACKETS: Regex = Regex::new(
36        r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
37    ).unwrap();
38}
39
40/// Rule MD052: Reference links and images should use reference style
41///
42/// See [docs/md052.md](../../docs/md052.md) for full documentation, configuration, and examples.
43///
44/// This rule is triggered when a reference link or image uses a reference that isn't defined.
45#[derive(Clone, Default)]
46pub struct MD052ReferenceLinkImages {}
47
48impl MD052ReferenceLinkImages {
49    pub fn new() -> Self {
50        Self {}
51    }
52
53    /// Check if a position is inside any code span
54    fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
55        code_spans
56            .iter()
57            .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
58    }
59
60    /// Check if a byte position is within an HTML comment
61    fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
62        for m in HTML_COMMENT_PATTERN.find_iter(content) {
63            if m.start() <= byte_pos && byte_pos < m.end() {
64                return true;
65            }
66        }
67        false
68    }
69
70    /// Check if a byte position is within an HTML tag
71    fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
72        // Check HTML tags
73        for html_tag in ctx.html_tags().iter() {
74            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
75                return true;
76            }
77        }
78        false
79    }
80
81    fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
82        use crate::config::MarkdownFlavor;
83        use crate::utils::skip_context::is_mkdocs_snippet_line;
84
85        let mut references = HashSet::new();
86        let mut in_code_block = false;
87        let mut code_fence_marker = String::new();
88
89        for line in content.lines() {
90            // Skip lines that look like MkDocs snippet markers (only in MkDocs mode)
91            if is_mkdocs_snippet_line(
92                line,
93                if mkdocs_mode {
94                    MarkdownFlavor::MkDocs
95                } else {
96                    MarkdownFlavor::Standard
97                },
98            ) {
99                continue;
100            }
101            // Handle code block boundaries
102            if let Some(cap) = FENCED_CODE_START.captures(line) {
103                if let Some(marker) = cap.get(0) {
104                    let marker_str = marker.as_str().to_string();
105                    if !in_code_block {
106                        in_code_block = true;
107                        code_fence_marker = marker_str;
108                    } else if line.trim().starts_with(&code_fence_marker) {
109                        in_code_block = false;
110                        code_fence_marker.clear();
111                    }
112                }
113                continue;
114            }
115
116            // Skip lines in code blocks
117            if in_code_block {
118                continue;
119            }
120
121            if let Some(cap) = REF_REGEX.captures(line) {
122                // Store references in lowercase for case-insensitive comparison
123                if let Some(reference) = cap.get(1) {
124                    references.insert(reference.as_str().to_lowercase());
125                }
126            }
127        }
128
129        references
130    }
131
132    fn find_undefined_references(
133        &self,
134        content: &str,
135        references: &HashSet<String>,
136        ctx: &crate::lint_context::LintContext,
137        mkdocs_mode: bool,
138    ) -> Vec<(usize, usize, usize, String)> {
139        let mut undefined = Vec::new();
140        let mut reported_refs = HashMap::new();
141        let mut in_code_block = false;
142        let mut code_fence_marker = String::new();
143        let mut in_example_section = false;
144
145        // Get code spans once for the entire function
146        let code_spans = ctx.code_spans();
147
148        // Use cached data for reference links and images
149        for link in &ctx.links {
150            if !link.is_reference {
151                continue; // Skip inline links
152            }
153
154            // Skip links inside code spans
155            if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
156                continue;
157            }
158
159            // Skip links inside HTML comments
160            if Self::is_in_html_comment(content, link.byte_offset) {
161                continue;
162            }
163
164            // Skip links inside math contexts
165            if is_in_math_context(ctx, link.byte_offset) {
166                continue;
167            }
168
169            // Skip links inside table cells
170            if is_in_table_cell(ctx, link.line, link.start_col) {
171                continue;
172            }
173
174            // Skip links inside frontmatter (convert from 1-based to 0-based line numbers)
175            if is_in_front_matter(content, link.line.saturating_sub(1)) {
176                continue;
177            }
178
179            if let Some(ref_id) = &link.reference_id {
180                let reference_lower = ref_id.to_lowercase();
181
182                // Skip MkDocs auto-references if in MkDocs mode
183                // Check both the reference_id and the link text for shorthand references
184                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&link.text)) {
185                    continue;
186                }
187
188                // Check if reference is defined
189                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
190                    // Check if the line is in an example section or list item
191                    if let Some(line_info) = ctx.line_info(link.line) {
192                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
193                            in_example_section = true;
194                            continue;
195                        }
196
197                        if in_example_section {
198                            continue;
199                        }
200
201                        // Skip list items
202                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
203                            continue;
204                        }
205                    }
206
207                    let match_len = link.byte_end - link.byte_offset;
208                    undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
209                    reported_refs.insert(reference_lower, true);
210                }
211            }
212        }
213
214        // Use cached data for reference images
215        for image in &ctx.images {
216            if !image.is_reference {
217                continue; // Skip inline images
218            }
219
220            // Skip images inside code spans
221            if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
222                continue;
223            }
224
225            // Skip images inside HTML comments
226            if Self::is_in_html_comment(content, image.byte_offset) {
227                continue;
228            }
229
230            // Skip images inside math contexts
231            if is_in_math_context(ctx, image.byte_offset) {
232                continue;
233            }
234
235            // Skip images inside table cells
236            if is_in_table_cell(ctx, image.line, image.start_col) {
237                continue;
238            }
239
240            // Skip images inside frontmatter (convert from 1-based to 0-based line numbers)
241            if is_in_front_matter(content, image.line.saturating_sub(1)) {
242                continue;
243            }
244
245            if let Some(ref_id) = &image.reference_id {
246                let reference_lower = ref_id.to_lowercase();
247
248                // Skip MkDocs auto-references if in MkDocs mode
249                // Check both the reference_id and the alt text for shorthand references
250                if mkdocs_mode && (is_mkdocs_auto_reference(ref_id) || is_mkdocs_auto_reference(&image.alt_text)) {
251                    continue;
252                }
253
254                // Check if reference is defined
255                if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
256                    // Check if the line is in an example section or list item
257                    if let Some(line_info) = ctx.line_info(image.line) {
258                        if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
259                            in_example_section = true;
260                            continue;
261                        }
262
263                        if in_example_section {
264                            continue;
265                        }
266
267                        // Skip list items
268                        if LIST_ITEM_REGEX.is_match(&line_info.content) {
269                            continue;
270                        }
271                    }
272
273                    let match_len = image.byte_end - image.byte_offset;
274                    undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
275                    reported_refs.insert(reference_lower, true);
276                }
277            }
278        }
279
280        // Build a set of byte ranges that are already covered by parsed links/images
281        let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
282
283        // Add ranges from parsed links
284        for link in &ctx.links {
285            covered_ranges.push((link.byte_offset, link.byte_end));
286        }
287
288        // Add ranges from parsed images
289        for image in &ctx.images {
290            covered_ranges.push((image.byte_offset, image.byte_end));
291        }
292
293        // Sort ranges by start position
294        covered_ranges.sort_by_key(|&(start, _)| start);
295
296        // Handle shortcut references [text] which aren't captured in ctx.links
297        // Need to use regex for these
298        let lines: Vec<&str> = content.lines().collect();
299        in_example_section = false; // Reset for line-by-line processing
300
301        for (line_num, line) in lines.iter().enumerate() {
302            // Skip lines in frontmatter (line_num is already 0-based)
303            if is_in_front_matter(content, line_num) {
304                continue;
305            }
306
307            // Handle code blocks
308            if let Some(cap) = FENCED_CODE_START.captures(line) {
309                if let Some(marker) = cap.get(0) {
310                    let marker_str = marker.as_str().to_string();
311                    if !in_code_block {
312                        in_code_block = true;
313                        code_fence_marker = marker_str;
314                    } else if line.trim().starts_with(&code_fence_marker) {
315                        in_code_block = false;
316                        code_fence_marker.clear();
317                    }
318                }
319                continue;
320            }
321
322            if in_code_block {
323                continue;
324            }
325
326            // Check for example sections
327            if OUTPUT_EXAMPLE_START.is_match(line) {
328                in_example_section = true;
329                continue;
330            }
331
332            if in_example_section {
333                // Check if we're exiting the example section (another heading)
334                if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
335                    in_example_section = false;
336                } else {
337                    continue;
338                }
339            }
340
341            // Skip list items
342            if LIST_ITEM_REGEX.is_match(line) {
343                continue;
344            }
345
346            // Skip GitHub alerts/callouts (e.g., > [!TIP])
347            if GITHUB_ALERT_REGEX.is_match(line) {
348                continue;
349            }
350
351            // Collect positions of brackets that are part of URLs (IPv6, etc.)
352            // so we can exclude them from reference checking
353            let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
354            for mat in URL_WITH_BRACKETS.find_iter(line) {
355                // Find all bracket pairs within this URL match
356                let url_str = mat.as_str();
357                let url_start = mat.start();
358
359                // Find brackets within the URL (e.g., in https://[::1]:8080)
360                let mut idx = 0;
361                while idx < url_str.len() {
362                    if let Some(bracket_start) = url_str[idx..].find('[') {
363                        let bracket_start_abs = url_start + idx + bracket_start;
364                        if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
365                            let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
366                            url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
367                            idx += bracket_start + bracket_end + 2;
368                        } else {
369                            break;
370                        }
371                    } else {
372                        break;
373                    }
374                }
375            }
376
377            // Check shortcut references: [reference]
378            if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
379                for cap in captures {
380                    if let Some(ref_match) = cap.get(1) {
381                        // Check if this bracket is part of a URL (IPv6, etc.)
382                        let bracket_start = cap.get(0).unwrap().start();
383                        let bracket_end = cap.get(0).unwrap().end();
384
385                        // Skip if this bracket pair is within any URL bracket range
386                        let is_in_url = url_bracket_ranges
387                            .iter()
388                            .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
389
390                        if is_in_url {
391                            continue;
392                        }
393
394                        let reference = ref_match.as_str();
395                        let reference_lower = reference.to_lowercase();
396
397                        // Skip GitHub alerts (including extended types)
398                        if let Some(alert_type) = reference.strip_prefix('!')
399                            && matches!(
400                                alert_type,
401                                "NOTE"
402                                    | "TIP"
403                                    | "WARNING"
404                                    | "IMPORTANT"
405                                    | "CAUTION"
406                                    | "INFO"
407                                    | "SUCCESS"
408                                    | "FAILURE"
409                                    | "DANGER"
410                                    | "BUG"
411                                    | "EXAMPLE"
412                                    | "QUOTE"
413                            )
414                        {
415                            continue;
416                        }
417
418                        // Skip MkDocs snippet section markers like [start:section] or [end:section]
419                        // when they appear as part of snippet syntax (e.g., # -8<- [start:section])
420                        if mkdocs_mode
421                            && (reference.starts_with("start:") || reference.starts_with("end:"))
422                            && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
423                                || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
424                        {
425                            continue;
426                        }
427
428                        // Skip MkDocs auto-references if in MkDocs mode
429                        if mkdocs_mode && is_mkdocs_auto_reference(reference) {
430                            continue;
431                        }
432
433                        if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
434                            let full_match = cap.get(0).unwrap();
435                            let col = full_match.start();
436
437                            // Skip if inside code span
438                            let code_spans = ctx.code_spans();
439                            if Self::is_in_code_span(line_num + 1, col, &code_spans) {
440                                continue;
441                            }
442
443                            // Check if this position is within a covered range
444                            let line_start_byte = ctx.line_offsets[line_num];
445                            let byte_pos = line_start_byte + col;
446
447                            // Skip if inside HTML comment
448                            if Self::is_in_html_comment(content, byte_pos) {
449                                continue;
450                            }
451
452                            // Skip if inside HTML tag
453                            if Self::is_in_html_tag(ctx, byte_pos) {
454                                continue;
455                            }
456
457                            // Skip if inside math context
458                            if is_in_math_context(ctx, byte_pos) {
459                                continue;
460                            }
461
462                            // Skip if inside table cell
463                            if is_in_table_cell(ctx, line_num + 1, col) {
464                                continue;
465                            }
466
467                            let byte_end = byte_pos + (full_match.end() - full_match.start());
468
469                            // Check if this shortcut ref overlaps with any parsed link/image
470                            let mut is_covered = false;
471                            for &(range_start, range_end) in &covered_ranges {
472                                if range_start <= byte_pos && byte_end <= range_end {
473                                    // This shortcut ref is completely within a parsed link/image
474                                    is_covered = true;
475                                    break;
476                                }
477                                if range_start > byte_end {
478                                    // No need to check further (ranges are sorted)
479                                    break;
480                                }
481                            }
482
483                            if is_covered {
484                                continue;
485                            }
486
487                            // More sophisticated checks to avoid false positives
488
489                            // Check 1: If preceded by ], this might be part of [text][ref]
490                            // Look for the pattern ...][ref] and check if there's a matching [ before
491                            if col > 0 && line.chars().nth(col.saturating_sub(1)) == Some(']') {
492                                // Look backwards for a [ that would make this [text][ref]
493                                let mut bracket_count = 1; // We already saw one ]
494                                let mut check_pos = col.saturating_sub(2);
495                                let mut found_opening = false;
496
497                                while check_pos > 0 {
498                                    match line.chars().nth(check_pos) {
499                                        Some(']') => bracket_count += 1,
500                                        Some('[') => {
501                                            bracket_count -= 1;
502                                            if bracket_count == 0 {
503                                                // Check if this [ is escaped
504                                                if check_pos == 0 || line.chars().nth(check_pos - 1) != Some('\\') {
505                                                    found_opening = true;
506                                                }
507                                                break;
508                                            }
509                                        }
510                                        _ => {}
511                                    }
512                                    if check_pos == 0 {
513                                        break;
514                                    }
515                                    check_pos = check_pos.saturating_sub(1);
516                                }
517
518                                if found_opening {
519                                    // This is part of [text][ref], skip it
520                                    continue;
521                                }
522                            }
523
524                            // Check 2: If there's an escaped bracket pattern before this
525                            // e.g., \[text\][ref], the [ref] shouldn't be treated as a shortcut
526                            let before_text = &line[..col];
527                            if before_text.contains("\\]") {
528                                // Check if there's a \[ before the \]
529                                if let Some(escaped_close_pos) = before_text.rfind("\\]") {
530                                    let search_text = &before_text[..escaped_close_pos];
531                                    if search_text.contains("\\[") {
532                                        // This looks like \[...\][ref], skip it
533                                        continue;
534                                    }
535                                }
536                            }
537
538                            let match_len = full_match.end() - full_match.start();
539                            undefined.push((line_num, col, match_len, reference.to_string()));
540                            reported_refs.insert(reference_lower, true);
541                        }
542                    }
543                }
544            }
545        }
546
547        undefined
548    }
549}
550
551impl Rule for MD052ReferenceLinkImages {
552    fn name(&self) -> &'static str {
553        "MD052"
554    }
555
556    fn description(&self) -> &'static str {
557        "Reference links and images should use a reference that exists"
558    }
559
560    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
561        let content = ctx.content;
562        let mut warnings = Vec::new();
563
564        // Check if we're in MkDocs mode from the context
565        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
566
567        let references = self.extract_references(content, mkdocs_mode);
568
569        // Use optimized detection method with cached link/image data
570        for (line_num, col, match_len, reference) in
571            self.find_undefined_references(content, &references, ctx, mkdocs_mode)
572        {
573            let lines: Vec<&str> = content.lines().collect();
574            let line_content = lines.get(line_num).unwrap_or(&"");
575
576            // Calculate precise character range for the entire undefined reference
577            let (start_line, start_col, end_line, end_col) =
578                calculate_match_range(line_num + 1, line_content, col, match_len);
579
580            warnings.push(LintWarning {
581                rule_name: Some(self.name()),
582                line: start_line,
583                column: start_col,
584                end_line,
585                end_column: end_col,
586                message: format!("Reference '{reference}' not found"),
587                severity: Severity::Warning,
588                fix: None,
589            });
590        }
591
592        Ok(warnings)
593    }
594
595    /// Check if this rule should be skipped for performance
596    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
597        // Skip if content is empty or has no reference-style links/images
598        ctx.content.is_empty() || (!ctx.content.contains("](") && !ctx.content.contains("]["))
599    }
600
601    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
602        let content = ctx.content;
603        // No automatic fix available for undefined references
604        Ok(content.to_string())
605    }
606
607    fn as_any(&self) -> &dyn std::any::Any {
608        self
609    }
610
611    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
612    where
613        Self: Sized,
614    {
615        // Flavor is now accessed from LintContext during check
616        Box::new(MD052ReferenceLinkImages::new())
617    }
618}
619
620#[cfg(test)]
621mod tests {
622    use super::*;
623    use crate::lint_context::LintContext;
624
625    #[test]
626    fn test_valid_reference_link() {
627        let rule = MD052ReferenceLinkImages::new();
628        let content = "[text][ref]\n\n[ref]: https://example.com";
629        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
630        let result = rule.check(&ctx).unwrap();
631
632        assert_eq!(result.len(), 0);
633    }
634
635    #[test]
636    fn test_undefined_reference_link() {
637        let rule = MD052ReferenceLinkImages::new();
638        let content = "[text][undefined]";
639        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
640        let result = rule.check(&ctx).unwrap();
641
642        assert_eq!(result.len(), 1);
643        assert!(result[0].message.contains("Reference 'undefined' not found"));
644    }
645
646    #[test]
647    fn test_valid_reference_image() {
648        let rule = MD052ReferenceLinkImages::new();
649        let content = "![alt][img]\n\n[img]: image.jpg";
650        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
651        let result = rule.check(&ctx).unwrap();
652
653        assert_eq!(result.len(), 0);
654    }
655
656    #[test]
657    fn test_undefined_reference_image() {
658        let rule = MD052ReferenceLinkImages::new();
659        let content = "![alt][missing]";
660        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
661        let result = rule.check(&ctx).unwrap();
662
663        assert_eq!(result.len(), 1);
664        assert!(result[0].message.contains("Reference 'missing' not found"));
665    }
666
667    #[test]
668    fn test_case_insensitive_references() {
669        let rule = MD052ReferenceLinkImages::new();
670        let content = "[Text][REF]\n\n[ref]: https://example.com";
671        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
672        let result = rule.check(&ctx).unwrap();
673
674        assert_eq!(result.len(), 0);
675    }
676
677    #[test]
678    fn test_shortcut_reference_valid() {
679        let rule = MD052ReferenceLinkImages::new();
680        let content = "[ref]\n\n[ref]: https://example.com";
681        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
682        let result = rule.check(&ctx).unwrap();
683
684        assert_eq!(result.len(), 0);
685    }
686
687    #[test]
688    fn test_shortcut_reference_undefined() {
689        let rule = MD052ReferenceLinkImages::new();
690        let content = "[undefined]";
691        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
692        let result = rule.check(&ctx).unwrap();
693
694        assert_eq!(result.len(), 1);
695        assert!(result[0].message.contains("Reference 'undefined' not found"));
696    }
697
698    #[test]
699    fn test_inline_links_ignored() {
700        let rule = MD052ReferenceLinkImages::new();
701        let content = "[text](https://example.com)";
702        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
703        let result = rule.check(&ctx).unwrap();
704
705        assert_eq!(result.len(), 0);
706    }
707
708    #[test]
709    fn test_inline_images_ignored() {
710        let rule = MD052ReferenceLinkImages::new();
711        let content = "![alt](image.jpg)";
712        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
713        let result = rule.check(&ctx).unwrap();
714
715        assert_eq!(result.len(), 0);
716    }
717
718    #[test]
719    fn test_references_in_code_blocks_ignored() {
720        let rule = MD052ReferenceLinkImages::new();
721        let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
722        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
723        let result = rule.check(&ctx).unwrap();
724
725        assert_eq!(result.len(), 0);
726    }
727
728    #[test]
729    fn test_references_in_inline_code_ignored() {
730        let rule = MD052ReferenceLinkImages::new();
731        let content = "`[undefined]`";
732        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
733        let result = rule.check(&ctx).unwrap();
734
735        // References inside inline code spans should be ignored
736        assert_eq!(result.len(), 0);
737    }
738
739    #[test]
740    fn test_comprehensive_inline_code_detection() {
741        let rule = MD052ReferenceLinkImages::new();
742        let content = r#"# Test
743
744This `[inside]` should be ignored.
745This [outside] should be flagged.
746Reference links `[text][ref]` in code are ignored.
747Regular reference [text][missing] should be flagged.
748Images `![alt][img]` in code are ignored.
749Regular image ![alt][badimg] should be flagged.
750
751Multiple `[one]` and `[two]` in code ignored, but [three] is not.
752
753```
754[code block content] should be ignored
755```
756
757`Multiple [refs] in [same] code span` ignored."#;
758
759        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
760        let result = rule.check(&ctx).unwrap();
761
762        // Should only flag: outside, missing, badimg, three (4 total)
763        assert_eq!(result.len(), 4);
764
765        let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
766        assert!(messages.iter().any(|m| m.contains("outside")));
767        assert!(messages.iter().any(|m| m.contains("missing")));
768        assert!(messages.iter().any(|m| m.contains("badimg")));
769        assert!(messages.iter().any(|m| m.contains("three")));
770
771        // Should NOT flag any references inside code spans
772        assert!(!messages.iter().any(|m| m.contains("inside")));
773        assert!(!messages.iter().any(|m| m.contains("one")));
774        assert!(!messages.iter().any(|m| m.contains("two")));
775        assert!(!messages.iter().any(|m| m.contains("refs")));
776        assert!(!messages.iter().any(|m| m.contains("same")));
777    }
778
779    #[test]
780    fn test_multiple_undefined_references() {
781        let rule = MD052ReferenceLinkImages::new();
782        let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
783        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
784        let result = rule.check(&ctx).unwrap();
785
786        assert_eq!(result.len(), 3);
787        assert!(result[0].message.contains("ref1"));
788        assert!(result[1].message.contains("ref2"));
789        assert!(result[2].message.contains("ref3"));
790    }
791
792    #[test]
793    fn test_mixed_valid_and_undefined() {
794        let rule = MD052ReferenceLinkImages::new();
795        let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
796        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
797        let result = rule.check(&ctx).unwrap();
798
799        assert_eq!(result.len(), 1);
800        assert!(result[0].message.contains("missing"));
801    }
802
803    #[test]
804    fn test_empty_reference() {
805        let rule = MD052ReferenceLinkImages::new();
806        let content = "[text][]\n\n[ref]: https://example.com";
807        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
808        let result = rule.check(&ctx).unwrap();
809
810        // Empty reference should use the link text as reference
811        assert_eq!(result.len(), 1);
812    }
813
814    #[test]
815    fn test_escaped_brackets_ignored() {
816        let rule = MD052ReferenceLinkImages::new();
817        let content = "\\[not a link\\]";
818        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
819        let result = rule.check(&ctx).unwrap();
820
821        assert_eq!(result.len(), 0);
822    }
823
824    #[test]
825    fn test_list_items_ignored() {
826        let rule = MD052ReferenceLinkImages::new();
827        let content = "- [undefined]\n* [another]\n+ [third]";
828        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
829        let result = rule.check(&ctx).unwrap();
830
831        // List items that look like shortcut references should be ignored
832        assert_eq!(result.len(), 0);
833    }
834
835    #[test]
836    fn test_output_example_section_ignored() {
837        let rule = MD052ReferenceLinkImages::new();
838        let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
839        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
840        let result = rule.check(&ctx).unwrap();
841
842        // Only the reference outside the Output section should be flagged
843        assert_eq!(result.len(), 1);
844        assert!(result[0].message.contains("missing"));
845    }
846
847    #[test]
848    fn test_reference_definitions_in_code_blocks_ignored() {
849        let rule = MD052ReferenceLinkImages::new();
850        let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
851        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
852        let result = rule.check(&ctx).unwrap();
853
854        // Reference defined in code block should not count
855        assert_eq!(result.len(), 1);
856        assert!(result[0].message.contains("ref"));
857    }
858
859    #[test]
860    fn test_multiple_references_to_same_undefined() {
861        let rule = MD052ReferenceLinkImages::new();
862        let content = "[first][missing] [second][missing] [third][missing]";
863        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
864        let result = rule.check(&ctx).unwrap();
865
866        // Should only report once per unique reference
867        assert_eq!(result.len(), 1);
868        assert!(result[0].message.contains("missing"));
869    }
870
871    #[test]
872    fn test_reference_with_special_characters() {
873        let rule = MD052ReferenceLinkImages::new();
874        let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
875        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
876        let result = rule.check(&ctx).unwrap();
877
878        assert_eq!(result.len(), 0);
879    }
880
881    #[test]
882    fn test_issue_51_html_attribute_not_reference() {
883        // Test for issue #51 - HTML attributes with square brackets shouldn't be treated as references
884        let rule = MD052ReferenceLinkImages::new();
885        let content = r#"# Example
886
887## Test
888
889Want to fill out this form?
890
891<form method="post">
892    <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
893</form>"#;
894        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
895        let result = rule.check(&ctx).unwrap();
896
897        assert_eq!(
898            result.len(),
899            0,
900            "HTML attributes with square brackets should not be flagged as undefined references"
901        );
902    }
903
904    #[test]
905    fn test_extract_references() {
906        let rule = MD052ReferenceLinkImages::new();
907        let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
908        let refs = rule.extract_references(content, false);
909
910        assert_eq!(refs.len(), 3);
911        assert!(refs.contains("ref1"));
912        assert!(refs.contains("ref2"));
913        assert!(refs.contains("ref3"));
914    }
915
916    #[test]
917    fn test_inline_code_not_flagged() {
918        let rule = MD052ReferenceLinkImages::new();
919
920        // Test that arrays in inline code are not flagged as references
921        let content = r#"# Test
922
923Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
924
925Also, `[todo]` is not a reference link.
926
927But this [reference] should be flagged.
928
929And this `[inline code]` should not be flagged.
930"#;
931
932        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
933        let warnings = rule.check(&ctx).unwrap();
934
935        // Should only flag [reference], not the ones in backticks
936        assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
937        assert!(warnings[0].message.contains("'reference'"));
938    }
939
940    #[test]
941    fn test_code_block_references_ignored() {
942        let rule = MD052ReferenceLinkImages::new();
943
944        let content = r#"# Test
945
946```markdown
947[undefined] reference in code block
948![undefined] image in code block
949```
950
951[real-undefined] reference outside
952"#;
953
954        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
955        let warnings = rule.check(&ctx).unwrap();
956
957        // Should only flag [real-undefined], not the ones in code block
958        assert_eq!(warnings.len(), 1);
959        assert!(warnings[0].message.contains("'real-undefined'"));
960    }
961
962    #[test]
963    fn test_html_comments_ignored() {
964        // Test for issue #20 - MD052 should not flag content inside HTML comments
965        let rule = MD052ReferenceLinkImages::new();
966
967        // Test the exact case from issue #20
968        let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
969<!--- set_env EDITOR 'python3 fake_editor.py' -->
970
971```bash
972$ python3 vote.py
9733 votes for: 2
9742 votes for: 3, 4
975```"#;
976        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
977        let result = rule.check(&ctx).unwrap();
978        assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
979
980        // Test various reference patterns inside HTML comments
981        let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
982Normal [text][undefined]
983<!-- Another [comment][with] references -->"#;
984        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
985        let result = rule.check(&ctx).unwrap();
986        assert_eq!(
987            result.len(),
988            1,
989            "Should only flag the undefined reference outside comments"
990        );
991        assert!(result[0].message.contains("undefined"));
992
993        // Test multi-line HTML comments
994        let content = r#"<!--
995[ref1]
996[ref2][ref3]
997-->
998[actual][undefined]"#;
999        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1000        let result = rule.check(&ctx).unwrap();
1001        assert_eq!(
1002            result.len(),
1003            1,
1004            "Should not flag references in multi-line HTML comments"
1005        );
1006        assert!(result[0].message.contains("undefined"));
1007
1008        // Test mixed scenarios
1009        let content = r#"<!-- Comment with [1:] pattern -->
1010Valid [link][ref]
1011<!-- More [refs][in][comments] -->
1012![image][missing]
1013
1014[ref]: https://example.com"#;
1015        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1016        let result = rule.check(&ctx).unwrap();
1017        assert_eq!(result.len(), 1, "Should only flag missing image reference");
1018        assert!(result[0].message.contains("missing"));
1019    }
1020
1021    #[test]
1022    fn test_frontmatter_ignored() {
1023        // Test for issue #24 - MD052 should not flag content inside frontmatter
1024        let rule = MD052ReferenceLinkImages::new();
1025
1026        // Test YAML frontmatter with arrays and references
1027        let content = r#"---
1028layout: post
1029title: "My Jekyll Post"
1030date: 2023-01-01
1031categories: blog
1032tags: ["test", "example"]
1033author: John Doe
1034---
1035
1036# My Blog Post
1037
1038This is the actual markdown content that should be linted.
1039
1040[undefined] reference should be flagged.
1041
1042## Section 1
1043
1044Some content here."#;
1045        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1046        let result = rule.check(&ctx).unwrap();
1047
1048        // Should only flag [undefined] in the content, not the ["test", "example"] array in frontmatter
1049        assert_eq!(
1050            result.len(),
1051            1,
1052            "Should only flag the undefined reference outside frontmatter"
1053        );
1054        assert!(result[0].message.contains("undefined"));
1055
1056        // Test TOML frontmatter
1057        let content = r#"+++
1058title = "My Post"
1059tags = ["example", "test"]
1060+++
1061
1062# Content
1063
1064[missing] reference should be flagged."#;
1065        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1066        let result = rule.check(&ctx).unwrap();
1067        assert_eq!(
1068            result.len(),
1069            1,
1070            "Should only flag the undefined reference outside TOML frontmatter"
1071        );
1072        assert!(result[0].message.contains("missing"));
1073    }
1074
1075    #[test]
1076    fn test_mkdocs_snippet_markers_not_flagged() {
1077        // Test for issue #68 - MkDocs snippet selection markers should not be flagged as undefined references
1078        let rule = MD052ReferenceLinkImages::new();
1079
1080        // Test snippet section markers
1081        let content = r#"# Document with MkDocs Snippets
1082
1083Some content here.
1084
1085# -8<- [start:remote-content]
1086
1087This is the remote content section.
1088
1089# -8<- [end:remote-content]
1090
1091More content here.
1092
1093<!-- --8<-- [start:another-section] -->
1094Content in another section
1095<!-- --8<-- [end:another-section] -->"#;
1096        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1097        let result = rule.check(&ctx).unwrap();
1098
1099        // Should not flag any snippet markers as undefined references
1100        assert_eq!(
1101            result.len(),
1102            0,
1103            "Should not flag MkDocs snippet markers as undefined references"
1104        );
1105
1106        // Test that the snippet marker lines are properly skipped
1107        // but regular undefined references on other lines are still caught
1108        let content = r#"# Document
1109
1110# -8<- [start:section]
1111Content with [reference] inside snippet section
1112# -8<- [end:section]
1113
1114Regular [undefined] reference outside snippet markers."#;
1115        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1116        let result = rule.check(&ctx).unwrap();
1117
1118        assert_eq!(
1119            result.len(),
1120            2,
1121            "Should flag undefined references but skip snippet marker lines"
1122        );
1123        // The references inside the content should be flagged, but not start: and end:
1124        assert!(result[0].message.contains("reference"));
1125        assert!(result[1].message.contains("undefined"));
1126
1127        // Test in standard mode - should flag the markers as undefined
1128        let content = r#"# Document
1129
1130# -8<- [start:section]
1131# -8<- [end:section]"#;
1132        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1133        let result = rule.check(&ctx).unwrap();
1134
1135        assert_eq!(
1136            result.len(),
1137            2,
1138            "In standard mode, snippet markers should be flagged as undefined references"
1139        );
1140    }
1141
1142    #[test]
1143    fn test_github_alerts_not_flagged() {
1144        // Test for issue #60 - GitHub alerts should not be flagged as undefined references
1145        let rule = MD052ReferenceLinkImages::new();
1146
1147        // Test various GitHub alert types
1148        let content = r#"# Document with GitHub Alerts
1149
1150> [!NOTE]
1151> This is a note alert.
1152
1153> [!TIP]
1154> This is a tip alert.
1155
1156> [!IMPORTANT]
1157> This is an important alert.
1158
1159> [!WARNING]
1160> This is a warning alert.
1161
1162> [!CAUTION]
1163> This is a caution alert.
1164
1165Regular content with [undefined] reference."#;
1166        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1167        let result = rule.check(&ctx).unwrap();
1168
1169        // Should only flag the undefined reference, not the GitHub alerts
1170        assert_eq!(
1171            result.len(),
1172            1,
1173            "Should only flag the undefined reference, not GitHub alerts"
1174        );
1175        assert!(result[0].message.contains("undefined"));
1176        assert_eq!(result[0].line, 18); // Line with [undefined]
1177
1178        // Test GitHub alerts with additional content
1179        let content = r#"> [!TIP]
1180> Here's a useful tip about [something].
1181> Multiple lines are allowed.
1182
1183[something] is mentioned but not defined."#;
1184        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1185        let result = rule.check(&ctx).unwrap();
1186
1187        // Should flag only the [something] outside blockquotes
1188        // The test shows we're only catching one, which might be correct behavior
1189        // matching markdownlint's approach
1190        assert_eq!(result.len(), 1, "Should flag undefined reference");
1191        assert!(result[0].message.contains("something"));
1192
1193        // Test GitHub alerts with proper references
1194        let content = r#"> [!NOTE]
1195> See [reference] for more details.
1196
1197[reference]: https://example.com"#;
1198        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1199        let result = rule.check(&ctx).unwrap();
1200
1201        // Should not flag anything - [!NOTE] is GitHub alert and [reference] is defined
1202        assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1203    }
1204}