1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_front_matter, is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11 static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16 static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19 static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22 static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25 static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29 static ref URL_WITH_BRACKETS: Regex = Regex::new(
37 r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38 ).unwrap();
39}
40
41#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50 pub fn new() -> Self {
51 Self {}
52 }
53
54 fn strip_backticks(s: &str) -> &str {
57 s.trim_start_matches('`').trim_end_matches('`')
58 }
59
60 fn is_valid_python_identifier(s: &str) -> bool {
64 if s.is_empty() {
65 return false;
66 }
67 let first_char = s.chars().next().unwrap();
68 if !first_char.is_ascii_alphabetic() && first_char != '_' {
69 return false;
70 }
71 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
72 }
73
74 fn is_likely_not_reference(text: &str) -> bool {
77 if text.chars().all(|c| c.is_ascii_digit()) {
79 return true;
80 }
81
82 if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
84 return true;
85 }
86
87 if text.contains('.') && !text.contains(' ') && !text.contains('-') && !text.contains('_') {
90 return true;
92 }
93
94 if text == "*" || text == "..." || text == "**" {
96 return true;
97 }
98
99 if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
101 return true;
102 }
103
104 if text.contains(',') || text.contains('[') || text.contains(']') {
107 return true;
109 }
110
111 if !text.contains('`')
118 && text.contains('.')
119 && !text.contains(' ')
120 && !text.contains('-')
121 && !text.contains('_')
122 {
123 return true;
124 }
125
126 if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
133 return true;
134 }
135
136 if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
138 return true;
139 }
140
141 if (text.starts_with('"') && text.ends_with('"'))
143 || (text.starts_with('\'') && text.ends_with('\''))
144 || text.contains('"')
145 || text.contains('\'')
146 {
147 return true;
148 }
149
150 if text.contains(':') && text.contains(' ') {
153 return true;
154 }
155
156 if text.starts_with('!') {
158 return true;
159 }
160
161 if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
163 return true;
164 }
165
166 let common_non_refs = [
169 "object", "Object", "any", "Any", "inv", "void", "bool", "int", "float", "str", "char", "i8", "i16", "i32",
170 "i64", "i128", "isize", "u8", "u16", "u32", "u64", "u128", "usize", "f32", "f64",
171 ];
172
173 if common_non_refs.contains(&text) {
174 return true;
175 }
176
177 false
178 }
179
180 fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
182 code_spans
183 .iter()
184 .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
185 }
186
187 fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
189 for m in HTML_COMMENT_PATTERN.find_iter(content) {
190 if m.start() <= byte_pos && byte_pos < m.end() {
191 return true;
192 }
193 }
194 false
195 }
196
197 fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
199 for html_tag in ctx.html_tags().iter() {
201 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
202 return true;
203 }
204 }
205 false
206 }
207
208 fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
209 use crate::config::MarkdownFlavor;
210 use crate::utils::skip_context::is_mkdocs_snippet_line;
211
212 let mut references = HashSet::new();
213 let mut in_code_block = false;
214 let mut code_fence_marker = String::new();
215
216 for line in content.lines() {
217 if is_mkdocs_snippet_line(
219 line,
220 if mkdocs_mode {
221 MarkdownFlavor::MkDocs
222 } else {
223 MarkdownFlavor::Standard
224 },
225 ) {
226 continue;
227 }
228 if let Some(cap) = FENCED_CODE_START.captures(line) {
230 if let Some(fence) = cap.get(2) {
231 let fence_str = fence.as_str();
233 if !in_code_block {
234 in_code_block = true;
235 code_fence_marker = fence_str.to_string();
236 } else if line.trim_start().starts_with(&code_fence_marker) {
237 let trimmed = line.trim_start();
239 if trimmed.starts_with(&code_fence_marker) {
241 let after_fence = &trimmed[code_fence_marker.len()..];
242 if after_fence.trim().is_empty() {
243 in_code_block = false;
244 code_fence_marker.clear();
245 }
246 }
247 }
248 }
249 continue;
250 }
251
252 if in_code_block {
254 continue;
255 }
256
257 if line.trim_start().starts_with("*[") {
260 continue;
261 }
262
263 if let Some(cap) = REF_REGEX.captures(line) {
264 if let Some(reference) = cap.get(1) {
266 references.insert(reference.as_str().to_lowercase());
267 }
268 }
269 }
270
271 references
272 }
273
274 fn find_undefined_references(
275 &self,
276 content: &str,
277 references: &HashSet<String>,
278 ctx: &crate::lint_context::LintContext,
279 mkdocs_mode: bool,
280 ) -> Vec<(usize, usize, usize, String)> {
281 let mut undefined = Vec::new();
282 let mut reported_refs = HashMap::new();
283 let mut in_code_block = false;
284 let mut code_fence_marker = String::new();
285 let mut in_example_section = false;
286
287 let code_spans = ctx.code_spans();
289
290 for link in &ctx.links {
292 if !link.is_reference {
293 continue; }
295
296 if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
298 continue;
299 }
300
301 if Self::is_in_html_comment(content, link.byte_offset) {
303 continue;
304 }
305
306 if Self::is_in_html_tag(ctx, link.byte_offset) {
308 continue;
309 }
310
311 if is_in_math_context(ctx, link.byte_offset) {
313 continue;
314 }
315
316 if is_in_table_cell(ctx, link.line, link.start_col) {
318 continue;
319 }
320
321 if is_in_front_matter(content, link.line.saturating_sub(1)) {
323 continue;
324 }
325
326 if let Some(ref_id) = &link.reference_id {
327 let reference_lower = ref_id.to_lowercase();
328
329 let stripped_ref = Self::strip_backticks(ref_id);
333 let stripped_text = Self::strip_backticks(&link.text);
334 if mkdocs_mode
335 && (is_mkdocs_auto_reference(stripped_ref)
336 || is_mkdocs_auto_reference(stripped_text)
337 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
338 || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
339 {
340 continue;
341 }
342
343 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
345 if let Some(line_info) = ctx.line_info(link.line) {
347 if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
348 in_example_section = true;
349 continue;
350 }
351
352 if in_example_section {
353 continue;
354 }
355
356 if LIST_ITEM_REGEX.is_match(&line_info.content) {
358 continue;
359 }
360
361 let trimmed = line_info.content.trim_start();
363 if trimmed.starts_with('<') {
364 continue;
365 }
366 }
367
368 let match_len = link.byte_end - link.byte_offset;
369 undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
370 reported_refs.insert(reference_lower, true);
371 }
372 }
373 }
374
375 for image in &ctx.images {
377 if !image.is_reference {
378 continue; }
380
381 if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
383 continue;
384 }
385
386 if Self::is_in_html_comment(content, image.byte_offset) {
388 continue;
389 }
390
391 if Self::is_in_html_tag(ctx, image.byte_offset) {
393 continue;
394 }
395
396 if is_in_math_context(ctx, image.byte_offset) {
398 continue;
399 }
400
401 if is_in_table_cell(ctx, image.line, image.start_col) {
403 continue;
404 }
405
406 if is_in_front_matter(content, image.line.saturating_sub(1)) {
408 continue;
409 }
410
411 if let Some(ref_id) = &image.reference_id {
412 let reference_lower = ref_id.to_lowercase();
413
414 let stripped_ref = Self::strip_backticks(ref_id);
418 let stripped_alt = Self::strip_backticks(&image.alt_text);
419 if mkdocs_mode
420 && (is_mkdocs_auto_reference(stripped_ref)
421 || is_mkdocs_auto_reference(stripped_alt)
422 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
423 || (image.alt_text.as_str() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
424 {
425 continue;
426 }
427
428 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
430 if let Some(line_info) = ctx.line_info(image.line) {
432 if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
433 in_example_section = true;
434 continue;
435 }
436
437 if in_example_section {
438 continue;
439 }
440
441 if LIST_ITEM_REGEX.is_match(&line_info.content) {
443 continue;
444 }
445
446 let trimmed = line_info.content.trim_start();
448 if trimmed.starts_with('<') {
449 continue;
450 }
451 }
452
453 let match_len = image.byte_end - image.byte_offset;
454 undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
455 reported_refs.insert(reference_lower, true);
456 }
457 }
458 }
459
460 let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
462
463 for link in &ctx.links {
465 covered_ranges.push((link.byte_offset, link.byte_end));
466 }
467
468 for image in &ctx.images {
470 covered_ranges.push((image.byte_offset, image.byte_end));
471 }
472
473 covered_ranges.sort_by_key(|&(start, _)| start);
475
476 let lines: Vec<&str> = content.lines().collect();
479 in_example_section = false; for (line_num, line) in lines.iter().enumerate() {
482 if is_in_front_matter(content, line_num) {
484 continue;
485 }
486
487 if let Some(cap) = FENCED_CODE_START.captures(line) {
489 if let Some(fence) = cap.get(2) {
490 let fence_str = fence.as_str();
492 if !in_code_block {
493 in_code_block = true;
494 code_fence_marker = fence_str.to_string();
495 } else if line.trim_start().starts_with(&code_fence_marker) {
496 let trimmed = line.trim_start();
498 if trimmed.starts_with(&code_fence_marker) {
500 let after_fence = &trimmed[code_fence_marker.len()..];
501 if after_fence.trim().is_empty() {
502 in_code_block = false;
503 code_fence_marker.clear();
504 }
505 }
506 }
507 }
508 continue;
509 }
510
511 if in_code_block {
512 continue;
513 }
514
515 if OUTPUT_EXAMPLE_START.is_match(line) {
517 in_example_section = true;
518 continue;
519 }
520
521 if in_example_section {
522 if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
524 in_example_section = false;
525 } else {
526 continue;
527 }
528 }
529
530 if LIST_ITEM_REGEX.is_match(line) {
532 continue;
533 }
534
535 let trimmed_line = line.trim_start();
537 if trimmed_line.starts_with('<') {
538 continue;
539 }
540
541 if GITHUB_ALERT_REGEX.is_match(line) {
543 continue;
544 }
545
546 if trimmed_line.starts_with("*[") {
549 continue;
550 }
551
552 let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
555 for mat in URL_WITH_BRACKETS.find_iter(line) {
556 let url_str = mat.as_str();
558 let url_start = mat.start();
559
560 let mut idx = 0;
562 while idx < url_str.len() {
563 if let Some(bracket_start) = url_str[idx..].find('[') {
564 let bracket_start_abs = url_start + idx + bracket_start;
565 if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
566 let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
567 url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
568 idx += bracket_start + bracket_end + 2;
569 } else {
570 break;
571 }
572 } else {
573 break;
574 }
575 }
576 }
577
578 if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
580 for cap in captures {
581 if let Some(ref_match) = cap.get(1) {
582 let bracket_start = cap.get(0).unwrap().start();
584 let bracket_end = cap.get(0).unwrap().end();
585
586 let is_in_url = url_bracket_ranges
588 .iter()
589 .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
590
591 if is_in_url {
592 continue;
593 }
594
595 let reference = ref_match.as_str();
596 let reference_lower = reference.to_lowercase();
597
598 if Self::is_likely_not_reference(reference) {
600 continue;
601 }
602
603 if let Some(alert_type) = reference.strip_prefix('!')
605 && matches!(
606 alert_type,
607 "NOTE"
608 | "TIP"
609 | "WARNING"
610 | "IMPORTANT"
611 | "CAUTION"
612 | "INFO"
613 | "SUCCESS"
614 | "FAILURE"
615 | "DANGER"
616 | "BUG"
617 | "EXAMPLE"
618 | "QUOTE"
619 )
620 {
621 continue;
622 }
623
624 if mkdocs_mode
627 && (reference.starts_with("start:") || reference.starts_with("end:"))
628 && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
629 || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
630 {
631 continue;
632 }
633
634 let stripped_ref = Self::strip_backticks(reference);
637 if mkdocs_mode
638 && (is_mkdocs_auto_reference(stripped_ref)
639 || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
640 {
641 continue;
642 }
643
644 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
645 let full_match = cap.get(0).unwrap();
646 let col = full_match.start();
647
648 let code_spans = ctx.code_spans();
650 if Self::is_in_code_span(line_num + 1, col, &code_spans) {
651 continue;
652 }
653
654 let line_start_byte = ctx.line_offsets[line_num];
656 let byte_pos = line_start_byte + col;
657
658 if Self::is_in_html_comment(content, byte_pos) {
660 continue;
661 }
662
663 if Self::is_in_html_tag(ctx, byte_pos) {
665 continue;
666 }
667
668 if is_in_math_context(ctx, byte_pos) {
670 continue;
671 }
672
673 if is_in_table_cell(ctx, line_num + 1, col) {
675 continue;
676 }
677
678 let byte_end = byte_pos + (full_match.end() - full_match.start());
679
680 let mut is_covered = false;
682 for &(range_start, range_end) in &covered_ranges {
683 if range_start <= byte_pos && byte_end <= range_end {
684 is_covered = true;
686 break;
687 }
688 if range_start > byte_end {
689 break;
691 }
692 }
693
694 if is_covered {
695 continue;
696 }
697
698 let line_chars: Vec<char> = line.chars().collect();
703 if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
704 let mut bracket_count = 1; let mut check_pos = col.saturating_sub(2);
707 let mut found_opening = false;
708
709 while check_pos > 0 && check_pos < line_chars.len() {
710 match line_chars.get(check_pos) {
711 Some(&']') => bracket_count += 1,
712 Some(&'[') => {
713 bracket_count -= 1;
714 if bracket_count == 0 {
715 if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
717 found_opening = true;
718 }
719 break;
720 }
721 }
722 _ => {}
723 }
724 if check_pos == 0 {
725 break;
726 }
727 check_pos = check_pos.saturating_sub(1);
728 }
729
730 if found_opening {
731 continue;
733 }
734 }
735
736 let before_text = &line[..col];
739 if before_text.contains("\\]") {
740 if let Some(escaped_close_pos) = before_text.rfind("\\]") {
742 let search_text = &before_text[..escaped_close_pos];
743 if search_text.contains("\\[") {
744 continue;
746 }
747 }
748 }
749
750 let match_len = full_match.end() - full_match.start();
751 undefined.push((line_num, col, match_len, reference.to_string()));
752 reported_refs.insert(reference_lower, true);
753 }
754 }
755 }
756 }
757 }
758
759 undefined
760 }
761}
762
763impl Rule for MD052ReferenceLinkImages {
764 fn name(&self) -> &'static str {
765 "MD052"
766 }
767
768 fn description(&self) -> &'static str {
769 "Reference links and images should use a reference that exists"
770 }
771
772 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
773 let content = ctx.content;
774 let mut warnings = Vec::new();
775
776 let has_reference_links = ctx.links.iter().any(|l| l.is_reference);
779 let has_reference_images = ctx.images.iter().any(|i| i.is_reference);
780
781 if !content.contains('[') {
783 return Ok(warnings);
784 }
785
786 let has_reference_definitions = content.contains("]:");
788
789 if !has_reference_links && !has_reference_images && !has_reference_definitions {
792 let all_brackets_are_inline = ctx.links.iter().all(|l| !l.is_reference)
795 && ctx.images.iter().all(|i| !i.is_reference)
796 && ctx.links.len() + ctx.images.len() > 0;
797
798 if all_brackets_are_inline {
799 return Ok(warnings); }
801 }
802
803 let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
805
806 let references = self.extract_references(content, mkdocs_mode);
807
808 for (line_num, col, match_len, reference) in
810 self.find_undefined_references(content, &references, ctx, mkdocs_mode)
811 {
812 let lines: Vec<&str> = content.lines().collect();
813 let line_content = lines.get(line_num).unwrap_or(&"");
814
815 let (start_line, start_col, end_line, end_col) =
817 calculate_match_range(line_num + 1, line_content, col, match_len);
818
819 warnings.push(LintWarning {
820 rule_name: Some(self.name()),
821 line: start_line,
822 column: start_col,
823 end_line,
824 end_column: end_col,
825 message: format!("Reference '{reference}' not found"),
826 severity: Severity::Warning,
827 fix: None,
828 });
829 }
830
831 Ok(warnings)
832 }
833
834 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
836 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
838 }
839
840 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
841 let content = ctx.content;
842 Ok(content.to_string())
844 }
845
846 fn as_any(&self) -> &dyn std::any::Any {
847 self
848 }
849
850 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
851 where
852 Self: Sized,
853 {
854 Box::new(MD052ReferenceLinkImages::new())
856 }
857}
858
859#[cfg(test)]
860mod tests {
861 use super::*;
862 use crate::lint_context::LintContext;
863
864 #[test]
865 fn test_valid_reference_link() {
866 let rule = MD052ReferenceLinkImages::new();
867 let content = "[text][ref]\n\n[ref]: https://example.com";
868 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
869 let result = rule.check(&ctx).unwrap();
870
871 assert_eq!(result.len(), 0);
872 }
873
874 #[test]
875 fn test_undefined_reference_link() {
876 let rule = MD052ReferenceLinkImages::new();
877 let content = "[text][undefined]";
878 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
879 let result = rule.check(&ctx).unwrap();
880
881 assert_eq!(result.len(), 1);
882 assert!(result[0].message.contains("Reference 'undefined' not found"));
883 }
884
885 #[test]
886 fn test_valid_reference_image() {
887 let rule = MD052ReferenceLinkImages::new();
888 let content = "![alt][img]\n\n[img]: image.jpg";
889 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
890 let result = rule.check(&ctx).unwrap();
891
892 assert_eq!(result.len(), 0);
893 }
894
895 #[test]
896 fn test_undefined_reference_image() {
897 let rule = MD052ReferenceLinkImages::new();
898 let content = "![alt][missing]";
899 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
900 let result = rule.check(&ctx).unwrap();
901
902 assert_eq!(result.len(), 1);
903 assert!(result[0].message.contains("Reference 'missing' not found"));
904 }
905
906 #[test]
907 fn test_case_insensitive_references() {
908 let rule = MD052ReferenceLinkImages::new();
909 let content = "[Text][REF]\n\n[ref]: https://example.com";
910 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
911 let result = rule.check(&ctx).unwrap();
912
913 assert_eq!(result.len(), 0);
914 }
915
916 #[test]
917 fn test_shortcut_reference_valid() {
918 let rule = MD052ReferenceLinkImages::new();
919 let content = "[ref]\n\n[ref]: https://example.com";
920 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
921 let result = rule.check(&ctx).unwrap();
922
923 assert_eq!(result.len(), 0);
924 }
925
926 #[test]
927 fn test_shortcut_reference_undefined() {
928 let rule = MD052ReferenceLinkImages::new();
929 let content = "[undefined]";
930 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
931 let result = rule.check(&ctx).unwrap();
932
933 assert_eq!(result.len(), 1);
934 assert!(result[0].message.contains("Reference 'undefined' not found"));
935 }
936
937 #[test]
938 fn test_inline_links_ignored() {
939 let rule = MD052ReferenceLinkImages::new();
940 let content = "[text](https://example.com)";
941 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
942 let result = rule.check(&ctx).unwrap();
943
944 assert_eq!(result.len(), 0);
945 }
946
947 #[test]
948 fn test_inline_images_ignored() {
949 let rule = MD052ReferenceLinkImages::new();
950 let content = "";
951 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
952 let result = rule.check(&ctx).unwrap();
953
954 assert_eq!(result.len(), 0);
955 }
956
957 #[test]
958 fn test_references_in_code_blocks_ignored() {
959 let rule = MD052ReferenceLinkImages::new();
960 let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
961 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
962 let result = rule.check(&ctx).unwrap();
963
964 assert_eq!(result.len(), 0);
965 }
966
967 #[test]
968 fn test_references_in_inline_code_ignored() {
969 let rule = MD052ReferenceLinkImages::new();
970 let content = "`[undefined]`";
971 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
972 let result = rule.check(&ctx).unwrap();
973
974 assert_eq!(result.len(), 0);
976 }
977
978 #[test]
979 fn test_comprehensive_inline_code_detection() {
980 let rule = MD052ReferenceLinkImages::new();
981 let content = r#"# Test
982
983This `[inside]` should be ignored.
984This [outside] should be flagged.
985Reference links `[text][ref]` in code are ignored.
986Regular reference [text][missing] should be flagged.
987Images `![alt][img]` in code are ignored.
988Regular image ![alt][badimg] should be flagged.
989
990Multiple `[one]` and `[two]` in code ignored, but [three] is not.
991
992```
993[code block content] should be ignored
994```
995
996`Multiple [refs] in [same] code span` ignored."#;
997
998 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
999 let result = rule.check(&ctx).unwrap();
1000
1001 assert_eq!(result.len(), 4);
1003
1004 let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1005 assert!(messages.iter().any(|m| m.contains("outside")));
1006 assert!(messages.iter().any(|m| m.contains("missing")));
1007 assert!(messages.iter().any(|m| m.contains("badimg")));
1008 assert!(messages.iter().any(|m| m.contains("three")));
1009
1010 assert!(!messages.iter().any(|m| m.contains("inside")));
1012 assert!(!messages.iter().any(|m| m.contains("one")));
1013 assert!(!messages.iter().any(|m| m.contains("two")));
1014 assert!(!messages.iter().any(|m| m.contains("refs")));
1015 assert!(!messages.iter().any(|m| m.contains("same")));
1016 }
1017
1018 #[test]
1019 fn test_multiple_undefined_references() {
1020 let rule = MD052ReferenceLinkImages::new();
1021 let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1022 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1023 let result = rule.check(&ctx).unwrap();
1024
1025 assert_eq!(result.len(), 3);
1026 assert!(result[0].message.contains("ref1"));
1027 assert!(result[1].message.contains("ref2"));
1028 assert!(result[2].message.contains("ref3"));
1029 }
1030
1031 #[test]
1032 fn test_mixed_valid_and_undefined() {
1033 let rule = MD052ReferenceLinkImages::new();
1034 let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1035 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1036 let result = rule.check(&ctx).unwrap();
1037
1038 assert_eq!(result.len(), 1);
1039 assert!(result[0].message.contains("missing"));
1040 }
1041
1042 #[test]
1043 fn test_empty_reference() {
1044 let rule = MD052ReferenceLinkImages::new();
1045 let content = "[text][]\n\n[ref]: https://example.com";
1046 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1047 let result = rule.check(&ctx).unwrap();
1048
1049 assert_eq!(result.len(), 1);
1051 }
1052
1053 #[test]
1054 fn test_escaped_brackets_ignored() {
1055 let rule = MD052ReferenceLinkImages::new();
1056 let content = "\\[not a link\\]";
1057 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1058 let result = rule.check(&ctx).unwrap();
1059
1060 assert_eq!(result.len(), 0);
1061 }
1062
1063 #[test]
1064 fn test_list_items_ignored() {
1065 let rule = MD052ReferenceLinkImages::new();
1066 let content = "- [undefined]\n* [another]\n+ [third]";
1067 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1068 let result = rule.check(&ctx).unwrap();
1069
1070 assert_eq!(result.len(), 0);
1072 }
1073
1074 #[test]
1075 fn test_output_example_section_ignored() {
1076 let rule = MD052ReferenceLinkImages::new();
1077 let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1078 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1079 let result = rule.check(&ctx).unwrap();
1080
1081 assert_eq!(result.len(), 1);
1083 assert!(result[0].message.contains("missing"));
1084 }
1085
1086 #[test]
1087 fn test_reference_definitions_in_code_blocks_ignored() {
1088 let rule = MD052ReferenceLinkImages::new();
1089 let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1090 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1091 let result = rule.check(&ctx).unwrap();
1092
1093 assert_eq!(result.len(), 1);
1095 assert!(result[0].message.contains("ref"));
1096 }
1097
1098 #[test]
1099 fn test_multiple_references_to_same_undefined() {
1100 let rule = MD052ReferenceLinkImages::new();
1101 let content = "[first][missing] [second][missing] [third][missing]";
1102 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1103 let result = rule.check(&ctx).unwrap();
1104
1105 assert_eq!(result.len(), 1);
1107 assert!(result[0].message.contains("missing"));
1108 }
1109
1110 #[test]
1111 fn test_reference_with_special_characters() {
1112 let rule = MD052ReferenceLinkImages::new();
1113 let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1114 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1115 let result = rule.check(&ctx).unwrap();
1116
1117 assert_eq!(result.len(), 0);
1118 }
1119
1120 #[test]
1121 fn test_issue_51_html_attribute_not_reference() {
1122 let rule = MD052ReferenceLinkImages::new();
1124 let content = r#"# Example
1125
1126## Test
1127
1128Want to fill out this form?
1129
1130<form method="post">
1131 <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1132</form>"#;
1133 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1134 let result = rule.check(&ctx).unwrap();
1135
1136 assert_eq!(
1137 result.len(),
1138 0,
1139 "HTML attributes with square brackets should not be flagged as undefined references"
1140 );
1141 }
1142
1143 #[test]
1144 fn test_extract_references() {
1145 let rule = MD052ReferenceLinkImages::new();
1146 let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1147 let refs = rule.extract_references(content, false);
1148
1149 assert_eq!(refs.len(), 3);
1150 assert!(refs.contains("ref1"));
1151 assert!(refs.contains("ref2"));
1152 assert!(refs.contains("ref3"));
1153 }
1154
1155 #[test]
1156 fn test_inline_code_not_flagged() {
1157 let rule = MD052ReferenceLinkImages::new();
1158
1159 let content = r#"# Test
1161
1162Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1163
1164Also, `[todo]` is not a reference link.
1165
1166But this [reference] should be flagged.
1167
1168And this `[inline code]` should not be flagged.
1169"#;
1170
1171 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1172 let warnings = rule.check(&ctx).unwrap();
1173
1174 assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1176 assert!(warnings[0].message.contains("'reference'"));
1177 }
1178
1179 #[test]
1180 fn test_code_block_references_ignored() {
1181 let rule = MD052ReferenceLinkImages::new();
1182
1183 let content = r#"# Test
1184
1185```markdown
1186[undefined] reference in code block
1187![undefined] image in code block
1188```
1189
1190[real-undefined] reference outside
1191"#;
1192
1193 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1194 let warnings = rule.check(&ctx).unwrap();
1195
1196 assert_eq!(warnings.len(), 1);
1198 assert!(warnings[0].message.contains("'real-undefined'"));
1199 }
1200
1201 #[test]
1202 fn test_html_comments_ignored() {
1203 let rule = MD052ReferenceLinkImages::new();
1205
1206 let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1208<!--- set_env EDITOR 'python3 fake_editor.py' -->
1209
1210```bash
1211$ python3 vote.py
12123 votes for: 2
12132 votes for: 3, 4
1214```"#;
1215 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1216 let result = rule.check(&ctx).unwrap();
1217 assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1218
1219 let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1221Normal [text][undefined]
1222<!-- Another [comment][with] references -->"#;
1223 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1224 let result = rule.check(&ctx).unwrap();
1225 assert_eq!(
1226 result.len(),
1227 1,
1228 "Should only flag the undefined reference outside comments"
1229 );
1230 assert!(result[0].message.contains("undefined"));
1231
1232 let content = r#"<!--
1234[ref1]
1235[ref2][ref3]
1236-->
1237[actual][undefined]"#;
1238 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1239 let result = rule.check(&ctx).unwrap();
1240 assert_eq!(
1241 result.len(),
1242 1,
1243 "Should not flag references in multi-line HTML comments"
1244 );
1245 assert!(result[0].message.contains("undefined"));
1246
1247 let content = r#"<!-- Comment with [1:] pattern -->
1249Valid [link][ref]
1250<!-- More [refs][in][comments] -->
1251![image][missing]
1252
1253[ref]: https://example.com"#;
1254 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1255 let result = rule.check(&ctx).unwrap();
1256 assert_eq!(result.len(), 1, "Should only flag missing image reference");
1257 assert!(result[0].message.contains("missing"));
1258 }
1259
1260 #[test]
1261 fn test_frontmatter_ignored() {
1262 let rule = MD052ReferenceLinkImages::new();
1264
1265 let content = r#"---
1267layout: post
1268title: "My Jekyll Post"
1269date: 2023-01-01
1270categories: blog
1271tags: ["test", "example"]
1272author: John Doe
1273---
1274
1275# My Blog Post
1276
1277This is the actual markdown content that should be linted.
1278
1279[undefined] reference should be flagged.
1280
1281## Section 1
1282
1283Some content here."#;
1284 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1285 let result = rule.check(&ctx).unwrap();
1286
1287 assert_eq!(
1289 result.len(),
1290 1,
1291 "Should only flag the undefined reference outside frontmatter"
1292 );
1293 assert!(result[0].message.contains("undefined"));
1294
1295 let content = r#"+++
1297title = "My Post"
1298tags = ["example", "test"]
1299+++
1300
1301# Content
1302
1303[missing] reference should be flagged."#;
1304 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1305 let result = rule.check(&ctx).unwrap();
1306 assert_eq!(
1307 result.len(),
1308 1,
1309 "Should only flag the undefined reference outside TOML frontmatter"
1310 );
1311 assert!(result[0].message.contains("missing"));
1312 }
1313
1314 #[test]
1315 fn test_mkdocs_snippet_markers_not_flagged() {
1316 let rule = MD052ReferenceLinkImages::new();
1318
1319 let content = r#"# Document with MkDocs Snippets
1321
1322Some content here.
1323
1324# -8<- [start:remote-content]
1325
1326This is the remote content section.
1327
1328# -8<- [end:remote-content]
1329
1330More content here.
1331
1332<!-- --8<-- [start:another-section] -->
1333Content in another section
1334<!-- --8<-- [end:another-section] -->"#;
1335 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1336 let result = rule.check(&ctx).unwrap();
1337
1338 assert_eq!(
1340 result.len(),
1341 0,
1342 "Should not flag MkDocs snippet markers as undefined references"
1343 );
1344
1345 let content = r#"# Document
1348
1349# -8<- [start:section]
1350Content with [reference] inside snippet section
1351# -8<- [end:section]
1352
1353Regular [undefined] reference outside snippet markers."#;
1354 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1355 let result = rule.check(&ctx).unwrap();
1356
1357 assert_eq!(
1358 result.len(),
1359 2,
1360 "Should flag undefined references but skip snippet marker lines"
1361 );
1362 assert!(result[0].message.contains("reference"));
1364 assert!(result[1].message.contains("undefined"));
1365
1366 let content = r#"# Document
1368
1369# -8<- [start:section]
1370# -8<- [end:section]"#;
1371 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1372 let result = rule.check(&ctx).unwrap();
1373
1374 assert_eq!(
1375 result.len(),
1376 2,
1377 "In standard mode, snippet markers should be flagged as undefined references"
1378 );
1379 }
1380
1381 #[test]
1382 fn test_github_alerts_not_flagged() {
1383 let rule = MD052ReferenceLinkImages::new();
1385
1386 let content = r#"# Document with GitHub Alerts
1388
1389> [!NOTE]
1390> This is a note alert.
1391
1392> [!TIP]
1393> This is a tip alert.
1394
1395> [!IMPORTANT]
1396> This is an important alert.
1397
1398> [!WARNING]
1399> This is a warning alert.
1400
1401> [!CAUTION]
1402> This is a caution alert.
1403
1404Regular content with [undefined] reference."#;
1405 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1406 let result = rule.check(&ctx).unwrap();
1407
1408 assert_eq!(
1410 result.len(),
1411 1,
1412 "Should only flag the undefined reference, not GitHub alerts"
1413 );
1414 assert!(result[0].message.contains("undefined"));
1415 assert_eq!(result[0].line, 18); let content = r#"> [!TIP]
1419> Here's a useful tip about [something].
1420> Multiple lines are allowed.
1421
1422[something] is mentioned but not defined."#;
1423 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1424 let result = rule.check(&ctx).unwrap();
1425
1426 assert_eq!(result.len(), 1, "Should flag undefined reference");
1430 assert!(result[0].message.contains("something"));
1431
1432 let content = r#"> [!NOTE]
1434> See [reference] for more details.
1435
1436[reference]: https://example.com"#;
1437 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1438 let result = rule.check(&ctx).unwrap();
1439
1440 assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1442 }
1443}