1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11 static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16 static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19 static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22 static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25 static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29 static ref URL_WITH_BRACKETS: Regex = Regex::new(
37 r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38 ).unwrap();
39}
40
41#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50 pub fn new() -> Self {
51 Self {}
52 }
53
54 fn strip_backticks(s: &str) -> &str {
57 s.trim_start_matches('`').trim_end_matches('`')
58 }
59
60 fn is_valid_python_identifier(s: &str) -> bool {
64 if s.is_empty() {
65 return false;
66 }
67 let first_char = s.chars().next().unwrap();
68 if !first_char.is_ascii_alphabetic() && first_char != '_' {
69 return false;
70 }
71 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
72 }
73
74 fn is_known_non_reference_pattern(text: &str) -> bool {
82 if text.chars().all(|c| c.is_ascii_digit()) {
84 return true;
85 }
86
87 if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
89 return true;
90 }
91
92 if text.contains('.')
96 && !text.contains(' ')
97 && !text.contains('-')
98 && !text.contains('_')
99 && !text.contains('`')
100 {
101 return true;
103 }
104
105 if text == "*" || text == "..." || text == "**" {
107 return true;
108 }
109
110 if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
112 return true;
113 }
114
115 if text.contains(',') || text.contains('[') || text.contains(']') {
118 return true;
120 }
121
122 if !text.contains('`')
129 && text.contains('.')
130 && !text.contains(' ')
131 && !text.contains('-')
132 && !text.contains('_')
133 {
134 return true;
135 }
136
137 if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
144 return true;
145 }
146
147 if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
149 return true;
150 }
151
152 if (text.starts_with('"') && text.ends_with('"'))
154 || (text.starts_with('\'') && text.ends_with('\''))
155 || text.contains('"')
156 || text.contains('\'')
157 {
158 return true;
159 }
160
161 if text.contains(':') && text.contains(' ') {
164 return true;
165 }
166
167 if text.starts_with('!') {
169 return true;
170 }
171
172 if text.starts_with('^') {
175 return true;
176 }
177
178 if text.starts_with('@') {
181 return true;
182 }
183
184 if text == "TOC" {
187 return true;
188 }
189
190 if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
192 return true;
193 }
194
195 let common_non_refs = [
198 "object",
200 "Object",
201 "any",
202 "Any",
203 "inv",
204 "void",
205 "bool",
206 "int",
207 "float",
208 "str",
209 "char",
210 "i8",
211 "i16",
212 "i32",
213 "i64",
214 "i128",
215 "isize",
216 "u8",
217 "u16",
218 "u32",
219 "u64",
220 "u128",
221 "usize",
222 "f32",
223 "f64",
224 "null",
226 "true",
227 "false",
228 "NaN",
229 "Infinity",
230 "object Object",
232 ];
233
234 if common_non_refs.contains(&text) {
235 return true;
236 }
237
238 false
239 }
240
241 fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
243 code_spans
244 .iter()
245 .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
246 }
247
248 fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
250 for m in HTML_COMMENT_PATTERN.find_iter(content) {
251 if m.start() <= byte_pos && byte_pos < m.end() {
252 return true;
253 }
254 }
255 false
256 }
257
258 fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
260 for html_tag in ctx.html_tags().iter() {
262 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
263 return true;
264 }
265 }
266 false
267 }
268
269 fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
270 use crate::config::MarkdownFlavor;
271 use crate::utils::skip_context::is_mkdocs_snippet_line;
272
273 let mut references = HashSet::new();
274 let mut in_code_block = false;
275 let mut code_fence_marker = String::new();
276
277 for line in content.lines() {
278 if is_mkdocs_snippet_line(
280 line,
281 if mkdocs_mode {
282 MarkdownFlavor::MkDocs
283 } else {
284 MarkdownFlavor::Standard
285 },
286 ) {
287 continue;
288 }
289 if let Some(cap) = FENCED_CODE_START.captures(line) {
291 if let Some(fence) = cap.get(2) {
292 let fence_str = fence.as_str();
294 if !in_code_block {
295 in_code_block = true;
296 code_fence_marker = fence_str.to_string();
297 } else if line.trim_start().starts_with(&code_fence_marker) {
298 let trimmed = line.trim_start();
300 if trimmed.starts_with(&code_fence_marker) {
302 let after_fence = &trimmed[code_fence_marker.len()..];
303 if after_fence.trim().is_empty() {
304 in_code_block = false;
305 code_fence_marker.clear();
306 }
307 }
308 }
309 }
310 continue;
311 }
312
313 if in_code_block {
315 continue;
316 }
317
318 if line.trim_start().starts_with("*[") {
321 continue;
322 }
323
324 if let Some(cap) = REF_REGEX.captures(line) {
325 if let Some(reference) = cap.get(1) {
327 references.insert(reference.as_str().to_lowercase());
328 }
329 }
330 }
331
332 references
333 }
334
335 fn find_undefined_references(
336 &self,
337 content: &str,
338 references: &HashSet<String>,
339 ctx: &crate::lint_context::LintContext,
340 mkdocs_mode: bool,
341 ) -> Vec<(usize, usize, usize, String)> {
342 let mut undefined = Vec::new();
343 let mut reported_refs = HashMap::new();
344 let mut in_code_block = false;
345 let mut code_fence_marker = String::new();
346 let mut in_example_section = false;
347
348 let code_spans = ctx.code_spans();
350
351 for link in &ctx.links {
353 if !link.is_reference {
354 continue; }
356
357 if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
359 continue;
360 }
361
362 if Self::is_in_html_comment(content, link.byte_offset) {
364 continue;
365 }
366
367 if Self::is_in_html_tag(ctx, link.byte_offset) {
369 continue;
370 }
371
372 if is_in_math_context(ctx, link.byte_offset) {
374 continue;
375 }
376
377 if is_in_table_cell(ctx, link.line, link.start_col) {
379 continue;
380 }
381
382 if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
384 continue;
385 }
386
387 if let Some(ref_id) = &link.reference_id {
388 let reference_lower = ref_id.to_lowercase();
389
390 if Self::is_known_non_reference_pattern(ref_id) {
392 continue;
393 }
394
395 let stripped_ref = Self::strip_backticks(ref_id);
399 let stripped_text = Self::strip_backticks(&link.text);
400 if mkdocs_mode
401 && (is_mkdocs_auto_reference(stripped_ref)
402 || is_mkdocs_auto_reference(stripped_text)
403 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
404 || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
405 {
406 continue;
407 }
408
409 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
411 if let Some(line_info) = ctx.line_info(link.line) {
413 if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
414 in_example_section = true;
415 continue;
416 }
417
418 if in_example_section {
419 continue;
420 }
421
422 if LIST_ITEM_REGEX.is_match(&line_info.content) {
424 continue;
425 }
426
427 let trimmed = line_info.content.trim_start();
429 if trimmed.starts_with('<') {
430 continue;
431 }
432 }
433
434 let match_len = link.byte_end - link.byte_offset;
435 undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
436 reported_refs.insert(reference_lower, true);
437 }
438 }
439 }
440
441 for image in &ctx.images {
443 if !image.is_reference {
444 continue; }
446
447 if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
449 continue;
450 }
451
452 if Self::is_in_html_comment(content, image.byte_offset) {
454 continue;
455 }
456
457 if Self::is_in_html_tag(ctx, image.byte_offset) {
459 continue;
460 }
461
462 if is_in_math_context(ctx, image.byte_offset) {
464 continue;
465 }
466
467 if is_in_table_cell(ctx, image.line, image.start_col) {
469 continue;
470 }
471
472 if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
474 continue;
475 }
476
477 if let Some(ref_id) = &image.reference_id {
478 let reference_lower = ref_id.to_lowercase();
479
480 if Self::is_known_non_reference_pattern(ref_id) {
482 continue;
483 }
484
485 let stripped_ref = Self::strip_backticks(ref_id);
489 let stripped_alt = Self::strip_backticks(&image.alt_text);
490 if mkdocs_mode
491 && (is_mkdocs_auto_reference(stripped_ref)
492 || is_mkdocs_auto_reference(stripped_alt)
493 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
494 || (image.alt_text.as_str() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
495 {
496 continue;
497 }
498
499 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
501 if let Some(line_info) = ctx.line_info(image.line) {
503 if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
504 in_example_section = true;
505 continue;
506 }
507
508 if in_example_section {
509 continue;
510 }
511
512 if LIST_ITEM_REGEX.is_match(&line_info.content) {
514 continue;
515 }
516
517 let trimmed = line_info.content.trim_start();
519 if trimmed.starts_with('<') {
520 continue;
521 }
522 }
523
524 let match_len = image.byte_end - image.byte_offset;
525 undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
526 reported_refs.insert(reference_lower, true);
527 }
528 }
529 }
530
531 let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
533
534 for link in &ctx.links {
536 covered_ranges.push((link.byte_offset, link.byte_end));
537 }
538
539 for image in &ctx.images {
541 covered_ranges.push((image.byte_offset, image.byte_end));
542 }
543
544 covered_ranges.sort_by_key(|&(start, _)| start);
546
547 let lines: Vec<&str> = content.lines().collect();
550 in_example_section = false; for (line_num, line) in lines.iter().enumerate() {
553 if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
555 continue;
556 }
557
558 if let Some(cap) = FENCED_CODE_START.captures(line) {
560 if let Some(fence) = cap.get(2) {
561 let fence_str = fence.as_str();
563 if !in_code_block {
564 in_code_block = true;
565 code_fence_marker = fence_str.to_string();
566 } else if line.trim_start().starts_with(&code_fence_marker) {
567 let trimmed = line.trim_start();
569 if trimmed.starts_with(&code_fence_marker) {
571 let after_fence = &trimmed[code_fence_marker.len()..];
572 if after_fence.trim().is_empty() {
573 in_code_block = false;
574 code_fence_marker.clear();
575 }
576 }
577 }
578 }
579 continue;
580 }
581
582 if in_code_block {
583 continue;
584 }
585
586 if OUTPUT_EXAMPLE_START.is_match(line) {
588 in_example_section = true;
589 continue;
590 }
591
592 if in_example_section {
593 if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
595 in_example_section = false;
596 } else {
597 continue;
598 }
599 }
600
601 if LIST_ITEM_REGEX.is_match(line) {
603 continue;
604 }
605
606 let trimmed_line = line.trim_start();
608 if trimmed_line.starts_with('<') {
609 continue;
610 }
611
612 if GITHUB_ALERT_REGEX.is_match(line) {
614 continue;
615 }
616
617 if trimmed_line.starts_with("*[") {
620 continue;
621 }
622
623 let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
626 for mat in URL_WITH_BRACKETS.find_iter(line) {
627 let url_str = mat.as_str();
629 let url_start = mat.start();
630
631 let mut idx = 0;
633 while idx < url_str.len() {
634 if let Some(bracket_start) = url_str[idx..].find('[') {
635 let bracket_start_abs = url_start + idx + bracket_start;
636 if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
637 let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
638 url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
639 idx += bracket_start + bracket_end + 2;
640 } else {
641 break;
642 }
643 } else {
644 break;
645 }
646 }
647 }
648
649 if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
651 for cap in captures {
652 if let Some(ref_match) = cap.get(1) {
653 let bracket_start = cap.get(0).unwrap().start();
655 let bracket_end = cap.get(0).unwrap().end();
656
657 let is_in_url = url_bracket_ranges
659 .iter()
660 .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
661
662 if is_in_url {
663 continue;
664 }
665
666 if bracket_start > 0 {
669 if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
671 && *byte == b'^'
672 {
673 continue; }
675 }
676
677 let reference = ref_match.as_str();
678 let reference_lower = reference.to_lowercase();
679
680 if Self::is_known_non_reference_pattern(reference) {
682 continue;
683 }
684
685 if let Some(alert_type) = reference.strip_prefix('!')
687 && matches!(
688 alert_type,
689 "NOTE"
690 | "TIP"
691 | "WARNING"
692 | "IMPORTANT"
693 | "CAUTION"
694 | "INFO"
695 | "SUCCESS"
696 | "FAILURE"
697 | "DANGER"
698 | "BUG"
699 | "EXAMPLE"
700 | "QUOTE"
701 )
702 {
703 continue;
704 }
705
706 if mkdocs_mode
709 && (reference.starts_with("start:") || reference.starts_with("end:"))
710 && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
711 || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
712 {
713 continue;
714 }
715
716 let stripped_ref = Self::strip_backticks(reference);
719 if mkdocs_mode
720 && (is_mkdocs_auto_reference(stripped_ref)
721 || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
722 {
723 continue;
724 }
725
726 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
727 let full_match = cap.get(0).unwrap();
728 let col = full_match.start();
729
730 let code_spans = ctx.code_spans();
732 if Self::is_in_code_span(line_num + 1, col, &code_spans) {
733 continue;
734 }
735
736 let line_start_byte = ctx.line_offsets[line_num];
738 let byte_pos = line_start_byte + col;
739
740 if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
742 &ctx.code_blocks,
743 byte_pos,
744 ) {
745 continue;
746 }
747
748 if Self::is_in_html_comment(content, byte_pos) {
750 continue;
751 }
752
753 if Self::is_in_html_tag(ctx, byte_pos) {
755 continue;
756 }
757
758 if is_in_math_context(ctx, byte_pos) {
760 continue;
761 }
762
763 if is_in_table_cell(ctx, line_num + 1, col) {
765 continue;
766 }
767
768 let byte_end = byte_pos + (full_match.end() - full_match.start());
769
770 let mut is_covered = false;
772 for &(range_start, range_end) in &covered_ranges {
773 if range_start <= byte_pos && byte_end <= range_end {
774 is_covered = true;
776 break;
777 }
778 if range_start > byte_end {
779 break;
781 }
782 }
783
784 if is_covered {
785 continue;
786 }
787
788 let line_chars: Vec<char> = line.chars().collect();
793 if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
794 let mut bracket_count = 1; let mut check_pos = col.saturating_sub(2);
797 let mut found_opening = false;
798
799 while check_pos > 0 && check_pos < line_chars.len() {
800 match line_chars.get(check_pos) {
801 Some(&']') => bracket_count += 1,
802 Some(&'[') => {
803 bracket_count -= 1;
804 if bracket_count == 0 {
805 if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
807 found_opening = true;
808 }
809 break;
810 }
811 }
812 _ => {}
813 }
814 if check_pos == 0 {
815 break;
816 }
817 check_pos = check_pos.saturating_sub(1);
818 }
819
820 if found_opening {
821 continue;
823 }
824 }
825
826 let before_text = &line[..col];
829 if before_text.contains("\\]") {
830 if let Some(escaped_close_pos) = before_text.rfind("\\]") {
832 let search_text = &before_text[..escaped_close_pos];
833 if search_text.contains("\\[") {
834 continue;
836 }
837 }
838 }
839
840 let match_len = full_match.end() - full_match.start();
841 undefined.push((line_num, col, match_len, reference.to_string()));
842 reported_refs.insert(reference_lower, true);
843 }
844 }
845 }
846 }
847 }
848
849 undefined
850 }
851}
852
853impl Rule for MD052ReferenceLinkImages {
854 fn name(&self) -> &'static str {
855 "MD052"
856 }
857
858 fn description(&self) -> &'static str {
859 "Reference links and images should use a reference that exists"
860 }
861
862 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
863 let content = ctx.content;
864 let mut warnings = Vec::new();
865
866 if !content.contains('[') {
868 return Ok(warnings);
869 }
870
871 let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
873
874 let references = self.extract_references(content, mkdocs_mode);
875
876 for (line_num, col, match_len, reference) in
878 self.find_undefined_references(content, &references, ctx, mkdocs_mode)
879 {
880 let lines: Vec<&str> = content.lines().collect();
881 let line_content = lines.get(line_num).unwrap_or(&"");
882
883 let (start_line, start_col, end_line, end_col) =
885 calculate_match_range(line_num + 1, line_content, col, match_len);
886
887 warnings.push(LintWarning {
888 rule_name: Some(self.name().to_string()),
889 line: start_line,
890 column: start_col,
891 end_line,
892 end_column: end_col,
893 message: format!("Reference '{reference}' not found"),
894 severity: Severity::Warning,
895 fix: None,
896 });
897 }
898
899 Ok(warnings)
900 }
901
902 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
904 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
906 }
907
908 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
909 let content = ctx.content;
910 Ok(content.to_string())
912 }
913
914 fn as_any(&self) -> &dyn std::any::Any {
915 self
916 }
917
918 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
919 where
920 Self: Sized,
921 {
922 Box::new(MD052ReferenceLinkImages::new())
924 }
925}
926
927#[cfg(test)]
928mod tests {
929 use super::*;
930 use crate::lint_context::LintContext;
931
932 #[test]
933 fn test_valid_reference_link() {
934 let rule = MD052ReferenceLinkImages::new();
935 let content = "[text][ref]\n\n[ref]: https://example.com";
936 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
937 let result = rule.check(&ctx).unwrap();
938
939 assert_eq!(result.len(), 0);
940 }
941
942 #[test]
943 fn test_undefined_reference_link() {
944 let rule = MD052ReferenceLinkImages::new();
945 let content = "[text][undefined]";
946 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
947 let result = rule.check(&ctx).unwrap();
948
949 assert_eq!(result.len(), 1);
950 assert!(result[0].message.contains("Reference 'undefined' not found"));
951 }
952
953 #[test]
954 fn test_valid_reference_image() {
955 let rule = MD052ReferenceLinkImages::new();
956 let content = "![alt][img]\n\n[img]: image.jpg";
957 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
958 let result = rule.check(&ctx).unwrap();
959
960 assert_eq!(result.len(), 0);
961 }
962
963 #[test]
964 fn test_undefined_reference_image() {
965 let rule = MD052ReferenceLinkImages::new();
966 let content = "![alt][missing]";
967 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
968 let result = rule.check(&ctx).unwrap();
969
970 assert_eq!(result.len(), 1);
971 assert!(result[0].message.contains("Reference 'missing' not found"));
972 }
973
974 #[test]
975 fn test_case_insensitive_references() {
976 let rule = MD052ReferenceLinkImages::new();
977 let content = "[Text][REF]\n\n[ref]: https://example.com";
978 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
979 let result = rule.check(&ctx).unwrap();
980
981 assert_eq!(result.len(), 0);
982 }
983
984 #[test]
985 fn test_shortcut_reference_valid() {
986 let rule = MD052ReferenceLinkImages::new();
987 let content = "[ref]\n\n[ref]: https://example.com";
988 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
989 let result = rule.check(&ctx).unwrap();
990
991 assert_eq!(result.len(), 0);
992 }
993
994 #[test]
995 fn test_shortcut_reference_undefined() {
996 let rule = MD052ReferenceLinkImages::new();
997 let content = "[undefined]";
998 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
999 let result = rule.check(&ctx).unwrap();
1000
1001 assert_eq!(result.len(), 1);
1002 assert!(result[0].message.contains("Reference 'undefined' not found"));
1003 }
1004
1005 #[test]
1006 fn test_inline_links_ignored() {
1007 let rule = MD052ReferenceLinkImages::new();
1008 let content = "[text](https://example.com)";
1009 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1010 let result = rule.check(&ctx).unwrap();
1011
1012 assert_eq!(result.len(), 0);
1013 }
1014
1015 #[test]
1016 fn test_inline_images_ignored() {
1017 let rule = MD052ReferenceLinkImages::new();
1018 let content = "";
1019 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1020 let result = rule.check(&ctx).unwrap();
1021
1022 assert_eq!(result.len(), 0);
1023 }
1024
1025 #[test]
1026 fn test_references_in_code_blocks_ignored() {
1027 let rule = MD052ReferenceLinkImages::new();
1028 let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1029 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1030 let result = rule.check(&ctx).unwrap();
1031
1032 assert_eq!(result.len(), 0);
1033 }
1034
1035 #[test]
1036 fn test_references_in_inline_code_ignored() {
1037 let rule = MD052ReferenceLinkImages::new();
1038 let content = "`[undefined]`";
1039 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1040 let result = rule.check(&ctx).unwrap();
1041
1042 assert_eq!(result.len(), 0);
1044 }
1045
1046 #[test]
1047 fn test_comprehensive_inline_code_detection() {
1048 let rule = MD052ReferenceLinkImages::new();
1049 let content = r#"# Test
1050
1051This `[inside]` should be ignored.
1052This [outside] should be flagged.
1053Reference links `[text][ref]` in code are ignored.
1054Regular reference [text][missing] should be flagged.
1055Images `![alt][img]` in code are ignored.
1056Regular image ![alt][badimg] should be flagged.
1057
1058Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1059
1060```
1061[code block content] should be ignored
1062```
1063
1064`Multiple [refs] in [same] code span` ignored."#;
1065
1066 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1067 let result = rule.check(&ctx).unwrap();
1068
1069 assert_eq!(result.len(), 4);
1071
1072 let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1073 assert!(messages.iter().any(|m| m.contains("outside")));
1074 assert!(messages.iter().any(|m| m.contains("missing")));
1075 assert!(messages.iter().any(|m| m.contains("badimg")));
1076 assert!(messages.iter().any(|m| m.contains("three")));
1077
1078 assert!(!messages.iter().any(|m| m.contains("inside")));
1080 assert!(!messages.iter().any(|m| m.contains("one")));
1081 assert!(!messages.iter().any(|m| m.contains("two")));
1082 assert!(!messages.iter().any(|m| m.contains("refs")));
1083 assert!(!messages.iter().any(|m| m.contains("same")));
1084 }
1085
1086 #[test]
1087 fn test_multiple_undefined_references() {
1088 let rule = MD052ReferenceLinkImages::new();
1089 let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1090 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1091 let result = rule.check(&ctx).unwrap();
1092
1093 assert_eq!(result.len(), 3);
1094 assert!(result[0].message.contains("ref1"));
1095 assert!(result[1].message.contains("ref2"));
1096 assert!(result[2].message.contains("ref3"));
1097 }
1098
1099 #[test]
1100 fn test_mixed_valid_and_undefined() {
1101 let rule = MD052ReferenceLinkImages::new();
1102 let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1103 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1104 let result = rule.check(&ctx).unwrap();
1105
1106 assert_eq!(result.len(), 1);
1107 assert!(result[0].message.contains("missing"));
1108 }
1109
1110 #[test]
1111 fn test_empty_reference() {
1112 let rule = MD052ReferenceLinkImages::new();
1113 let content = "[text][]\n\n[ref]: https://example.com";
1114 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1115 let result = rule.check(&ctx).unwrap();
1116
1117 assert_eq!(result.len(), 1);
1119 }
1120
1121 #[test]
1122 fn test_escaped_brackets_ignored() {
1123 let rule = MD052ReferenceLinkImages::new();
1124 let content = "\\[not a link\\]";
1125 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1126 let result = rule.check(&ctx).unwrap();
1127
1128 assert_eq!(result.len(), 0);
1129 }
1130
1131 #[test]
1132 fn test_list_items_ignored() {
1133 let rule = MD052ReferenceLinkImages::new();
1134 let content = "- [undefined]\n* [another]\n+ [third]";
1135 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1136 let result = rule.check(&ctx).unwrap();
1137
1138 assert_eq!(result.len(), 0);
1140 }
1141
1142 #[test]
1143 fn test_output_example_section_ignored() {
1144 let rule = MD052ReferenceLinkImages::new();
1145 let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1146 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1147 let result = rule.check(&ctx).unwrap();
1148
1149 assert_eq!(result.len(), 1);
1151 assert!(result[0].message.contains("missing"));
1152 }
1153
1154 #[test]
1155 fn test_reference_definitions_in_code_blocks_ignored() {
1156 let rule = MD052ReferenceLinkImages::new();
1157 let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1158 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1159 let result = rule.check(&ctx).unwrap();
1160
1161 assert_eq!(result.len(), 1);
1163 assert!(result[0].message.contains("ref"));
1164 }
1165
1166 #[test]
1167 fn test_multiple_references_to_same_undefined() {
1168 let rule = MD052ReferenceLinkImages::new();
1169 let content = "[first][missing] [second][missing] [third][missing]";
1170 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1171 let result = rule.check(&ctx).unwrap();
1172
1173 assert_eq!(result.len(), 1);
1175 assert!(result[0].message.contains("missing"));
1176 }
1177
1178 #[test]
1179 fn test_reference_with_special_characters() {
1180 let rule = MD052ReferenceLinkImages::new();
1181 let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1182 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1183 let result = rule.check(&ctx).unwrap();
1184
1185 assert_eq!(result.len(), 0);
1186 }
1187
1188 #[test]
1189 fn test_issue_51_html_attribute_not_reference() {
1190 let rule = MD052ReferenceLinkImages::new();
1192 let content = r#"# Example
1193
1194## Test
1195
1196Want to fill out this form?
1197
1198<form method="post">
1199 <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1200</form>"#;
1201 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1202 let result = rule.check(&ctx).unwrap();
1203
1204 assert_eq!(
1205 result.len(),
1206 0,
1207 "HTML attributes with square brackets should not be flagged as undefined references"
1208 );
1209 }
1210
1211 #[test]
1212 fn test_extract_references() {
1213 let rule = MD052ReferenceLinkImages::new();
1214 let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1215 let refs = rule.extract_references(content, false);
1216
1217 assert_eq!(refs.len(), 3);
1218 assert!(refs.contains("ref1"));
1219 assert!(refs.contains("ref2"));
1220 assert!(refs.contains("ref3"));
1221 }
1222
1223 #[test]
1224 fn test_inline_code_not_flagged() {
1225 let rule = MD052ReferenceLinkImages::new();
1226
1227 let content = r#"# Test
1229
1230Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1231
1232Also, `[todo]` is not a reference link.
1233
1234But this [reference] should be flagged.
1235
1236And this `[inline code]` should not be flagged.
1237"#;
1238
1239 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1240 let warnings = rule.check(&ctx).unwrap();
1241
1242 assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1244 assert!(warnings[0].message.contains("'reference'"));
1245 }
1246
1247 #[test]
1248 fn test_code_block_references_ignored() {
1249 let rule = MD052ReferenceLinkImages::new();
1250
1251 let content = r#"# Test
1252
1253```markdown
1254[undefined] reference in code block
1255![undefined] image in code block
1256```
1257
1258[real-undefined] reference outside
1259"#;
1260
1261 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1262 let warnings = rule.check(&ctx).unwrap();
1263
1264 assert_eq!(warnings.len(), 1);
1266 assert!(warnings[0].message.contains("'real-undefined'"));
1267 }
1268
1269 #[test]
1270 fn test_html_comments_ignored() {
1271 let rule = MD052ReferenceLinkImages::new();
1273
1274 let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1276<!--- set_env EDITOR 'python3 fake_editor.py' -->
1277
1278```bash
1279$ python3 vote.py
12803 votes for: 2
12812 votes for: 3, 4
1282```"#;
1283 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1284 let result = rule.check(&ctx).unwrap();
1285 assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1286
1287 let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1289Normal [text][undefined]
1290<!-- Another [comment][with] references -->"#;
1291 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1292 let result = rule.check(&ctx).unwrap();
1293 assert_eq!(
1294 result.len(),
1295 1,
1296 "Should only flag the undefined reference outside comments"
1297 );
1298 assert!(result[0].message.contains("undefined"));
1299
1300 let content = r#"<!--
1302[ref1]
1303[ref2][ref3]
1304-->
1305[actual][undefined]"#;
1306 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1307 let result = rule.check(&ctx).unwrap();
1308 assert_eq!(
1309 result.len(),
1310 1,
1311 "Should not flag references in multi-line HTML comments"
1312 );
1313 assert!(result[0].message.contains("undefined"));
1314
1315 let content = r#"<!-- Comment with [1:] pattern -->
1317Valid [link][ref]
1318<!-- More [refs][in][comments] -->
1319![image][missing]
1320
1321[ref]: https://example.com"#;
1322 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1323 let result = rule.check(&ctx).unwrap();
1324 assert_eq!(result.len(), 1, "Should only flag missing image reference");
1325 assert!(result[0].message.contains("missing"));
1326 }
1327
1328 #[test]
1329 fn test_frontmatter_ignored() {
1330 let rule = MD052ReferenceLinkImages::new();
1332
1333 let content = r#"---
1335layout: post
1336title: "My Jekyll Post"
1337date: 2023-01-01
1338categories: blog
1339tags: ["test", "example"]
1340author: John Doe
1341---
1342
1343# My Blog Post
1344
1345This is the actual markdown content that should be linted.
1346
1347[undefined] reference should be flagged.
1348
1349## Section 1
1350
1351Some content here."#;
1352 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1353 let result = rule.check(&ctx).unwrap();
1354
1355 assert_eq!(
1357 result.len(),
1358 1,
1359 "Should only flag the undefined reference outside frontmatter"
1360 );
1361 assert!(result[0].message.contains("undefined"));
1362
1363 let content = r#"+++
1365title = "My Post"
1366tags = ["example", "test"]
1367+++
1368
1369# Content
1370
1371[missing] reference should be flagged."#;
1372 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1373 let result = rule.check(&ctx).unwrap();
1374 assert_eq!(
1375 result.len(),
1376 1,
1377 "Should only flag the undefined reference outside TOML frontmatter"
1378 );
1379 assert!(result[0].message.contains("missing"));
1380 }
1381
1382 #[test]
1383 fn test_mkdocs_snippet_markers_not_flagged() {
1384 let rule = MD052ReferenceLinkImages::new();
1386
1387 let content = r#"# Document with MkDocs Snippets
1389
1390Some content here.
1391
1392# -8<- [start:remote-content]
1393
1394This is the remote content section.
1395
1396# -8<- [end:remote-content]
1397
1398More content here.
1399
1400<!-- --8<-- [start:another-section] -->
1401Content in another section
1402<!-- --8<-- [end:another-section] -->"#;
1403 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1404 let result = rule.check(&ctx).unwrap();
1405
1406 assert_eq!(
1408 result.len(),
1409 0,
1410 "Should not flag MkDocs snippet markers as undefined references"
1411 );
1412
1413 let content = r#"# Document
1416
1417# -8<- [start:section]
1418Content with [reference] inside snippet section
1419# -8<- [end:section]
1420
1421Regular [undefined] reference outside snippet markers."#;
1422 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1423 let result = rule.check(&ctx).unwrap();
1424
1425 assert_eq!(
1426 result.len(),
1427 2,
1428 "Should flag undefined references but skip snippet marker lines"
1429 );
1430 assert!(result[0].message.contains("reference"));
1432 assert!(result[1].message.contains("undefined"));
1433
1434 let content = r#"# Document
1436
1437# -8<- [start:section]
1438# -8<- [end:section]"#;
1439 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1440 let result = rule.check(&ctx).unwrap();
1441
1442 assert_eq!(
1443 result.len(),
1444 2,
1445 "In standard mode, snippet markers should be flagged as undefined references"
1446 );
1447 }
1448
1449 #[test]
1450 fn test_pandoc_citations_not_flagged() {
1451 let rule = MD052ReferenceLinkImages::new();
1453
1454 let content = r#"# Research Paper
1455
1456We are using the **bookdown** package [@R-bookdown] in this sample book.
1457This was built on top of R Markdown and **knitr** [@xie2015].
1458
1459Multiple citations [@citation1; @citation2; @citation3] are also supported.
1460
1461Regular [undefined] reference should still be flagged.
1462"#;
1463 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1464 let result = rule.check(&ctx).unwrap();
1465
1466 assert_eq!(
1468 result.len(),
1469 1,
1470 "Should only flag the undefined reference, not Pandoc citations"
1471 );
1472 assert!(result[0].message.contains("undefined"));
1473 }
1474
1475 #[test]
1476 fn test_pandoc_inline_footnotes_not_flagged() {
1477 let rule = MD052ReferenceLinkImages::new();
1479
1480 let content = r#"# Math Document
1481
1482You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1483
1484Another footnote^[with some text and a [link](https://example.com)].
1485
1486But this [reference] without ^ should be flagged.
1487"#;
1488 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1489 let result = rule.check(&ctx).unwrap();
1490
1491 assert_eq!(
1493 result.len(),
1494 1,
1495 "Should only flag the regular reference, not inline footnotes"
1496 );
1497 assert!(result[0].message.contains("reference"));
1498 }
1499
1500 #[test]
1501 fn test_github_alerts_not_flagged() {
1502 let rule = MD052ReferenceLinkImages::new();
1504
1505 let content = r#"# Document with GitHub Alerts
1507
1508> [!NOTE]
1509> This is a note alert.
1510
1511> [!TIP]
1512> This is a tip alert.
1513
1514> [!IMPORTANT]
1515> This is an important alert.
1516
1517> [!WARNING]
1518> This is a warning alert.
1519
1520> [!CAUTION]
1521> This is a caution alert.
1522
1523Regular content with [undefined] reference."#;
1524 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1525 let result = rule.check(&ctx).unwrap();
1526
1527 assert_eq!(
1529 result.len(),
1530 1,
1531 "Should only flag the undefined reference, not GitHub alerts"
1532 );
1533 assert!(result[0].message.contains("undefined"));
1534 assert_eq!(result[0].line, 18); let content = r#"> [!TIP]
1538> Here's a useful tip about [something].
1539> Multiple lines are allowed.
1540
1541[something] is mentioned but not defined."#;
1542 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1543 let result = rule.check(&ctx).unwrap();
1544
1545 assert_eq!(result.len(), 1, "Should flag undefined reference");
1549 assert!(result[0].message.contains("something"));
1550
1551 let content = r#"> [!NOTE]
1553> See [reference] for more details.
1554
1555[reference]: https://example.com"#;
1556 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1557 let result = rule.check(&ctx).unwrap();
1558
1559 assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1561 }
1562}