1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13static REF_REGEX: LazyLock<Regex> =
17 LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22static FENCED_CODE_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap());
24
25static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
27 LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
28
29static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
32 Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
33 .unwrap()
34});
35
36static URL_WITH_BRACKETS: LazyLock<Regex> =
44 LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
45
46#[derive(Clone, Default)]
59pub struct MD052ReferenceLinkImages {
60 config: MD052Config,
61}
62
63impl MD052ReferenceLinkImages {
64 pub fn new() -> Self {
65 Self {
66 config: MD052Config::default(),
67 }
68 }
69
70 pub fn from_config_struct(config: MD052Config) -> Self {
71 Self { config }
72 }
73
74 fn strip_backticks(s: &str) -> &str {
77 s.trim_start_matches('`').trim_end_matches('`')
78 }
79
80 fn is_valid_python_identifier(s: &str) -> bool {
84 if s.is_empty() {
85 return false;
86 }
87 let first_char = s.chars().next().unwrap();
88 if !first_char.is_ascii_alphabetic() && first_char != '_' {
89 return false;
90 }
91 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
92 }
93
94 fn is_known_non_reference_pattern(text: &str) -> bool {
102 if text.chars().all(|c| c.is_ascii_digit()) {
104 return true;
105 }
106
107 if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
109 return true;
110 }
111
112 if text.contains('.')
116 && !text.contains(' ')
117 && !text.contains('-')
118 && !text.contains('_')
119 && !text.contains('`')
120 {
121 return true;
123 }
124
125 if text == "*" || text == "..." || text == "**" {
127 return true;
128 }
129
130 if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
132 return true;
133 }
134
135 if text.contains(',') || text.contains('[') || text.contains(']') {
138 return true;
140 }
141
142 if !text.contains('`')
149 && text.contains('.')
150 && !text.contains(' ')
151 && !text.contains('-')
152 && !text.contains('_')
153 {
154 return true;
155 }
156
157 if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
164 return true;
165 }
166
167 if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
169 return true;
170 }
171
172 if (text.starts_with('"') && text.ends_with('"'))
174 || (text.starts_with('\'') && text.ends_with('\''))
175 || text.contains('"')
176 || text.contains('\'')
177 {
178 return true;
179 }
180
181 if text.contains(':') && text.contains(' ') {
184 return true;
185 }
186
187 if text.starts_with('!') {
189 return true;
190 }
191
192 if text.starts_with('^') {
195 return true;
196 }
197
198 if text.starts_with('@') {
201 return true;
202 }
203
204 if text == "TOC" {
207 return true;
208 }
209
210 if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
212 return true;
213 }
214
215 let common_non_refs = [
218 "object",
220 "Object",
221 "any",
222 "Any",
223 "inv",
224 "void",
225 "bool",
226 "int",
227 "float",
228 "str",
229 "char",
230 "i8",
231 "i16",
232 "i32",
233 "i64",
234 "i128",
235 "isize",
236 "u8",
237 "u16",
238 "u32",
239 "u64",
240 "u128",
241 "usize",
242 "f32",
243 "f64",
244 "null",
246 "true",
247 "false",
248 "NaN",
249 "Infinity",
250 "object Object",
252 ];
253
254 if common_non_refs.contains(&text) {
255 return true;
256 }
257
258 false
259 }
260
261 fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
263 code_spans
264 .iter()
265 .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
266 }
267
268 fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
270 for m in HTML_COMMENT_PATTERN.find_iter(content) {
271 if m.start() <= byte_pos && byte_pos < m.end() {
272 return true;
273 }
274 }
275 false
276 }
277
278 fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
280 for html_tag in ctx.html_tags().iter() {
282 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
283 return true;
284 }
285 }
286 false
287 }
288
289 fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
290 use crate::config::MarkdownFlavor;
291 use crate::utils::skip_context::is_mkdocs_snippet_line;
292
293 let mut references = HashSet::new();
294 let mut in_code_block = false;
295 let mut code_fence_marker = String::new();
296
297 for line in content.lines() {
298 if is_mkdocs_snippet_line(
300 line,
301 if mkdocs_mode {
302 MarkdownFlavor::MkDocs
303 } else {
304 MarkdownFlavor::Standard
305 },
306 ) {
307 continue;
308 }
309 if let Some(cap) = FENCED_CODE_START.captures(line) {
311 if let Some(fence) = cap.get(2) {
312 let fence_str = fence.as_str();
314 if !in_code_block {
315 in_code_block = true;
316 code_fence_marker = fence_str.to_string();
317 } else if line.trim_start().starts_with(&code_fence_marker) {
318 let trimmed = line.trim_start();
320 if trimmed.starts_with(&code_fence_marker) {
322 let after_fence = &trimmed[code_fence_marker.len()..];
323 if after_fence.trim().is_empty() {
324 in_code_block = false;
325 code_fence_marker.clear();
326 }
327 }
328 }
329 }
330 continue;
331 }
332
333 if in_code_block {
335 continue;
336 }
337
338 if line.trim_start().starts_with("*[") {
341 continue;
342 }
343
344 if let Some(cap) = REF_REGEX.captures(line) {
345 if let Some(reference) = cap.get(1) {
347 references.insert(reference.as_str().to_lowercase());
348 }
349 }
350 }
351
352 references
353 }
354
355 fn find_undefined_references(
356 &self,
357 content: &str,
358 references: &HashSet<String>,
359 ctx: &crate::lint_context::LintContext,
360 mkdocs_mode: bool,
361 ) -> Vec<(usize, usize, usize, String)> {
362 let mut undefined = Vec::new();
363 let mut reported_refs = HashMap::new();
364 let mut in_code_block = false;
365 let mut code_fence_marker = String::new();
366 let mut in_example_section = false;
367
368 let code_spans = ctx.code_spans();
370
371 for link in &ctx.links {
373 if !link.is_reference {
374 continue; }
376
377 if ctx.is_in_jinja_range(link.byte_offset) {
379 continue;
380 }
381
382 if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
384 continue;
385 }
386
387 if Self::is_in_html_comment(content, link.byte_offset) {
389 continue;
390 }
391
392 if Self::is_in_html_tag(ctx, link.byte_offset) {
394 continue;
395 }
396
397 if is_in_math_context(ctx, link.byte_offset) {
399 continue;
400 }
401
402 if is_in_table_cell(ctx, link.line, link.start_col) {
404 continue;
405 }
406
407 if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
409 continue;
410 }
411
412 if let Some(ref_id) = &link.reference_id {
413 let reference_lower = ref_id.to_lowercase();
414
415 if Self::is_known_non_reference_pattern(ref_id) {
417 continue;
418 }
419
420 let stripped_ref = Self::strip_backticks(ref_id);
424 let stripped_text = Self::strip_backticks(&link.text);
425 if mkdocs_mode
426 && (is_mkdocs_auto_reference(stripped_ref)
427 || is_mkdocs_auto_reference(stripped_text)
428 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
429 || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
430 {
431 continue;
432 }
433
434 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
436 if let Some(line_info) = ctx.line_info(link.line) {
438 if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
439 in_example_section = true;
440 continue;
441 }
442
443 if in_example_section {
444 continue;
445 }
446
447 if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
449 continue;
450 }
451
452 let trimmed = line_info.content(ctx.content).trim_start();
454 if trimmed.starts_with('<') {
455 continue;
456 }
457 }
458
459 let match_len = link.byte_end - link.byte_offset;
460 undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
461 reported_refs.insert(reference_lower, true);
462 }
463 }
464 }
465
466 for image in &ctx.images {
468 if !image.is_reference {
469 continue; }
471
472 if ctx.is_in_jinja_range(image.byte_offset) {
474 continue;
475 }
476
477 if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
479 continue;
480 }
481
482 if Self::is_in_html_comment(content, image.byte_offset) {
484 continue;
485 }
486
487 if Self::is_in_html_tag(ctx, image.byte_offset) {
489 continue;
490 }
491
492 if is_in_math_context(ctx, image.byte_offset) {
494 continue;
495 }
496
497 if is_in_table_cell(ctx, image.line, image.start_col) {
499 continue;
500 }
501
502 if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
504 continue;
505 }
506
507 if let Some(ref_id) = &image.reference_id {
508 let reference_lower = ref_id.to_lowercase();
509
510 if Self::is_known_non_reference_pattern(ref_id) {
512 continue;
513 }
514
515 let stripped_ref = Self::strip_backticks(ref_id);
519 let stripped_alt = Self::strip_backticks(&image.alt_text);
520 if mkdocs_mode
521 && (is_mkdocs_auto_reference(stripped_ref)
522 || is_mkdocs_auto_reference(stripped_alt)
523 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
524 || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
525 {
526 continue;
527 }
528
529 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
531 if let Some(line_info) = ctx.line_info(image.line) {
533 if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
534 in_example_section = true;
535 continue;
536 }
537
538 if in_example_section {
539 continue;
540 }
541
542 if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
544 continue;
545 }
546
547 let trimmed = line_info.content(ctx.content).trim_start();
549 if trimmed.starts_with('<') {
550 continue;
551 }
552 }
553
554 let match_len = image.byte_end - image.byte_offset;
555 undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
556 reported_refs.insert(reference_lower, true);
557 }
558 }
559 }
560
561 let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
563
564 for link in &ctx.links {
566 covered_ranges.push((link.byte_offset, link.byte_end));
567 }
568
569 for image in &ctx.images {
571 covered_ranges.push((image.byte_offset, image.byte_end));
572 }
573
574 covered_ranges.sort_by_key(|&(start, _)| start);
576
577 if !self.config.shortcut_syntax {
582 return undefined;
583 }
584
585 let lines: Vec<&str> = content.lines().collect();
587 in_example_section = false; for (line_num, line) in lines.iter().enumerate() {
590 if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
592 continue;
593 }
594
595 if let Some(cap) = FENCED_CODE_START.captures(line) {
597 if let Some(fence) = cap.get(2) {
598 let fence_str = fence.as_str();
600 if !in_code_block {
601 in_code_block = true;
602 code_fence_marker = fence_str.to_string();
603 } else if line.trim_start().starts_with(&code_fence_marker) {
604 let trimmed = line.trim_start();
606 if trimmed.starts_with(&code_fence_marker) {
608 let after_fence = &trimmed[code_fence_marker.len()..];
609 if after_fence.trim().is_empty() {
610 in_code_block = false;
611 code_fence_marker.clear();
612 }
613 }
614 }
615 }
616 continue;
617 }
618
619 if in_code_block {
620 continue;
621 }
622
623 if OUTPUT_EXAMPLE_START.is_match(line) {
625 in_example_section = true;
626 continue;
627 }
628
629 if in_example_section {
630 if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
632 in_example_section = false;
633 } else {
634 continue;
635 }
636 }
637
638 if LIST_ITEM_REGEX.is_match(line) {
640 continue;
641 }
642
643 let trimmed_line = line.trim_start();
645 if trimmed_line.starts_with('<') {
646 continue;
647 }
648
649 if GITHUB_ALERT_REGEX.is_match(line) {
651 continue;
652 }
653
654 if trimmed_line.starts_with("*[") {
657 continue;
658 }
659
660 let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
663 for mat in URL_WITH_BRACKETS.find_iter(line) {
664 let url_str = mat.as_str();
666 let url_start = mat.start();
667
668 let mut idx = 0;
670 while idx < url_str.len() {
671 if let Some(bracket_start) = url_str[idx..].find('[') {
672 let bracket_start_abs = url_start + idx + bracket_start;
673 if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
674 let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
675 url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
676 idx += bracket_start + bracket_end + 2;
677 } else {
678 break;
679 }
680 } else {
681 break;
682 }
683 }
684 }
685
686 if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
688 for cap in captures {
689 if let Some(ref_match) = cap.get(1) {
690 let bracket_start = cap.get(0).unwrap().start();
692 let bracket_end = cap.get(0).unwrap().end();
693
694 let is_in_url = url_bracket_ranges
696 .iter()
697 .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
698
699 if is_in_url {
700 continue;
701 }
702
703 if bracket_start > 0 {
706 if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
708 && *byte == b'^'
709 {
710 continue; }
712 }
713
714 let reference = ref_match.as_str();
715 let reference_lower = reference.to_lowercase();
716
717 if Self::is_known_non_reference_pattern(reference) {
719 continue;
720 }
721
722 if let Some(alert_type) = reference.strip_prefix('!')
724 && matches!(
725 alert_type,
726 "NOTE"
727 | "TIP"
728 | "WARNING"
729 | "IMPORTANT"
730 | "CAUTION"
731 | "INFO"
732 | "SUCCESS"
733 | "FAILURE"
734 | "DANGER"
735 | "BUG"
736 | "EXAMPLE"
737 | "QUOTE"
738 )
739 {
740 continue;
741 }
742
743 if mkdocs_mode
746 && (reference.starts_with("start:") || reference.starts_with("end:"))
747 && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
748 || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
749 {
750 continue;
751 }
752
753 let stripped_ref = Self::strip_backticks(reference);
756 if mkdocs_mode
757 && (is_mkdocs_auto_reference(stripped_ref)
758 || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
759 {
760 continue;
761 }
762
763 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
764 let full_match = cap.get(0).unwrap();
765 let col = full_match.start();
766
767 let code_spans = ctx.code_spans();
769 if Self::is_in_code_span(line_num + 1, col, &code_spans) {
770 continue;
771 }
772
773 let line_start_byte = ctx.line_offsets[line_num];
775 let byte_pos = line_start_byte + col;
776
777 if ctx.is_in_jinja_range(byte_pos) {
779 continue;
780 }
781
782 if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
784 &ctx.code_blocks,
785 byte_pos,
786 ) {
787 continue;
788 }
789
790 if Self::is_in_html_comment(content, byte_pos) {
792 continue;
793 }
794
795 if Self::is_in_html_tag(ctx, byte_pos) {
797 continue;
798 }
799
800 if is_in_math_context(ctx, byte_pos) {
802 continue;
803 }
804
805 if is_in_table_cell(ctx, line_num + 1, col) {
807 continue;
808 }
809
810 let byte_end = byte_pos + (full_match.end() - full_match.start());
811
812 let mut is_covered = false;
814 for &(range_start, range_end) in &covered_ranges {
815 if range_start <= byte_pos && byte_end <= range_end {
816 is_covered = true;
818 break;
819 }
820 if range_start > byte_end {
821 break;
823 }
824 }
825
826 if is_covered {
827 continue;
828 }
829
830 let line_chars: Vec<char> = line.chars().collect();
835 if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
836 let mut bracket_count = 1; let mut check_pos = col.saturating_sub(2);
839 let mut found_opening = false;
840
841 while check_pos > 0 && check_pos < line_chars.len() {
842 match line_chars.get(check_pos) {
843 Some(&']') => bracket_count += 1,
844 Some(&'[') => {
845 bracket_count -= 1;
846 if bracket_count == 0 {
847 if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
849 found_opening = true;
850 }
851 break;
852 }
853 }
854 _ => {}
855 }
856 if check_pos == 0 {
857 break;
858 }
859 check_pos = check_pos.saturating_sub(1);
860 }
861
862 if found_opening {
863 continue;
865 }
866 }
867
868 let before_text = &line[..col];
871 if before_text.contains("\\]") {
872 if let Some(escaped_close_pos) = before_text.rfind("\\]") {
874 let search_text = &before_text[..escaped_close_pos];
875 if search_text.contains("\\[") {
876 continue;
878 }
879 }
880 }
881
882 let match_len = full_match.end() - full_match.start();
883 undefined.push((line_num, col, match_len, reference.to_string()));
884 reported_refs.insert(reference_lower, true);
885 }
886 }
887 }
888 }
889 }
890
891 undefined
892 }
893}
894
895impl Rule for MD052ReferenceLinkImages {
896 fn name(&self) -> &'static str {
897 "MD052"
898 }
899
900 fn description(&self) -> &'static str {
901 "Reference links and images should use a reference that exists"
902 }
903
904 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
905 let content = ctx.content;
906 let mut warnings = Vec::new();
907
908 if !content.contains('[') {
910 return Ok(warnings);
911 }
912
913 let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
915
916 let references = self.extract_references(content, mkdocs_mode);
917
918 for (line_num, col, match_len, reference) in
920 self.find_undefined_references(content, &references, ctx, mkdocs_mode)
921 {
922 let lines: Vec<&str> = content.lines().collect();
923 let line_content = lines.get(line_num).unwrap_or(&"");
924
925 let (start_line, start_col, end_line, end_col) =
927 calculate_match_range(line_num + 1, line_content, col, match_len);
928
929 warnings.push(LintWarning {
930 rule_name: Some(self.name().to_string()),
931 line: start_line,
932 column: start_col,
933 end_line,
934 end_column: end_col,
935 message: format!("Reference '{reference}' not found"),
936 severity: Severity::Warning,
937 fix: None,
938 });
939 }
940
941 Ok(warnings)
942 }
943
944 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
946 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
948 }
949
950 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
951 let content = ctx.content;
952 Ok(content.to_string())
954 }
955
956 fn as_any(&self) -> &dyn std::any::Any {
957 self
958 }
959
960 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
961 where
962 Self: Sized,
963 {
964 let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
965 Box::new(Self::from_config_struct(rule_config))
966 }
967}
968
969#[cfg(test)]
970mod tests {
971 use super::*;
972 use crate::lint_context::LintContext;
973
974 #[test]
975 fn test_valid_reference_link() {
976 let rule = MD052ReferenceLinkImages::new();
977 let content = "[text][ref]\n\n[ref]: https://example.com";
978 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
979 let result = rule.check(&ctx).unwrap();
980
981 assert_eq!(result.len(), 0);
982 }
983
984 #[test]
985 fn test_undefined_reference_link() {
986 let rule = MD052ReferenceLinkImages::new();
987 let content = "[text][undefined]";
988 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
989 let result = rule.check(&ctx).unwrap();
990
991 assert_eq!(result.len(), 1);
992 assert!(result[0].message.contains("Reference 'undefined' not found"));
993 }
994
995 #[test]
996 fn test_valid_reference_image() {
997 let rule = MD052ReferenceLinkImages::new();
998 let content = "![alt][img]\n\n[img]: image.jpg";
999 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1000 let result = rule.check(&ctx).unwrap();
1001
1002 assert_eq!(result.len(), 0);
1003 }
1004
1005 #[test]
1006 fn test_undefined_reference_image() {
1007 let rule = MD052ReferenceLinkImages::new();
1008 let content = "![alt][missing]";
1009 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1010 let result = rule.check(&ctx).unwrap();
1011
1012 assert_eq!(result.len(), 1);
1013 assert!(result[0].message.contains("Reference 'missing' not found"));
1014 }
1015
1016 #[test]
1017 fn test_case_insensitive_references() {
1018 let rule = MD052ReferenceLinkImages::new();
1019 let content = "[Text][REF]\n\n[ref]: https://example.com";
1020 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1021 let result = rule.check(&ctx).unwrap();
1022
1023 assert_eq!(result.len(), 0);
1024 }
1025
1026 #[test]
1027 fn test_shortcut_reference_valid() {
1028 let rule = MD052ReferenceLinkImages::new();
1029 let content = "[ref]\n\n[ref]: https://example.com";
1030 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1031 let result = rule.check(&ctx).unwrap();
1032
1033 assert_eq!(result.len(), 0);
1034 }
1035
1036 #[test]
1037 fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
1038 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1041 let content = "[undefined]";
1042 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1043 let result = rule.check(&ctx).unwrap();
1044
1045 assert_eq!(result.len(), 1);
1046 assert!(result[0].message.contains("Reference 'undefined' not found"));
1047 }
1048
1049 #[test]
1050 fn test_shortcut_reference_not_checked_by_default() {
1051 let rule = MD052ReferenceLinkImages::new();
1053 let content = "[undefined]";
1054 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1055 let result = rule.check(&ctx).unwrap();
1056
1057 assert_eq!(result.len(), 0);
1059 }
1060
1061 #[test]
1062 fn test_inline_links_ignored() {
1063 let rule = MD052ReferenceLinkImages::new();
1064 let content = "[text](https://example.com)";
1065 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1066 let result = rule.check(&ctx).unwrap();
1067
1068 assert_eq!(result.len(), 0);
1069 }
1070
1071 #[test]
1072 fn test_inline_images_ignored() {
1073 let rule = MD052ReferenceLinkImages::new();
1074 let content = "";
1075 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1076 let result = rule.check(&ctx).unwrap();
1077
1078 assert_eq!(result.len(), 0);
1079 }
1080
1081 #[test]
1082 fn test_references_in_code_blocks_ignored() {
1083 let rule = MD052ReferenceLinkImages::new();
1084 let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1085 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1086 let result = rule.check(&ctx).unwrap();
1087
1088 assert_eq!(result.len(), 0);
1089 }
1090
1091 #[test]
1092 fn test_references_in_inline_code_ignored() {
1093 let rule = MD052ReferenceLinkImages::new();
1094 let content = "`[undefined]`";
1095 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1096 let result = rule.check(&ctx).unwrap();
1097
1098 assert_eq!(result.len(), 0);
1100 }
1101
1102 #[test]
1103 fn test_comprehensive_inline_code_detection() {
1104 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1106 let content = r#"# Test
1107
1108This `[inside]` should be ignored.
1109This [outside] should be flagged.
1110Reference links `[text][ref]` in code are ignored.
1111Regular reference [text][missing] should be flagged.
1112Images `![alt][img]` in code are ignored.
1113Regular image ![alt][badimg] should be flagged.
1114
1115Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1116
1117```
1118[code block content] should be ignored
1119```
1120
1121`Multiple [refs] in [same] code span` ignored."#;
1122
1123 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1124 let result = rule.check(&ctx).unwrap();
1125
1126 assert_eq!(result.len(), 4);
1128
1129 let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1130 assert!(messages.iter().any(|m| m.contains("outside")));
1131 assert!(messages.iter().any(|m| m.contains("missing")));
1132 assert!(messages.iter().any(|m| m.contains("badimg")));
1133 assert!(messages.iter().any(|m| m.contains("three")));
1134
1135 assert!(!messages.iter().any(|m| m.contains("inside")));
1137 assert!(!messages.iter().any(|m| m.contains("one")));
1138 assert!(!messages.iter().any(|m| m.contains("two")));
1139 assert!(!messages.iter().any(|m| m.contains("refs")));
1140 assert!(!messages.iter().any(|m| m.contains("same")));
1141 }
1142
1143 #[test]
1144 fn test_multiple_undefined_references() {
1145 let rule = MD052ReferenceLinkImages::new();
1146 let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1147 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1148 let result = rule.check(&ctx).unwrap();
1149
1150 assert_eq!(result.len(), 3);
1151 assert!(result[0].message.contains("ref1"));
1152 assert!(result[1].message.contains("ref2"));
1153 assert!(result[2].message.contains("ref3"));
1154 }
1155
1156 #[test]
1157 fn test_mixed_valid_and_undefined() {
1158 let rule = MD052ReferenceLinkImages::new();
1159 let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1160 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1161 let result = rule.check(&ctx).unwrap();
1162
1163 assert_eq!(result.len(), 1);
1164 assert!(result[0].message.contains("missing"));
1165 }
1166
1167 #[test]
1168 fn test_empty_reference() {
1169 let rule = MD052ReferenceLinkImages::new();
1170 let content = "[text][]\n\n[ref]: https://example.com";
1171 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1172 let result = rule.check(&ctx).unwrap();
1173
1174 assert_eq!(result.len(), 1);
1176 }
1177
1178 #[test]
1179 fn test_escaped_brackets_ignored() {
1180 let rule = MD052ReferenceLinkImages::new();
1181 let content = "\\[not a link\\]";
1182 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1183 let result = rule.check(&ctx).unwrap();
1184
1185 assert_eq!(result.len(), 0);
1186 }
1187
1188 #[test]
1189 fn test_list_items_ignored() {
1190 let rule = MD052ReferenceLinkImages::new();
1191 let content = "- [undefined]\n* [another]\n+ [third]";
1192 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1193 let result = rule.check(&ctx).unwrap();
1194
1195 assert_eq!(result.len(), 0);
1197 }
1198
1199 #[test]
1200 fn test_output_example_section_ignored() {
1201 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1203 let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1204 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1205 let result = rule.check(&ctx).unwrap();
1206
1207 assert_eq!(result.len(), 1);
1209 assert!(result[0].message.contains("missing"));
1210 }
1211
1212 #[test]
1213 fn test_reference_definitions_in_code_blocks_ignored() {
1214 let rule = MD052ReferenceLinkImages::new();
1215 let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1216 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1217 let result = rule.check(&ctx).unwrap();
1218
1219 assert_eq!(result.len(), 1);
1221 assert!(result[0].message.contains("ref"));
1222 }
1223
1224 #[test]
1225 fn test_multiple_references_to_same_undefined() {
1226 let rule = MD052ReferenceLinkImages::new();
1227 let content = "[first][missing] [second][missing] [third][missing]";
1228 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1229 let result = rule.check(&ctx).unwrap();
1230
1231 assert_eq!(result.len(), 1);
1233 assert!(result[0].message.contains("missing"));
1234 }
1235
1236 #[test]
1237 fn test_reference_with_special_characters() {
1238 let rule = MD052ReferenceLinkImages::new();
1239 let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1240 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1241 let result = rule.check(&ctx).unwrap();
1242
1243 assert_eq!(result.len(), 0);
1244 }
1245
1246 #[test]
1247 fn test_issue_51_html_attribute_not_reference() {
1248 let rule = MD052ReferenceLinkImages::new();
1250 let content = r#"# Example
1251
1252## Test
1253
1254Want to fill out this form?
1255
1256<form method="post">
1257 <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1258</form>"#;
1259 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1260 let result = rule.check(&ctx).unwrap();
1261
1262 assert_eq!(
1263 result.len(),
1264 0,
1265 "HTML attributes with square brackets should not be flagged as undefined references"
1266 );
1267 }
1268
1269 #[test]
1270 fn test_extract_references() {
1271 let rule = MD052ReferenceLinkImages::new();
1272 let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1273 let refs = rule.extract_references(content, false);
1274
1275 assert_eq!(refs.len(), 3);
1276 assert!(refs.contains("ref1"));
1277 assert!(refs.contains("ref2"));
1278 assert!(refs.contains("ref3"));
1279 }
1280
1281 #[test]
1282 fn test_inline_code_not_flagged() {
1283 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1285
1286 let content = r#"# Test
1288
1289Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1290
1291Also, `[todo]` is not a reference link.
1292
1293But this [reference] should be flagged.
1294
1295And this `[inline code]` should not be flagged.
1296"#;
1297
1298 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1299 let warnings = rule.check(&ctx).unwrap();
1300
1301 assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1303 assert!(warnings[0].message.contains("'reference'"));
1304 }
1305
1306 #[test]
1307 fn test_code_block_references_ignored() {
1308 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1310
1311 let content = r#"# Test
1312
1313```markdown
1314[undefined] reference in code block
1315![undefined] image in code block
1316```
1317
1318[real-undefined] reference outside
1319"#;
1320
1321 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1322 let warnings = rule.check(&ctx).unwrap();
1323
1324 assert_eq!(warnings.len(), 1);
1326 assert!(warnings[0].message.contains("'real-undefined'"));
1327 }
1328
1329 #[test]
1330 fn test_html_comments_ignored() {
1331 let rule = MD052ReferenceLinkImages::new();
1333
1334 let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1336<!--- set_env EDITOR 'python3 fake_editor.py' -->
1337
1338```bash
1339$ python3 vote.py
13403 votes for: 2
13412 votes for: 3, 4
1342```"#;
1343 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1344 let result = rule.check(&ctx).unwrap();
1345 assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1346
1347 let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1349Normal [text][undefined]
1350<!-- Another [comment][with] references -->"#;
1351 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1352 let result = rule.check(&ctx).unwrap();
1353 assert_eq!(
1354 result.len(),
1355 1,
1356 "Should only flag the undefined reference outside comments"
1357 );
1358 assert!(result[0].message.contains("undefined"));
1359
1360 let content = r#"<!--
1362[ref1]
1363[ref2][ref3]
1364-->
1365[actual][undefined]"#;
1366 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1367 let result = rule.check(&ctx).unwrap();
1368 assert_eq!(
1369 result.len(),
1370 1,
1371 "Should not flag references in multi-line HTML comments"
1372 );
1373 assert!(result[0].message.contains("undefined"));
1374
1375 let content = r#"<!-- Comment with [1:] pattern -->
1377Valid [link][ref]
1378<!-- More [refs][in][comments] -->
1379![image][missing]
1380
1381[ref]: https://example.com"#;
1382 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1383 let result = rule.check(&ctx).unwrap();
1384 assert_eq!(result.len(), 1, "Should only flag missing image reference");
1385 assert!(result[0].message.contains("missing"));
1386 }
1387
1388 #[test]
1389 fn test_frontmatter_ignored() {
1390 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1393
1394 let content = r#"---
1396layout: post
1397title: "My Jekyll Post"
1398date: 2023-01-01
1399categories: blog
1400tags: ["test", "example"]
1401author: John Doe
1402---
1403
1404# My Blog Post
1405
1406This is the actual markdown content that should be linted.
1407
1408[undefined] reference should be flagged.
1409
1410## Section 1
1411
1412Some content here."#;
1413 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1414 let result = rule.check(&ctx).unwrap();
1415
1416 assert_eq!(
1418 result.len(),
1419 1,
1420 "Should only flag the undefined reference outside frontmatter"
1421 );
1422 assert!(result[0].message.contains("undefined"));
1423
1424 let content = r#"+++
1426title = "My Post"
1427tags = ["example", "test"]
1428+++
1429
1430# Content
1431
1432[missing] reference should be flagged."#;
1433 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1434 let result = rule.check(&ctx).unwrap();
1435 assert_eq!(
1436 result.len(),
1437 1,
1438 "Should only flag the undefined reference outside TOML frontmatter"
1439 );
1440 assert!(result[0].message.contains("missing"));
1441 }
1442
1443 #[test]
1444 fn test_mkdocs_snippet_markers_not_flagged() {
1445 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1448
1449 let content = r#"# Document with MkDocs Snippets
1451
1452Some content here.
1453
1454# -8<- [start:remote-content]
1455
1456This is the remote content section.
1457
1458# -8<- [end:remote-content]
1459
1460More content here.
1461
1462<!-- --8<-- [start:another-section] -->
1463Content in another section
1464<!-- --8<-- [end:another-section] -->"#;
1465 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1466 let result = rule.check(&ctx).unwrap();
1467
1468 assert_eq!(
1470 result.len(),
1471 0,
1472 "Should not flag MkDocs snippet markers as undefined references"
1473 );
1474
1475 let content = r#"# Document
1478
1479# -8<- [start:section]
1480Content with [reference] inside snippet section
1481# -8<- [end:section]
1482
1483Regular [undefined] reference outside snippet markers."#;
1484 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1485 let result = rule.check(&ctx).unwrap();
1486
1487 assert_eq!(
1488 result.len(),
1489 2,
1490 "Should flag undefined references but skip snippet marker lines"
1491 );
1492 assert!(result[0].message.contains("reference"));
1494 assert!(result[1].message.contains("undefined"));
1495
1496 let content = r#"# Document
1498
1499# -8<- [start:section]
1500# -8<- [end:section]"#;
1501 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1502 let result = rule.check(&ctx).unwrap();
1503
1504 assert_eq!(
1505 result.len(),
1506 2,
1507 "In standard mode, snippet markers should be flagged as undefined references"
1508 );
1509 }
1510
1511 #[test]
1512 fn test_pandoc_citations_not_flagged() {
1513 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1516
1517 let content = r#"# Research Paper
1518
1519We are using the **bookdown** package [@R-bookdown] in this sample book.
1520This was built on top of R Markdown and **knitr** [@xie2015].
1521
1522Multiple citations [@citation1; @citation2; @citation3] are also supported.
1523
1524Regular [undefined] reference should still be flagged.
1525"#;
1526 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1527 let result = rule.check(&ctx).unwrap();
1528
1529 assert_eq!(
1531 result.len(),
1532 1,
1533 "Should only flag the undefined reference, not Pandoc citations"
1534 );
1535 assert!(result[0].message.contains("undefined"));
1536 }
1537
1538 #[test]
1539 fn test_pandoc_inline_footnotes_not_flagged() {
1540 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1543
1544 let content = r#"# Math Document
1545
1546You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1547
1548Another footnote^[with some text and a [link](https://example.com)].
1549
1550But this [reference] without ^ should be flagged.
1551"#;
1552 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1553 let result = rule.check(&ctx).unwrap();
1554
1555 assert_eq!(
1557 result.len(),
1558 1,
1559 "Should only flag the regular reference, not inline footnotes"
1560 );
1561 assert!(result[0].message.contains("reference"));
1562 }
1563
1564 #[test]
1565 fn test_github_alerts_not_flagged() {
1566 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config { shortcut_syntax: true });
1569
1570 let content = r#"# Document with GitHub Alerts
1572
1573> [!NOTE]
1574> This is a note alert.
1575
1576> [!TIP]
1577> This is a tip alert.
1578
1579> [!IMPORTANT]
1580> This is an important alert.
1581
1582> [!WARNING]
1583> This is a warning alert.
1584
1585> [!CAUTION]
1586> This is a caution alert.
1587
1588Regular content with [undefined] reference."#;
1589 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1590 let result = rule.check(&ctx).unwrap();
1591
1592 assert_eq!(
1594 result.len(),
1595 1,
1596 "Should only flag the undefined reference, not GitHub alerts"
1597 );
1598 assert!(result[0].message.contains("undefined"));
1599 assert_eq!(result[0].line, 18); let content = r#"> [!TIP]
1603> Here's a useful tip about [something].
1604> Multiple lines are allowed.
1605
1606[something] is mentioned but not defined."#;
1607 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1608 let result = rule.check(&ctx).unwrap();
1609
1610 assert_eq!(result.len(), 1, "Should flag undefined reference");
1614 assert!(result[0].message.contains("something"));
1615
1616 let content = r#"> [!NOTE]
1618> See [reference] for more details.
1619
1620[reference]: https://example.com"#;
1621 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1622 let result = rule.check(&ctx).unwrap();
1623
1624 assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1626 }
1627}