1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::SHORTCUT_REF_REGEX;
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13static REF_REGEX: LazyLock<Regex> =
17 LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
24 LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
25
26static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29 Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
30 .unwrap()
31});
32
33static URL_WITH_BRACKETS: LazyLock<Regex> =
41 LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
42
43#[derive(Clone, Default)]
56pub struct MD052ReferenceLinkImages {
57 config: MD052Config,
58}
59
60impl MD052ReferenceLinkImages {
61 pub fn new() -> Self {
62 Self {
63 config: MD052Config::default(),
64 }
65 }
66
67 pub fn from_config_struct(config: MD052Config) -> Self {
68 Self { config }
69 }
70
71 fn strip_backticks(s: &str) -> &str {
74 s.trim_start_matches('`').trim_end_matches('`')
75 }
76
77 fn is_valid_python_identifier(s: &str) -> bool {
81 if s.is_empty() {
82 return false;
83 }
84 let first_char = s.chars().next().unwrap();
85 if !first_char.is_ascii_alphabetic() && first_char != '_' {
86 return false;
87 }
88 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
89 }
90
91 fn is_known_non_reference_pattern(&self, text: &str) -> bool {
100 if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
104 return true;
105 }
106 if text.chars().all(|c| c.is_ascii_digit()) {
108 return true;
109 }
110
111 if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
113 return true;
114 }
115
116 if text.contains('.')
120 && !text.contains(' ')
121 && !text.contains('-')
122 && !text.contains('_')
123 && !text.contains('`')
124 {
125 return true;
127 }
128
129 if text == "*" || text == "..." || text == "**" {
131 return true;
132 }
133
134 if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
136 return true;
137 }
138
139 if text.contains(',') || text.contains('[') || text.contains(']') {
142 return true;
144 }
145
146 if !text.contains('`')
153 && text.contains('.')
154 && !text.contains(' ')
155 && !text.contains('-')
156 && !text.contains('_')
157 {
158 return true;
159 }
160
161 if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
168 return true;
169 }
170
171 if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
173 return true;
174 }
175
176 if (text.starts_with('"') && text.ends_with('"'))
178 || (text.starts_with('\'') && text.ends_with('\''))
179 || text.contains('"')
180 || text.contains('\'')
181 {
182 return true;
183 }
184
185 if text.contains(':') && text.contains(' ') {
188 return true;
189 }
190
191 if text.starts_with('!') {
193 return true;
194 }
195
196 if text.starts_with('^') {
199 return true;
200 }
201
202 if text.starts_with('@') {
205 return true;
206 }
207
208 if text == "TOC" {
211 return true;
212 }
213
214 if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
216 return true;
217 }
218
219 let common_non_refs = [
222 "object",
224 "Object",
225 "any",
226 "Any",
227 "inv",
228 "void",
229 "bool",
230 "int",
231 "float",
232 "str",
233 "char",
234 "i8",
235 "i16",
236 "i32",
237 "i64",
238 "i128",
239 "isize",
240 "u8",
241 "u16",
242 "u32",
243 "u64",
244 "u128",
245 "usize",
246 "f32",
247 "f64",
248 "null",
250 "true",
251 "false",
252 "NaN",
253 "Infinity",
254 "object Object",
256 ];
257
258 if common_non_refs.contains(&text) {
259 return true;
260 }
261
262 false
263 }
264
265 fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
267 code_spans
268 .iter()
269 .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
270 }
271
272 fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
274 for html_tag in ctx.html_tags().iter() {
276 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
277 return true;
278 }
279 }
280 false
281 }
282
283 fn extract_references(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
284 use crate::utils::skip_context::is_mkdocs_snippet_line;
285
286 let mut references = HashSet::new();
287
288 for (line_num, line) in ctx.content.lines().enumerate() {
289 if let Some(line_info) = ctx.line_info(line_num + 1)
291 && line_info.in_code_block
292 {
293 continue;
294 }
295
296 if is_mkdocs_snippet_line(line, ctx.flavor) {
298 continue;
299 }
300
301 if line.trim_start().starts_with("*[") {
304 continue;
305 }
306
307 if let Some(cap) = REF_REGEX.captures(line) {
308 if let Some(reference) = cap.get(1) {
310 references.insert(reference.as_str().to_lowercase());
311 }
312 }
313 }
314
315 references
316 }
317
318 fn find_undefined_references(
319 &self,
320 references: &HashSet<String>,
321 ctx: &crate::lint_context::LintContext,
322 mkdocs_mode: bool,
323 ) -> Vec<(usize, usize, usize, String)> {
324 let mut undefined = Vec::new();
325 let mut reported_refs = HashMap::new();
326 let mut in_example_section = false;
327
328 let code_spans = ctx.code_spans();
330
331 for link in &ctx.links {
333 if !link.is_reference {
334 continue; }
336
337 if ctx.is_in_jinja_range(link.byte_offset) {
339 continue;
340 }
341
342 if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
344 continue;
345 }
346
347 if ctx.is_in_html_comment(link.byte_offset) {
349 continue;
350 }
351
352 if Self::is_in_html_tag(ctx, link.byte_offset) {
354 continue;
355 }
356
357 if is_in_math_context(ctx, link.byte_offset) {
359 continue;
360 }
361
362 if is_in_table_cell(ctx, link.line, link.start_col) {
364 continue;
365 }
366
367 if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
369 continue;
370 }
371
372 if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
375 continue;
376 }
377
378 if ctx.is_in_shortcode(link.byte_offset) {
381 continue;
382 }
383
384 if let Some(ref_id) = &link.reference_id {
385 let reference_lower = ref_id.to_lowercase();
386
387 if self.is_known_non_reference_pattern(ref_id) {
389 continue;
390 }
391
392 let stripped_ref = Self::strip_backticks(ref_id);
396 let stripped_text = Self::strip_backticks(&link.text);
397 if mkdocs_mode
398 && (is_mkdocs_auto_reference(stripped_ref)
399 || is_mkdocs_auto_reference(stripped_text)
400 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
401 || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
402 {
403 continue;
404 }
405
406 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
408 if let Some(line_info) = ctx.line_info(link.line) {
410 if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
411 in_example_section = true;
412 continue;
413 }
414
415 if in_example_section {
416 continue;
417 }
418
419 if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
421 continue;
422 }
423
424 let trimmed = line_info.content(ctx.content).trim_start();
426 if trimmed.starts_with('<') {
427 continue;
428 }
429 }
430
431 let match_len = link.byte_end - link.byte_offset;
432 undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
433 reported_refs.insert(reference_lower, true);
434 }
435 }
436 }
437
438 for image in &ctx.images {
440 if !image.is_reference {
441 continue; }
443
444 if ctx.is_in_jinja_range(image.byte_offset) {
446 continue;
447 }
448
449 if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
451 continue;
452 }
453
454 if ctx.is_in_html_comment(image.byte_offset) {
456 continue;
457 }
458
459 if Self::is_in_html_tag(ctx, image.byte_offset) {
461 continue;
462 }
463
464 if is_in_math_context(ctx, image.byte_offset) {
466 continue;
467 }
468
469 if is_in_table_cell(ctx, image.line, image.start_col) {
471 continue;
472 }
473
474 if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
476 continue;
477 }
478
479 if let Some(ref_id) = &image.reference_id {
480 let reference_lower = ref_id.to_lowercase();
481
482 if self.is_known_non_reference_pattern(ref_id) {
484 continue;
485 }
486
487 let stripped_ref = Self::strip_backticks(ref_id);
491 let stripped_alt = Self::strip_backticks(&image.alt_text);
492 if mkdocs_mode
493 && (is_mkdocs_auto_reference(stripped_ref)
494 || is_mkdocs_auto_reference(stripped_alt)
495 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
496 || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
497 {
498 continue;
499 }
500
501 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
503 if let Some(line_info) = ctx.line_info(image.line) {
505 if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
506 in_example_section = true;
507 continue;
508 }
509
510 if in_example_section {
511 continue;
512 }
513
514 if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
516 continue;
517 }
518
519 let trimmed = line_info.content(ctx.content).trim_start();
521 if trimmed.starts_with('<') {
522 continue;
523 }
524 }
525
526 let match_len = image.byte_end - image.byte_offset;
527 undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
528 reported_refs.insert(reference_lower, true);
529 }
530 }
531 }
532
533 let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
535
536 for link in &ctx.links {
538 covered_ranges.push((link.byte_offset, link.byte_end));
539 }
540
541 for image in &ctx.images {
543 covered_ranges.push((image.byte_offset, image.byte_end));
544 }
545
546 covered_ranges.sort_by_key(|&(start, _)| start);
548
549 if !self.config.shortcut_syntax {
554 return undefined;
555 }
556
557 let lines = ctx.raw_lines();
559 in_example_section = false; for (line_num, line) in lines.iter().enumerate() {
562 if let Some(line_info) = ctx.line_info(line_num + 1)
564 && (line_info.in_front_matter || line_info.in_code_block)
565 {
566 continue;
567 }
568
569 if OUTPUT_EXAMPLE_START.is_match(line) {
571 in_example_section = true;
572 continue;
573 }
574
575 if in_example_section {
576 if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
578 in_example_section = false;
579 } else {
580 continue;
581 }
582 }
583
584 if LIST_ITEM_REGEX.is_match(line) {
586 continue;
587 }
588
589 let trimmed_line = line.trim_start();
591 if trimmed_line.starts_with('<') {
592 continue;
593 }
594
595 if GITHUB_ALERT_REGEX.is_match(line) {
597 continue;
598 }
599
600 if trimmed_line.starts_with("*[") {
603 continue;
604 }
605
606 let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
609 for mat in URL_WITH_BRACKETS.find_iter(line) {
610 let url_str = mat.as_str();
612 let url_start = mat.start();
613
614 let mut idx = 0;
616 while idx < url_str.len() {
617 if let Some(bracket_start) = url_str[idx..].find('[') {
618 let bracket_start_abs = url_start + idx + bracket_start;
619 if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
620 let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
621 url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
622 idx += bracket_start + bracket_end + 2;
623 } else {
624 break;
625 }
626 } else {
627 break;
628 }
629 }
630 }
631
632 if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
634 for cap in captures {
635 if let Some(ref_match) = cap.get(1) {
636 let bracket_start = cap.get(0).unwrap().start();
638 let bracket_end = cap.get(0).unwrap().end();
639
640 let is_in_url = url_bracket_ranges
642 .iter()
643 .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
644
645 if is_in_url {
646 continue;
647 }
648
649 if bracket_start > 0 {
652 if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
654 && *byte == b'^'
655 {
656 continue; }
658 }
659
660 let reference = ref_match.as_str();
661 let reference_lower = reference.to_lowercase();
662
663 if self.is_known_non_reference_pattern(reference) {
665 continue;
666 }
667
668 if let Some(alert_type) = reference.strip_prefix('!')
670 && matches!(
671 alert_type,
672 "NOTE"
673 | "TIP"
674 | "WARNING"
675 | "IMPORTANT"
676 | "CAUTION"
677 | "INFO"
678 | "SUCCESS"
679 | "FAILURE"
680 | "DANGER"
681 | "BUG"
682 | "EXAMPLE"
683 | "QUOTE"
684 )
685 {
686 continue;
687 }
688
689 if mkdocs_mode
692 && (reference.starts_with("start:") || reference.starts_with("end:"))
693 && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
694 || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
695 {
696 continue;
697 }
698
699 let stripped_ref = Self::strip_backticks(reference);
702 if mkdocs_mode
703 && (is_mkdocs_auto_reference(stripped_ref)
704 || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
705 {
706 continue;
707 }
708
709 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
710 let full_match = cap.get(0).unwrap();
711 let col = full_match.start();
712
713 let code_spans = ctx.code_spans();
715 if Self::is_in_code_span(line_num + 1, col, &code_spans) {
716 continue;
717 }
718
719 let line_start_byte = ctx.line_offsets[line_num];
721 let byte_pos = line_start_byte + col;
722
723 if ctx.is_in_jinja_range(byte_pos) {
725 continue;
726 }
727
728 if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
730 &ctx.code_blocks,
731 byte_pos,
732 ) {
733 continue;
734 }
735
736 if ctx.is_in_html_comment(byte_pos) {
738 continue;
739 }
740
741 if Self::is_in_html_tag(ctx, byte_pos) {
743 continue;
744 }
745
746 if is_in_math_context(ctx, byte_pos) {
748 continue;
749 }
750
751 if is_in_table_cell(ctx, line_num + 1, col) {
753 continue;
754 }
755
756 let byte_end = byte_pos + (full_match.end() - full_match.start());
757
758 let mut is_covered = false;
760 for &(range_start, range_end) in &covered_ranges {
761 if range_start <= byte_pos && byte_end <= range_end {
762 is_covered = true;
764 break;
765 }
766 if range_start > byte_end {
767 break;
769 }
770 }
771
772 if is_covered {
773 continue;
774 }
775
776 let line_chars: Vec<char> = line.chars().collect();
781 if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
782 let mut bracket_count = 1; let mut check_pos = col.saturating_sub(2);
785 let mut found_opening = false;
786
787 while check_pos > 0 && check_pos < line_chars.len() {
788 match line_chars.get(check_pos) {
789 Some(&']') => bracket_count += 1,
790 Some(&'[') => {
791 bracket_count -= 1;
792 if bracket_count == 0 {
793 if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
795 found_opening = true;
796 }
797 break;
798 }
799 }
800 _ => {}
801 }
802 if check_pos == 0 {
803 break;
804 }
805 check_pos = check_pos.saturating_sub(1);
806 }
807
808 if found_opening {
809 continue;
811 }
812 }
813
814 let before_text = &line[..col];
817 if before_text.contains("\\]") {
818 if let Some(escaped_close_pos) = before_text.rfind("\\]") {
820 let search_text = &before_text[..escaped_close_pos];
821 if search_text.contains("\\[") {
822 continue;
824 }
825 }
826 }
827
828 let match_len = full_match.end() - full_match.start();
829 undefined.push((line_num, col, match_len, reference.to_string()));
830 reported_refs.insert(reference_lower, true);
831 }
832 }
833 }
834 }
835 }
836
837 undefined
838 }
839}
840
841impl Rule for MD052ReferenceLinkImages {
842 fn name(&self) -> &'static str {
843 "MD052"
844 }
845
846 fn description(&self) -> &'static str {
847 "Reference links and images should use a reference that exists"
848 }
849
850 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
851 let content = ctx.content;
852 let mut warnings = Vec::new();
853
854 if !content.contains('[') {
856 return Ok(warnings);
857 }
858
859 let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
861
862 let references = self.extract_references(ctx);
863
864 let lines = ctx.raw_lines();
866 for (line_num, col, match_len, reference) in self.find_undefined_references(&references, ctx, mkdocs_mode) {
867 let line_content = lines.get(line_num).unwrap_or(&"");
868
869 let (start_line, start_col, end_line, end_col) =
871 calculate_match_range(line_num + 1, line_content, col, match_len);
872
873 warnings.push(LintWarning {
874 rule_name: Some(self.name().to_string()),
875 line: start_line,
876 column: start_col,
877 end_line,
878 end_column: end_col,
879 message: format!("Reference '{reference}' not found"),
880 severity: Severity::Warning,
881 fix: None,
882 });
883 }
884
885 Ok(warnings)
886 }
887
888 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
890 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
892 }
893
894 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
895 let content = ctx.content;
896 Ok(content.to_string())
898 }
899
900 fn as_any(&self) -> &dyn std::any::Any {
901 self
902 }
903
904 fn default_config_section(&self) -> Option<(String, toml::Value)> {
905 let json_value = serde_json::to_value(&self.config).ok()?;
906 Some((
907 self.name().to_string(),
908 crate::rule_config_serde::json_to_toml_value(&json_value)?,
909 ))
910 }
911
912 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
913 where
914 Self: Sized,
915 {
916 let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
917 Box::new(Self::from_config_struct(rule_config))
918 }
919}
920
921#[cfg(test)]
922mod tests {
923 use super::*;
924 use crate::lint_context::LintContext;
925
926 #[test]
927 fn test_valid_reference_link() {
928 let rule = MD052ReferenceLinkImages::new();
929 let content = "[text][ref]\n\n[ref]: https://example.com";
930 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
931 let result = rule.check(&ctx).unwrap();
932
933 assert_eq!(result.len(), 0);
934 }
935
936 #[test]
937 fn test_undefined_reference_link() {
938 let rule = MD052ReferenceLinkImages::new();
939 let content = "[text][undefined]";
940 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
941 let result = rule.check(&ctx).unwrap();
942
943 assert_eq!(result.len(), 1);
944 assert!(result[0].message.contains("Reference 'undefined' not found"));
945 }
946
947 #[test]
948 fn test_valid_reference_image() {
949 let rule = MD052ReferenceLinkImages::new();
950 let content = "![alt][img]\n\n[img]: image.jpg";
951 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
952 let result = rule.check(&ctx).unwrap();
953
954 assert_eq!(result.len(), 0);
955 }
956
957 #[test]
958 fn test_undefined_reference_image() {
959 let rule = MD052ReferenceLinkImages::new();
960 let content = "![alt][missing]";
961 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
962 let result = rule.check(&ctx).unwrap();
963
964 assert_eq!(result.len(), 1);
965 assert!(result[0].message.contains("Reference 'missing' not found"));
966 }
967
968 #[test]
969 fn test_case_insensitive_references() {
970 let rule = MD052ReferenceLinkImages::new();
971 let content = "[Text][REF]\n\n[ref]: https://example.com";
972 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
973 let result = rule.check(&ctx).unwrap();
974
975 assert_eq!(result.len(), 0);
976 }
977
978 #[test]
979 fn test_shortcut_reference_valid() {
980 let rule = MD052ReferenceLinkImages::new();
981 let content = "[ref]\n\n[ref]: https://example.com";
982 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
983 let result = rule.check(&ctx).unwrap();
984
985 assert_eq!(result.len(), 0);
986 }
987
988 #[test]
989 fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
990 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
993 shortcut_syntax: true,
994 ..Default::default()
995 });
996 let content = "[undefined]";
997 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
998 let result = rule.check(&ctx).unwrap();
999
1000 assert_eq!(result.len(), 1);
1001 assert!(result[0].message.contains("Reference 'undefined' not found"));
1002 }
1003
1004 #[test]
1005 fn test_shortcut_reference_not_checked_by_default() {
1006 let rule = MD052ReferenceLinkImages::new();
1008 let content = "[undefined]";
1009 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1010 let result = rule.check(&ctx).unwrap();
1011
1012 assert_eq!(result.len(), 0);
1014 }
1015
1016 #[test]
1017 fn test_inline_links_ignored() {
1018 let rule = MD052ReferenceLinkImages::new();
1019 let content = "[text](https://example.com)";
1020 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1021 let result = rule.check(&ctx).unwrap();
1022
1023 assert_eq!(result.len(), 0);
1024 }
1025
1026 #[test]
1027 fn test_inline_images_ignored() {
1028 let rule = MD052ReferenceLinkImages::new();
1029 let content = "";
1030 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1031 let result = rule.check(&ctx).unwrap();
1032
1033 assert_eq!(result.len(), 0);
1034 }
1035
1036 #[test]
1037 fn test_references_in_code_blocks_ignored() {
1038 let rule = MD052ReferenceLinkImages::new();
1039 let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1040 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1041 let result = rule.check(&ctx).unwrap();
1042
1043 assert_eq!(result.len(), 0);
1044 }
1045
1046 #[test]
1047 fn test_references_in_inline_code_ignored() {
1048 let rule = MD052ReferenceLinkImages::new();
1049 let content = "`[undefined]`";
1050 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1051 let result = rule.check(&ctx).unwrap();
1052
1053 assert_eq!(result.len(), 0);
1055 }
1056
1057 #[test]
1058 fn test_comprehensive_inline_code_detection() {
1059 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1061 shortcut_syntax: true,
1062 ..Default::default()
1063 });
1064 let content = r#"# Test
1065
1066This `[inside]` should be ignored.
1067This [outside] should be flagged.
1068Reference links `[text][ref]` in code are ignored.
1069Regular reference [text][missing] should be flagged.
1070Images `![alt][img]` in code are ignored.
1071Regular image ![alt][badimg] should be flagged.
1072
1073Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1074
1075```
1076[code block content] should be ignored
1077```
1078
1079`Multiple [refs] in [same] code span` ignored."#;
1080
1081 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1082 let result = rule.check(&ctx).unwrap();
1083
1084 assert_eq!(result.len(), 4);
1086
1087 let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1088 assert!(messages.iter().any(|m| m.contains("outside")));
1089 assert!(messages.iter().any(|m| m.contains("missing")));
1090 assert!(messages.iter().any(|m| m.contains("badimg")));
1091 assert!(messages.iter().any(|m| m.contains("three")));
1092
1093 assert!(!messages.iter().any(|m| m.contains("inside")));
1095 assert!(!messages.iter().any(|m| m.contains("one")));
1096 assert!(!messages.iter().any(|m| m.contains("two")));
1097 assert!(!messages.iter().any(|m| m.contains("refs")));
1098 assert!(!messages.iter().any(|m| m.contains("same")));
1099 }
1100
1101 #[test]
1102 fn test_multiple_undefined_references() {
1103 let rule = MD052ReferenceLinkImages::new();
1104 let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1105 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1106 let result = rule.check(&ctx).unwrap();
1107
1108 assert_eq!(result.len(), 3);
1109 assert!(result[0].message.contains("ref1"));
1110 assert!(result[1].message.contains("ref2"));
1111 assert!(result[2].message.contains("ref3"));
1112 }
1113
1114 #[test]
1115 fn test_mixed_valid_and_undefined() {
1116 let rule = MD052ReferenceLinkImages::new();
1117 let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1118 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1119 let result = rule.check(&ctx).unwrap();
1120
1121 assert_eq!(result.len(), 1);
1122 assert!(result[0].message.contains("missing"));
1123 }
1124
1125 #[test]
1126 fn test_empty_reference() {
1127 let rule = MD052ReferenceLinkImages::new();
1128 let content = "[text][]\n\n[ref]: https://example.com";
1129 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1130 let result = rule.check(&ctx).unwrap();
1131
1132 assert_eq!(result.len(), 1);
1134 }
1135
1136 #[test]
1137 fn test_escaped_brackets_ignored() {
1138 let rule = MD052ReferenceLinkImages::new();
1139 let content = "\\[not a link\\]";
1140 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1141 let result = rule.check(&ctx).unwrap();
1142
1143 assert_eq!(result.len(), 0);
1144 }
1145
1146 #[test]
1147 fn test_list_items_ignored() {
1148 let rule = MD052ReferenceLinkImages::new();
1149 let content = "- [undefined]\n* [another]\n+ [third]";
1150 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1151 let result = rule.check(&ctx).unwrap();
1152
1153 assert_eq!(result.len(), 0);
1155 }
1156
1157 #[test]
1158 fn test_output_example_section_ignored() {
1159 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1161 shortcut_syntax: true,
1162 ..Default::default()
1163 });
1164 let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1165 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1166 let result = rule.check(&ctx).unwrap();
1167
1168 assert_eq!(result.len(), 1);
1170 assert!(result[0].message.contains("missing"));
1171 }
1172
1173 #[test]
1174 fn test_reference_definitions_in_code_blocks_ignored() {
1175 let rule = MD052ReferenceLinkImages::new();
1176 let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1177 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1178 let result = rule.check(&ctx).unwrap();
1179
1180 assert_eq!(result.len(), 1);
1182 assert!(result[0].message.contains("ref"));
1183 }
1184
1185 #[test]
1186 fn test_multiple_references_to_same_undefined() {
1187 let rule = MD052ReferenceLinkImages::new();
1188 let content = "[first][missing] [second][missing] [third][missing]";
1189 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1190 let result = rule.check(&ctx).unwrap();
1191
1192 assert_eq!(result.len(), 1);
1194 assert!(result[0].message.contains("missing"));
1195 }
1196
1197 #[test]
1198 fn test_reference_with_special_characters() {
1199 let rule = MD052ReferenceLinkImages::new();
1200 let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1201 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1202 let result = rule.check(&ctx).unwrap();
1203
1204 assert_eq!(result.len(), 0);
1205 }
1206
1207 #[test]
1208 fn test_issue_51_html_attribute_not_reference() {
1209 let rule = MD052ReferenceLinkImages::new();
1211 let content = r#"# Example
1212
1213## Test
1214
1215Want to fill out this form?
1216
1217<form method="post">
1218 <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1219</form>"#;
1220 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1221 let result = rule.check(&ctx).unwrap();
1222
1223 assert_eq!(
1224 result.len(),
1225 0,
1226 "HTML attributes with square brackets should not be flagged as undefined references"
1227 );
1228 }
1229
1230 #[test]
1231 fn test_extract_references() {
1232 let rule = MD052ReferenceLinkImages::new();
1233 let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1234 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1235 let refs = rule.extract_references(&ctx);
1236
1237 assert_eq!(refs.len(), 3);
1238 assert!(refs.contains("ref1"));
1239 assert!(refs.contains("ref2"));
1240 assert!(refs.contains("ref3"));
1241 }
1242
1243 #[test]
1244 fn test_inline_code_not_flagged() {
1245 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1247 shortcut_syntax: true,
1248 ..Default::default()
1249 });
1250
1251 let content = r#"# Test
1253
1254Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1255
1256Also, `[todo]` is not a reference link.
1257
1258But this [reference] should be flagged.
1259
1260And this `[inline code]` should not be flagged.
1261"#;
1262
1263 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1264 let warnings = rule.check(&ctx).unwrap();
1265
1266 assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1268 assert!(warnings[0].message.contains("'reference'"));
1269 }
1270
1271 #[test]
1272 fn test_code_block_references_ignored() {
1273 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1275 shortcut_syntax: true,
1276 ..Default::default()
1277 });
1278
1279 let content = r#"# Test
1280
1281```markdown
1282[undefined] reference in code block
1283![undefined] image in code block
1284```
1285
1286[real-undefined] reference outside
1287"#;
1288
1289 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1290 let warnings = rule.check(&ctx).unwrap();
1291
1292 assert_eq!(warnings.len(), 1);
1294 assert!(warnings[0].message.contains("'real-undefined'"));
1295 }
1296
1297 #[test]
1298 fn test_html_comments_ignored() {
1299 let rule = MD052ReferenceLinkImages::new();
1301
1302 let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1304<!--- set_env EDITOR 'python3 fake_editor.py' -->
1305
1306```bash
1307$ python3 vote.py
13083 votes for: 2
13092 votes for: 3, 4
1310```"#;
1311 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1312 let result = rule.check(&ctx).unwrap();
1313 assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1314
1315 let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1317Normal [text][undefined]
1318<!-- Another [comment][with] references -->"#;
1319 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1320 let result = rule.check(&ctx).unwrap();
1321 assert_eq!(
1322 result.len(),
1323 1,
1324 "Should only flag the undefined reference outside comments"
1325 );
1326 assert!(result[0].message.contains("undefined"));
1327
1328 let content = r#"<!--
1330[ref1]
1331[ref2][ref3]
1332-->
1333[actual][undefined]"#;
1334 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1335 let result = rule.check(&ctx).unwrap();
1336 assert_eq!(
1337 result.len(),
1338 1,
1339 "Should not flag references in multi-line HTML comments"
1340 );
1341 assert!(result[0].message.contains("undefined"));
1342
1343 let content = r#"<!-- Comment with [1:] pattern -->
1345Valid [link][ref]
1346<!-- More [refs][in][comments] -->
1347![image][missing]
1348
1349[ref]: https://example.com"#;
1350 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1351 let result = rule.check(&ctx).unwrap();
1352 assert_eq!(result.len(), 1, "Should only flag missing image reference");
1353 assert!(result[0].message.contains("missing"));
1354 }
1355
1356 #[test]
1357 fn test_frontmatter_ignored() {
1358 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1361 shortcut_syntax: true,
1362 ..Default::default()
1363 });
1364
1365 let content = r#"---
1367layout: post
1368title: "My Jekyll Post"
1369date: 2023-01-01
1370categories: blog
1371tags: ["test", "example"]
1372author: John Doe
1373---
1374
1375# My Blog Post
1376
1377This is the actual markdown content that should be linted.
1378
1379[undefined] reference should be flagged.
1380
1381## Section 1
1382
1383Some content here."#;
1384 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1385 let result = rule.check(&ctx).unwrap();
1386
1387 assert_eq!(
1389 result.len(),
1390 1,
1391 "Should only flag the undefined reference outside frontmatter"
1392 );
1393 assert!(result[0].message.contains("undefined"));
1394
1395 let content = r#"+++
1397title = "My Post"
1398tags = ["example", "test"]
1399+++
1400
1401# Content
1402
1403[missing] reference should be flagged."#;
1404 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1405 let result = rule.check(&ctx).unwrap();
1406 assert_eq!(
1407 result.len(),
1408 1,
1409 "Should only flag the undefined reference outside TOML frontmatter"
1410 );
1411 assert!(result[0].message.contains("missing"));
1412 }
1413
1414 #[test]
1415 fn test_mkdocs_snippet_markers_not_flagged() {
1416 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1419 shortcut_syntax: true,
1420 ..Default::default()
1421 });
1422
1423 let content = r#"# Document with MkDocs Snippets
1425
1426Some content here.
1427
1428# -8<- [start:remote-content]
1429
1430This is the remote content section.
1431
1432# -8<- [end:remote-content]
1433
1434More content here.
1435
1436<!-- --8<-- [start:another-section] -->
1437Content in another section
1438<!-- --8<-- [end:another-section] -->"#;
1439 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1440 let result = rule.check(&ctx).unwrap();
1441
1442 assert_eq!(
1444 result.len(),
1445 0,
1446 "Should not flag MkDocs snippet markers as undefined references"
1447 );
1448
1449 let content = r#"# Document
1452
1453# -8<- [start:section]
1454Content with [reference] inside snippet section
1455# -8<- [end:section]
1456
1457Regular [undefined] reference outside snippet markers."#;
1458 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1459 let result = rule.check(&ctx).unwrap();
1460
1461 assert_eq!(
1462 result.len(),
1463 2,
1464 "Should flag undefined references but skip snippet marker lines"
1465 );
1466 assert!(result[0].message.contains("reference"));
1468 assert!(result[1].message.contains("undefined"));
1469
1470 let content = r#"# Document
1472
1473# -8<- [start:section]
1474# -8<- [end:section]"#;
1475 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1476 let result = rule.check(&ctx).unwrap();
1477
1478 assert_eq!(
1479 result.len(),
1480 2,
1481 "In standard mode, snippet markers should be flagged as undefined references"
1482 );
1483 }
1484
1485 #[test]
1486 fn test_pandoc_citations_not_flagged() {
1487 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1490 shortcut_syntax: true,
1491 ..Default::default()
1492 });
1493
1494 let content = r#"# Research Paper
1495
1496We are using the **bookdown** package [@R-bookdown] in this sample book.
1497This was built on top of R Markdown and **knitr** [@xie2015].
1498
1499Multiple citations [@citation1; @citation2; @citation3] are also supported.
1500
1501Regular [undefined] reference should still be flagged.
1502"#;
1503 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1504 let result = rule.check(&ctx).unwrap();
1505
1506 assert_eq!(
1508 result.len(),
1509 1,
1510 "Should only flag the undefined reference, not Pandoc citations"
1511 );
1512 assert!(result[0].message.contains("undefined"));
1513 }
1514
1515 #[test]
1516 fn test_pandoc_inline_footnotes_not_flagged() {
1517 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1520 shortcut_syntax: true,
1521 ..Default::default()
1522 });
1523
1524 let content = r#"# Math Document
1525
1526You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1527
1528Another footnote^[with some text and a [link](https://example.com)].
1529
1530But this [reference] without ^ should be flagged.
1531"#;
1532 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1533 let result = rule.check(&ctx).unwrap();
1534
1535 assert_eq!(
1537 result.len(),
1538 1,
1539 "Should only flag the regular reference, not inline footnotes"
1540 );
1541 assert!(result[0].message.contains("reference"));
1542 }
1543
1544 #[test]
1545 fn test_github_alerts_not_flagged() {
1546 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1549 shortcut_syntax: true,
1550 ..Default::default()
1551 });
1552
1553 let content = r#"# Document with GitHub Alerts
1555
1556> [!NOTE]
1557> This is a note alert.
1558
1559> [!TIP]
1560> This is a tip alert.
1561
1562> [!IMPORTANT]
1563> This is an important alert.
1564
1565> [!WARNING]
1566> This is a warning alert.
1567
1568> [!CAUTION]
1569> This is a caution alert.
1570
1571Regular content with [undefined] reference."#;
1572 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1573 let result = rule.check(&ctx).unwrap();
1574
1575 assert_eq!(
1577 result.len(),
1578 1,
1579 "Should only flag the undefined reference, not GitHub alerts"
1580 );
1581 assert!(result[0].message.contains("undefined"));
1582 assert_eq!(result[0].line, 18); let content = r#"> [!TIP]
1586> Here's a useful tip about [something].
1587> Multiple lines are allowed.
1588
1589[something] is mentioned but not defined."#;
1590 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1591 let result = rule.check(&ctx).unwrap();
1592
1593 assert_eq!(result.len(), 1, "Should flag undefined reference");
1597 assert!(result[0].message.contains("something"));
1598
1599 let content = r#"> [!NOTE]
1601> See [reference] for more details.
1602
1603[reference]: https://example.com"#;
1604 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1605 let result = rule.check(&ctx).unwrap();
1606
1607 assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1609 }
1610
1611 #[test]
1612 fn test_ignore_config() {
1613 let config = MD052Config {
1615 shortcut_syntax: true,
1616 ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1617 };
1618 let rule = MD052ReferenceLinkImages::from_config_struct(config);
1619
1620 let content = r#"# Document with Custom Types
1621
1622Use [Vec] for dynamic arrays.
1623Use [HashMap] for key-value storage.
1624Use [Option] for nullable values.
1625Use [Result] for error handling.
1626"#;
1627 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1628 let result = rule.check(&ctx).unwrap();
1629
1630 assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1632 assert!(result[0].message.contains("Result"));
1633 }
1634
1635 #[test]
1636 fn test_ignore_case_insensitive() {
1637 let config = MD052Config {
1639 shortcut_syntax: true,
1640 ignore: vec!["Vec".to_string()],
1641 };
1642 let rule = MD052ReferenceLinkImages::from_config_struct(config);
1643
1644 let content = r#"# Case Insensitivity Test
1645
1646[Vec] should be ignored.
1647[vec] should also be ignored (different case, same match).
1648[VEC] should also be ignored (different case, same match).
1649[undefined] should be flagged (not in ignore list).
1650"#;
1651 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1652 let result = rule.check(&ctx).unwrap();
1653
1654 assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1656 assert!(result[0].message.contains("undefined"));
1657 }
1658
1659 #[test]
1660 fn test_ignore_empty_by_default() {
1661 let rule = MD052ReferenceLinkImages::new();
1663
1664 let content = "[text][undefined]";
1665 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1666 let result = rule.check(&ctx).unwrap();
1667
1668 assert_eq!(result.len(), 1);
1670 assert!(result[0].message.contains("undefined"));
1671 }
1672
1673 #[test]
1674 fn test_ignore_with_reference_links() {
1675 let config = MD052Config {
1677 shortcut_syntax: false,
1678 ignore: vec!["CustomType".to_string()],
1679 };
1680 let rule = MD052ReferenceLinkImages::from_config_struct(config);
1681
1682 let content = r#"# Test
1683
1684See [documentation][CustomType] for details.
1685See [other docs][MissingRef] for more.
1686"#;
1687 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1688 let result = rule.check(&ctx).unwrap();
1689
1690 for (i, w) in result.iter().enumerate() {
1692 eprintln!("Warning {}: {}", i, w.message);
1693 }
1694
1695 assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1698 assert!(
1699 result[0].message.contains("missingref"),
1700 "Expected 'missingref' in message: {}",
1701 result[0].message
1702 );
1703 }
1704
1705 #[test]
1706 fn test_ignore_multiple() {
1707 let config = MD052Config {
1709 shortcut_syntax: true,
1710 ignore: vec![
1711 "i32".to_string(),
1712 "u64".to_string(),
1713 "String".to_string(),
1714 "Arc".to_string(),
1715 "Mutex".to_string(),
1716 ],
1717 };
1718 let rule = MD052ReferenceLinkImages::from_config_struct(config);
1719
1720 let content = r#"# Types
1721
1722[i32] [u64] [String] [Arc] [Mutex] [Box]
1723"#;
1724 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1725 let result = rule.check(&ctx).unwrap();
1726
1727 assert_eq!(result.len(), 1);
1731 assert!(result[0].message.contains("Box"));
1732 }
1733
1734 #[test]
1735 fn test_nested_code_fences_reference_extraction() {
1736 let rule = MD052ReferenceLinkImages::new();
1741
1742 let content = "````\n```\n[ref-inside]: https://example.com\n```\n````\n\n[Use this link][ref-inside]";
1743 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1744 let result = rule.check(&ctx).unwrap();
1745
1746 assert_eq!(
1750 result.len(),
1751 1,
1752 "Reference defined inside nested code fence should not count as a definition"
1753 );
1754 assert!(result[0].message.contains("ref-inside"));
1755 }
1756}