1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::SHORTCUT_REF_REGEX;
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10mod md052_config;
11use md052_config::MD052Config;
12
13static REF_REGEX: LazyLock<Regex> =
17 LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
18
19static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
21
22static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
24 LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
25
26static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29 Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
30 .unwrap()
31});
32
33static URL_WITH_BRACKETS: LazyLock<Regex> =
41 LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
42
43#[derive(Clone, Default)]
56pub struct MD052ReferenceLinkImages {
57 config: MD052Config,
58}
59
60impl MD052ReferenceLinkImages {
61 pub fn new() -> Self {
62 Self {
63 config: MD052Config::default(),
64 }
65 }
66
67 pub fn from_config_struct(config: MD052Config) -> Self {
68 Self { config }
69 }
70
71 fn strip_backticks(s: &str) -> &str {
74 s.trim_start_matches('`').trim_end_matches('`')
75 }
76
77 fn is_valid_python_identifier(s: &str) -> bool {
81 if s.is_empty() {
82 return false;
83 }
84 let first_char = s.chars().next().unwrap();
85 if !first_char.is_ascii_alphabetic() && first_char != '_' {
86 return false;
87 }
88 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
89 }
90
91 fn is_known_non_reference_pattern(&self, text: &str) -> bool {
100 if self.config.ignore.iter().any(|p| p.eq_ignore_ascii_case(text)) {
104 return true;
105 }
106 if text.chars().all(|c| c.is_ascii_digit()) {
108 return true;
109 }
110
111 if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
113 return true;
114 }
115
116 if text.contains('.')
120 && !text.contains(' ')
121 && !text.contains('-')
122 && !text.contains('_')
123 && !text.contains('`')
124 {
125 return true;
127 }
128
129 if text == "*" || text == "..." || text == "**" {
131 return true;
132 }
133
134 if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
136 return true;
137 }
138
139 if text.contains(',') || text.contains('[') || text.contains(']') {
142 return true;
144 }
145
146 if !text.contains('`')
153 && text.contains('.')
154 && !text.contains(' ')
155 && !text.contains('-')
156 && !text.contains('_')
157 {
158 return true;
159 }
160
161 if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
168 return true;
169 }
170
171 if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
173 return true;
174 }
175
176 if (text.starts_with('"') && text.ends_with('"'))
178 || (text.starts_with('\'') && text.ends_with('\''))
179 || text.contains('"')
180 || text.contains('\'')
181 {
182 return true;
183 }
184
185 if text.contains(':') && text.contains(' ') {
188 return true;
189 }
190
191 if text.starts_with('!') {
193 return true;
194 }
195
196 if text.starts_with('^') {
199 return true;
200 }
201
202 if text.starts_with('@') {
205 return true;
206 }
207
208 if text == "TOC" {
211 return true;
212 }
213
214 if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
216 return true;
217 }
218
219 let common_non_refs = [
222 "object",
224 "Object",
225 "any",
226 "Any",
227 "inv",
228 "void",
229 "bool",
230 "int",
231 "float",
232 "str",
233 "char",
234 "i8",
235 "i16",
236 "i32",
237 "i64",
238 "i128",
239 "isize",
240 "u8",
241 "u16",
242 "u32",
243 "u64",
244 "u128",
245 "usize",
246 "f32",
247 "f64",
248 "null",
250 "true",
251 "false",
252 "NaN",
253 "Infinity",
254 "object Object",
256 ];
257
258 if common_non_refs.contains(&text) {
259 return true;
260 }
261
262 false
263 }
264
265 fn is_in_code_span(byte_pos: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
267 code_spans
268 .iter()
269 .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
270 }
271
272 fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
274 for html_tag in ctx.html_tags().iter() {
276 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
277 return true;
278 }
279 }
280 false
281 }
282
283 fn extract_references(&self, ctx: &crate::lint_context::LintContext) -> HashSet<String> {
284 use crate::utils::skip_context::is_mkdocs_snippet_line;
285
286 let mut references = HashSet::new();
287
288 for (line_num, line) in ctx.content.lines().enumerate() {
289 if let Some(line_info) = ctx.line_info(line_num + 1)
291 && line_info.in_code_block
292 {
293 continue;
294 }
295
296 if is_mkdocs_snippet_line(line, ctx.flavor) {
298 continue;
299 }
300
301 if line.trim_start().starts_with("*[") {
304 continue;
305 }
306
307 if let Some(cap) = REF_REGEX.captures(line) {
308 if let Some(reference) = cap.get(1) {
310 references.insert(reference.as_str().to_lowercase());
311 }
312 }
313 }
314
315 references
316 }
317
318 fn find_undefined_references(
319 &self,
320 references: &HashSet<String>,
321 ctx: &crate::lint_context::LintContext,
322 mkdocs_mode: bool,
323 ) -> Vec<(usize, usize, usize, String)> {
324 let mut undefined = Vec::new();
325 let mut reported_refs = HashMap::new();
326 let mut in_example_section = false;
327
328 let code_spans = ctx.code_spans();
330
331 for link in &ctx.links {
333 if !link.is_reference {
334 continue; }
336
337 if ctx.is_in_jinja_range(link.byte_offset) {
339 continue;
340 }
341
342 if Self::is_in_code_span(link.byte_offset, &code_spans) {
344 continue;
345 }
346
347 if ctx.is_in_html_comment(link.byte_offset) {
349 continue;
350 }
351
352 if Self::is_in_html_tag(ctx, link.byte_offset) {
354 continue;
355 }
356
357 if is_in_math_context(ctx, link.byte_offset) {
359 continue;
360 }
361
362 if is_in_table_cell(ctx, link.line, link.start_col) {
364 continue;
365 }
366
367 if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
369 continue;
370 }
371
372 if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
375 continue;
376 }
377
378 if ctx.is_in_shortcode(link.byte_offset) {
381 continue;
382 }
383
384 if let Some(ref_id) = &link.reference_id {
385 let reference_lower = ref_id.to_lowercase();
386
387 if self.is_known_non_reference_pattern(ref_id) {
389 continue;
390 }
391
392 let stripped_ref = Self::strip_backticks(ref_id);
396 let stripped_text = Self::strip_backticks(&link.text);
397 if mkdocs_mode
398 && (is_mkdocs_auto_reference(stripped_ref)
399 || is_mkdocs_auto_reference(stripped_text)
400 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
401 || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
402 {
403 continue;
404 }
405
406 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
408 if let Some(line_info) = ctx.line_info(link.line) {
410 if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
411 in_example_section = true;
412 continue;
413 }
414
415 if in_example_section {
416 continue;
417 }
418
419 if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
421 continue;
422 }
423
424 let trimmed = line_info.content(ctx.content).trim_start();
426 if trimmed.starts_with('<') {
427 continue;
428 }
429 }
430
431 let match_len = link.byte_end - link.byte_offset;
432 undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
433 reported_refs.insert(reference_lower, true);
434 }
435 }
436 }
437
438 for image in &ctx.images {
440 if !image.is_reference {
441 continue; }
443
444 if ctx.is_in_jinja_range(image.byte_offset) {
446 continue;
447 }
448
449 if Self::is_in_code_span(image.byte_offset, &code_spans) {
451 continue;
452 }
453
454 if ctx.is_in_html_comment(image.byte_offset) {
456 continue;
457 }
458
459 if Self::is_in_html_tag(ctx, image.byte_offset) {
461 continue;
462 }
463
464 if is_in_math_context(ctx, image.byte_offset) {
466 continue;
467 }
468
469 if is_in_table_cell(ctx, image.line, image.start_col) {
471 continue;
472 }
473
474 if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
476 continue;
477 }
478
479 if let Some(ref_id) = &image.reference_id {
480 let reference_lower = ref_id.to_lowercase();
481
482 if self.is_known_non_reference_pattern(ref_id) {
484 continue;
485 }
486
487 let stripped_ref = Self::strip_backticks(ref_id);
491 let stripped_alt = Self::strip_backticks(&image.alt_text);
492 if mkdocs_mode
493 && (is_mkdocs_auto_reference(stripped_ref)
494 || is_mkdocs_auto_reference(stripped_alt)
495 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
496 || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
497 {
498 continue;
499 }
500
501 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
503 if let Some(line_info) = ctx.line_info(image.line) {
505 if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
506 in_example_section = true;
507 continue;
508 }
509
510 if in_example_section {
511 continue;
512 }
513
514 if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
516 continue;
517 }
518
519 let trimmed = line_info.content(ctx.content).trim_start();
521 if trimmed.starts_with('<') {
522 continue;
523 }
524 }
525
526 let match_len = image.byte_end - image.byte_offset;
527 undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
528 reported_refs.insert(reference_lower, true);
529 }
530 }
531 }
532
533 let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
535
536 for link in &ctx.links {
538 covered_ranges.push((link.byte_offset, link.byte_end));
539 }
540
541 for image in &ctx.images {
543 covered_ranges.push((image.byte_offset, image.byte_end));
544 }
545
546 covered_ranges.sort_by_key(|&(start, _)| start);
548
549 if !self.config.shortcut_syntax {
554 return undefined;
555 }
556
557 let lines = ctx.raw_lines();
559 in_example_section = false; for (line_num, line) in lines.iter().enumerate() {
562 if let Some(line_info) = ctx.line_info(line_num + 1)
564 && (line_info.in_front_matter || line_info.in_code_block)
565 {
566 continue;
567 }
568
569 if OUTPUT_EXAMPLE_START.is_match(line) {
571 in_example_section = true;
572 continue;
573 }
574
575 if in_example_section {
576 if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
578 in_example_section = false;
579 } else {
580 continue;
581 }
582 }
583
584 if LIST_ITEM_REGEX.is_match(line) {
586 continue;
587 }
588
589 let trimmed_line = line.trim_start();
591 if trimmed_line.starts_with('<') {
592 continue;
593 }
594
595 if GITHUB_ALERT_REGEX.is_match(line) {
597 continue;
598 }
599
600 if trimmed_line.starts_with("*[") {
603 continue;
604 }
605
606 let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
609 for mat in URL_WITH_BRACKETS.find_iter(line) {
610 let url_str = mat.as_str();
612 let url_start = mat.start();
613
614 let mut idx = 0;
616 while idx < url_str.len() {
617 if let Some(bracket_start) = url_str[idx..].find('[') {
618 let bracket_start_abs = url_start + idx + bracket_start;
619 if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
620 let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
621 url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
622 idx += bracket_start + bracket_end + 2;
623 } else {
624 break;
625 }
626 } else {
627 break;
628 }
629 }
630 }
631
632 if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
634 for cap in captures {
635 if let Some(ref_match) = cap.get(1) {
636 let bracket_start = cap.get(0).unwrap().start();
638 let bracket_end = cap.get(0).unwrap().end();
639
640 let is_in_url = url_bracket_ranges
642 .iter()
643 .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
644
645 if is_in_url {
646 continue;
647 }
648
649 if bracket_start > 0 {
652 if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
654 && *byte == b'^'
655 {
656 continue; }
658 }
659
660 let reference = ref_match.as_str();
661 let reference_lower = reference.to_lowercase();
662
663 if self.is_known_non_reference_pattern(reference) {
665 continue;
666 }
667
668 if let Some(alert_type) = reference.strip_prefix('!')
670 && matches!(
671 alert_type,
672 "NOTE"
673 | "TIP"
674 | "WARNING"
675 | "IMPORTANT"
676 | "CAUTION"
677 | "INFO"
678 | "SUCCESS"
679 | "FAILURE"
680 | "DANGER"
681 | "BUG"
682 | "EXAMPLE"
683 | "QUOTE"
684 )
685 {
686 continue;
687 }
688
689 if mkdocs_mode
692 && (reference.starts_with("start:") || reference.starts_with("end:"))
693 && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
694 || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
695 {
696 continue;
697 }
698
699 let stripped_ref = Self::strip_backticks(reference);
702 if mkdocs_mode
703 && (is_mkdocs_auto_reference(stripped_ref)
704 || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
705 {
706 continue;
707 }
708
709 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
710 let full_match = cap.get(0).unwrap();
711 let col = full_match.start();
712 let line_start_byte = ctx.line_offsets[line_num];
713 let byte_pos = line_start_byte + col;
714
715 let code_spans = ctx.code_spans();
717 if Self::is_in_code_span(byte_pos, &code_spans) {
718 continue;
719 }
720
721 if ctx.is_in_jinja_range(byte_pos) {
723 continue;
724 }
725
726 if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
728 &ctx.code_blocks,
729 byte_pos,
730 ) {
731 continue;
732 }
733
734 if ctx.is_in_html_comment(byte_pos) {
736 continue;
737 }
738
739 if Self::is_in_html_tag(ctx, byte_pos) {
741 continue;
742 }
743
744 if is_in_math_context(ctx, byte_pos) {
746 continue;
747 }
748
749 if is_in_table_cell(ctx, line_num + 1, col) {
751 continue;
752 }
753
754 let byte_end = byte_pos + (full_match.end() - full_match.start());
755
756 let mut is_covered = false;
758 for &(range_start, range_end) in &covered_ranges {
759 if range_start <= byte_pos && byte_end <= range_end {
760 is_covered = true;
762 break;
763 }
764 if range_start > byte_end {
765 break;
767 }
768 }
769
770 if is_covered {
771 continue;
772 }
773
774 let line_chars: Vec<char> = line.chars().collect();
779 if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
780 let mut bracket_count = 1; let mut check_pos = col.saturating_sub(2);
783 let mut found_opening = false;
784
785 while check_pos > 0 && check_pos < line_chars.len() {
786 match line_chars.get(check_pos) {
787 Some(&']') => bracket_count += 1,
788 Some(&'[') => {
789 bracket_count -= 1;
790 if bracket_count == 0 {
791 if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
793 found_opening = true;
794 }
795 break;
796 }
797 }
798 _ => {}
799 }
800 if check_pos == 0 {
801 break;
802 }
803 check_pos = check_pos.saturating_sub(1);
804 }
805
806 if found_opening {
807 continue;
809 }
810 }
811
812 let before_text = &line[..col];
815 if before_text.contains("\\]") {
816 if let Some(escaped_close_pos) = before_text.rfind("\\]") {
818 let search_text = &before_text[..escaped_close_pos];
819 if search_text.contains("\\[") {
820 continue;
822 }
823 }
824 }
825
826 let match_len = full_match.end() - full_match.start();
827 undefined.push((line_num, col, match_len, reference.to_string()));
828 reported_refs.insert(reference_lower, true);
829 }
830 }
831 }
832 }
833 }
834
835 undefined
836 }
837}
838
839impl Rule for MD052ReferenceLinkImages {
840 fn name(&self) -> &'static str {
841 "MD052"
842 }
843
844 fn description(&self) -> &'static str {
845 "Reference links and images should use a reference that exists"
846 }
847
848 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
849 let content = ctx.content;
850 let mut warnings = Vec::new();
851
852 if !content.contains('[') {
854 return Ok(warnings);
855 }
856
857 let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
859
860 let references = self.extract_references(ctx);
861
862 let lines = ctx.raw_lines();
864 for (line_num, col, match_len, reference) in self.find_undefined_references(&references, ctx, mkdocs_mode) {
865 let line_content = lines.get(line_num).unwrap_or(&"");
866
867 let (start_line, start_col, end_line, end_col) =
869 calculate_match_range(line_num + 1, line_content, col, match_len);
870
871 warnings.push(LintWarning {
872 rule_name: Some(self.name().to_string()),
873 line: start_line,
874 column: start_col,
875 end_line,
876 end_column: end_col,
877 message: format!("Reference '{reference}' not found"),
878 severity: Severity::Warning,
879 fix: None,
880 });
881 }
882
883 Ok(warnings)
884 }
885
886 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
888 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
890 }
891
892 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
893 let content = ctx.content;
894 Ok(content.to_string())
896 }
897
898 fn as_any(&self) -> &dyn std::any::Any {
899 self
900 }
901
902 fn default_config_section(&self) -> Option<(String, toml::Value)> {
903 let json_value = serde_json::to_value(&self.config).ok()?;
904 Some((
905 self.name().to_string(),
906 crate::rule_config_serde::json_to_toml_value(&json_value)?,
907 ))
908 }
909
910 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
911 where
912 Self: Sized,
913 {
914 let rule_config = crate::rule_config_serde::load_rule_config::<MD052Config>(config);
915 Box::new(Self::from_config_struct(rule_config))
916 }
917}
918
919#[cfg(test)]
920mod tests {
921 use super::*;
922 use crate::lint_context::LintContext;
923
924 #[test]
925 fn test_valid_reference_link() {
926 let rule = MD052ReferenceLinkImages::new();
927 let content = "[text][ref]\n\n[ref]: https://example.com";
928 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
929 let result = rule.check(&ctx).unwrap();
930
931 assert_eq!(result.len(), 0);
932 }
933
934 #[test]
935 fn test_undefined_reference_link() {
936 let rule = MD052ReferenceLinkImages::new();
937 let content = "[text][undefined]";
938 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
939 let result = rule.check(&ctx).unwrap();
940
941 assert_eq!(result.len(), 1);
942 assert!(result[0].message.contains("Reference 'undefined' not found"));
943 }
944
945 #[test]
946 fn test_valid_reference_image() {
947 let rule = MD052ReferenceLinkImages::new();
948 let content = "![alt][img]\n\n[img]: image.jpg";
949 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
950 let result = rule.check(&ctx).unwrap();
951
952 assert_eq!(result.len(), 0);
953 }
954
955 #[test]
956 fn test_undefined_reference_image() {
957 let rule = MD052ReferenceLinkImages::new();
958 let content = "![alt][missing]";
959 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
960 let result = rule.check(&ctx).unwrap();
961
962 assert_eq!(result.len(), 1);
963 assert!(result[0].message.contains("Reference 'missing' not found"));
964 }
965
966 #[test]
967 fn test_case_insensitive_references() {
968 let rule = MD052ReferenceLinkImages::new();
969 let content = "[Text][REF]\n\n[ref]: https://example.com";
970 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
971 let result = rule.check(&ctx).unwrap();
972
973 assert_eq!(result.len(), 0);
974 }
975
976 #[test]
977 fn test_shortcut_reference_valid() {
978 let rule = MD052ReferenceLinkImages::new();
979 let content = "[ref]\n\n[ref]: https://example.com";
980 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
981 let result = rule.check(&ctx).unwrap();
982
983 assert_eq!(result.len(), 0);
984 }
985
986 #[test]
987 fn test_shortcut_reference_undefined_with_shortcut_syntax_enabled() {
988 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
991 shortcut_syntax: true,
992 ..Default::default()
993 });
994 let content = "[undefined]";
995 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
996 let result = rule.check(&ctx).unwrap();
997
998 assert_eq!(result.len(), 1);
999 assert!(result[0].message.contains("Reference 'undefined' not found"));
1000 }
1001
1002 #[test]
1003 fn test_shortcut_reference_not_checked_by_default() {
1004 let rule = MD052ReferenceLinkImages::new();
1006 let content = "[undefined]";
1007 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1008 let result = rule.check(&ctx).unwrap();
1009
1010 assert_eq!(result.len(), 0);
1012 }
1013
1014 #[test]
1015 fn test_inline_links_ignored() {
1016 let rule = MD052ReferenceLinkImages::new();
1017 let content = "[text](https://example.com)";
1018 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1019 let result = rule.check(&ctx).unwrap();
1020
1021 assert_eq!(result.len(), 0);
1022 }
1023
1024 #[test]
1025 fn test_inline_images_ignored() {
1026 let rule = MD052ReferenceLinkImages::new();
1027 let content = "";
1028 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1029 let result = rule.check(&ctx).unwrap();
1030
1031 assert_eq!(result.len(), 0);
1032 }
1033
1034 #[test]
1035 fn test_references_in_code_blocks_ignored() {
1036 let rule = MD052ReferenceLinkImages::new();
1037 let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1038 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1039 let result = rule.check(&ctx).unwrap();
1040
1041 assert_eq!(result.len(), 0);
1042 }
1043
1044 #[test]
1045 fn test_references_in_inline_code_ignored() {
1046 let rule = MD052ReferenceLinkImages::new();
1047 let content = "`[undefined]`";
1048 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1049 let result = rule.check(&ctx).unwrap();
1050
1051 assert_eq!(result.len(), 0);
1053 }
1054
1055 #[test]
1056 fn test_comprehensive_inline_code_detection() {
1057 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1059 shortcut_syntax: true,
1060 ..Default::default()
1061 });
1062 let content = r#"# Test
1063
1064This `[inside]` should be ignored.
1065This [outside] should be flagged.
1066Reference links `[text][ref]` in code are ignored.
1067Regular reference [text][missing] should be flagged.
1068Images `![alt][img]` in code are ignored.
1069Regular image ![alt][badimg] should be flagged.
1070
1071Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1072
1073```
1074[code block content] should be ignored
1075```
1076
1077`Multiple [refs] in [same] code span` ignored."#;
1078
1079 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1080 let result = rule.check(&ctx).unwrap();
1081
1082 assert_eq!(result.len(), 4);
1084
1085 let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1086 assert!(messages.iter().any(|m| m.contains("outside")));
1087 assert!(messages.iter().any(|m| m.contains("missing")));
1088 assert!(messages.iter().any(|m| m.contains("badimg")));
1089 assert!(messages.iter().any(|m| m.contains("three")));
1090
1091 assert!(!messages.iter().any(|m| m.contains("inside")));
1093 assert!(!messages.iter().any(|m| m.contains("one")));
1094 assert!(!messages.iter().any(|m| m.contains("two")));
1095 assert!(!messages.iter().any(|m| m.contains("refs")));
1096 assert!(!messages.iter().any(|m| m.contains("same")));
1097 }
1098
1099 #[test]
1100 fn test_multiple_undefined_references() {
1101 let rule = MD052ReferenceLinkImages::new();
1102 let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1103 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1104 let result = rule.check(&ctx).unwrap();
1105
1106 assert_eq!(result.len(), 3);
1107 assert!(result[0].message.contains("ref1"));
1108 assert!(result[1].message.contains("ref2"));
1109 assert!(result[2].message.contains("ref3"));
1110 }
1111
1112 #[test]
1113 fn test_mixed_valid_and_undefined() {
1114 let rule = MD052ReferenceLinkImages::new();
1115 let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1116 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1117 let result = rule.check(&ctx).unwrap();
1118
1119 assert_eq!(result.len(), 1);
1120 assert!(result[0].message.contains("missing"));
1121 }
1122
1123 #[test]
1124 fn test_empty_reference() {
1125 let rule = MD052ReferenceLinkImages::new();
1126 let content = "[text][]\n\n[ref]: https://example.com";
1127 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1128 let result = rule.check(&ctx).unwrap();
1129
1130 assert_eq!(result.len(), 1);
1132 }
1133
1134 #[test]
1135 fn test_escaped_brackets_ignored() {
1136 let rule = MD052ReferenceLinkImages::new();
1137 let content = "\\[not a link\\]";
1138 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1139 let result = rule.check(&ctx).unwrap();
1140
1141 assert_eq!(result.len(), 0);
1142 }
1143
1144 #[test]
1145 fn test_list_items_ignored() {
1146 let rule = MD052ReferenceLinkImages::new();
1147 let content = "- [undefined]\n* [another]\n+ [third]";
1148 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1149 let result = rule.check(&ctx).unwrap();
1150
1151 assert_eq!(result.len(), 0);
1153 }
1154
1155 #[test]
1156 fn test_output_example_section_ignored() {
1157 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1159 shortcut_syntax: true,
1160 ..Default::default()
1161 });
1162 let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1163 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1164 let result = rule.check(&ctx).unwrap();
1165
1166 assert_eq!(result.len(), 1);
1168 assert!(result[0].message.contains("missing"));
1169 }
1170
1171 #[test]
1172 fn test_reference_definitions_in_code_blocks_ignored() {
1173 let rule = MD052ReferenceLinkImages::new();
1174 let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1175 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1176 let result = rule.check(&ctx).unwrap();
1177
1178 assert_eq!(result.len(), 1);
1180 assert!(result[0].message.contains("ref"));
1181 }
1182
1183 #[test]
1184 fn test_multiple_references_to_same_undefined() {
1185 let rule = MD052ReferenceLinkImages::new();
1186 let content = "[first][missing] [second][missing] [third][missing]";
1187 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1188 let result = rule.check(&ctx).unwrap();
1189
1190 assert_eq!(result.len(), 1);
1192 assert!(result[0].message.contains("missing"));
1193 }
1194
1195 #[test]
1196 fn test_reference_with_special_characters() {
1197 let rule = MD052ReferenceLinkImages::new();
1198 let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1199 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1200 let result = rule.check(&ctx).unwrap();
1201
1202 assert_eq!(result.len(), 0);
1203 }
1204
1205 #[test]
1206 fn test_issue_51_html_attribute_not_reference() {
1207 let rule = MD052ReferenceLinkImages::new();
1209 let content = r#"# Example
1210
1211## Test
1212
1213Want to fill out this form?
1214
1215<form method="post">
1216 <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1217</form>"#;
1218 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1219 let result = rule.check(&ctx).unwrap();
1220
1221 assert_eq!(
1222 result.len(),
1223 0,
1224 "HTML attributes with square brackets should not be flagged as undefined references"
1225 );
1226 }
1227
1228 #[test]
1229 fn test_extract_references() {
1230 let rule = MD052ReferenceLinkImages::new();
1231 let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1232 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1233 let refs = rule.extract_references(&ctx);
1234
1235 assert_eq!(refs.len(), 3);
1236 assert!(refs.contains("ref1"));
1237 assert!(refs.contains("ref2"));
1238 assert!(refs.contains("ref3"));
1239 }
1240
1241 #[test]
1242 fn test_inline_code_not_flagged() {
1243 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1245 shortcut_syntax: true,
1246 ..Default::default()
1247 });
1248
1249 let content = r#"# Test
1251
1252Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1253
1254Also, `[todo]` is not a reference link.
1255
1256But this [reference] should be flagged.
1257
1258And this `[inline code]` should not be flagged.
1259"#;
1260
1261 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1262 let warnings = rule.check(&ctx).unwrap();
1263
1264 assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1266 assert!(warnings[0].message.contains("'reference'"));
1267 }
1268
1269 #[test]
1270 fn test_code_block_references_ignored() {
1271 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1273 shortcut_syntax: true,
1274 ..Default::default()
1275 });
1276
1277 let content = r#"# Test
1278
1279```markdown
1280[undefined] reference in code block
1281![undefined] image in code block
1282```
1283
1284[real-undefined] reference outside
1285"#;
1286
1287 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1288 let warnings = rule.check(&ctx).unwrap();
1289
1290 assert_eq!(warnings.len(), 1);
1292 assert!(warnings[0].message.contains("'real-undefined'"));
1293 }
1294
1295 #[test]
1296 fn test_html_comments_ignored() {
1297 let rule = MD052ReferenceLinkImages::new();
1299
1300 let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1302<!--- set_env EDITOR 'python3 fake_editor.py' -->
1303
1304```bash
1305$ python3 vote.py
13063 votes for: 2
13072 votes for: 3, 4
1308```"#;
1309 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1310 let result = rule.check(&ctx).unwrap();
1311 assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1312
1313 let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1315Normal [text][undefined]
1316<!-- Another [comment][with] references -->"#;
1317 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1318 let result = rule.check(&ctx).unwrap();
1319 assert_eq!(
1320 result.len(),
1321 1,
1322 "Should only flag the undefined reference outside comments"
1323 );
1324 assert!(result[0].message.contains("undefined"));
1325
1326 let content = r#"<!--
1328[ref1]
1329[ref2][ref3]
1330-->
1331[actual][undefined]"#;
1332 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1333 let result = rule.check(&ctx).unwrap();
1334 assert_eq!(
1335 result.len(),
1336 1,
1337 "Should not flag references in multi-line HTML comments"
1338 );
1339 assert!(result[0].message.contains("undefined"));
1340
1341 let content = r#"<!-- Comment with [1:] pattern -->
1343Valid [link][ref]
1344<!-- More [refs][in][comments] -->
1345![image][missing]
1346
1347[ref]: https://example.com"#;
1348 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1349 let result = rule.check(&ctx).unwrap();
1350 assert_eq!(result.len(), 1, "Should only flag missing image reference");
1351 assert!(result[0].message.contains("missing"));
1352 }
1353
1354 #[test]
1355 fn test_frontmatter_ignored() {
1356 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1359 shortcut_syntax: true,
1360 ..Default::default()
1361 });
1362
1363 let content = r#"---
1365layout: post
1366title: "My Jekyll Post"
1367date: 2023-01-01
1368categories: blog
1369tags: ["test", "example"]
1370author: John Doe
1371---
1372
1373# My Blog Post
1374
1375This is the actual markdown content that should be linted.
1376
1377[undefined] reference should be flagged.
1378
1379## Section 1
1380
1381Some content here."#;
1382 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1383 let result = rule.check(&ctx).unwrap();
1384
1385 assert_eq!(
1387 result.len(),
1388 1,
1389 "Should only flag the undefined reference outside frontmatter"
1390 );
1391 assert!(result[0].message.contains("undefined"));
1392
1393 let content = r#"+++
1395title = "My Post"
1396tags = ["example", "test"]
1397+++
1398
1399# Content
1400
1401[missing] reference should be flagged."#;
1402 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1403 let result = rule.check(&ctx).unwrap();
1404 assert_eq!(
1405 result.len(),
1406 1,
1407 "Should only flag the undefined reference outside TOML frontmatter"
1408 );
1409 assert!(result[0].message.contains("missing"));
1410 }
1411
1412 #[test]
1413 fn test_mkdocs_snippet_markers_not_flagged() {
1414 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1417 shortcut_syntax: true,
1418 ..Default::default()
1419 });
1420
1421 let content = r#"# Document with MkDocs Snippets
1423
1424Some content here.
1425
1426# -8<- [start:remote-content]
1427
1428This is the remote content section.
1429
1430# -8<- [end:remote-content]
1431
1432More content here.
1433
1434<!-- --8<-- [start:another-section] -->
1435Content in another section
1436<!-- --8<-- [end:another-section] -->"#;
1437 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1438 let result = rule.check(&ctx).unwrap();
1439
1440 assert_eq!(
1442 result.len(),
1443 0,
1444 "Should not flag MkDocs snippet markers as undefined references"
1445 );
1446
1447 let content = r#"# Document
1450
1451# -8<- [start:section]
1452Content with [reference] inside snippet section
1453# -8<- [end:section]
1454
1455Regular [undefined] reference outside snippet markers."#;
1456 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
1457 let result = rule.check(&ctx).unwrap();
1458
1459 assert_eq!(
1460 result.len(),
1461 2,
1462 "Should flag undefined references but skip snippet marker lines"
1463 );
1464 assert!(result[0].message.contains("reference"));
1466 assert!(result[1].message.contains("undefined"));
1467
1468 let content = r#"# Document
1470
1471# -8<- [start:section]
1472# -8<- [end:section]"#;
1473 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1474 let result = rule.check(&ctx).unwrap();
1475
1476 assert_eq!(
1477 result.len(),
1478 2,
1479 "In standard mode, snippet markers should be flagged as undefined references"
1480 );
1481 }
1482
1483 #[test]
1484 fn test_pandoc_citations_not_flagged() {
1485 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1488 shortcut_syntax: true,
1489 ..Default::default()
1490 });
1491
1492 let content = r#"# Research Paper
1493
1494We are using the **bookdown** package [@R-bookdown] in this sample book.
1495This was built on top of R Markdown and **knitr** [@xie2015].
1496
1497Multiple citations [@citation1; @citation2; @citation3] are also supported.
1498
1499Regular [undefined] reference should still be flagged.
1500"#;
1501 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1502 let result = rule.check(&ctx).unwrap();
1503
1504 assert_eq!(
1506 result.len(),
1507 1,
1508 "Should only flag the undefined reference, not Pandoc citations"
1509 );
1510 assert!(result[0].message.contains("undefined"));
1511 }
1512
1513 #[test]
1514 fn test_pandoc_inline_footnotes_not_flagged() {
1515 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1518 shortcut_syntax: true,
1519 ..Default::default()
1520 });
1521
1522 let content = r#"# Math Document
1523
1524You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1525
1526Another footnote^[with some text and a [link](https://example.com)].
1527
1528But this [reference] without ^ should be flagged.
1529"#;
1530 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1531 let result = rule.check(&ctx).unwrap();
1532
1533 assert_eq!(
1535 result.len(),
1536 1,
1537 "Should only flag the regular reference, not inline footnotes"
1538 );
1539 assert!(result[0].message.contains("reference"));
1540 }
1541
1542 #[test]
1543 fn test_github_alerts_not_flagged() {
1544 let rule = MD052ReferenceLinkImages::from_config_struct(MD052Config {
1547 shortcut_syntax: true,
1548 ..Default::default()
1549 });
1550
1551 let content = r#"# Document with GitHub Alerts
1553
1554> [!NOTE]
1555> This is a note alert.
1556
1557> [!TIP]
1558> This is a tip alert.
1559
1560> [!IMPORTANT]
1561> This is an important alert.
1562
1563> [!WARNING]
1564> This is a warning alert.
1565
1566> [!CAUTION]
1567> This is a caution alert.
1568
1569Regular content with [undefined] reference."#;
1570 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1571 let result = rule.check(&ctx).unwrap();
1572
1573 assert_eq!(
1575 result.len(),
1576 1,
1577 "Should only flag the undefined reference, not GitHub alerts"
1578 );
1579 assert!(result[0].message.contains("undefined"));
1580 assert_eq!(result[0].line, 18); let content = r#"> [!TIP]
1584> Here's a useful tip about [something].
1585> Multiple lines are allowed.
1586
1587[something] is mentioned but not defined."#;
1588 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1589 let result = rule.check(&ctx).unwrap();
1590
1591 assert_eq!(result.len(), 1, "Should flag undefined reference");
1595 assert!(result[0].message.contains("something"));
1596
1597 let content = r#"> [!NOTE]
1599> See [reference] for more details.
1600
1601[reference]: https://example.com"#;
1602 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1603 let result = rule.check(&ctx).unwrap();
1604
1605 assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1607 }
1608
1609 #[test]
1610 fn test_ignore_config() {
1611 let config = MD052Config {
1613 shortcut_syntax: true,
1614 ignore: vec!["Vec".to_string(), "HashMap".to_string(), "Option".to_string()],
1615 };
1616 let rule = MD052ReferenceLinkImages::from_config_struct(config);
1617
1618 let content = r#"# Document with Custom Types
1619
1620Use [Vec] for dynamic arrays.
1621Use [HashMap] for key-value storage.
1622Use [Option] for nullable values.
1623Use [Result] for error handling.
1624"#;
1625 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1626 let result = rule.check(&ctx).unwrap();
1627
1628 assert_eq!(result.len(), 1, "Should only flag names not in ignore");
1630 assert!(result[0].message.contains("Result"));
1631 }
1632
1633 #[test]
1634 fn test_ignore_case_insensitive() {
1635 let config = MD052Config {
1637 shortcut_syntax: true,
1638 ignore: vec!["Vec".to_string()],
1639 };
1640 let rule = MD052ReferenceLinkImages::from_config_struct(config);
1641
1642 let content = r#"# Case Insensitivity Test
1643
1644[Vec] should be ignored.
1645[vec] should also be ignored (different case, same match).
1646[VEC] should also be ignored (different case, same match).
1647[undefined] should be flagged (not in ignore list).
1648"#;
1649 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1650 let result = rule.check(&ctx).unwrap();
1651
1652 assert_eq!(result.len(), 1, "Should only flag non-ignored reference");
1654 assert!(result[0].message.contains("undefined"));
1655 }
1656
1657 #[test]
1658 fn test_ignore_empty_by_default() {
1659 let rule = MD052ReferenceLinkImages::new();
1661
1662 let content = "[text][undefined]";
1663 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1664 let result = rule.check(&ctx).unwrap();
1665
1666 assert_eq!(result.len(), 1);
1668 assert!(result[0].message.contains("undefined"));
1669 }
1670
1671 #[test]
1672 fn test_ignore_with_reference_links() {
1673 let config = MD052Config {
1675 shortcut_syntax: false,
1676 ignore: vec!["CustomType".to_string()],
1677 };
1678 let rule = MD052ReferenceLinkImages::from_config_struct(config);
1679
1680 let content = r#"# Test
1681
1682See [documentation][CustomType] for details.
1683See [other docs][MissingRef] for more.
1684"#;
1685 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1686 let result = rule.check(&ctx).unwrap();
1687
1688 for (i, w) in result.iter().enumerate() {
1690 eprintln!("Warning {}: {}", i, w.message);
1691 }
1692
1693 assert_eq!(result.len(), 1, "Expected 1 warning, got {}", result.len());
1696 assert!(
1697 result[0].message.contains("missingref"),
1698 "Expected 'missingref' in message: {}",
1699 result[0].message
1700 );
1701 }
1702
1703 #[test]
1704 fn test_ignore_multiple() {
1705 let config = MD052Config {
1707 shortcut_syntax: true,
1708 ignore: vec![
1709 "i32".to_string(),
1710 "u64".to_string(),
1711 "String".to_string(),
1712 "Arc".to_string(),
1713 "Mutex".to_string(),
1714 ],
1715 };
1716 let rule = MD052ReferenceLinkImages::from_config_struct(config);
1717
1718 let content = r#"# Types
1719
1720[i32] [u64] [String] [Arc] [Mutex] [Box]
1721"#;
1722 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1723 let result = rule.check(&ctx).unwrap();
1724
1725 assert_eq!(result.len(), 1);
1729 assert!(result[0].message.contains("Box"));
1730 }
1731
1732 #[test]
1733 fn test_nested_code_fences_reference_extraction() {
1734 let rule = MD052ReferenceLinkImages::new();
1739
1740 let content = "````\n```\n[ref-inside]: https://example.com\n```\n````\n\n[Use this link][ref-inside]";
1741 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1742 let result = rule.check(&ctx).unwrap();
1743
1744 assert_eq!(
1748 result.len(),
1749 1,
1750 "Reference defined inside nested code fence should not count as a definition"
1751 );
1752 assert!(result[0].message.contains("ref-inside"));
1753 }
1754}