1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8use std::sync::LazyLock;
9
10static REF_REGEX: LazyLock<Regex> =
14 LazyLock::new(|| Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap());
15
16static LIST_ITEM_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap());
18
19static FENCED_CODE_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap());
21
22static OUTPUT_EXAMPLE_START: LazyLock<Regex> =
24 LazyLock::new(|| Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap());
25
26static GITHUB_ALERT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29 Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]")
30 .unwrap()
31});
32
33static URL_WITH_BRACKETS: LazyLock<Regex> =
41 LazyLock::new(|| Regex::new(r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])").unwrap());
42
43#[derive(Clone, Default)]
49pub struct MD052ReferenceLinkImages {}
50
51impl MD052ReferenceLinkImages {
52 pub fn new() -> Self {
53 Self {}
54 }
55
56 fn strip_backticks(s: &str) -> &str {
59 s.trim_start_matches('`').trim_end_matches('`')
60 }
61
62 fn is_valid_python_identifier(s: &str) -> bool {
66 if s.is_empty() {
67 return false;
68 }
69 let first_char = s.chars().next().unwrap();
70 if !first_char.is_ascii_alphabetic() && first_char != '_' {
71 return false;
72 }
73 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
74 }
75
76 fn is_known_non_reference_pattern(text: &str) -> bool {
84 if text.chars().all(|c| c.is_ascii_digit()) {
86 return true;
87 }
88
89 if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
91 return true;
92 }
93
94 if text.contains('.')
98 && !text.contains(' ')
99 && !text.contains('-')
100 && !text.contains('_')
101 && !text.contains('`')
102 {
103 return true;
105 }
106
107 if text == "*" || text == "..." || text == "**" {
109 return true;
110 }
111
112 if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
114 return true;
115 }
116
117 if text.contains(',') || text.contains('[') || text.contains(']') {
120 return true;
122 }
123
124 if !text.contains('`')
131 && text.contains('.')
132 && !text.contains(' ')
133 && !text.contains('-')
134 && !text.contains('_')
135 {
136 return true;
137 }
138
139 if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
146 return true;
147 }
148
149 if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
151 return true;
152 }
153
154 if (text.starts_with('"') && text.ends_with('"'))
156 || (text.starts_with('\'') && text.ends_with('\''))
157 || text.contains('"')
158 || text.contains('\'')
159 {
160 return true;
161 }
162
163 if text.contains(':') && text.contains(' ') {
166 return true;
167 }
168
169 if text.starts_with('!') {
171 return true;
172 }
173
174 if text.starts_with('^') {
177 return true;
178 }
179
180 if text.starts_with('@') {
183 return true;
184 }
185
186 if text == "TOC" {
189 return true;
190 }
191
192 if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
194 return true;
195 }
196
197 let common_non_refs = [
200 "object",
202 "Object",
203 "any",
204 "Any",
205 "inv",
206 "void",
207 "bool",
208 "int",
209 "float",
210 "str",
211 "char",
212 "i8",
213 "i16",
214 "i32",
215 "i64",
216 "i128",
217 "isize",
218 "u8",
219 "u16",
220 "u32",
221 "u64",
222 "u128",
223 "usize",
224 "f32",
225 "f64",
226 "null",
228 "true",
229 "false",
230 "NaN",
231 "Infinity",
232 "object Object",
234 ];
235
236 if common_non_refs.contains(&text) {
237 return true;
238 }
239
240 false
241 }
242
243 fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
245 code_spans
246 .iter()
247 .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
248 }
249
250 fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
252 for m in HTML_COMMENT_PATTERN.find_iter(content) {
253 if m.start() <= byte_pos && byte_pos < m.end() {
254 return true;
255 }
256 }
257 false
258 }
259
260 fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
262 for html_tag in ctx.html_tags().iter() {
264 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
265 return true;
266 }
267 }
268 false
269 }
270
271 fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
272 use crate::config::MarkdownFlavor;
273 use crate::utils::skip_context::is_mkdocs_snippet_line;
274
275 let mut references = HashSet::new();
276 let mut in_code_block = false;
277 let mut code_fence_marker = String::new();
278
279 for line in content.lines() {
280 if is_mkdocs_snippet_line(
282 line,
283 if mkdocs_mode {
284 MarkdownFlavor::MkDocs
285 } else {
286 MarkdownFlavor::Standard
287 },
288 ) {
289 continue;
290 }
291 if let Some(cap) = FENCED_CODE_START.captures(line) {
293 if let Some(fence) = cap.get(2) {
294 let fence_str = fence.as_str();
296 if !in_code_block {
297 in_code_block = true;
298 code_fence_marker = fence_str.to_string();
299 } else if line.trim_start().starts_with(&code_fence_marker) {
300 let trimmed = line.trim_start();
302 if trimmed.starts_with(&code_fence_marker) {
304 let after_fence = &trimmed[code_fence_marker.len()..];
305 if after_fence.trim().is_empty() {
306 in_code_block = false;
307 code_fence_marker.clear();
308 }
309 }
310 }
311 }
312 continue;
313 }
314
315 if in_code_block {
317 continue;
318 }
319
320 if line.trim_start().starts_with("*[") {
323 continue;
324 }
325
326 if let Some(cap) = REF_REGEX.captures(line) {
327 if let Some(reference) = cap.get(1) {
329 references.insert(reference.as_str().to_lowercase());
330 }
331 }
332 }
333
334 references
335 }
336
337 fn find_undefined_references(
338 &self,
339 content: &str,
340 references: &HashSet<String>,
341 ctx: &crate::lint_context::LintContext,
342 mkdocs_mode: bool,
343 ) -> Vec<(usize, usize, usize, String)> {
344 let mut undefined = Vec::new();
345 let mut reported_refs = HashMap::new();
346 let mut in_code_block = false;
347 let mut code_fence_marker = String::new();
348 let mut in_example_section = false;
349
350 let code_spans = ctx.code_spans();
352
353 for link in &ctx.links {
355 if !link.is_reference {
356 continue; }
358
359 if ctx.is_in_jinja_range(link.byte_offset) {
361 continue;
362 }
363
364 if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
366 continue;
367 }
368
369 if Self::is_in_html_comment(content, link.byte_offset) {
371 continue;
372 }
373
374 if Self::is_in_html_tag(ctx, link.byte_offset) {
376 continue;
377 }
378
379 if is_in_math_context(ctx, link.byte_offset) {
381 continue;
382 }
383
384 if is_in_table_cell(ctx, link.line, link.start_col) {
386 continue;
387 }
388
389 if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
391 continue;
392 }
393
394 if let Some(ref_id) = &link.reference_id {
395 let reference_lower = ref_id.to_lowercase();
396
397 if Self::is_known_non_reference_pattern(ref_id) {
399 continue;
400 }
401
402 let stripped_ref = Self::strip_backticks(ref_id);
406 let stripped_text = Self::strip_backticks(&link.text);
407 if mkdocs_mode
408 && (is_mkdocs_auto_reference(stripped_ref)
409 || is_mkdocs_auto_reference(stripped_text)
410 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
411 || (link.text.as_ref() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
412 {
413 continue;
414 }
415
416 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
418 if let Some(line_info) = ctx.line_info(link.line) {
420 if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
421 in_example_section = true;
422 continue;
423 }
424
425 if in_example_section {
426 continue;
427 }
428
429 if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
431 continue;
432 }
433
434 let trimmed = line_info.content(ctx.content).trim_start();
436 if trimmed.starts_with('<') {
437 continue;
438 }
439 }
440
441 let match_len = link.byte_end - link.byte_offset;
442 undefined.push((link.line - 1, link.start_col, match_len, ref_id.to_string()));
443 reported_refs.insert(reference_lower, true);
444 }
445 }
446 }
447
448 for image in &ctx.images {
450 if !image.is_reference {
451 continue; }
453
454 if ctx.is_in_jinja_range(image.byte_offset) {
456 continue;
457 }
458
459 if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
461 continue;
462 }
463
464 if Self::is_in_html_comment(content, image.byte_offset) {
466 continue;
467 }
468
469 if Self::is_in_html_tag(ctx, image.byte_offset) {
471 continue;
472 }
473
474 if is_in_math_context(ctx, image.byte_offset) {
476 continue;
477 }
478
479 if is_in_table_cell(ctx, image.line, image.start_col) {
481 continue;
482 }
483
484 if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
486 continue;
487 }
488
489 if let Some(ref_id) = &image.reference_id {
490 let reference_lower = ref_id.to_lowercase();
491
492 if Self::is_known_non_reference_pattern(ref_id) {
494 continue;
495 }
496
497 let stripped_ref = Self::strip_backticks(ref_id);
501 let stripped_alt = Self::strip_backticks(&image.alt_text);
502 if mkdocs_mode
503 && (is_mkdocs_auto_reference(stripped_ref)
504 || is_mkdocs_auto_reference(stripped_alt)
505 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
506 || (image.alt_text.as_ref() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
507 {
508 continue;
509 }
510
511 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
513 if let Some(line_info) = ctx.line_info(image.line) {
515 if OUTPUT_EXAMPLE_START.is_match(line_info.content(ctx.content)) {
516 in_example_section = true;
517 continue;
518 }
519
520 if in_example_section {
521 continue;
522 }
523
524 if LIST_ITEM_REGEX.is_match(line_info.content(ctx.content)) {
526 continue;
527 }
528
529 let trimmed = line_info.content(ctx.content).trim_start();
531 if trimmed.starts_with('<') {
532 continue;
533 }
534 }
535
536 let match_len = image.byte_end - image.byte_offset;
537 undefined.push((image.line - 1, image.start_col, match_len, ref_id.to_string()));
538 reported_refs.insert(reference_lower, true);
539 }
540 }
541 }
542
543 let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
545
546 for link in &ctx.links {
548 covered_ranges.push((link.byte_offset, link.byte_end));
549 }
550
551 for image in &ctx.images {
553 covered_ranges.push((image.byte_offset, image.byte_end));
554 }
555
556 covered_ranges.sort_by_key(|&(start, _)| start);
558
559 let lines: Vec<&str> = content.lines().collect();
562 in_example_section = false; for (line_num, line) in lines.iter().enumerate() {
565 if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
567 continue;
568 }
569
570 if let Some(cap) = FENCED_CODE_START.captures(line) {
572 if let Some(fence) = cap.get(2) {
573 let fence_str = fence.as_str();
575 if !in_code_block {
576 in_code_block = true;
577 code_fence_marker = fence_str.to_string();
578 } else if line.trim_start().starts_with(&code_fence_marker) {
579 let trimmed = line.trim_start();
581 if trimmed.starts_with(&code_fence_marker) {
583 let after_fence = &trimmed[code_fence_marker.len()..];
584 if after_fence.trim().is_empty() {
585 in_code_block = false;
586 code_fence_marker.clear();
587 }
588 }
589 }
590 }
591 continue;
592 }
593
594 if in_code_block {
595 continue;
596 }
597
598 if OUTPUT_EXAMPLE_START.is_match(line) {
600 in_example_section = true;
601 continue;
602 }
603
604 if in_example_section {
605 if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
607 in_example_section = false;
608 } else {
609 continue;
610 }
611 }
612
613 if LIST_ITEM_REGEX.is_match(line) {
615 continue;
616 }
617
618 let trimmed_line = line.trim_start();
620 if trimmed_line.starts_with('<') {
621 continue;
622 }
623
624 if GITHUB_ALERT_REGEX.is_match(line) {
626 continue;
627 }
628
629 if trimmed_line.starts_with("*[") {
632 continue;
633 }
634
635 let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
638 for mat in URL_WITH_BRACKETS.find_iter(line) {
639 let url_str = mat.as_str();
641 let url_start = mat.start();
642
643 let mut idx = 0;
645 while idx < url_str.len() {
646 if let Some(bracket_start) = url_str[idx..].find('[') {
647 let bracket_start_abs = url_start + idx + bracket_start;
648 if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
649 let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
650 url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
651 idx += bracket_start + bracket_end + 2;
652 } else {
653 break;
654 }
655 } else {
656 break;
657 }
658 }
659 }
660
661 if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
663 for cap in captures {
664 if let Some(ref_match) = cap.get(1) {
665 let bracket_start = cap.get(0).unwrap().start();
667 let bracket_end = cap.get(0).unwrap().end();
668
669 let is_in_url = url_bracket_ranges
671 .iter()
672 .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
673
674 if is_in_url {
675 continue;
676 }
677
678 if bracket_start > 0 {
681 if let Some(byte) = line.as_bytes().get(bracket_start.saturating_sub(1))
683 && *byte == b'^'
684 {
685 continue; }
687 }
688
689 let reference = ref_match.as_str();
690 let reference_lower = reference.to_lowercase();
691
692 if Self::is_known_non_reference_pattern(reference) {
694 continue;
695 }
696
697 if let Some(alert_type) = reference.strip_prefix('!')
699 && matches!(
700 alert_type,
701 "NOTE"
702 | "TIP"
703 | "WARNING"
704 | "IMPORTANT"
705 | "CAUTION"
706 | "INFO"
707 | "SUCCESS"
708 | "FAILURE"
709 | "DANGER"
710 | "BUG"
711 | "EXAMPLE"
712 | "QUOTE"
713 )
714 {
715 continue;
716 }
717
718 if mkdocs_mode
721 && (reference.starts_with("start:") || reference.starts_with("end:"))
722 && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
723 || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
724 {
725 continue;
726 }
727
728 let stripped_ref = Self::strip_backticks(reference);
731 if mkdocs_mode
732 && (is_mkdocs_auto_reference(stripped_ref)
733 || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
734 {
735 continue;
736 }
737
738 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
739 let full_match = cap.get(0).unwrap();
740 let col = full_match.start();
741
742 let code_spans = ctx.code_spans();
744 if Self::is_in_code_span(line_num + 1, col, &code_spans) {
745 continue;
746 }
747
748 let line_start_byte = ctx.line_offsets[line_num];
750 let byte_pos = line_start_byte + col;
751
752 if ctx.is_in_jinja_range(byte_pos) {
754 continue;
755 }
756
757 if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
759 &ctx.code_blocks,
760 byte_pos,
761 ) {
762 continue;
763 }
764
765 if Self::is_in_html_comment(content, byte_pos) {
767 continue;
768 }
769
770 if Self::is_in_html_tag(ctx, byte_pos) {
772 continue;
773 }
774
775 if is_in_math_context(ctx, byte_pos) {
777 continue;
778 }
779
780 if is_in_table_cell(ctx, line_num + 1, col) {
782 continue;
783 }
784
785 let byte_end = byte_pos + (full_match.end() - full_match.start());
786
787 let mut is_covered = false;
789 for &(range_start, range_end) in &covered_ranges {
790 if range_start <= byte_pos && byte_end <= range_end {
791 is_covered = true;
793 break;
794 }
795 if range_start > byte_end {
796 break;
798 }
799 }
800
801 if is_covered {
802 continue;
803 }
804
805 let line_chars: Vec<char> = line.chars().collect();
810 if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
811 let mut bracket_count = 1; let mut check_pos = col.saturating_sub(2);
814 let mut found_opening = false;
815
816 while check_pos > 0 && check_pos < line_chars.len() {
817 match line_chars.get(check_pos) {
818 Some(&']') => bracket_count += 1,
819 Some(&'[') => {
820 bracket_count -= 1;
821 if bracket_count == 0 {
822 if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
824 found_opening = true;
825 }
826 break;
827 }
828 }
829 _ => {}
830 }
831 if check_pos == 0 {
832 break;
833 }
834 check_pos = check_pos.saturating_sub(1);
835 }
836
837 if found_opening {
838 continue;
840 }
841 }
842
843 let before_text = &line[..col];
846 if before_text.contains("\\]") {
847 if let Some(escaped_close_pos) = before_text.rfind("\\]") {
849 let search_text = &before_text[..escaped_close_pos];
850 if search_text.contains("\\[") {
851 continue;
853 }
854 }
855 }
856
857 let match_len = full_match.end() - full_match.start();
858 undefined.push((line_num, col, match_len, reference.to_string()));
859 reported_refs.insert(reference_lower, true);
860 }
861 }
862 }
863 }
864 }
865
866 undefined
867 }
868}
869
870impl Rule for MD052ReferenceLinkImages {
871 fn name(&self) -> &'static str {
872 "MD052"
873 }
874
875 fn description(&self) -> &'static str {
876 "Reference links and images should use a reference that exists"
877 }
878
879 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
880 let content = ctx.content;
881 let mut warnings = Vec::new();
882
883 if !content.contains('[') {
885 return Ok(warnings);
886 }
887
888 let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
890
891 let references = self.extract_references(content, mkdocs_mode);
892
893 for (line_num, col, match_len, reference) in
895 self.find_undefined_references(content, &references, ctx, mkdocs_mode)
896 {
897 let lines: Vec<&str> = content.lines().collect();
898 let line_content = lines.get(line_num).unwrap_or(&"");
899
900 let (start_line, start_col, end_line, end_col) =
902 calculate_match_range(line_num + 1, line_content, col, match_len);
903
904 warnings.push(LintWarning {
905 rule_name: Some(self.name().to_string()),
906 line: start_line,
907 column: start_col,
908 end_line,
909 end_column: end_col,
910 message: format!("Reference '{reference}' not found"),
911 severity: Severity::Warning,
912 fix: None,
913 });
914 }
915
916 Ok(warnings)
917 }
918
919 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
921 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
923 }
924
925 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
926 let content = ctx.content;
927 Ok(content.to_string())
929 }
930
931 fn as_any(&self) -> &dyn std::any::Any {
932 self
933 }
934
935 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
936 where
937 Self: Sized,
938 {
939 Box::new(MD052ReferenceLinkImages::new())
941 }
942}
943
944#[cfg(test)]
945mod tests {
946 use super::*;
947 use crate::lint_context::LintContext;
948
949 #[test]
950 fn test_valid_reference_link() {
951 let rule = MD052ReferenceLinkImages::new();
952 let content = "[text][ref]\n\n[ref]: https://example.com";
953 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
954 let result = rule.check(&ctx).unwrap();
955
956 assert_eq!(result.len(), 0);
957 }
958
959 #[test]
960 fn test_undefined_reference_link() {
961 let rule = MD052ReferenceLinkImages::new();
962 let content = "[text][undefined]";
963 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
964 let result = rule.check(&ctx).unwrap();
965
966 assert_eq!(result.len(), 1);
967 assert!(result[0].message.contains("Reference 'undefined' not found"));
968 }
969
970 #[test]
971 fn test_valid_reference_image() {
972 let rule = MD052ReferenceLinkImages::new();
973 let content = "![alt][img]\n\n[img]: image.jpg";
974 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
975 let result = rule.check(&ctx).unwrap();
976
977 assert_eq!(result.len(), 0);
978 }
979
980 #[test]
981 fn test_undefined_reference_image() {
982 let rule = MD052ReferenceLinkImages::new();
983 let content = "![alt][missing]";
984 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
985 let result = rule.check(&ctx).unwrap();
986
987 assert_eq!(result.len(), 1);
988 assert!(result[0].message.contains("Reference 'missing' not found"));
989 }
990
991 #[test]
992 fn test_case_insensitive_references() {
993 let rule = MD052ReferenceLinkImages::new();
994 let content = "[Text][REF]\n\n[ref]: https://example.com";
995 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
996 let result = rule.check(&ctx).unwrap();
997
998 assert_eq!(result.len(), 0);
999 }
1000
1001 #[test]
1002 fn test_shortcut_reference_valid() {
1003 let rule = MD052ReferenceLinkImages::new();
1004 let content = "[ref]\n\n[ref]: https://example.com";
1005 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1006 let result = rule.check(&ctx).unwrap();
1007
1008 assert_eq!(result.len(), 0);
1009 }
1010
1011 #[test]
1012 fn test_shortcut_reference_undefined() {
1013 let rule = MD052ReferenceLinkImages::new();
1014 let content = "[undefined]";
1015 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1016 let result = rule.check(&ctx).unwrap();
1017
1018 assert_eq!(result.len(), 1);
1019 assert!(result[0].message.contains("Reference 'undefined' not found"));
1020 }
1021
1022 #[test]
1023 fn test_inline_links_ignored() {
1024 let rule = MD052ReferenceLinkImages::new();
1025 let content = "[text](https://example.com)";
1026 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1027 let result = rule.check(&ctx).unwrap();
1028
1029 assert_eq!(result.len(), 0);
1030 }
1031
1032 #[test]
1033 fn test_inline_images_ignored() {
1034 let rule = MD052ReferenceLinkImages::new();
1035 let content = "";
1036 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1037 let result = rule.check(&ctx).unwrap();
1038
1039 assert_eq!(result.len(), 0);
1040 }
1041
1042 #[test]
1043 fn test_references_in_code_blocks_ignored() {
1044 let rule = MD052ReferenceLinkImages::new();
1045 let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1046 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1047 let result = rule.check(&ctx).unwrap();
1048
1049 assert_eq!(result.len(), 0);
1050 }
1051
1052 #[test]
1053 fn test_references_in_inline_code_ignored() {
1054 let rule = MD052ReferenceLinkImages::new();
1055 let content = "`[undefined]`";
1056 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1057 let result = rule.check(&ctx).unwrap();
1058
1059 assert_eq!(result.len(), 0);
1061 }
1062
1063 #[test]
1064 fn test_comprehensive_inline_code_detection() {
1065 let rule = MD052ReferenceLinkImages::new();
1066 let content = r#"# Test
1067
1068This `[inside]` should be ignored.
1069This [outside] should be flagged.
1070Reference links `[text][ref]` in code are ignored.
1071Regular reference [text][missing] should be flagged.
1072Images `![alt][img]` in code are ignored.
1073Regular image ![alt][badimg] should be flagged.
1074
1075Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1076
1077```
1078[code block content] should be ignored
1079```
1080
1081`Multiple [refs] in [same] code span` ignored."#;
1082
1083 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1084 let result = rule.check(&ctx).unwrap();
1085
1086 assert_eq!(result.len(), 4);
1088
1089 let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1090 assert!(messages.iter().any(|m| m.contains("outside")));
1091 assert!(messages.iter().any(|m| m.contains("missing")));
1092 assert!(messages.iter().any(|m| m.contains("badimg")));
1093 assert!(messages.iter().any(|m| m.contains("three")));
1094
1095 assert!(!messages.iter().any(|m| m.contains("inside")));
1097 assert!(!messages.iter().any(|m| m.contains("one")));
1098 assert!(!messages.iter().any(|m| m.contains("two")));
1099 assert!(!messages.iter().any(|m| m.contains("refs")));
1100 assert!(!messages.iter().any(|m| m.contains("same")));
1101 }
1102
1103 #[test]
1104 fn test_multiple_undefined_references() {
1105 let rule = MD052ReferenceLinkImages::new();
1106 let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1107 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1108 let result = rule.check(&ctx).unwrap();
1109
1110 assert_eq!(result.len(), 3);
1111 assert!(result[0].message.contains("ref1"));
1112 assert!(result[1].message.contains("ref2"));
1113 assert!(result[2].message.contains("ref3"));
1114 }
1115
1116 #[test]
1117 fn test_mixed_valid_and_undefined() {
1118 let rule = MD052ReferenceLinkImages::new();
1119 let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1120 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1121 let result = rule.check(&ctx).unwrap();
1122
1123 assert_eq!(result.len(), 1);
1124 assert!(result[0].message.contains("missing"));
1125 }
1126
1127 #[test]
1128 fn test_empty_reference() {
1129 let rule = MD052ReferenceLinkImages::new();
1130 let content = "[text][]\n\n[ref]: https://example.com";
1131 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1132 let result = rule.check(&ctx).unwrap();
1133
1134 assert_eq!(result.len(), 1);
1136 }
1137
1138 #[test]
1139 fn test_escaped_brackets_ignored() {
1140 let rule = MD052ReferenceLinkImages::new();
1141 let content = "\\[not a link\\]";
1142 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1143 let result = rule.check(&ctx).unwrap();
1144
1145 assert_eq!(result.len(), 0);
1146 }
1147
1148 #[test]
1149 fn test_list_items_ignored() {
1150 let rule = MD052ReferenceLinkImages::new();
1151 let content = "- [undefined]\n* [another]\n+ [third]";
1152 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1153 let result = rule.check(&ctx).unwrap();
1154
1155 assert_eq!(result.len(), 0);
1157 }
1158
1159 #[test]
1160 fn test_output_example_section_ignored() {
1161 let rule = MD052ReferenceLinkImages::new();
1162 let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1163 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1164 let result = rule.check(&ctx).unwrap();
1165
1166 assert_eq!(result.len(), 1);
1168 assert!(result[0].message.contains("missing"));
1169 }
1170
1171 #[test]
1172 fn test_reference_definitions_in_code_blocks_ignored() {
1173 let rule = MD052ReferenceLinkImages::new();
1174 let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1175 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1176 let result = rule.check(&ctx).unwrap();
1177
1178 assert_eq!(result.len(), 1);
1180 assert!(result[0].message.contains("ref"));
1181 }
1182
1183 #[test]
1184 fn test_multiple_references_to_same_undefined() {
1185 let rule = MD052ReferenceLinkImages::new();
1186 let content = "[first][missing] [second][missing] [third][missing]";
1187 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1188 let result = rule.check(&ctx).unwrap();
1189
1190 assert_eq!(result.len(), 1);
1192 assert!(result[0].message.contains("missing"));
1193 }
1194
1195 #[test]
1196 fn test_reference_with_special_characters() {
1197 let rule = MD052ReferenceLinkImages::new();
1198 let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1199 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1200 let result = rule.check(&ctx).unwrap();
1201
1202 assert_eq!(result.len(), 0);
1203 }
1204
1205 #[test]
1206 fn test_issue_51_html_attribute_not_reference() {
1207 let rule = MD052ReferenceLinkImages::new();
1209 let content = r#"# Example
1210
1211## Test
1212
1213Want to fill out this form?
1214
1215<form method="post">
1216 <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1217</form>"#;
1218 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1219 let result = rule.check(&ctx).unwrap();
1220
1221 assert_eq!(
1222 result.len(),
1223 0,
1224 "HTML attributes with square brackets should not be flagged as undefined references"
1225 );
1226 }
1227
1228 #[test]
1229 fn test_extract_references() {
1230 let rule = MD052ReferenceLinkImages::new();
1231 let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1232 let refs = rule.extract_references(content, false);
1233
1234 assert_eq!(refs.len(), 3);
1235 assert!(refs.contains("ref1"));
1236 assert!(refs.contains("ref2"));
1237 assert!(refs.contains("ref3"));
1238 }
1239
1240 #[test]
1241 fn test_inline_code_not_flagged() {
1242 let rule = MD052ReferenceLinkImages::new();
1243
1244 let content = r#"# Test
1246
1247Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1248
1249Also, `[todo]` is not a reference link.
1250
1251But this [reference] should be flagged.
1252
1253And this `[inline code]` should not be flagged.
1254"#;
1255
1256 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1257 let warnings = rule.check(&ctx).unwrap();
1258
1259 assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1261 assert!(warnings[0].message.contains("'reference'"));
1262 }
1263
1264 #[test]
1265 fn test_code_block_references_ignored() {
1266 let rule = MD052ReferenceLinkImages::new();
1267
1268 let content = r#"# Test
1269
1270```markdown
1271[undefined] reference in code block
1272![undefined] image in code block
1273```
1274
1275[real-undefined] reference outside
1276"#;
1277
1278 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1279 let warnings = rule.check(&ctx).unwrap();
1280
1281 assert_eq!(warnings.len(), 1);
1283 assert!(warnings[0].message.contains("'real-undefined'"));
1284 }
1285
1286 #[test]
1287 fn test_html_comments_ignored() {
1288 let rule = MD052ReferenceLinkImages::new();
1290
1291 let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1293<!--- set_env EDITOR 'python3 fake_editor.py' -->
1294
1295```bash
1296$ python3 vote.py
12973 votes for: 2
12982 votes for: 3, 4
1299```"#;
1300 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1301 let result = rule.check(&ctx).unwrap();
1302 assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1303
1304 let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1306Normal [text][undefined]
1307<!-- Another [comment][with] references -->"#;
1308 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1309 let result = rule.check(&ctx).unwrap();
1310 assert_eq!(
1311 result.len(),
1312 1,
1313 "Should only flag the undefined reference outside comments"
1314 );
1315 assert!(result[0].message.contains("undefined"));
1316
1317 let content = r#"<!--
1319[ref1]
1320[ref2][ref3]
1321-->
1322[actual][undefined]"#;
1323 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1324 let result = rule.check(&ctx).unwrap();
1325 assert_eq!(
1326 result.len(),
1327 1,
1328 "Should not flag references in multi-line HTML comments"
1329 );
1330 assert!(result[0].message.contains("undefined"));
1331
1332 let content = r#"<!-- Comment with [1:] pattern -->
1334Valid [link][ref]
1335<!-- More [refs][in][comments] -->
1336![image][missing]
1337
1338[ref]: https://example.com"#;
1339 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1340 let result = rule.check(&ctx).unwrap();
1341 assert_eq!(result.len(), 1, "Should only flag missing image reference");
1342 assert!(result[0].message.contains("missing"));
1343 }
1344
1345 #[test]
1346 fn test_frontmatter_ignored() {
1347 let rule = MD052ReferenceLinkImages::new();
1349
1350 let content = r#"---
1352layout: post
1353title: "My Jekyll Post"
1354date: 2023-01-01
1355categories: blog
1356tags: ["test", "example"]
1357author: John Doe
1358---
1359
1360# My Blog Post
1361
1362This is the actual markdown content that should be linted.
1363
1364[undefined] reference should be flagged.
1365
1366## Section 1
1367
1368Some content here."#;
1369 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1370 let result = rule.check(&ctx).unwrap();
1371
1372 assert_eq!(
1374 result.len(),
1375 1,
1376 "Should only flag the undefined reference outside frontmatter"
1377 );
1378 assert!(result[0].message.contains("undefined"));
1379
1380 let content = r#"+++
1382title = "My Post"
1383tags = ["example", "test"]
1384+++
1385
1386# Content
1387
1388[missing] reference should be flagged."#;
1389 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1390 let result = rule.check(&ctx).unwrap();
1391 assert_eq!(
1392 result.len(),
1393 1,
1394 "Should only flag the undefined reference outside TOML frontmatter"
1395 );
1396 assert!(result[0].message.contains("missing"));
1397 }
1398
1399 #[test]
1400 fn test_mkdocs_snippet_markers_not_flagged() {
1401 let rule = MD052ReferenceLinkImages::new();
1403
1404 let content = r#"# Document with MkDocs Snippets
1406
1407Some content here.
1408
1409# -8<- [start:remote-content]
1410
1411This is the remote content section.
1412
1413# -8<- [end:remote-content]
1414
1415More content here.
1416
1417<!-- --8<-- [start:another-section] -->
1418Content in another section
1419<!-- --8<-- [end:another-section] -->"#;
1420 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1421 let result = rule.check(&ctx).unwrap();
1422
1423 assert_eq!(
1425 result.len(),
1426 0,
1427 "Should not flag MkDocs snippet markers as undefined references"
1428 );
1429
1430 let content = r#"# Document
1433
1434# -8<- [start:section]
1435Content with [reference] inside snippet section
1436# -8<- [end:section]
1437
1438Regular [undefined] reference outside snippet markers."#;
1439 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1440 let result = rule.check(&ctx).unwrap();
1441
1442 assert_eq!(
1443 result.len(),
1444 2,
1445 "Should flag undefined references but skip snippet marker lines"
1446 );
1447 assert!(result[0].message.contains("reference"));
1449 assert!(result[1].message.contains("undefined"));
1450
1451 let content = r#"# Document
1453
1454# -8<- [start:section]
1455# -8<- [end:section]"#;
1456 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1457 let result = rule.check(&ctx).unwrap();
1458
1459 assert_eq!(
1460 result.len(),
1461 2,
1462 "In standard mode, snippet markers should be flagged as undefined references"
1463 );
1464 }
1465
1466 #[test]
1467 fn test_pandoc_citations_not_flagged() {
1468 let rule = MD052ReferenceLinkImages::new();
1470
1471 let content = r#"# Research Paper
1472
1473We are using the **bookdown** package [@R-bookdown] in this sample book.
1474This was built on top of R Markdown and **knitr** [@xie2015].
1475
1476Multiple citations [@citation1; @citation2; @citation3] are also supported.
1477
1478Regular [undefined] reference should still be flagged.
1479"#;
1480 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1481 let result = rule.check(&ctx).unwrap();
1482
1483 assert_eq!(
1485 result.len(),
1486 1,
1487 "Should only flag the undefined reference, not Pandoc citations"
1488 );
1489 assert!(result[0].message.contains("undefined"));
1490 }
1491
1492 #[test]
1493 fn test_pandoc_inline_footnotes_not_flagged() {
1494 let rule = MD052ReferenceLinkImages::new();
1496
1497 let content = r#"# Math Document
1498
1499You can use math in footnotes like this^[where we mention $p = \frac{a}{b}$].
1500
1501Another footnote^[with some text and a [link](https://example.com)].
1502
1503But this [reference] without ^ should be flagged.
1504"#;
1505 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1506 let result = rule.check(&ctx).unwrap();
1507
1508 assert_eq!(
1510 result.len(),
1511 1,
1512 "Should only flag the regular reference, not inline footnotes"
1513 );
1514 assert!(result[0].message.contains("reference"));
1515 }
1516
1517 #[test]
1518 fn test_github_alerts_not_flagged() {
1519 let rule = MD052ReferenceLinkImages::new();
1521
1522 let content = r#"# Document with GitHub Alerts
1524
1525> [!NOTE]
1526> This is a note alert.
1527
1528> [!TIP]
1529> This is a tip alert.
1530
1531> [!IMPORTANT]
1532> This is an important alert.
1533
1534> [!WARNING]
1535> This is a warning alert.
1536
1537> [!CAUTION]
1538> This is a caution alert.
1539
1540Regular content with [undefined] reference."#;
1541 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1542 let result = rule.check(&ctx).unwrap();
1543
1544 assert_eq!(
1546 result.len(),
1547 1,
1548 "Should only flag the undefined reference, not GitHub alerts"
1549 );
1550 assert!(result[0].message.contains("undefined"));
1551 assert_eq!(result[0].line, 18); let content = r#"> [!TIP]
1555> Here's a useful tip about [something].
1556> Multiple lines are allowed.
1557
1558[something] is mentioned but not defined."#;
1559 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1560 let result = rule.check(&ctx).unwrap();
1561
1562 assert_eq!(result.len(), 1, "Should flag undefined reference");
1566 assert!(result[0].message.contains("something"));
1567
1568 let content = r#"> [!NOTE]
1570> See [reference] for more details.
1571
1572[reference]: https://example.com"#;
1573 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1574 let result = rule.check(&ctx).unwrap();
1575
1576 assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1578 }
1579}