1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11 static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16 static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19 static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22 static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25 static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29 static ref URL_WITH_BRACKETS: Regex = Regex::new(
37 r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38 ).unwrap();
39}
40
41#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50 pub fn new() -> Self {
51 Self {}
52 }
53
54 fn strip_backticks(s: &str) -> &str {
57 s.trim_start_matches('`').trim_end_matches('`')
58 }
59
60 fn is_valid_python_identifier(s: &str) -> bool {
64 if s.is_empty() {
65 return false;
66 }
67 let first_char = s.chars().next().unwrap();
68 if !first_char.is_ascii_alphabetic() && first_char != '_' {
69 return false;
70 }
71 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
72 }
73
74 fn is_likely_not_reference(text: &str) -> bool {
77 if text.chars().all(|c| c.is_ascii_digit()) {
79 return true;
80 }
81
82 if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
84 return true;
85 }
86
87 if text.contains('.') && !text.contains(' ') && !text.contains('-') && !text.contains('_') {
90 return true;
92 }
93
94 if text == "*" || text == "..." || text == "**" {
96 return true;
97 }
98
99 if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
101 return true;
102 }
103
104 if text.contains(',') || text.contains('[') || text.contains(']') {
107 return true;
109 }
110
111 if !text.contains('`')
118 && text.contains('.')
119 && !text.contains(' ')
120 && !text.contains('-')
121 && !text.contains('_')
122 {
123 return true;
124 }
125
126 if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
133 return true;
134 }
135
136 if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
138 return true;
139 }
140
141 if (text.starts_with('"') && text.ends_with('"'))
143 || (text.starts_with('\'') && text.ends_with('\''))
144 || text.contains('"')
145 || text.contains('\'')
146 {
147 return true;
148 }
149
150 if text.contains(':') && text.contains(' ') {
153 return true;
154 }
155
156 if text.starts_with('!') {
158 return true;
159 }
160
161 if text.starts_with('^') {
164 return true;
165 }
166
167 if text == "TOC" {
170 return true;
171 }
172
173 if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
175 return true;
176 }
177
178 let common_non_refs = [
181 "object",
183 "Object",
184 "any",
185 "Any",
186 "inv",
187 "void",
188 "bool",
189 "int",
190 "float",
191 "str",
192 "char",
193 "i8",
194 "i16",
195 "i32",
196 "i64",
197 "i128",
198 "isize",
199 "u8",
200 "u16",
201 "u32",
202 "u64",
203 "u128",
204 "usize",
205 "f32",
206 "f64",
207 "null",
209 "true",
210 "false",
211 "NaN",
212 "Infinity",
213 "object Object",
215 ];
216
217 if common_non_refs.contains(&text) {
218 return true;
219 }
220
221 false
222 }
223
224 fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
226 code_spans
227 .iter()
228 .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
229 }
230
231 fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
233 for m in HTML_COMMENT_PATTERN.find_iter(content) {
234 if m.start() <= byte_pos && byte_pos < m.end() {
235 return true;
236 }
237 }
238 false
239 }
240
241 fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
243 for html_tag in ctx.html_tags().iter() {
245 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
246 return true;
247 }
248 }
249 false
250 }
251
252 fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
253 use crate::config::MarkdownFlavor;
254 use crate::utils::skip_context::is_mkdocs_snippet_line;
255
256 let mut references = HashSet::new();
257 let mut in_code_block = false;
258 let mut code_fence_marker = String::new();
259
260 for line in content.lines() {
261 if is_mkdocs_snippet_line(
263 line,
264 if mkdocs_mode {
265 MarkdownFlavor::MkDocs
266 } else {
267 MarkdownFlavor::Standard
268 },
269 ) {
270 continue;
271 }
272 if let Some(cap) = FENCED_CODE_START.captures(line) {
274 if let Some(fence) = cap.get(2) {
275 let fence_str = fence.as_str();
277 if !in_code_block {
278 in_code_block = true;
279 code_fence_marker = fence_str.to_string();
280 } else if line.trim_start().starts_with(&code_fence_marker) {
281 let trimmed = line.trim_start();
283 if trimmed.starts_with(&code_fence_marker) {
285 let after_fence = &trimmed[code_fence_marker.len()..];
286 if after_fence.trim().is_empty() {
287 in_code_block = false;
288 code_fence_marker.clear();
289 }
290 }
291 }
292 }
293 continue;
294 }
295
296 if in_code_block {
298 continue;
299 }
300
301 if line.trim_start().starts_with("*[") {
304 continue;
305 }
306
307 if let Some(cap) = REF_REGEX.captures(line) {
308 if let Some(reference) = cap.get(1) {
310 references.insert(reference.as_str().to_lowercase());
311 }
312 }
313 }
314
315 references
316 }
317
318 fn find_undefined_references(
319 &self,
320 content: &str,
321 references: &HashSet<String>,
322 ctx: &crate::lint_context::LintContext,
323 mkdocs_mode: bool,
324 ) -> Vec<(usize, usize, usize, String)> {
325 let mut undefined = Vec::new();
326 let mut reported_refs = HashMap::new();
327 let mut in_code_block = false;
328 let mut code_fence_marker = String::new();
329 let mut in_example_section = false;
330
331 let code_spans = ctx.code_spans();
333
334 for link in &ctx.links {
336 if !link.is_reference {
337 continue; }
339
340 if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
342 continue;
343 }
344
345 if Self::is_in_html_comment(content, link.byte_offset) {
347 continue;
348 }
349
350 if Self::is_in_html_tag(ctx, link.byte_offset) {
352 continue;
353 }
354
355 if is_in_math_context(ctx, link.byte_offset) {
357 continue;
358 }
359
360 if is_in_table_cell(ctx, link.line, link.start_col) {
362 continue;
363 }
364
365 if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
367 continue;
368 }
369
370 if let Some(ref_id) = &link.reference_id {
371 let reference_lower = ref_id.to_lowercase();
372
373 let stripped_ref = Self::strip_backticks(ref_id);
377 let stripped_text = Self::strip_backticks(&link.text);
378 if mkdocs_mode
379 && (is_mkdocs_auto_reference(stripped_ref)
380 || is_mkdocs_auto_reference(stripped_text)
381 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
382 || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
383 {
384 continue;
385 }
386
387 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
389 if let Some(line_info) = ctx.line_info(link.line) {
391 if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
392 in_example_section = true;
393 continue;
394 }
395
396 if in_example_section {
397 continue;
398 }
399
400 if LIST_ITEM_REGEX.is_match(&line_info.content) {
402 continue;
403 }
404
405 let trimmed = line_info.content.trim_start();
407 if trimmed.starts_with('<') {
408 continue;
409 }
410 }
411
412 let match_len = link.byte_end - link.byte_offset;
413 undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
414 reported_refs.insert(reference_lower, true);
415 }
416 }
417 }
418
419 for image in &ctx.images {
421 if !image.is_reference {
422 continue; }
424
425 if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
427 continue;
428 }
429
430 if Self::is_in_html_comment(content, image.byte_offset) {
432 continue;
433 }
434
435 if Self::is_in_html_tag(ctx, image.byte_offset) {
437 continue;
438 }
439
440 if is_in_math_context(ctx, image.byte_offset) {
442 continue;
443 }
444
445 if is_in_table_cell(ctx, image.line, image.start_col) {
447 continue;
448 }
449
450 if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
452 continue;
453 }
454
455 if let Some(ref_id) = &image.reference_id {
456 let reference_lower = ref_id.to_lowercase();
457
458 let stripped_ref = Self::strip_backticks(ref_id);
462 let stripped_alt = Self::strip_backticks(&image.alt_text);
463 if mkdocs_mode
464 && (is_mkdocs_auto_reference(stripped_ref)
465 || is_mkdocs_auto_reference(stripped_alt)
466 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
467 || (image.alt_text.as_str() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
468 {
469 continue;
470 }
471
472 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
474 if let Some(line_info) = ctx.line_info(image.line) {
476 if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
477 in_example_section = true;
478 continue;
479 }
480
481 if in_example_section {
482 continue;
483 }
484
485 if LIST_ITEM_REGEX.is_match(&line_info.content) {
487 continue;
488 }
489
490 let trimmed = line_info.content.trim_start();
492 if trimmed.starts_with('<') {
493 continue;
494 }
495 }
496
497 let match_len = image.byte_end - image.byte_offset;
498 undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
499 reported_refs.insert(reference_lower, true);
500 }
501 }
502 }
503
504 let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
506
507 for link in &ctx.links {
509 covered_ranges.push((link.byte_offset, link.byte_end));
510 }
511
512 for image in &ctx.images {
514 covered_ranges.push((image.byte_offset, image.byte_end));
515 }
516
517 covered_ranges.sort_by_key(|&(start, _)| start);
519
520 let lines: Vec<&str> = content.lines().collect();
523 in_example_section = false; for (line_num, line) in lines.iter().enumerate() {
526 if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
528 continue;
529 }
530
531 if let Some(cap) = FENCED_CODE_START.captures(line) {
533 if let Some(fence) = cap.get(2) {
534 let fence_str = fence.as_str();
536 if !in_code_block {
537 in_code_block = true;
538 code_fence_marker = fence_str.to_string();
539 } else if line.trim_start().starts_with(&code_fence_marker) {
540 let trimmed = line.trim_start();
542 if trimmed.starts_with(&code_fence_marker) {
544 let after_fence = &trimmed[code_fence_marker.len()..];
545 if after_fence.trim().is_empty() {
546 in_code_block = false;
547 code_fence_marker.clear();
548 }
549 }
550 }
551 }
552 continue;
553 }
554
555 if in_code_block {
556 continue;
557 }
558
559 if OUTPUT_EXAMPLE_START.is_match(line) {
561 in_example_section = true;
562 continue;
563 }
564
565 if in_example_section {
566 if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
568 in_example_section = false;
569 } else {
570 continue;
571 }
572 }
573
574 if LIST_ITEM_REGEX.is_match(line) {
576 continue;
577 }
578
579 let trimmed_line = line.trim_start();
581 if trimmed_line.starts_with('<') {
582 continue;
583 }
584
585 if GITHUB_ALERT_REGEX.is_match(line) {
587 continue;
588 }
589
590 if trimmed_line.starts_with("*[") {
593 continue;
594 }
595
596 let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
599 for mat in URL_WITH_BRACKETS.find_iter(line) {
600 let url_str = mat.as_str();
602 let url_start = mat.start();
603
604 let mut idx = 0;
606 while idx < url_str.len() {
607 if let Some(bracket_start) = url_str[idx..].find('[') {
608 let bracket_start_abs = url_start + idx + bracket_start;
609 if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
610 let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
611 url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
612 idx += bracket_start + bracket_end + 2;
613 } else {
614 break;
615 }
616 } else {
617 break;
618 }
619 }
620 }
621
622 if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
624 for cap in captures {
625 if let Some(ref_match) = cap.get(1) {
626 let bracket_start = cap.get(0).unwrap().start();
628 let bracket_end = cap.get(0).unwrap().end();
629
630 let is_in_url = url_bracket_ranges
632 .iter()
633 .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
634
635 if is_in_url {
636 continue;
637 }
638
639 let reference = ref_match.as_str();
640 let reference_lower = reference.to_lowercase();
641
642 if Self::is_likely_not_reference(reference) {
644 continue;
645 }
646
647 if let Some(alert_type) = reference.strip_prefix('!')
649 && matches!(
650 alert_type,
651 "NOTE"
652 | "TIP"
653 | "WARNING"
654 | "IMPORTANT"
655 | "CAUTION"
656 | "INFO"
657 | "SUCCESS"
658 | "FAILURE"
659 | "DANGER"
660 | "BUG"
661 | "EXAMPLE"
662 | "QUOTE"
663 )
664 {
665 continue;
666 }
667
668 if mkdocs_mode
671 && (reference.starts_with("start:") || reference.starts_with("end:"))
672 && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
673 || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
674 {
675 continue;
676 }
677
678 let stripped_ref = Self::strip_backticks(reference);
681 if mkdocs_mode
682 && (is_mkdocs_auto_reference(stripped_ref)
683 || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
684 {
685 continue;
686 }
687
688 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
689 let full_match = cap.get(0).unwrap();
690 let col = full_match.start();
691
692 let code_spans = ctx.code_spans();
694 if Self::is_in_code_span(line_num + 1, col, &code_spans) {
695 continue;
696 }
697
698 let line_start_byte = ctx.line_offsets[line_num];
700 let byte_pos = line_start_byte + col;
701
702 if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
704 &ctx.code_blocks,
705 byte_pos,
706 ) {
707 continue;
708 }
709
710 if Self::is_in_html_comment(content, byte_pos) {
712 continue;
713 }
714
715 if Self::is_in_html_tag(ctx, byte_pos) {
717 continue;
718 }
719
720 if is_in_math_context(ctx, byte_pos) {
722 continue;
723 }
724
725 if is_in_table_cell(ctx, line_num + 1, col) {
727 continue;
728 }
729
730 let byte_end = byte_pos + (full_match.end() - full_match.start());
731
732 let mut is_covered = false;
734 for &(range_start, range_end) in &covered_ranges {
735 if range_start <= byte_pos && byte_end <= range_end {
736 is_covered = true;
738 break;
739 }
740 if range_start > byte_end {
741 break;
743 }
744 }
745
746 if is_covered {
747 continue;
748 }
749
750 let line_chars: Vec<char> = line.chars().collect();
755 if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
756 let mut bracket_count = 1; let mut check_pos = col.saturating_sub(2);
759 let mut found_opening = false;
760
761 while check_pos > 0 && check_pos < line_chars.len() {
762 match line_chars.get(check_pos) {
763 Some(&']') => bracket_count += 1,
764 Some(&'[') => {
765 bracket_count -= 1;
766 if bracket_count == 0 {
767 if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
769 found_opening = true;
770 }
771 break;
772 }
773 }
774 _ => {}
775 }
776 if check_pos == 0 {
777 break;
778 }
779 check_pos = check_pos.saturating_sub(1);
780 }
781
782 if found_opening {
783 continue;
785 }
786 }
787
788 let before_text = &line[..col];
791 if before_text.contains("\\]") {
792 if let Some(escaped_close_pos) = before_text.rfind("\\]") {
794 let search_text = &before_text[..escaped_close_pos];
795 if search_text.contains("\\[") {
796 continue;
798 }
799 }
800 }
801
802 let match_len = full_match.end() - full_match.start();
803 undefined.push((line_num, col, match_len, reference.to_string()));
804 reported_refs.insert(reference_lower, true);
805 }
806 }
807 }
808 }
809 }
810
811 undefined
812 }
813}
814
815impl Rule for MD052ReferenceLinkImages {
816 fn name(&self) -> &'static str {
817 "MD052"
818 }
819
820 fn description(&self) -> &'static str {
821 "Reference links and images should use a reference that exists"
822 }
823
824 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
825 let content = ctx.content;
826 let mut warnings = Vec::new();
827
828 let has_reference_links = ctx.links.iter().any(|l| l.is_reference);
831 let has_reference_images = ctx.images.iter().any(|i| i.is_reference);
832
833 if !content.contains('[') {
835 return Ok(warnings);
836 }
837
838 let has_reference_definitions = content.contains("]:");
840
841 if !has_reference_links && !has_reference_images && !has_reference_definitions {
844 let all_brackets_are_inline = ctx.links.iter().all(|l| !l.is_reference)
847 && ctx.images.iter().all(|i| !i.is_reference)
848 && ctx.links.len() + ctx.images.len() > 0;
849
850 if all_brackets_are_inline {
851 return Ok(warnings); }
853 }
854
855 let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
857
858 let references = self.extract_references(content, mkdocs_mode);
859
860 for (line_num, col, match_len, reference) in
862 self.find_undefined_references(content, &references, ctx, mkdocs_mode)
863 {
864 let lines: Vec<&str> = content.lines().collect();
865 let line_content = lines.get(line_num).unwrap_or(&"");
866
867 let (start_line, start_col, end_line, end_col) =
869 calculate_match_range(line_num + 1, line_content, col, match_len);
870
871 warnings.push(LintWarning {
872 rule_name: Some(self.name()),
873 line: start_line,
874 column: start_col,
875 end_line,
876 end_column: end_col,
877 message: format!("Reference '{reference}' not found"),
878 severity: Severity::Warning,
879 fix: None,
880 });
881 }
882
883 Ok(warnings)
884 }
885
886 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
888 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
890 }
891
892 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
893 let content = ctx.content;
894 Ok(content.to_string())
896 }
897
898 fn as_any(&self) -> &dyn std::any::Any {
899 self
900 }
901
902 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
903 where
904 Self: Sized,
905 {
906 Box::new(MD052ReferenceLinkImages::new())
908 }
909}
910
911#[cfg(test)]
912mod tests {
913 use super::*;
914 use crate::lint_context::LintContext;
915
916 #[test]
917 fn test_valid_reference_link() {
918 let rule = MD052ReferenceLinkImages::new();
919 let content = "[text][ref]\n\n[ref]: https://example.com";
920 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
921 let result = rule.check(&ctx).unwrap();
922
923 assert_eq!(result.len(), 0);
924 }
925
926 #[test]
927 fn test_undefined_reference_link() {
928 let rule = MD052ReferenceLinkImages::new();
929 let content = "[text][undefined]";
930 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
931 let result = rule.check(&ctx).unwrap();
932
933 assert_eq!(result.len(), 1);
934 assert!(result[0].message.contains("Reference 'undefined' not found"));
935 }
936
937 #[test]
938 fn test_valid_reference_image() {
939 let rule = MD052ReferenceLinkImages::new();
940 let content = "![alt][img]\n\n[img]: image.jpg";
941 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
942 let result = rule.check(&ctx).unwrap();
943
944 assert_eq!(result.len(), 0);
945 }
946
947 #[test]
948 fn test_undefined_reference_image() {
949 let rule = MD052ReferenceLinkImages::new();
950 let content = "![alt][missing]";
951 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
952 let result = rule.check(&ctx).unwrap();
953
954 assert_eq!(result.len(), 1);
955 assert!(result[0].message.contains("Reference 'missing' not found"));
956 }
957
958 #[test]
959 fn test_case_insensitive_references() {
960 let rule = MD052ReferenceLinkImages::new();
961 let content = "[Text][REF]\n\n[ref]: https://example.com";
962 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
963 let result = rule.check(&ctx).unwrap();
964
965 assert_eq!(result.len(), 0);
966 }
967
968 #[test]
969 fn test_shortcut_reference_valid() {
970 let rule = MD052ReferenceLinkImages::new();
971 let content = "[ref]\n\n[ref]: https://example.com";
972 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
973 let result = rule.check(&ctx).unwrap();
974
975 assert_eq!(result.len(), 0);
976 }
977
978 #[test]
979 fn test_shortcut_reference_undefined() {
980 let rule = MD052ReferenceLinkImages::new();
981 let content = "[undefined]";
982 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
983 let result = rule.check(&ctx).unwrap();
984
985 assert_eq!(result.len(), 1);
986 assert!(result[0].message.contains("Reference 'undefined' not found"));
987 }
988
989 #[test]
990 fn test_inline_links_ignored() {
991 let rule = MD052ReferenceLinkImages::new();
992 let content = "[text](https://example.com)";
993 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
994 let result = rule.check(&ctx).unwrap();
995
996 assert_eq!(result.len(), 0);
997 }
998
999 #[test]
1000 fn test_inline_images_ignored() {
1001 let rule = MD052ReferenceLinkImages::new();
1002 let content = "";
1003 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1004 let result = rule.check(&ctx).unwrap();
1005
1006 assert_eq!(result.len(), 0);
1007 }
1008
1009 #[test]
1010 fn test_references_in_code_blocks_ignored() {
1011 let rule = MD052ReferenceLinkImages::new();
1012 let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
1013 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1014 let result = rule.check(&ctx).unwrap();
1015
1016 assert_eq!(result.len(), 0);
1017 }
1018
1019 #[test]
1020 fn test_references_in_inline_code_ignored() {
1021 let rule = MD052ReferenceLinkImages::new();
1022 let content = "`[undefined]`";
1023 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1024 let result = rule.check(&ctx).unwrap();
1025
1026 assert_eq!(result.len(), 0);
1028 }
1029
1030 #[test]
1031 fn test_comprehensive_inline_code_detection() {
1032 let rule = MD052ReferenceLinkImages::new();
1033 let content = r#"# Test
1034
1035This `[inside]` should be ignored.
1036This [outside] should be flagged.
1037Reference links `[text][ref]` in code are ignored.
1038Regular reference [text][missing] should be flagged.
1039Images `![alt][img]` in code are ignored.
1040Regular image ![alt][badimg] should be flagged.
1041
1042Multiple `[one]` and `[two]` in code ignored, but [three] is not.
1043
1044```
1045[code block content] should be ignored
1046```
1047
1048`Multiple [refs] in [same] code span` ignored."#;
1049
1050 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1051 let result = rule.check(&ctx).unwrap();
1052
1053 assert_eq!(result.len(), 4);
1055
1056 let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1057 assert!(messages.iter().any(|m| m.contains("outside")));
1058 assert!(messages.iter().any(|m| m.contains("missing")));
1059 assert!(messages.iter().any(|m| m.contains("badimg")));
1060 assert!(messages.iter().any(|m| m.contains("three")));
1061
1062 assert!(!messages.iter().any(|m| m.contains("inside")));
1064 assert!(!messages.iter().any(|m| m.contains("one")));
1065 assert!(!messages.iter().any(|m| m.contains("two")));
1066 assert!(!messages.iter().any(|m| m.contains("refs")));
1067 assert!(!messages.iter().any(|m| m.contains("same")));
1068 }
1069
1070 #[test]
1071 fn test_multiple_undefined_references() {
1072 let rule = MD052ReferenceLinkImages::new();
1073 let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1074 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1075 let result = rule.check(&ctx).unwrap();
1076
1077 assert_eq!(result.len(), 3);
1078 assert!(result[0].message.contains("ref1"));
1079 assert!(result[1].message.contains("ref2"));
1080 assert!(result[2].message.contains("ref3"));
1081 }
1082
1083 #[test]
1084 fn test_mixed_valid_and_undefined() {
1085 let rule = MD052ReferenceLinkImages::new();
1086 let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1087 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1088 let result = rule.check(&ctx).unwrap();
1089
1090 assert_eq!(result.len(), 1);
1091 assert!(result[0].message.contains("missing"));
1092 }
1093
1094 #[test]
1095 fn test_empty_reference() {
1096 let rule = MD052ReferenceLinkImages::new();
1097 let content = "[text][]\n\n[ref]: https://example.com";
1098 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1099 let result = rule.check(&ctx).unwrap();
1100
1101 assert_eq!(result.len(), 1);
1103 }
1104
1105 #[test]
1106 fn test_escaped_brackets_ignored() {
1107 let rule = MD052ReferenceLinkImages::new();
1108 let content = "\\[not a link\\]";
1109 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1110 let result = rule.check(&ctx).unwrap();
1111
1112 assert_eq!(result.len(), 0);
1113 }
1114
1115 #[test]
1116 fn test_list_items_ignored() {
1117 let rule = MD052ReferenceLinkImages::new();
1118 let content = "- [undefined]\n* [another]\n+ [third]";
1119 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1120 let result = rule.check(&ctx).unwrap();
1121
1122 assert_eq!(result.len(), 0);
1124 }
1125
1126 #[test]
1127 fn test_output_example_section_ignored() {
1128 let rule = MD052ReferenceLinkImages::new();
1129 let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1130 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1131 let result = rule.check(&ctx).unwrap();
1132
1133 assert_eq!(result.len(), 1);
1135 assert!(result[0].message.contains("missing"));
1136 }
1137
1138 #[test]
1139 fn test_reference_definitions_in_code_blocks_ignored() {
1140 let rule = MD052ReferenceLinkImages::new();
1141 let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1142 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1143 let result = rule.check(&ctx).unwrap();
1144
1145 assert_eq!(result.len(), 1);
1147 assert!(result[0].message.contains("ref"));
1148 }
1149
1150 #[test]
1151 fn test_multiple_references_to_same_undefined() {
1152 let rule = MD052ReferenceLinkImages::new();
1153 let content = "[first][missing] [second][missing] [third][missing]";
1154 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1155 let result = rule.check(&ctx).unwrap();
1156
1157 assert_eq!(result.len(), 1);
1159 assert!(result[0].message.contains("missing"));
1160 }
1161
1162 #[test]
1163 fn test_reference_with_special_characters() {
1164 let rule = MD052ReferenceLinkImages::new();
1165 let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1166 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1167 let result = rule.check(&ctx).unwrap();
1168
1169 assert_eq!(result.len(), 0);
1170 }
1171
1172 #[test]
1173 fn test_issue_51_html_attribute_not_reference() {
1174 let rule = MD052ReferenceLinkImages::new();
1176 let content = r#"# Example
1177
1178## Test
1179
1180Want to fill out this form?
1181
1182<form method="post">
1183 <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1184</form>"#;
1185 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1186 let result = rule.check(&ctx).unwrap();
1187
1188 assert_eq!(
1189 result.len(),
1190 0,
1191 "HTML attributes with square brackets should not be flagged as undefined references"
1192 );
1193 }
1194
1195 #[test]
1196 fn test_extract_references() {
1197 let rule = MD052ReferenceLinkImages::new();
1198 let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1199 let refs = rule.extract_references(content, false);
1200
1201 assert_eq!(refs.len(), 3);
1202 assert!(refs.contains("ref1"));
1203 assert!(refs.contains("ref2"));
1204 assert!(refs.contains("ref3"));
1205 }
1206
1207 #[test]
1208 fn test_inline_code_not_flagged() {
1209 let rule = MD052ReferenceLinkImages::new();
1210
1211 let content = r#"# Test
1213
1214Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1215
1216Also, `[todo]` is not a reference link.
1217
1218But this [reference] should be flagged.
1219
1220And this `[inline code]` should not be flagged.
1221"#;
1222
1223 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1224 let warnings = rule.check(&ctx).unwrap();
1225
1226 assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1228 assert!(warnings[0].message.contains("'reference'"));
1229 }
1230
1231 #[test]
1232 fn test_code_block_references_ignored() {
1233 let rule = MD052ReferenceLinkImages::new();
1234
1235 let content = r#"# Test
1236
1237```markdown
1238[undefined] reference in code block
1239![undefined] image in code block
1240```
1241
1242[real-undefined] reference outside
1243"#;
1244
1245 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1246 let warnings = rule.check(&ctx).unwrap();
1247
1248 assert_eq!(warnings.len(), 1);
1250 assert!(warnings[0].message.contains("'real-undefined'"));
1251 }
1252
1253 #[test]
1254 fn test_html_comments_ignored() {
1255 let rule = MD052ReferenceLinkImages::new();
1257
1258 let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1260<!--- set_env EDITOR 'python3 fake_editor.py' -->
1261
1262```bash
1263$ python3 vote.py
12643 votes for: 2
12652 votes for: 3, 4
1266```"#;
1267 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1268 let result = rule.check(&ctx).unwrap();
1269 assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1270
1271 let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1273Normal [text][undefined]
1274<!-- Another [comment][with] references -->"#;
1275 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1276 let result = rule.check(&ctx).unwrap();
1277 assert_eq!(
1278 result.len(),
1279 1,
1280 "Should only flag the undefined reference outside comments"
1281 );
1282 assert!(result[0].message.contains("undefined"));
1283
1284 let content = r#"<!--
1286[ref1]
1287[ref2][ref3]
1288-->
1289[actual][undefined]"#;
1290 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1291 let result = rule.check(&ctx).unwrap();
1292 assert_eq!(
1293 result.len(),
1294 1,
1295 "Should not flag references in multi-line HTML comments"
1296 );
1297 assert!(result[0].message.contains("undefined"));
1298
1299 let content = r#"<!-- Comment with [1:] pattern -->
1301Valid [link][ref]
1302<!-- More [refs][in][comments] -->
1303![image][missing]
1304
1305[ref]: https://example.com"#;
1306 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1307 let result = rule.check(&ctx).unwrap();
1308 assert_eq!(result.len(), 1, "Should only flag missing image reference");
1309 assert!(result[0].message.contains("missing"));
1310 }
1311
1312 #[test]
1313 fn test_frontmatter_ignored() {
1314 let rule = MD052ReferenceLinkImages::new();
1316
1317 let content = r#"---
1319layout: post
1320title: "My Jekyll Post"
1321date: 2023-01-01
1322categories: blog
1323tags: ["test", "example"]
1324author: John Doe
1325---
1326
1327# My Blog Post
1328
1329This is the actual markdown content that should be linted.
1330
1331[undefined] reference should be flagged.
1332
1333## Section 1
1334
1335Some content here."#;
1336 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1337 let result = rule.check(&ctx).unwrap();
1338
1339 assert_eq!(
1341 result.len(),
1342 1,
1343 "Should only flag the undefined reference outside frontmatter"
1344 );
1345 assert!(result[0].message.contains("undefined"));
1346
1347 let content = r#"+++
1349title = "My Post"
1350tags = ["example", "test"]
1351+++
1352
1353# Content
1354
1355[missing] reference should be flagged."#;
1356 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1357 let result = rule.check(&ctx).unwrap();
1358 assert_eq!(
1359 result.len(),
1360 1,
1361 "Should only flag the undefined reference outside TOML frontmatter"
1362 );
1363 assert!(result[0].message.contains("missing"));
1364 }
1365
1366 #[test]
1367 fn test_mkdocs_snippet_markers_not_flagged() {
1368 let rule = MD052ReferenceLinkImages::new();
1370
1371 let content = r#"# Document with MkDocs Snippets
1373
1374Some content here.
1375
1376# -8<- [start:remote-content]
1377
1378This is the remote content section.
1379
1380# -8<- [end:remote-content]
1381
1382More content here.
1383
1384<!-- --8<-- [start:another-section] -->
1385Content in another section
1386<!-- --8<-- [end:another-section] -->"#;
1387 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1388 let result = rule.check(&ctx).unwrap();
1389
1390 assert_eq!(
1392 result.len(),
1393 0,
1394 "Should not flag MkDocs snippet markers as undefined references"
1395 );
1396
1397 let content = r#"# Document
1400
1401# -8<- [start:section]
1402Content with [reference] inside snippet section
1403# -8<- [end:section]
1404
1405Regular [undefined] reference outside snippet markers."#;
1406 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1407 let result = rule.check(&ctx).unwrap();
1408
1409 assert_eq!(
1410 result.len(),
1411 2,
1412 "Should flag undefined references but skip snippet marker lines"
1413 );
1414 assert!(result[0].message.contains("reference"));
1416 assert!(result[1].message.contains("undefined"));
1417
1418 let content = r#"# Document
1420
1421# -8<- [start:section]
1422# -8<- [end:section]"#;
1423 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1424 let result = rule.check(&ctx).unwrap();
1425
1426 assert_eq!(
1427 result.len(),
1428 2,
1429 "In standard mode, snippet markers should be flagged as undefined references"
1430 );
1431 }
1432
1433 #[test]
1434 fn test_github_alerts_not_flagged() {
1435 let rule = MD052ReferenceLinkImages::new();
1437
1438 let content = r#"# Document with GitHub Alerts
1440
1441> [!NOTE]
1442> This is a note alert.
1443
1444> [!TIP]
1445> This is a tip alert.
1446
1447> [!IMPORTANT]
1448> This is an important alert.
1449
1450> [!WARNING]
1451> This is a warning alert.
1452
1453> [!CAUTION]
1454> This is a caution alert.
1455
1456Regular content with [undefined] reference."#;
1457 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1458 let result = rule.check(&ctx).unwrap();
1459
1460 assert_eq!(
1462 result.len(),
1463 1,
1464 "Should only flag the undefined reference, not GitHub alerts"
1465 );
1466 assert!(result[0].message.contains("undefined"));
1467 assert_eq!(result[0].line, 18); let content = r#"> [!TIP]
1471> Here's a useful tip about [something].
1472> Multiple lines are allowed.
1473
1474[something] is mentioned but not defined."#;
1475 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1476 let result = rule.check(&ctx).unwrap();
1477
1478 assert_eq!(result.len(), 1, "Should flag undefined reference");
1482 assert!(result[0].message.contains("something"));
1483
1484 let content = r#"> [!NOTE]
1486> See [reference] for more details.
1487
1488[reference]: https://example.com"#;
1489 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1490 let result = rule.check(&ctx).unwrap();
1491
1492 assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1494 }
1495}