1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3use crate::utils::range_utils::calculate_match_range;
4use crate::utils::regex_cache::{HTML_COMMENT_PATTERN, SHORTCUT_REF_REGEX};
5use crate::utils::skip_context::{is_in_math_context, is_in_table_cell};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::{HashMap, HashSet};
9
10lazy_static! {
11 static ref REF_REGEX: Regex = Regex::new(r"^\s*\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]:\s*.*").unwrap();
15
16 static ref LIST_ITEM_REGEX: Regex = Regex::new(r"^\s*[-*+]\s+(?:\[[xX\s]\]\s+)?").unwrap();
18
19 static ref FENCED_CODE_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
21
22 static ref OUTPUT_EXAMPLE_START: Regex = Regex::new(r"^#+\s*(?:Output|Example|Output Style|Output Format)\s*$").unwrap();
24
25 static ref GITHUB_ALERT_REGEX: Regex = Regex::new(r"^\s*>\s*\[!(NOTE|TIP|IMPORTANT|WARNING|CAUTION|INFO|SUCCESS|FAILURE|DANGER|BUG|EXAMPLE|QUOTE)\]").unwrap();
28
29 static ref URL_WITH_BRACKETS: Regex = Regex::new(
37 r"https?://(?:\[[0-9a-fA-F:.%]+\]|[^\s\[\]]+/[^\s]*\[\d+\])"
38 ).unwrap();
39}
40
41#[derive(Clone, Default)]
47pub struct MD052ReferenceLinkImages {}
48
49impl MD052ReferenceLinkImages {
50 pub fn new() -> Self {
51 Self {}
52 }
53
54 fn strip_backticks(s: &str) -> &str {
57 s.trim_start_matches('`').trim_end_matches('`')
58 }
59
60 fn is_valid_python_identifier(s: &str) -> bool {
64 if s.is_empty() {
65 return false;
66 }
67 let first_char = s.chars().next().unwrap();
68 if !first_char.is_ascii_alphabetic() && first_char != '_' {
69 return false;
70 }
71 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
72 }
73
74 fn is_likely_not_reference(text: &str) -> bool {
77 if text.chars().all(|c| c.is_ascii_digit()) {
79 return true;
80 }
81
82 if text.contains(':') && text.chars().all(|c| c.is_ascii_digit() || c == ':') {
84 return true;
85 }
86
87 if text.contains('.') && !text.contains(' ') && !text.contains('-') && !text.contains('_') {
90 return true;
92 }
93
94 if text == "*" || text == "..." || text == "**" {
96 return true;
97 }
98
99 if text.contains('/') && !text.contains(' ') && !text.starts_with("http") {
101 return true;
102 }
103
104 if text.contains(',') || text.contains('[') || text.contains(']') {
107 return true;
109 }
110
111 if !text.contains('`')
118 && text.contains('.')
119 && !text.contains(' ')
120 && !text.contains('-')
121 && !text.contains('_')
122 {
123 return true;
124 }
125
126 if text.chars().all(|c| !c.is_alphanumeric() && c != ' ') {
133 return true;
134 }
135
136 if text.len() <= 2 && !text.chars().all(|c| c.is_alphabetic()) {
138 return true;
139 }
140
141 if (text.starts_with('"') && text.ends_with('"'))
143 || (text.starts_with('\'') && text.ends_with('\''))
144 || text.contains('"')
145 || text.contains('\'')
146 {
147 return true;
148 }
149
150 if text.contains(':') && text.contains(' ') {
153 return true;
154 }
155
156 if text.starts_with('!') {
158 return true;
159 }
160
161 if text.len() == 1 && text.chars().all(|c| c.is_ascii_uppercase()) {
163 return true;
164 }
165
166 let common_non_refs = [
169 "object", "Object", "any", "Any", "inv", "void", "bool", "int", "float", "str", "char", "i8", "i16", "i32",
170 "i64", "i128", "isize", "u8", "u16", "u32", "u64", "u128", "usize", "f32", "f64",
171 ];
172
173 if common_non_refs.contains(&text) {
174 return true;
175 }
176
177 false
178 }
179
180 fn is_in_code_span(line: usize, col: usize, code_spans: &[crate::lint_context::CodeSpan]) -> bool {
182 code_spans
183 .iter()
184 .any(|span| span.line == line && col >= span.start_col && col < span.end_col)
185 }
186
187 fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
189 for m in HTML_COMMENT_PATTERN.find_iter(content) {
190 if m.start() <= byte_pos && byte_pos < m.end() {
191 return true;
192 }
193 }
194 false
195 }
196
197 fn is_in_html_tag(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
199 for html_tag in ctx.html_tags().iter() {
201 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
202 return true;
203 }
204 }
205 false
206 }
207
208 fn extract_references(&self, content: &str, mkdocs_mode: bool) -> HashSet<String> {
209 use crate::config::MarkdownFlavor;
210 use crate::utils::skip_context::is_mkdocs_snippet_line;
211
212 let mut references = HashSet::new();
213 let mut in_code_block = false;
214 let mut code_fence_marker = String::new();
215
216 for line in content.lines() {
217 if is_mkdocs_snippet_line(
219 line,
220 if mkdocs_mode {
221 MarkdownFlavor::MkDocs
222 } else {
223 MarkdownFlavor::Standard
224 },
225 ) {
226 continue;
227 }
228 if let Some(cap) = FENCED_CODE_START.captures(line) {
230 if let Some(fence) = cap.get(2) {
231 let fence_str = fence.as_str();
233 if !in_code_block {
234 in_code_block = true;
235 code_fence_marker = fence_str.to_string();
236 } else if line.trim_start().starts_with(&code_fence_marker) {
237 let trimmed = line.trim_start();
239 if trimmed.starts_with(&code_fence_marker) {
241 let after_fence = &trimmed[code_fence_marker.len()..];
242 if after_fence.trim().is_empty() {
243 in_code_block = false;
244 code_fence_marker.clear();
245 }
246 }
247 }
248 }
249 continue;
250 }
251
252 if in_code_block {
254 continue;
255 }
256
257 if line.trim_start().starts_with("*[") {
260 continue;
261 }
262
263 if let Some(cap) = REF_REGEX.captures(line) {
264 if let Some(reference) = cap.get(1) {
266 references.insert(reference.as_str().to_lowercase());
267 }
268 }
269 }
270
271 references
272 }
273
274 fn find_undefined_references(
275 &self,
276 content: &str,
277 references: &HashSet<String>,
278 ctx: &crate::lint_context::LintContext,
279 mkdocs_mode: bool,
280 ) -> Vec<(usize, usize, usize, String)> {
281 let mut undefined = Vec::new();
282 let mut reported_refs = HashMap::new();
283 let mut in_code_block = false;
284 let mut code_fence_marker = String::new();
285 let mut in_example_section = false;
286
287 let code_spans = ctx.code_spans();
289
290 for link in &ctx.links {
292 if !link.is_reference {
293 continue; }
295
296 if Self::is_in_code_span(link.line, link.start_col, &code_spans) {
298 continue;
299 }
300
301 if Self::is_in_html_comment(content, link.byte_offset) {
303 continue;
304 }
305
306 if Self::is_in_html_tag(ctx, link.byte_offset) {
308 continue;
309 }
310
311 if is_in_math_context(ctx, link.byte_offset) {
313 continue;
314 }
315
316 if is_in_table_cell(ctx, link.line, link.start_col) {
318 continue;
319 }
320
321 if ctx.line_info(link.line).is_some_and(|info| info.in_front_matter) {
323 continue;
324 }
325
326 if let Some(ref_id) = &link.reference_id {
327 let reference_lower = ref_id.to_lowercase();
328
329 let stripped_ref = Self::strip_backticks(ref_id);
333 let stripped_text = Self::strip_backticks(&link.text);
334 if mkdocs_mode
335 && (is_mkdocs_auto_reference(stripped_ref)
336 || is_mkdocs_auto_reference(stripped_text)
337 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
338 || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text)))
339 {
340 continue;
341 }
342
343 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
345 if let Some(line_info) = ctx.line_info(link.line) {
347 if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
348 in_example_section = true;
349 continue;
350 }
351
352 if in_example_section {
353 continue;
354 }
355
356 if LIST_ITEM_REGEX.is_match(&line_info.content) {
358 continue;
359 }
360
361 let trimmed = line_info.content.trim_start();
363 if trimmed.starts_with('<') {
364 continue;
365 }
366 }
367
368 let match_len = link.byte_end - link.byte_offset;
369 undefined.push((link.line - 1, link.start_col, match_len, ref_id.clone()));
370 reported_refs.insert(reference_lower, true);
371 }
372 }
373 }
374
375 for image in &ctx.images {
377 if !image.is_reference {
378 continue; }
380
381 if Self::is_in_code_span(image.line, image.start_col, &code_spans) {
383 continue;
384 }
385
386 if Self::is_in_html_comment(content, image.byte_offset) {
388 continue;
389 }
390
391 if Self::is_in_html_tag(ctx, image.byte_offset) {
393 continue;
394 }
395
396 if is_in_math_context(ctx, image.byte_offset) {
398 continue;
399 }
400
401 if is_in_table_cell(ctx, image.line, image.start_col) {
403 continue;
404 }
405
406 if ctx.line_info(image.line).is_some_and(|info| info.in_front_matter) {
408 continue;
409 }
410
411 if let Some(ref_id) = &image.reference_id {
412 let reference_lower = ref_id.to_lowercase();
413
414 let stripped_ref = Self::strip_backticks(ref_id);
418 let stripped_alt = Self::strip_backticks(&image.alt_text);
419 if mkdocs_mode
420 && (is_mkdocs_auto_reference(stripped_ref)
421 || is_mkdocs_auto_reference(stripped_alt)
422 || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
423 || (image.alt_text.as_str() != stripped_alt && Self::is_valid_python_identifier(stripped_alt)))
424 {
425 continue;
426 }
427
428 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
430 if let Some(line_info) = ctx.line_info(image.line) {
432 if OUTPUT_EXAMPLE_START.is_match(&line_info.content) {
433 in_example_section = true;
434 continue;
435 }
436
437 if in_example_section {
438 continue;
439 }
440
441 if LIST_ITEM_REGEX.is_match(&line_info.content) {
443 continue;
444 }
445
446 let trimmed = line_info.content.trim_start();
448 if trimmed.starts_with('<') {
449 continue;
450 }
451 }
452
453 let match_len = image.byte_end - image.byte_offset;
454 undefined.push((image.line - 1, image.start_col, match_len, ref_id.clone()));
455 reported_refs.insert(reference_lower, true);
456 }
457 }
458 }
459
460 let mut covered_ranges: Vec<(usize, usize)> = Vec::new();
462
463 for link in &ctx.links {
465 covered_ranges.push((link.byte_offset, link.byte_end));
466 }
467
468 for image in &ctx.images {
470 covered_ranges.push((image.byte_offset, image.byte_end));
471 }
472
473 covered_ranges.sort_by_key(|&(start, _)| start);
475
476 let lines: Vec<&str> = content.lines().collect();
479 in_example_section = false; for (line_num, line) in lines.iter().enumerate() {
482 if ctx.line_info(line_num + 1).is_some_and(|info| info.in_front_matter) {
484 continue;
485 }
486
487 if let Some(cap) = FENCED_CODE_START.captures(line) {
489 if let Some(fence) = cap.get(2) {
490 let fence_str = fence.as_str();
492 if !in_code_block {
493 in_code_block = true;
494 code_fence_marker = fence_str.to_string();
495 } else if line.trim_start().starts_with(&code_fence_marker) {
496 let trimmed = line.trim_start();
498 if trimmed.starts_with(&code_fence_marker) {
500 let after_fence = &trimmed[code_fence_marker.len()..];
501 if after_fence.trim().is_empty() {
502 in_code_block = false;
503 code_fence_marker.clear();
504 }
505 }
506 }
507 }
508 continue;
509 }
510
511 if in_code_block {
512 continue;
513 }
514
515 if OUTPUT_EXAMPLE_START.is_match(line) {
517 in_example_section = true;
518 continue;
519 }
520
521 if in_example_section {
522 if line.starts_with('#') && !OUTPUT_EXAMPLE_START.is_match(line) {
524 in_example_section = false;
525 } else {
526 continue;
527 }
528 }
529
530 if LIST_ITEM_REGEX.is_match(line) {
532 continue;
533 }
534
535 let trimmed_line = line.trim_start();
537 if trimmed_line.starts_with('<') {
538 continue;
539 }
540
541 if GITHUB_ALERT_REGEX.is_match(line) {
543 continue;
544 }
545
546 if trimmed_line.starts_with("*[") {
549 continue;
550 }
551
552 let mut url_bracket_ranges: Vec<(usize, usize)> = Vec::new();
555 for mat in URL_WITH_BRACKETS.find_iter(line) {
556 let url_str = mat.as_str();
558 let url_start = mat.start();
559
560 let mut idx = 0;
562 while idx < url_str.len() {
563 if let Some(bracket_start) = url_str[idx..].find('[') {
564 let bracket_start_abs = url_start + idx + bracket_start;
565 if let Some(bracket_end) = url_str[idx + bracket_start + 1..].find(']') {
566 let bracket_end_abs = url_start + idx + bracket_start + 1 + bracket_end + 1;
567 url_bracket_ranges.push((bracket_start_abs, bracket_end_abs));
568 idx += bracket_start + bracket_end + 2;
569 } else {
570 break;
571 }
572 } else {
573 break;
574 }
575 }
576 }
577
578 if let Ok(captures) = SHORTCUT_REF_REGEX.captures_iter(line).collect::<Result<Vec<_>, _>>() {
580 for cap in captures {
581 if let Some(ref_match) = cap.get(1) {
582 let bracket_start = cap.get(0).unwrap().start();
584 let bracket_end = cap.get(0).unwrap().end();
585
586 let is_in_url = url_bracket_ranges
588 .iter()
589 .any(|&(url_start, url_end)| bracket_start >= url_start && bracket_end <= url_end);
590
591 if is_in_url {
592 continue;
593 }
594
595 let reference = ref_match.as_str();
596 let reference_lower = reference.to_lowercase();
597
598 if Self::is_likely_not_reference(reference) {
600 continue;
601 }
602
603 if let Some(alert_type) = reference.strip_prefix('!')
605 && matches!(
606 alert_type,
607 "NOTE"
608 | "TIP"
609 | "WARNING"
610 | "IMPORTANT"
611 | "CAUTION"
612 | "INFO"
613 | "SUCCESS"
614 | "FAILURE"
615 | "DANGER"
616 | "BUG"
617 | "EXAMPLE"
618 | "QUOTE"
619 )
620 {
621 continue;
622 }
623
624 if mkdocs_mode
627 && (reference.starts_with("start:") || reference.starts_with("end:"))
628 && (crate::utils::mkdocs_snippets::is_snippet_section_start(line)
629 || crate::utils::mkdocs_snippets::is_snippet_section_end(line))
630 {
631 continue;
632 }
633
634 let stripped_ref = Self::strip_backticks(reference);
637 if mkdocs_mode
638 && (is_mkdocs_auto_reference(stripped_ref)
639 || (reference != stripped_ref && Self::is_valid_python_identifier(stripped_ref)))
640 {
641 continue;
642 }
643
644 if !references.contains(&reference_lower) && !reported_refs.contains_key(&reference_lower) {
645 let full_match = cap.get(0).unwrap();
646 let col = full_match.start();
647
648 let code_spans = ctx.code_spans();
650 if Self::is_in_code_span(line_num + 1, col, &code_spans) {
651 continue;
652 }
653
654 let line_start_byte = ctx.line_offsets[line_num];
656 let byte_pos = line_start_byte + col;
657
658 if crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block(
660 &ctx.code_blocks,
661 byte_pos,
662 ) {
663 continue;
664 }
665
666 if Self::is_in_html_comment(content, byte_pos) {
668 continue;
669 }
670
671 if Self::is_in_html_tag(ctx, byte_pos) {
673 continue;
674 }
675
676 if is_in_math_context(ctx, byte_pos) {
678 continue;
679 }
680
681 if is_in_table_cell(ctx, line_num + 1, col) {
683 continue;
684 }
685
686 let byte_end = byte_pos + (full_match.end() - full_match.start());
687
688 let mut is_covered = false;
690 for &(range_start, range_end) in &covered_ranges {
691 if range_start <= byte_pos && byte_end <= range_end {
692 is_covered = true;
694 break;
695 }
696 if range_start > byte_end {
697 break;
699 }
700 }
701
702 if is_covered {
703 continue;
704 }
705
706 let line_chars: Vec<char> = line.chars().collect();
711 if col > 0 && col <= line_chars.len() && line_chars.get(col - 1) == Some(&']') {
712 let mut bracket_count = 1; let mut check_pos = col.saturating_sub(2);
715 let mut found_opening = false;
716
717 while check_pos > 0 && check_pos < line_chars.len() {
718 match line_chars.get(check_pos) {
719 Some(&']') => bracket_count += 1,
720 Some(&'[') => {
721 bracket_count -= 1;
722 if bracket_count == 0 {
723 if check_pos == 0 || line_chars.get(check_pos - 1) != Some(&'\\') {
725 found_opening = true;
726 }
727 break;
728 }
729 }
730 _ => {}
731 }
732 if check_pos == 0 {
733 break;
734 }
735 check_pos = check_pos.saturating_sub(1);
736 }
737
738 if found_opening {
739 continue;
741 }
742 }
743
744 let before_text = &line[..col];
747 if before_text.contains("\\]") {
748 if let Some(escaped_close_pos) = before_text.rfind("\\]") {
750 let search_text = &before_text[..escaped_close_pos];
751 if search_text.contains("\\[") {
752 continue;
754 }
755 }
756 }
757
758 let match_len = full_match.end() - full_match.start();
759 undefined.push((line_num, col, match_len, reference.to_string()));
760 reported_refs.insert(reference_lower, true);
761 }
762 }
763 }
764 }
765 }
766
767 undefined
768 }
769}
770
771impl Rule for MD052ReferenceLinkImages {
772 fn name(&self) -> &'static str {
773 "MD052"
774 }
775
776 fn description(&self) -> &'static str {
777 "Reference links and images should use a reference that exists"
778 }
779
780 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
781 let content = ctx.content;
782 let mut warnings = Vec::new();
783
784 let has_reference_links = ctx.links.iter().any(|l| l.is_reference);
787 let has_reference_images = ctx.images.iter().any(|i| i.is_reference);
788
789 if !content.contains('[') {
791 return Ok(warnings);
792 }
793
794 let has_reference_definitions = content.contains("]:");
796
797 if !has_reference_links && !has_reference_images && !has_reference_definitions {
800 let all_brackets_are_inline = ctx.links.iter().all(|l| !l.is_reference)
803 && ctx.images.iter().all(|i| !i.is_reference)
804 && ctx.links.len() + ctx.images.len() > 0;
805
806 if all_brackets_are_inline {
807 return Ok(warnings); }
809 }
810
811 let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
813
814 let references = self.extract_references(content, mkdocs_mode);
815
816 for (line_num, col, match_len, reference) in
818 self.find_undefined_references(content, &references, ctx, mkdocs_mode)
819 {
820 let lines: Vec<&str> = content.lines().collect();
821 let line_content = lines.get(line_num).unwrap_or(&"");
822
823 let (start_line, start_col, end_line, end_col) =
825 calculate_match_range(line_num + 1, line_content, col, match_len);
826
827 warnings.push(LintWarning {
828 rule_name: Some(self.name()),
829 line: start_line,
830 column: start_col,
831 end_line,
832 end_column: end_col,
833 message: format!("Reference '{reference}' not found"),
834 severity: Severity::Warning,
835 fix: None,
836 });
837 }
838
839 Ok(warnings)
840 }
841
842 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
844 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
846 }
847
848 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
849 let content = ctx.content;
850 Ok(content.to_string())
852 }
853
854 fn as_any(&self) -> &dyn std::any::Any {
855 self
856 }
857
858 fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
859 where
860 Self: Sized,
861 {
862 Box::new(MD052ReferenceLinkImages::new())
864 }
865}
866
867#[cfg(test)]
868mod tests {
869 use super::*;
870 use crate::lint_context::LintContext;
871
872 #[test]
873 fn test_valid_reference_link() {
874 let rule = MD052ReferenceLinkImages::new();
875 let content = "[text][ref]\n\n[ref]: https://example.com";
876 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
877 let result = rule.check(&ctx).unwrap();
878
879 assert_eq!(result.len(), 0);
880 }
881
882 #[test]
883 fn test_undefined_reference_link() {
884 let rule = MD052ReferenceLinkImages::new();
885 let content = "[text][undefined]";
886 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
887 let result = rule.check(&ctx).unwrap();
888
889 assert_eq!(result.len(), 1);
890 assert!(result[0].message.contains("Reference 'undefined' not found"));
891 }
892
893 #[test]
894 fn test_valid_reference_image() {
895 let rule = MD052ReferenceLinkImages::new();
896 let content = "![alt][img]\n\n[img]: image.jpg";
897 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
898 let result = rule.check(&ctx).unwrap();
899
900 assert_eq!(result.len(), 0);
901 }
902
903 #[test]
904 fn test_undefined_reference_image() {
905 let rule = MD052ReferenceLinkImages::new();
906 let content = "![alt][missing]";
907 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
908 let result = rule.check(&ctx).unwrap();
909
910 assert_eq!(result.len(), 1);
911 assert!(result[0].message.contains("Reference 'missing' not found"));
912 }
913
914 #[test]
915 fn test_case_insensitive_references() {
916 let rule = MD052ReferenceLinkImages::new();
917 let content = "[Text][REF]\n\n[ref]: https://example.com";
918 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
919 let result = rule.check(&ctx).unwrap();
920
921 assert_eq!(result.len(), 0);
922 }
923
924 #[test]
925 fn test_shortcut_reference_valid() {
926 let rule = MD052ReferenceLinkImages::new();
927 let content = "[ref]\n\n[ref]: https://example.com";
928 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
929 let result = rule.check(&ctx).unwrap();
930
931 assert_eq!(result.len(), 0);
932 }
933
934 #[test]
935 fn test_shortcut_reference_undefined() {
936 let rule = MD052ReferenceLinkImages::new();
937 let content = "[undefined]";
938 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
939 let result = rule.check(&ctx).unwrap();
940
941 assert_eq!(result.len(), 1);
942 assert!(result[0].message.contains("Reference 'undefined' not found"));
943 }
944
945 #[test]
946 fn test_inline_links_ignored() {
947 let rule = MD052ReferenceLinkImages::new();
948 let content = "[text](https://example.com)";
949 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
950 let result = rule.check(&ctx).unwrap();
951
952 assert_eq!(result.len(), 0);
953 }
954
955 #[test]
956 fn test_inline_images_ignored() {
957 let rule = MD052ReferenceLinkImages::new();
958 let content = "";
959 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
960 let result = rule.check(&ctx).unwrap();
961
962 assert_eq!(result.len(), 0);
963 }
964
965 #[test]
966 fn test_references_in_code_blocks_ignored() {
967 let rule = MD052ReferenceLinkImages::new();
968 let content = "```\n[undefined]\n```\n\n[ref]: https://example.com";
969 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
970 let result = rule.check(&ctx).unwrap();
971
972 assert_eq!(result.len(), 0);
973 }
974
975 #[test]
976 fn test_references_in_inline_code_ignored() {
977 let rule = MD052ReferenceLinkImages::new();
978 let content = "`[undefined]`";
979 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
980 let result = rule.check(&ctx).unwrap();
981
982 assert_eq!(result.len(), 0);
984 }
985
986 #[test]
987 fn test_comprehensive_inline_code_detection() {
988 let rule = MD052ReferenceLinkImages::new();
989 let content = r#"# Test
990
991This `[inside]` should be ignored.
992This [outside] should be flagged.
993Reference links `[text][ref]` in code are ignored.
994Regular reference [text][missing] should be flagged.
995Images `![alt][img]` in code are ignored.
996Regular image ![alt][badimg] should be flagged.
997
998Multiple `[one]` and `[two]` in code ignored, but [three] is not.
999
1000```
1001[code block content] should be ignored
1002```
1003
1004`Multiple [refs] in [same] code span` ignored."#;
1005
1006 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1007 let result = rule.check(&ctx).unwrap();
1008
1009 assert_eq!(result.len(), 4);
1011
1012 let messages: Vec<&str> = result.iter().map(|w| &*w.message).collect();
1013 assert!(messages.iter().any(|m| m.contains("outside")));
1014 assert!(messages.iter().any(|m| m.contains("missing")));
1015 assert!(messages.iter().any(|m| m.contains("badimg")));
1016 assert!(messages.iter().any(|m| m.contains("three")));
1017
1018 assert!(!messages.iter().any(|m| m.contains("inside")));
1020 assert!(!messages.iter().any(|m| m.contains("one")));
1021 assert!(!messages.iter().any(|m| m.contains("two")));
1022 assert!(!messages.iter().any(|m| m.contains("refs")));
1023 assert!(!messages.iter().any(|m| m.contains("same")));
1024 }
1025
1026 #[test]
1027 fn test_multiple_undefined_references() {
1028 let rule = MD052ReferenceLinkImages::new();
1029 let content = "[link1][ref1] [link2][ref2] [link3][ref3]";
1030 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1031 let result = rule.check(&ctx).unwrap();
1032
1033 assert_eq!(result.len(), 3);
1034 assert!(result[0].message.contains("ref1"));
1035 assert!(result[1].message.contains("ref2"));
1036 assert!(result[2].message.contains("ref3"));
1037 }
1038
1039 #[test]
1040 fn test_mixed_valid_and_undefined() {
1041 let rule = MD052ReferenceLinkImages::new();
1042 let content = "[valid][ref] [invalid][missing]\n\n[ref]: https://example.com";
1043 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1044 let result = rule.check(&ctx).unwrap();
1045
1046 assert_eq!(result.len(), 1);
1047 assert!(result[0].message.contains("missing"));
1048 }
1049
1050 #[test]
1051 fn test_empty_reference() {
1052 let rule = MD052ReferenceLinkImages::new();
1053 let content = "[text][]\n\n[ref]: https://example.com";
1054 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1055 let result = rule.check(&ctx).unwrap();
1056
1057 assert_eq!(result.len(), 1);
1059 }
1060
1061 #[test]
1062 fn test_escaped_brackets_ignored() {
1063 let rule = MD052ReferenceLinkImages::new();
1064 let content = "\\[not a link\\]";
1065 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1066 let result = rule.check(&ctx).unwrap();
1067
1068 assert_eq!(result.len(), 0);
1069 }
1070
1071 #[test]
1072 fn test_list_items_ignored() {
1073 let rule = MD052ReferenceLinkImages::new();
1074 let content = "- [undefined]\n* [another]\n+ [third]";
1075 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1076 let result = rule.check(&ctx).unwrap();
1077
1078 assert_eq!(result.len(), 0);
1080 }
1081
1082 #[test]
1083 fn test_output_example_section_ignored() {
1084 let rule = MD052ReferenceLinkImages::new();
1085 let content = "## Output\n\n[undefined]\n\n## Normal Section\n\n[missing]";
1086 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1087 let result = rule.check(&ctx).unwrap();
1088
1089 assert_eq!(result.len(), 1);
1091 assert!(result[0].message.contains("missing"));
1092 }
1093
1094 #[test]
1095 fn test_reference_definitions_in_code_blocks_ignored() {
1096 let rule = MD052ReferenceLinkImages::new();
1097 let content = "[link][ref]\n\n```\n[ref]: https://example.com\n```";
1098 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1099 let result = rule.check(&ctx).unwrap();
1100
1101 assert_eq!(result.len(), 1);
1103 assert!(result[0].message.contains("ref"));
1104 }
1105
1106 #[test]
1107 fn test_multiple_references_to_same_undefined() {
1108 let rule = MD052ReferenceLinkImages::new();
1109 let content = "[first][missing] [second][missing] [third][missing]";
1110 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1111 let result = rule.check(&ctx).unwrap();
1112
1113 assert_eq!(result.len(), 1);
1115 assert!(result[0].message.contains("missing"));
1116 }
1117
1118 #[test]
1119 fn test_reference_with_special_characters() {
1120 let rule = MD052ReferenceLinkImages::new();
1121 let content = "[text][ref-with-hyphens]\n\n[ref-with-hyphens]: https://example.com";
1122 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1123 let result = rule.check(&ctx).unwrap();
1124
1125 assert_eq!(result.len(), 0);
1126 }
1127
1128 #[test]
1129 fn test_issue_51_html_attribute_not_reference() {
1130 let rule = MD052ReferenceLinkImages::new();
1132 let content = r#"# Example
1133
1134## Test
1135
1136Want to fill out this form?
1137
1138<form method="post">
1139 <input type="email" name="fields[email]" id="drip-email" placeholder="email@domain.com">
1140</form>"#;
1141 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1142 let result = rule.check(&ctx).unwrap();
1143
1144 assert_eq!(
1145 result.len(),
1146 0,
1147 "HTML attributes with square brackets should not be flagged as undefined references"
1148 );
1149 }
1150
1151 #[test]
1152 fn test_extract_references() {
1153 let rule = MD052ReferenceLinkImages::new();
1154 let content = "[ref1]: url1\n[Ref2]: url2\n[REF3]: url3";
1155 let refs = rule.extract_references(content, false);
1156
1157 assert_eq!(refs.len(), 3);
1158 assert!(refs.contains("ref1"));
1159 assert!(refs.contains("ref2"));
1160 assert!(refs.contains("ref3"));
1161 }
1162
1163 #[test]
1164 fn test_inline_code_not_flagged() {
1165 let rule = MD052ReferenceLinkImages::new();
1166
1167 let content = r#"# Test
1169
1170Configure with `["JavaScript", "GitHub", "Node.js"]` in your settings.
1171
1172Also, `[todo]` is not a reference link.
1173
1174But this [reference] should be flagged.
1175
1176And this `[inline code]` should not be flagged.
1177"#;
1178
1179 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1180 let warnings = rule.check(&ctx).unwrap();
1181
1182 assert_eq!(warnings.len(), 1, "Should only flag one undefined reference");
1184 assert!(warnings[0].message.contains("'reference'"));
1185 }
1186
1187 #[test]
1188 fn test_code_block_references_ignored() {
1189 let rule = MD052ReferenceLinkImages::new();
1190
1191 let content = r#"# Test
1192
1193```markdown
1194[undefined] reference in code block
1195![undefined] image in code block
1196```
1197
1198[real-undefined] reference outside
1199"#;
1200
1201 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1202 let warnings = rule.check(&ctx).unwrap();
1203
1204 assert_eq!(warnings.len(), 1);
1206 assert!(warnings[0].message.contains("'real-undefined'"));
1207 }
1208
1209 #[test]
1210 fn test_html_comments_ignored() {
1211 let rule = MD052ReferenceLinkImages::new();
1213
1214 let content = r#"<!--- write fake_editor.py 'import sys\nopen(*sys.argv[1:], mode="wt").write("2 3 4 4 2 3 2")' -->
1216<!--- set_env EDITOR 'python3 fake_editor.py' -->
1217
1218```bash
1219$ python3 vote.py
12203 votes for: 2
12212 votes for: 3, 4
1222```"#;
1223 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1224 let result = rule.check(&ctx).unwrap();
1225 assert_eq!(result.len(), 0, "Should not flag [1:] inside HTML comments");
1226
1227 let content = r#"<!-- This is [ref1] and [ref2][ref3] -->
1229Normal [text][undefined]
1230<!-- Another [comment][with] references -->"#;
1231 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1232 let result = rule.check(&ctx).unwrap();
1233 assert_eq!(
1234 result.len(),
1235 1,
1236 "Should only flag the undefined reference outside comments"
1237 );
1238 assert!(result[0].message.contains("undefined"));
1239
1240 let content = r#"<!--
1242[ref1]
1243[ref2][ref3]
1244-->
1245[actual][undefined]"#;
1246 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1247 let result = rule.check(&ctx).unwrap();
1248 assert_eq!(
1249 result.len(),
1250 1,
1251 "Should not flag references in multi-line HTML comments"
1252 );
1253 assert!(result[0].message.contains("undefined"));
1254
1255 let content = r#"<!-- Comment with [1:] pattern -->
1257Valid [link][ref]
1258<!-- More [refs][in][comments] -->
1259![image][missing]
1260
1261[ref]: https://example.com"#;
1262 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1263 let result = rule.check(&ctx).unwrap();
1264 assert_eq!(result.len(), 1, "Should only flag missing image reference");
1265 assert!(result[0].message.contains("missing"));
1266 }
1267
1268 #[test]
1269 fn test_frontmatter_ignored() {
1270 let rule = MD052ReferenceLinkImages::new();
1272
1273 let content = r#"---
1275layout: post
1276title: "My Jekyll Post"
1277date: 2023-01-01
1278categories: blog
1279tags: ["test", "example"]
1280author: John Doe
1281---
1282
1283# My Blog Post
1284
1285This is the actual markdown content that should be linted.
1286
1287[undefined] reference should be flagged.
1288
1289## Section 1
1290
1291Some content here."#;
1292 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1293 let result = rule.check(&ctx).unwrap();
1294
1295 assert_eq!(
1297 result.len(),
1298 1,
1299 "Should only flag the undefined reference outside frontmatter"
1300 );
1301 assert!(result[0].message.contains("undefined"));
1302
1303 let content = r#"+++
1305title = "My Post"
1306tags = ["example", "test"]
1307+++
1308
1309# Content
1310
1311[missing] reference should be flagged."#;
1312 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1313 let result = rule.check(&ctx).unwrap();
1314 assert_eq!(
1315 result.len(),
1316 1,
1317 "Should only flag the undefined reference outside TOML frontmatter"
1318 );
1319 assert!(result[0].message.contains("missing"));
1320 }
1321
1322 #[test]
1323 fn test_mkdocs_snippet_markers_not_flagged() {
1324 let rule = MD052ReferenceLinkImages::new();
1326
1327 let content = r#"# Document with MkDocs Snippets
1329
1330Some content here.
1331
1332# -8<- [start:remote-content]
1333
1334This is the remote content section.
1335
1336# -8<- [end:remote-content]
1337
1338More content here.
1339
1340<!-- --8<-- [start:another-section] -->
1341Content in another section
1342<!-- --8<-- [end:another-section] -->"#;
1343 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1344 let result = rule.check(&ctx).unwrap();
1345
1346 assert_eq!(
1348 result.len(),
1349 0,
1350 "Should not flag MkDocs snippet markers as undefined references"
1351 );
1352
1353 let content = r#"# Document
1356
1357# -8<- [start:section]
1358Content with [reference] inside snippet section
1359# -8<- [end:section]
1360
1361Regular [undefined] reference outside snippet markers."#;
1362 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MkDocs);
1363 let result = rule.check(&ctx).unwrap();
1364
1365 assert_eq!(
1366 result.len(),
1367 2,
1368 "Should flag undefined references but skip snippet marker lines"
1369 );
1370 assert!(result[0].message.contains("reference"));
1372 assert!(result[1].message.contains("undefined"));
1373
1374 let content = r#"# Document
1376
1377# -8<- [start:section]
1378# -8<- [end:section]"#;
1379 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1380 let result = rule.check(&ctx).unwrap();
1381
1382 assert_eq!(
1383 result.len(),
1384 2,
1385 "In standard mode, snippet markers should be flagged as undefined references"
1386 );
1387 }
1388
1389 #[test]
1390 fn test_github_alerts_not_flagged() {
1391 let rule = MD052ReferenceLinkImages::new();
1393
1394 let content = r#"# Document with GitHub Alerts
1396
1397> [!NOTE]
1398> This is a note alert.
1399
1400> [!TIP]
1401> This is a tip alert.
1402
1403> [!IMPORTANT]
1404> This is an important alert.
1405
1406> [!WARNING]
1407> This is a warning alert.
1408
1409> [!CAUTION]
1410> This is a caution alert.
1411
1412Regular content with [undefined] reference."#;
1413 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1414 let result = rule.check(&ctx).unwrap();
1415
1416 assert_eq!(
1418 result.len(),
1419 1,
1420 "Should only flag the undefined reference, not GitHub alerts"
1421 );
1422 assert!(result[0].message.contains("undefined"));
1423 assert_eq!(result[0].line, 18); let content = r#"> [!TIP]
1427> Here's a useful tip about [something].
1428> Multiple lines are allowed.
1429
1430[something] is mentioned but not defined."#;
1431 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1432 let result = rule.check(&ctx).unwrap();
1433
1434 assert_eq!(result.len(), 1, "Should flag undefined reference");
1438 assert!(result[0].message.contains("something"));
1439
1440 let content = r#"> [!NOTE]
1442> See [reference] for more details.
1443
1444[reference]: https://example.com"#;
1445 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1446 let result = rule.check(&ctx).unwrap();
1447
1448 assert_eq!(result.len(), 0, "Should not flag GitHub alerts or defined references");
1450 }
1451}