1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{CrossFileLinkIndex, FileIndex};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21 LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23fn reset_file_existence_cache() {
25 if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
26 cache.clear();
27 }
28}
29
30fn file_exists_with_cache(path: &Path) -> bool {
32 match FILE_EXISTENCE_CACHE.lock() {
33 Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
34 Err(_) => path.exists(), }
36}
37
38fn file_exists_or_markdown_extension(path: &Path) -> bool {
41 if file_exists_with_cache(path) {
43 return true;
44 }
45
46 if path.extension().is_none() {
48 for ext in MARKDOWN_EXTENSIONS {
49 let path_with_ext = path.with_extension(&ext[1..]);
51 if file_exists_with_cache(&path_with_ext) {
52 return true;
53 }
54 }
55 }
56
57 false
58}
59
60static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
62
63static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
67 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
68
69static URL_EXTRACT_REGEX: LazyLock<Regex> =
72 LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
73
74static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
78 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
79
80static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
82
83#[inline]
86fn hex_digit_to_value(byte: u8) -> Option<u8> {
87 match byte {
88 b'0'..=b'9' => Some(byte - b'0'),
89 b'a'..=b'f' => Some(byte - b'a' + 10),
90 b'A'..=b'F' => Some(byte - b'A' + 10),
91 _ => None,
92 }
93}
94
95const MARKDOWN_EXTENSIONS: &[&str] = &[
97 ".md",
98 ".markdown",
99 ".mdx",
100 ".mkd",
101 ".mkdn",
102 ".mdown",
103 ".mdwn",
104 ".qmd",
105 ".rmd",
106];
107
108#[inline]
110fn is_markdown_file(path: &str) -> bool {
111 let path_lower = path.to_lowercase();
112 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
113}
114
115#[derive(Debug, Clone, Default)]
117pub struct MD057ExistingRelativeLinks {
118 base_path: Arc<Mutex<Option<PathBuf>>>,
120}
121
122impl MD057ExistingRelativeLinks {
123 pub fn new() -> Self {
125 Self::default()
126 }
127
128 pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
130 let path = path.as_ref();
131 let dir_path = if path.is_file() {
132 path.parent().map(|p| p.to_path_buf())
133 } else {
134 Some(path.to_path_buf())
135 };
136
137 if let Ok(mut guard) = self.base_path.lock() {
138 *guard = dir_path;
139 }
140 self
141 }
142
143 #[allow(unused_variables)]
144 pub fn from_config_struct(config: MD057Config) -> Self {
145 Self::default()
146 }
147
148 #[inline]
159 fn is_external_url(&self, url: &str) -> bool {
160 if url.is_empty() {
161 return false;
162 }
163
164 if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
166 return true;
167 }
168
169 if url.starts_with("{{") || url.starts_with("{%") {
172 return true;
173 }
174
175 if url.ends_with(".com") {
182 return true;
183 }
184
185 if url.starts_with('/') {
189 return true;
190 }
191
192 if url.starts_with('~') || url.starts_with('@') {
196 return true;
197 }
198
199 false
201 }
202
203 #[inline]
205 fn is_fragment_only_link(&self, url: &str) -> bool {
206 url.starts_with('#')
207 }
208
209 fn url_decode(path: &str) -> String {
213 if !path.contains('%') {
215 return path.to_string();
216 }
217
218 let bytes = path.as_bytes();
219 let mut result = Vec::with_capacity(bytes.len());
220 let mut i = 0;
221
222 while i < bytes.len() {
223 if bytes[i] == b'%' && i + 2 < bytes.len() {
224 let hex1 = bytes[i + 1];
226 let hex2 = bytes[i + 2];
227 if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
228 result.push(d1 * 16 + d2);
229 i += 3;
230 continue;
231 }
232 }
233 result.push(bytes[i]);
234 i += 1;
235 }
236
237 String::from_utf8(result).unwrap_or_else(|_| path.to_string())
239 }
240
241 fn strip_query_and_fragment(url: &str) -> &str {
249 let query_pos = url.find('?');
252 let fragment_pos = url.find('#');
253
254 match (query_pos, fragment_pos) {
255 (Some(q), Some(f)) => {
256 &url[..q.min(f)]
258 }
259 (Some(q), None) => &url[..q],
260 (None, Some(f)) => &url[..f],
261 (None, None) => url,
262 }
263 }
264
265 fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
267 base_path.join(link)
268 }
269
270 fn process_link_with_base(
272 &self,
273 url: &str,
274 line_num: usize,
275 column: usize,
276 base_path: &Path,
277 warnings: &mut Vec<LintWarning>,
278 ) {
279 if url.is_empty() {
281 return;
282 }
283
284 if self.is_external_url(url) || self.is_fragment_only_link(url) {
286 return;
287 }
288
289 let file_path = Self::strip_query_and_fragment(url);
292
293 let decoded_path = Self::url_decode(file_path);
296
297 let resolved_path = Self::resolve_link_path_with_base(&decoded_path, base_path);
299
300 if file_exists_or_markdown_extension(&resolved_path) {
302 return; }
304
305 if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
308 && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
309 && let (Some(stem), Some(parent)) = (
310 resolved_path.file_stem().and_then(|s| s.to_str()),
311 resolved_path.parent(),
312 )
313 {
314 for md_ext in MARKDOWN_EXTENSIONS {
315 let source_path = parent.join(format!("{stem}{md_ext}"));
316 if file_exists_with_cache(&source_path) {
317 return; }
319 }
320 }
321
322 warnings.push(LintWarning {
324 rule_name: Some(self.name().to_string()),
325 line: line_num,
326 column,
327 end_line: line_num,
328 end_column: column + url.len(),
329 message: format!("Relative link '{url}' does not exist"),
330 severity: Severity::Warning,
331 fix: None,
332 });
333 }
334}
335
336impl Rule for MD057ExistingRelativeLinks {
337 fn name(&self) -> &'static str {
338 "MD057"
339 }
340
341 fn description(&self) -> &'static str {
342 "Relative links should point to existing files"
343 }
344
345 fn category(&self) -> RuleCategory {
346 RuleCategory::Link
347 }
348
349 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
350 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
351 }
352
353 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
354 let content = ctx.content;
355
356 if content.is_empty() || !content.contains('[') {
358 return Ok(Vec::new());
359 }
360
361 if !content.contains("](") {
363 return Ok(Vec::new());
364 }
365
366 reset_file_existence_cache();
368
369 let mut warnings = Vec::new();
370
371 let base_path: Option<PathBuf> = {
375 let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
377 if explicit_base.is_some() {
378 explicit_base
379 } else if let Some(ref source_file) = ctx.source_file {
380 let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
384 resolved_file
385 .parent()
386 .map(|p| p.to_path_buf())
387 .or_else(|| Some(CURRENT_DIR.clone()))
388 } else {
389 None
391 }
392 };
393
394 let Some(base_path) = base_path else {
396 return Ok(warnings);
397 };
398
399 if !ctx.links.is_empty() {
401 let line_index = &ctx.line_index;
403
404 let element_cache = ElementCache::new(content);
406
407 let lines: Vec<&str> = content.lines().collect();
409
410 for link in &ctx.links {
411 let line_idx = link.line - 1;
412 if line_idx >= lines.len() {
413 continue;
414 }
415
416 let line = lines[line_idx];
417
418 if !line.contains("](") {
420 continue;
421 }
422
423 for link_match in LINK_START_REGEX.find_iter(line) {
425 let start_pos = link_match.start();
426 let end_pos = link_match.end();
427
428 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
430 let absolute_start_pos = line_start_byte + start_pos;
431
432 if element_cache.is_in_code_span(absolute_start_pos) {
434 continue;
435 }
436
437 let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
441 .captures_at(line, end_pos - 1)
442 .and_then(|caps| caps.get(1).map(|g| (caps, g)))
443 .or_else(|| {
444 URL_EXTRACT_REGEX
445 .captures_at(line, end_pos - 1)
446 .and_then(|caps| caps.get(1).map(|g| (caps, g)))
447 });
448
449 if let Some((_caps, url_group)) = caps_and_url {
450 let url = url_group.as_str().trim();
451
452 let column = start_pos + 1;
454
455 self.process_link_with_base(url, link.line, column, &base_path, &mut warnings);
457 }
458 }
459 }
460 }
461
462 for image in &ctx.images {
464 let url = image.url.as_ref();
465 self.process_link_with_base(url, image.line, image.start_col + 1, &base_path, &mut warnings);
466 }
467
468 Ok(warnings)
469 }
470
471 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
472 Ok(ctx.content.to_string())
473 }
474
475 fn as_any(&self) -> &dyn std::any::Any {
476 self
477 }
478
479 fn default_config_section(&self) -> Option<(String, toml::Value)> {
480 None
482 }
483
484 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
485 where
486 Self: Sized,
487 {
488 let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
489 Box::new(Self::from_config_struct(rule_config))
490 }
491
492 fn cross_file_scope(&self) -> CrossFileScope {
493 CrossFileScope::Workspace
494 }
495
496 fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
497 let content = ctx.content;
498
499 if content.is_empty() || !content.contains("](") {
501 return;
502 }
503
504 let lines: Vec<&str> = content.lines().collect();
506 let element_cache = ElementCache::new(content);
507 let line_index = &ctx.line_index;
508
509 for link in &ctx.links {
510 let line_idx = link.line - 1;
511 if line_idx >= lines.len() {
512 continue;
513 }
514
515 let line = lines[line_idx];
516 if !line.contains("](") {
517 continue;
518 }
519
520 for link_match in LINK_START_REGEX.find_iter(line) {
522 let start_pos = link_match.start();
523 let end_pos = link_match.end();
524
525 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
527 let absolute_start_pos = line_start_byte + start_pos;
528
529 if element_cache.is_in_code_span(absolute_start_pos) {
531 continue;
532 }
533
534 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
538 .captures_at(line, end_pos - 1)
539 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
540
541 if let Some(caps) = caps_result
542 && let Some(url_group) = caps.get(1)
543 {
544 let file_path = url_group.as_str().trim();
545
546 if file_path.is_empty()
549 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
550 || file_path.starts_with("www.")
551 || file_path.starts_with('#')
552 || file_path.starts_with("{{")
553 || file_path.starts_with("{%")
554 || file_path.starts_with('/')
555 || file_path.starts_with('~')
556 || file_path.starts_with('@')
557 {
558 continue;
559 }
560
561 let file_path = Self::strip_query_and_fragment(file_path);
563
564 let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
566
567 if is_markdown_file(file_path) {
570 index.add_cross_file_link(CrossFileLinkIndex {
571 target_path: file_path.to_string(),
572 fragment: fragment.to_string(),
573 line: link.line,
574 column: start_pos + 1,
575 });
576 }
577 }
578 }
579 }
580 }
581
582 fn cross_file_check(
583 &self,
584 file_path: &Path,
585 file_index: &FileIndex,
586 workspace_index: &crate::workspace_index::WorkspaceIndex,
587 ) -> LintResult {
588 let mut warnings = Vec::new();
589
590 let file_dir = file_path.parent();
592
593 for cross_link in &file_index.cross_file_links {
594 let decoded_target = Self::url_decode(&cross_link.target_path);
597
598 let target_path = if decoded_target.starts_with('/') {
600 let stripped = decoded_target.trim_start_matches('/');
603 resolve_absolute_link(file_path, stripped)
604 } else if let Some(dir) = file_dir {
605 dir.join(&decoded_target)
606 } else {
607 Path::new(&decoded_target).to_path_buf()
608 };
609
610 let target_path = normalize_path(&target_path);
612
613 let file_exists = workspace_index.contains_file(&target_path) || target_path.exists();
615
616 if !file_exists {
617 let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
620 && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
621 && let (Some(stem), Some(parent)) =
622 (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
623 {
624 MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
625 let source_path = parent.join(format!("{stem}{md_ext}"));
626 workspace_index.contains_file(&source_path) || source_path.exists()
627 })
628 } else {
629 false
630 };
631
632 if !has_md_source {
633 warnings.push(LintWarning {
634 rule_name: Some(self.name().to_string()),
635 line: cross_link.line,
636 column: cross_link.column,
637 end_line: cross_link.line,
638 end_column: cross_link.column + cross_link.target_path.len(),
639 message: format!("Relative link '{}' does not exist", cross_link.target_path),
640 severity: Severity::Warning,
641 fix: None,
642 });
643 }
644 }
645 }
646
647 Ok(warnings)
648 }
649}
650
651fn normalize_path(path: &Path) -> PathBuf {
653 let mut components = Vec::new();
654
655 for component in path.components() {
656 match component {
657 std::path::Component::ParentDir => {
658 if !components.is_empty() {
660 components.pop();
661 }
662 }
663 std::path::Component::CurDir => {
664 }
666 _ => {
667 components.push(component);
668 }
669 }
670 }
671
672 components.iter().collect()
673}
674
675fn resolve_absolute_link(file_path: &Path, stripped_path: &str) -> PathBuf {
681 let mut current = file_path.parent();
683 while let Some(dir) = current {
684 let candidate = dir.join(stripped_path);
685 if candidate.exists() {
686 return candidate;
687 }
688 current = dir.parent();
689 }
690
691 file_path
694 .parent()
695 .map(|d| d.join(stripped_path))
696 .unwrap_or_else(|| PathBuf::from(stripped_path))
697}
698
699#[cfg(test)]
700mod tests {
701 use super::*;
702 use std::fs::File;
703 use std::io::Write;
704 use tempfile::tempdir;
705
706 #[test]
707 fn test_strip_query_and_fragment() {
708 assert_eq!(
710 MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
711 "file.png"
712 );
713 assert_eq!(
714 MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
715 "file.png"
716 );
717 assert_eq!(
718 MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
719 "file.png"
720 );
721
722 assert_eq!(
724 MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
725 "file.md"
726 );
727 assert_eq!(
728 MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
729 "file.md"
730 );
731
732 assert_eq!(
734 MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
735 "file.md"
736 );
737
738 assert_eq!(
740 MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
741 "file.png"
742 );
743
744 assert_eq!(
746 MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
747 "path/to/image.png"
748 );
749 assert_eq!(
750 MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
751 "path/to/image.png"
752 );
753
754 assert_eq!(
756 MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
757 "file.md"
758 );
759 }
760
761 #[test]
762 fn test_url_decode() {
763 assert_eq!(
765 MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
766 "penguin with space.jpg"
767 );
768
769 assert_eq!(
771 MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
772 "assets/my file name.png"
773 );
774
775 assert_eq!(
777 MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
778 "hello world!.md"
779 );
780
781 assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
783
784 assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
786
787 assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
789
790 assert_eq!(
792 MD057ExistingRelativeLinks::url_decode("normal-file.md"),
793 "normal-file.md"
794 );
795
796 assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
798
799 assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
801
802 assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
804
805 assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
807
808 assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
810
811 assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
813
814 assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), " ");
816
817 assert_eq!(
819 MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
820 "path/to/file.md"
821 );
822
823 assert_eq!(
825 MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
826 "hello world/foo bar.md"
827 );
828
829 assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
831
832 assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
834 }
835
836 #[test]
837 fn test_url_encoded_filenames() {
838 let temp_dir = tempdir().unwrap();
840 let base_path = temp_dir.path();
841
842 let file_with_spaces = base_path.join("penguin with space.jpg");
844 File::create(&file_with_spaces)
845 .unwrap()
846 .write_all(b"image data")
847 .unwrap();
848
849 let subdir = base_path.join("my images");
851 std::fs::create_dir(&subdir).unwrap();
852 let nested_file = subdir.join("photo 1.png");
853 File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
854
855 let content = r#"
857# Test Document with URL-Encoded Links
858
859
860
861
862"#;
863
864 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
865
866 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
867 let result = rule.check(&ctx).unwrap();
868
869 assert_eq!(
871 result.len(),
872 1,
873 "Should only warn about missing%20file.jpg. Got: {result:?}"
874 );
875 assert!(
876 result[0].message.contains("missing%20file.jpg"),
877 "Warning should mention the URL-encoded filename"
878 );
879 }
880
881 #[test]
882 fn test_external_urls() {
883 let rule = MD057ExistingRelativeLinks::new();
884
885 assert!(rule.is_external_url("https://example.com"));
887 assert!(rule.is_external_url("http://example.com"));
888 assert!(rule.is_external_url("ftp://example.com"));
889 assert!(rule.is_external_url("www.example.com"));
890 assert!(rule.is_external_url("example.com"));
891
892 assert!(rule.is_external_url("file:///path/to/file"));
894 assert!(rule.is_external_url("smb://server/share"));
895 assert!(rule.is_external_url("macappstores://apps.apple.com/"));
896 assert!(rule.is_external_url("mailto:user@example.com"));
897 assert!(rule.is_external_url("tel:+1234567890"));
898 assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
899 assert!(rule.is_external_url("javascript:void(0)"));
900 assert!(rule.is_external_url("ssh://git@github.com/repo"));
901 assert!(rule.is_external_url("git://github.com/repo.git"));
902
903 assert!(rule.is_external_url("{{URL}}")); assert!(rule.is_external_url("{{#URL}}")); assert!(rule.is_external_url("{{> partial}}")); assert!(rule.is_external_url("{{ variable }}")); assert!(rule.is_external_url("{{% include %}}")); assert!(rule.is_external_url("{{")); assert!(rule.is_external_url("/api/v1/users"));
914 assert!(rule.is_external_url("/blog/2024/release.html"));
915 assert!(rule.is_external_url("/react/hooks/use-state.html"));
916 assert!(rule.is_external_url("/pkg/runtime"));
917 assert!(rule.is_external_url("/doc/go1compat"));
918 assert!(rule.is_external_url("/index.html"));
919 assert!(rule.is_external_url("/assets/logo.png"));
920
921 assert!(rule.is_external_url("~/assets/image.png"));
924 assert!(rule.is_external_url("~/components/Button.vue"));
925 assert!(rule.is_external_url("~assets/logo.svg")); assert!(rule.is_external_url("@/components/Header.vue"));
929 assert!(rule.is_external_url("@images/photo.jpg"));
930 assert!(rule.is_external_url("@assets/styles.css"));
931
932 assert!(!rule.is_external_url("./relative/path.md"));
934 assert!(!rule.is_external_url("relative/path.md"));
935 assert!(!rule.is_external_url("../parent/path.md"));
936 }
937
938 #[test]
939 fn test_framework_path_aliases() {
940 let temp_dir = tempdir().unwrap();
942 let base_path = temp_dir.path();
943
944 let content = r#"
946# Framework Path Aliases
947
948
949
950
951
952[Link](@/pages/about.md)
953
954This is a [real missing link](missing.md) that should be flagged.
955"#;
956
957 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
958
959 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
960 let result = rule.check(&ctx).unwrap();
961
962 assert_eq!(
964 result.len(),
965 1,
966 "Should only warn about missing.md, not framework aliases. Got: {result:?}"
967 );
968 assert!(
969 result[0].message.contains("missing.md"),
970 "Warning should be for missing.md"
971 );
972 }
973
974 #[test]
975 fn test_url_decode_security_path_traversal() {
976 let temp_dir = tempdir().unwrap();
979 let base_path = temp_dir.path();
980
981 let file_in_base = base_path.join("safe.md");
983 File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
984
985 let content = r#"
990[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
991[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
992[Safe link](safe.md)
993"#;
994
995 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
996
997 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
998 let result = rule.check(&ctx).unwrap();
999
1000 assert_eq!(
1003 result.len(),
1004 2,
1005 "Should have warnings for traversal attempts. Got: {result:?}"
1006 );
1007 }
1008
1009 #[test]
1010 fn test_url_encoded_utf8_filenames() {
1011 let temp_dir = tempdir().unwrap();
1013 let base_path = temp_dir.path();
1014
1015 let cafe_file = base_path.join("café.md");
1017 File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1018
1019 let content = r#"
1020[Café link](caf%C3%A9.md)
1021[Missing unicode](r%C3%A9sum%C3%A9.md)
1022"#;
1023
1024 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1025
1026 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1027 let result = rule.check(&ctx).unwrap();
1028
1029 assert_eq!(
1031 result.len(),
1032 1,
1033 "Should only warn about missing résumé.md. Got: {result:?}"
1034 );
1035 assert!(
1036 result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1037 "Warning should mention the URL-encoded filename"
1038 );
1039 }
1040
1041 #[test]
1042 fn test_url_encoded_emoji_filenames() {
1043 let temp_dir = tempdir().unwrap();
1046 let base_path = temp_dir.path();
1047
1048 let emoji_dir = base_path.join("👤 Personal");
1050 std::fs::create_dir(&emoji_dir).unwrap();
1051
1052 let file_path = emoji_dir.join("TV Shows.md");
1054 File::create(&file_path)
1055 .unwrap()
1056 .write_all(b"# TV Shows\n\nContent here.")
1057 .unwrap();
1058
1059 let content = r#"
1062# Test Document
1063
1064[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1065[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1066"#;
1067
1068 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1069
1070 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1071 let result = rule.check(&ctx).unwrap();
1072
1073 assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1075 assert!(
1076 result[0].message.contains("Missing.md"),
1077 "Warning should be for Missing.md, got: {}",
1078 result[0].message
1079 );
1080 }
1081
1082 #[test]
1083 fn test_no_warnings_without_base_path() {
1084 let rule = MD057ExistingRelativeLinks::new();
1085 let content = "[Link](missing.md)";
1086
1087 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1088 let result = rule.check(&ctx).unwrap();
1089 assert!(result.is_empty(), "Should have no warnings without base path");
1090 }
1091
1092 #[test]
1093 fn test_existing_and_missing_links() {
1094 let temp_dir = tempdir().unwrap();
1096 let base_path = temp_dir.path();
1097
1098 let exists_path = base_path.join("exists.md");
1100 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1101
1102 assert!(exists_path.exists(), "exists.md should exist for this test");
1104
1105 let content = r#"
1107# Test Document
1108
1109[Valid Link](exists.md)
1110[Invalid Link](missing.md)
1111[External Link](https://example.com)
1112[Media Link](image.jpg)
1113 "#;
1114
1115 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1117
1118 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1120 let result = rule.check(&ctx).unwrap();
1121
1122 assert_eq!(result.len(), 2);
1124 let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1125 assert!(messages.iter().any(|m| m.contains("missing.md")));
1126 assert!(messages.iter().any(|m| m.contains("image.jpg")));
1127 }
1128
1129 #[test]
1130 fn test_angle_bracket_links() {
1131 let temp_dir = tempdir().unwrap();
1133 let base_path = temp_dir.path();
1134
1135 let exists_path = base_path.join("exists.md");
1137 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1138
1139 let content = r#"
1141# Test Document
1142
1143[Valid Link](<exists.md>)
1144[Invalid Link](<missing.md>)
1145[External Link](<https://example.com>)
1146 "#;
1147
1148 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1150
1151 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1152 let result = rule.check(&ctx).unwrap();
1153
1154 assert_eq!(result.len(), 1, "Should have exactly one warning");
1156 assert!(
1157 result[0].message.contains("missing.md"),
1158 "Warning should mention missing.md"
1159 );
1160 }
1161
1162 #[test]
1163 fn test_angle_bracket_links_with_parens() {
1164 let temp_dir = tempdir().unwrap();
1166 let base_path = temp_dir.path();
1167
1168 let app_dir = base_path.join("app");
1170 std::fs::create_dir(&app_dir).unwrap();
1171 let upload_dir = app_dir.join("(upload)");
1172 std::fs::create_dir(&upload_dir).unwrap();
1173 let page_file = upload_dir.join("page.tsx");
1174 File::create(&page_file)
1175 .unwrap()
1176 .write_all(b"export default function Page() {}")
1177 .unwrap();
1178
1179 let content = r#"
1181# Test Document with Paths Containing Parens
1182
1183[Upload Page](<app/(upload)/page.tsx>)
1184[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1185[Missing](<app/(missing)/file.md>)
1186"#;
1187
1188 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1189
1190 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1191 let result = rule.check(&ctx).unwrap();
1192
1193 assert_eq!(
1195 result.len(),
1196 1,
1197 "Should have exactly one warning for missing file. Got: {result:?}"
1198 );
1199 assert!(
1200 result[0].message.contains("app/(missing)/file.md"),
1201 "Warning should mention app/(missing)/file.md"
1202 );
1203 }
1204
1205 #[test]
1206 fn test_all_file_types_checked() {
1207 let temp_dir = tempdir().unwrap();
1209 let base_path = temp_dir.path();
1210
1211 let content = r#"
1213[Image Link](image.jpg)
1214[Video Link](video.mp4)
1215[Markdown Link](document.md)
1216[PDF Link](file.pdf)
1217"#;
1218
1219 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1220
1221 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1222 let result = rule.check(&ctx).unwrap();
1223
1224 assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1226 }
1227
1228 #[test]
1229 fn test_code_span_detection() {
1230 let rule = MD057ExistingRelativeLinks::new();
1231
1232 let temp_dir = tempdir().unwrap();
1234 let base_path = temp_dir.path();
1235
1236 let rule = rule.with_path(base_path);
1237
1238 let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1240
1241 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1242 let result = rule.check(&ctx).unwrap();
1243
1244 assert_eq!(result.len(), 1, "Should only flag the real link");
1246 assert!(result[0].message.contains("nonexistent.md"));
1247 }
1248
1249 #[test]
1250 fn test_inline_code_spans() {
1251 let temp_dir = tempdir().unwrap();
1253 let base_path = temp_dir.path();
1254
1255 let content = r#"
1257# Test Document
1258
1259This is a normal link: [Link](missing.md)
1260
1261This is a code span with a link: `[Link](another-missing.md)`
1262
1263Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1264
1265 "#;
1266
1267 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1269
1270 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1272 let result = rule.check(&ctx).unwrap();
1273
1274 assert_eq!(result.len(), 1, "Should have exactly one warning");
1276 assert!(
1277 result[0].message.contains("missing.md"),
1278 "Warning should be for missing.md"
1279 );
1280 assert!(
1281 !result.iter().any(|w| w.message.contains("another-missing.md")),
1282 "Should not warn about link in code span"
1283 );
1284 assert!(
1285 !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1286 "Should not warn about link in inline code"
1287 );
1288 }
1289
1290 #[test]
1291 fn test_extensionless_link_resolution() {
1292 let temp_dir = tempdir().unwrap();
1294 let base_path = temp_dir.path();
1295
1296 let page_path = base_path.join("page.md");
1298 File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1299
1300 let content = r#"
1302# Test Document
1303
1304[Link without extension](page)
1305[Link with extension](page.md)
1306[Missing link](nonexistent)
1307"#;
1308
1309 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1310
1311 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1312 let result = rule.check(&ctx).unwrap();
1313
1314 assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1317 assert!(
1318 result[0].message.contains("nonexistent"),
1319 "Warning should be for 'nonexistent' not 'page'"
1320 );
1321 }
1322
1323 #[test]
1325 fn test_cross_file_scope() {
1326 let rule = MD057ExistingRelativeLinks::new();
1327 assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1328 }
1329
1330 #[test]
1331 fn test_contribute_to_index_extracts_markdown_links() {
1332 let rule = MD057ExistingRelativeLinks::new();
1333 let content = r#"
1334# Document
1335
1336[Link to docs](./docs/guide.md)
1337[Link with fragment](./other.md#section)
1338[External link](https://example.com)
1339[Image link](image.png)
1340[Media file](video.mp4)
1341"#;
1342
1343 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1344 let mut index = FileIndex::new();
1345 rule.contribute_to_index(&ctx, &mut index);
1346
1347 assert_eq!(index.cross_file_links.len(), 2);
1349
1350 assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1352 assert_eq!(index.cross_file_links[0].fragment, "");
1353
1354 assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1356 assert_eq!(index.cross_file_links[1].fragment, "section");
1357 }
1358
1359 #[test]
1360 fn test_contribute_to_index_skips_external_and_anchors() {
1361 let rule = MD057ExistingRelativeLinks::new();
1362 let content = r#"
1363# Document
1364
1365[External](https://example.com)
1366[Another external](http://example.org)
1367[Fragment only](#section)
1368[FTP link](ftp://files.example.com)
1369[Mail link](mailto:test@example.com)
1370[WWW link](www.example.com)
1371"#;
1372
1373 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1374 let mut index = FileIndex::new();
1375 rule.contribute_to_index(&ctx, &mut index);
1376
1377 assert_eq!(index.cross_file_links.len(), 0);
1379 }
1380
1381 #[test]
1382 fn test_cross_file_check_valid_link() {
1383 use crate::workspace_index::WorkspaceIndex;
1384
1385 let rule = MD057ExistingRelativeLinks::new();
1386
1387 let mut workspace_index = WorkspaceIndex::new();
1389 workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1390
1391 let mut file_index = FileIndex::new();
1393 file_index.add_cross_file_link(CrossFileLinkIndex {
1394 target_path: "guide.md".to_string(),
1395 fragment: "".to_string(),
1396 line: 5,
1397 column: 1,
1398 });
1399
1400 let warnings = rule
1402 .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1403 .unwrap();
1404
1405 assert!(warnings.is_empty());
1407 }
1408
1409 #[test]
1410 fn test_cross_file_check_missing_link() {
1411 use crate::workspace_index::WorkspaceIndex;
1412
1413 let rule = MD057ExistingRelativeLinks::new();
1414
1415 let workspace_index = WorkspaceIndex::new();
1417
1418 let mut file_index = FileIndex::new();
1420 file_index.add_cross_file_link(CrossFileLinkIndex {
1421 target_path: "missing.md".to_string(),
1422 fragment: "".to_string(),
1423 line: 5,
1424 column: 1,
1425 });
1426
1427 let warnings = rule
1429 .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1430 .unwrap();
1431
1432 assert_eq!(warnings.len(), 1);
1434 assert!(warnings[0].message.contains("missing.md"));
1435 assert!(warnings[0].message.contains("does not exist"));
1436 }
1437
1438 #[test]
1439 fn test_cross_file_check_parent_path() {
1440 use crate::workspace_index::WorkspaceIndex;
1441
1442 let rule = MD057ExistingRelativeLinks::new();
1443
1444 let mut workspace_index = WorkspaceIndex::new();
1446 workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1447
1448 let mut file_index = FileIndex::new();
1450 file_index.add_cross_file_link(CrossFileLinkIndex {
1451 target_path: "../readme.md".to_string(),
1452 fragment: "".to_string(),
1453 line: 5,
1454 column: 1,
1455 });
1456
1457 let warnings = rule
1459 .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1460 .unwrap();
1461
1462 assert!(warnings.is_empty());
1464 }
1465
1466 #[test]
1467 fn test_cross_file_check_html_link_with_md_source() {
1468 use crate::workspace_index::WorkspaceIndex;
1471
1472 let rule = MD057ExistingRelativeLinks::new();
1473
1474 let mut workspace_index = WorkspaceIndex::new();
1476 workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1477
1478 let mut file_index = FileIndex::new();
1480 file_index.add_cross_file_link(CrossFileLinkIndex {
1481 target_path: "guide.html".to_string(),
1482 fragment: "section".to_string(),
1483 line: 10,
1484 column: 5,
1485 });
1486
1487 let warnings = rule
1489 .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1490 .unwrap();
1491
1492 assert!(
1494 warnings.is_empty(),
1495 "Expected no warnings for .html link with .md source, got: {warnings:?}"
1496 );
1497 }
1498
1499 #[test]
1500 fn test_cross_file_check_html_link_without_source() {
1501 use crate::workspace_index::WorkspaceIndex;
1503
1504 let rule = MD057ExistingRelativeLinks::new();
1505
1506 let workspace_index = WorkspaceIndex::new();
1508
1509 let mut file_index = FileIndex::new();
1511 file_index.add_cross_file_link(CrossFileLinkIndex {
1512 target_path: "missing.html".to_string(),
1513 fragment: "".to_string(),
1514 line: 10,
1515 column: 5,
1516 });
1517
1518 let warnings = rule
1520 .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1521 .unwrap();
1522
1523 assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1525 assert!(warnings[0].message.contains("missing.html"));
1526 }
1527
1528 #[test]
1529 fn test_normalize_path_function() {
1530 assert_eq!(
1532 normalize_path(Path::new("docs/guide.md")),
1533 PathBuf::from("docs/guide.md")
1534 );
1535
1536 assert_eq!(
1538 normalize_path(Path::new("./docs/guide.md")),
1539 PathBuf::from("docs/guide.md")
1540 );
1541
1542 assert_eq!(
1544 normalize_path(Path::new("docs/sub/../guide.md")),
1545 PathBuf::from("docs/guide.md")
1546 );
1547
1548 assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
1550 }
1551
1552 #[test]
1553 fn test_resolve_absolute_link() {
1554 let temp_dir = tempdir().expect("Failed to create temp dir");
1556 let root = temp_dir.path();
1557
1558 let contributing = root.join("CONTRIBUTING.md");
1560 File::create(&contributing).expect("Failed to create CONTRIBUTING.md");
1561
1562 let docs = root.join("docs");
1564 std::fs::create_dir(&docs).expect("Failed to create docs dir");
1565 let readme = docs.join("README.md");
1566 File::create(&readme).expect("Failed to create README.md");
1567
1568 let resolved = resolve_absolute_link(&readme, "CONTRIBUTING.md");
1571 assert!(resolved.exists(), "Should find CONTRIBUTING.md at workspace root");
1572 assert_eq!(resolved, contributing);
1573
1574 let nonexistent = resolve_absolute_link(&readme, "NONEXISTENT.md");
1576 assert!(!nonexistent.exists(), "Should not find nonexistent file");
1577 }
1578
1579 #[test]
1580 fn test_html_link_with_md_source() {
1581 let temp_dir = tempdir().unwrap();
1583 let base_path = temp_dir.path();
1584
1585 let md_file = base_path.join("guide.md");
1587 File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
1588
1589 let content = r#"
1590[Read the guide](guide.html)
1591[Also here](getting-started.html)
1592"#;
1593
1594 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1595 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1596 let result = rule.check(&ctx).unwrap();
1597
1598 assert_eq!(
1600 result.len(),
1601 1,
1602 "Should only warn about missing source. Got: {result:?}"
1603 );
1604 assert!(result[0].message.contains("getting-started.html"));
1605 }
1606
1607 #[test]
1608 fn test_htm_link_with_md_source() {
1609 let temp_dir = tempdir().unwrap();
1611 let base_path = temp_dir.path();
1612
1613 let md_file = base_path.join("page.md");
1614 File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
1615
1616 let content = "[Page](page.htm)";
1617
1618 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1619 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1620 let result = rule.check(&ctx).unwrap();
1621
1622 assert!(
1623 result.is_empty(),
1624 "Should not warn when .md source exists for .htm link"
1625 );
1626 }
1627
1628 #[test]
1629 fn test_html_link_finds_various_markdown_extensions() {
1630 let temp_dir = tempdir().unwrap();
1632 let base_path = temp_dir.path();
1633
1634 File::create(base_path.join("doc.md")).unwrap();
1635 File::create(base_path.join("tutorial.mdx")).unwrap();
1636 File::create(base_path.join("guide.markdown")).unwrap();
1637
1638 let content = r#"
1639[Doc](doc.html)
1640[Tutorial](tutorial.html)
1641[Guide](guide.html)
1642"#;
1643
1644 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1645 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1646 let result = rule.check(&ctx).unwrap();
1647
1648 assert!(
1649 result.is_empty(),
1650 "Should find all markdown variants as source files. Got: {result:?}"
1651 );
1652 }
1653
1654 #[test]
1655 fn test_html_link_in_subdirectory() {
1656 let temp_dir = tempdir().unwrap();
1658 let base_path = temp_dir.path();
1659
1660 let docs_dir = base_path.join("docs");
1661 std::fs::create_dir(&docs_dir).unwrap();
1662 File::create(docs_dir.join("guide.md"))
1663 .unwrap()
1664 .write_all(b"# Guide")
1665 .unwrap();
1666
1667 let content = "[Guide](docs/guide.html)";
1668
1669 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1670 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1671 let result = rule.check(&ctx).unwrap();
1672
1673 assert!(result.is_empty(), "Should find markdown source in subdirectory");
1674 }
1675}