1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{CrossFileLinkIndex, FileIndex};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21 LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23fn reset_file_existence_cache() {
25 let mut cache = FILE_EXISTENCE_CACHE
26 .lock()
27 .expect("File existence cache mutex poisoned");
28 cache.clear();
29}
30
31fn file_exists_with_cache(path: &Path) -> bool {
33 let mut cache = FILE_EXISTENCE_CACHE
34 .lock()
35 .expect("File existence cache mutex poisoned");
36 *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists())
37}
38
39static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
41
42static URL_EXTRACT_REGEX: LazyLock<Regex> =
45 LazyLock::new(|| Regex::new("\\]\\(\\s*<?([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*>?\\s*\\)").unwrap());
46
47static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
49 LazyLock::new(|| Regex::new(r"^(https?://|ftp://|mailto:|www\.)").unwrap());
50
51static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
53
54const MARKDOWN_EXTENSIONS: &[&str] = &[
56 ".md",
57 ".markdown",
58 ".mdx",
59 ".mkd",
60 ".mkdn",
61 ".mdown",
62 ".mdwn",
63 ".qmd",
64 ".rmd",
65];
66
67#[inline]
69fn is_markdown_file(path: &str) -> bool {
70 let path_lower = path.to_lowercase();
71 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
72}
73
74#[derive(Debug, Default, Clone)]
76pub struct MD057ExistingRelativeLinks {
77 base_path: Arc<Mutex<Option<PathBuf>>>,
79}
80
81impl MD057ExistingRelativeLinks {
82 pub fn new() -> Self {
84 Self::default()
85 }
86
87 pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
89 let path = path.as_ref();
90 let dir_path = if path.is_file() {
91 path.parent().map(|p| p.to_path_buf())
92 } else {
93 Some(path.to_path_buf())
94 };
95
96 *self.base_path.lock().expect("Base path mutex poisoned") = dir_path;
97 self
98 }
99
100 pub fn from_config_struct(_config: MD057Config) -> Self {
101 Self::default()
102 }
103
104 #[inline]
106 fn is_external_url(&self, url: &str) -> bool {
107 if url.is_empty() {
108 return false;
109 }
110
111 if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
113 return true;
114 }
115
116 if url.ends_with(".com") {
118 return true;
119 }
120
121 if url.starts_with('/') {
123 return false;
124 }
125
126 false
128 }
129
130 #[inline]
132 fn is_fragment_only_link(&self, url: &str) -> bool {
133 url.starts_with('#')
134 }
135
136 fn resolve_link_path(&self, link: &str) -> Option<PathBuf> {
138 self.base_path
139 .lock()
140 .unwrap()
141 .as_ref()
142 .map(|base_path| base_path.join(link))
143 }
144
145 fn process_link(&self, url: &str, line_num: usize, column: usize, warnings: &mut Vec<LintWarning>) {
147 if url.is_empty() {
149 return;
150 }
151
152 if self.is_external_url(url) || self.is_fragment_only_link(url) {
154 return;
155 }
156
157 if let Some(resolved_path) = self.resolve_link_path(url) {
159 if !file_exists_with_cache(&resolved_path) {
161 warnings.push(LintWarning {
162 rule_name: Some(self.name().to_string()),
163 line: line_num,
164 column,
165 end_line: line_num,
166 end_column: column + url.len(),
167 message: format!("Relative link '{url}' does not exist"),
168 severity: Severity::Warning,
169 fix: None, });
171 }
172 }
173 }
174}
175
176impl Rule for MD057ExistingRelativeLinks {
177 fn name(&self) -> &'static str {
178 "MD057"
179 }
180
181 fn description(&self) -> &'static str {
182 "Relative links should point to existing files"
183 }
184
185 fn category(&self) -> RuleCategory {
186 RuleCategory::Link
187 }
188
189 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
190 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
191 }
192
193 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
194 let content = ctx.content;
195
196 if content.is_empty() || !content.contains('[') {
198 return Ok(Vec::new());
199 }
200
201 if !content.contains("](") {
203 return Ok(Vec::new());
204 }
205
206 reset_file_existence_cache();
208
209 let mut warnings = Vec::new();
210
211 let base_path = {
213 let base_path_guard = self.base_path.lock().expect("Base path mutex poisoned");
214 if base_path_guard.is_some() {
215 base_path_guard.clone()
216 } else {
217 static CACHED_FILE_PATH: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
219 CACHED_FILE_PATH
220 .get_or_init(|| {
221 if let Ok(file_path) = env::var("RUMDL_FILE_PATH") {
222 let path = Path::new(&file_path);
223 if path.exists() {
224 path.parent()
225 .map(|p| p.to_path_buf())
226 .or_else(|| Some(CURRENT_DIR.clone()))
227 } else {
228 Some(CURRENT_DIR.clone())
229 }
230 } else {
231 Some(CURRENT_DIR.clone())
232 }
233 })
234 .clone()
235 }
236 };
237
238 if base_path.is_none() {
240 return Ok(warnings);
241 }
242
243 if !ctx.links.is_empty() {
245 let line_index = &ctx.line_index;
247
248 let element_cache = ElementCache::new(content);
250
251 let lines: Vec<&str> = content.lines().collect();
253
254 for link in &ctx.links {
255 let line_idx = link.line - 1;
256 if line_idx >= lines.len() {
257 continue;
258 }
259
260 let line = lines[line_idx];
261
262 if !line.contains("](") {
264 continue;
265 }
266
267 for link_match in LINK_START_REGEX.find_iter(line) {
269 let start_pos = link_match.start();
270 let end_pos = link_match.end();
271
272 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
274 let absolute_start_pos = line_start_byte + start_pos;
275
276 if element_cache.is_in_code_span(absolute_start_pos) {
278 continue;
279 }
280
281 if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
283 && let Some(url_group) = caps.get(1)
284 {
285 let url = url_group.as_str().trim();
286
287 let column = start_pos + 1;
289
290 self.process_link(url, link.line, column, &mut warnings);
292 }
293 }
294 }
295 }
296
297 Ok(warnings)
298 }
299
300 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
301 Ok(ctx.content.to_string())
302 }
303
304 fn as_any(&self) -> &dyn std::any::Any {
305 self
306 }
307
308 fn default_config_section(&self) -> Option<(String, toml::Value)> {
309 None
311 }
312
313 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
314 where
315 Self: Sized,
316 {
317 let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
318 Box::new(Self::from_config_struct(rule_config))
319 }
320
321 fn cross_file_scope(&self) -> CrossFileScope {
322 CrossFileScope::Workspace
323 }
324
325 fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
326 let content = ctx.content;
327
328 if content.is_empty() || !content.contains("](") {
330 return;
331 }
332
333 let lines: Vec<&str> = content.lines().collect();
335 let element_cache = ElementCache::new(content);
336 let line_index = &ctx.line_index;
337
338 for link in &ctx.links {
339 let line_idx = link.line - 1;
340 if line_idx >= lines.len() {
341 continue;
342 }
343
344 let line = lines[line_idx];
345 if !line.contains("](") {
346 continue;
347 }
348
349 for link_match in LINK_START_REGEX.find_iter(line) {
351 let start_pos = link_match.start();
352 let end_pos = link_match.end();
353
354 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
356 let absolute_start_pos = line_start_byte + start_pos;
357
358 if element_cache.is_in_code_span(absolute_start_pos) {
360 continue;
361 }
362
363 if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
366 && let Some(url_group) = caps.get(1)
367 {
368 let file_path = url_group.as_str().trim();
369
370 if file_path.is_empty()
372 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
373 || file_path.starts_with("www.")
374 || file_path.starts_with('#')
375 {
376 continue;
377 }
378
379 let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
381
382 if is_markdown_file(file_path) {
384 index.add_cross_file_link(CrossFileLinkIndex {
385 target_path: file_path.to_string(),
386 fragment: fragment.to_string(),
387 line: link.line,
388 column: start_pos + 1,
389 });
390 }
391 }
392 }
393 }
394 }
395
396 fn cross_file_check(
397 &self,
398 file_path: &Path,
399 file_index: &FileIndex,
400 workspace_index: &crate::workspace_index::WorkspaceIndex,
401 ) -> LintResult {
402 let mut warnings = Vec::new();
403
404 let file_dir = file_path.parent();
406
407 for cross_link in &file_index.cross_file_links {
408 let target_path = if cross_link.target_path.starts_with('/') {
410 let stripped = cross_link.target_path.trim_start_matches('/');
413 resolve_absolute_link(file_path, stripped)
414 } else if let Some(dir) = file_dir {
415 dir.join(&cross_link.target_path)
416 } else {
417 Path::new(&cross_link.target_path).to_path_buf()
418 };
419
420 let target_path = normalize_path(&target_path);
422
423 if !workspace_index.contains_file(&target_path) {
425 if cross_link.target_path.ends_with(".md") || cross_link.target_path.ends_with(".markdown") {
428 if !target_path.exists() {
431 warnings.push(LintWarning {
432 rule_name: Some(self.name().to_string()),
433 line: cross_link.line,
434 column: cross_link.column,
435 end_line: cross_link.line,
436 end_column: cross_link.column + cross_link.target_path.len(),
437 message: format!(
438 "Relative link '{}' does not exist in the workspace",
439 cross_link.target_path
440 ),
441 severity: Severity::Warning,
442 fix: None,
443 });
444 }
445 }
446 }
447 }
448
449 Ok(warnings)
450 }
451}
452
453fn normalize_path(path: &Path) -> PathBuf {
455 let mut components = Vec::new();
456
457 for component in path.components() {
458 match component {
459 std::path::Component::ParentDir => {
460 if !components.is_empty() {
462 components.pop();
463 }
464 }
465 std::path::Component::CurDir => {
466 }
468 _ => {
469 components.push(component);
470 }
471 }
472 }
473
474 components.iter().collect()
475}
476
477fn resolve_absolute_link(file_path: &Path, stripped_path: &str) -> PathBuf {
483 let mut current = file_path.parent();
485 while let Some(dir) = current {
486 let candidate = dir.join(stripped_path);
487 if candidate.exists() {
488 return candidate;
489 }
490 current = dir.parent();
491 }
492
493 file_path
496 .parent()
497 .map(|d| d.join(stripped_path))
498 .unwrap_or_else(|| PathBuf::from(stripped_path))
499}
500
501#[cfg(test)]
502mod tests {
503 use super::*;
504 use std::fs::File;
505 use std::io::Write;
506 use tempfile::tempdir;
507
508 #[test]
509 fn test_external_urls() {
510 let rule = MD057ExistingRelativeLinks::new();
511
512 assert!(rule.is_external_url("https://example.com"));
513 assert!(rule.is_external_url("http://example.com"));
514 assert!(rule.is_external_url("ftp://example.com"));
515 assert!(rule.is_external_url("www.example.com"));
516 assert!(rule.is_external_url("example.com"));
517
518 assert!(!rule.is_external_url("./relative/path.md"));
519 assert!(!rule.is_external_url("relative/path.md"));
520 assert!(!rule.is_external_url("../parent/path.md"));
521 }
522
523 #[test]
524 fn test_no_warnings_without_base_path() {
525 let rule = MD057ExistingRelativeLinks::new();
526 let content = "[Link](missing.md)";
527
528 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
529 let result = rule.check(&ctx).unwrap();
530 assert!(result.is_empty(), "Should have no warnings without base path");
531 }
532
533 #[test]
534 fn test_existing_and_missing_links() {
535 let temp_dir = tempdir().unwrap();
537 let base_path = temp_dir.path();
538
539 let exists_path = base_path.join("exists.md");
541 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
542
543 assert!(exists_path.exists(), "exists.md should exist for this test");
545
546 let content = r#"
548# Test Document
549
550[Valid Link](exists.md)
551[Invalid Link](missing.md)
552[External Link](https://example.com)
553[Media Link](image.jpg)
554 "#;
555
556 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
558
559 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
561 let result = rule.check(&ctx).unwrap();
562
563 assert_eq!(result.len(), 2);
565 let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
566 assert!(messages.iter().any(|m| m.contains("missing.md")));
567 assert!(messages.iter().any(|m| m.contains("image.jpg")));
568 }
569
570 #[test]
571 fn test_angle_bracket_links() {
572 let temp_dir = tempdir().unwrap();
574 let base_path = temp_dir.path();
575
576 let exists_path = base_path.join("exists.md");
578 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
579
580 let content = r#"
582# Test Document
583
584[Valid Link](<exists.md>)
585[Invalid Link](<missing.md>)
586[External Link](<https://example.com>)
587 "#;
588
589 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
591
592 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
593 let result = rule.check(&ctx).unwrap();
594
595 assert_eq!(result.len(), 1, "Should have exactly one warning");
597 assert!(
598 result[0].message.contains("missing.md"),
599 "Warning should mention missing.md"
600 );
601 }
602
603 #[test]
604 fn test_all_file_types_checked() {
605 let temp_dir = tempdir().unwrap();
607 let base_path = temp_dir.path();
608
609 let content = r#"
611[Image Link](image.jpg)
612[Video Link](video.mp4)
613[Markdown Link](document.md)
614[PDF Link](file.pdf)
615"#;
616
617 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
618
619 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
620 let result = rule.check(&ctx).unwrap();
621
622 assert_eq!(result.len(), 4, "Should have warnings for all missing files");
624 }
625
626 #[test]
627 fn test_code_span_detection() {
628 let rule = MD057ExistingRelativeLinks::new();
629
630 let temp_dir = tempdir().unwrap();
632 let base_path = temp_dir.path();
633
634 let rule = rule.with_path(base_path);
635
636 let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
638
639 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
640 let result = rule.check(&ctx).unwrap();
641
642 assert_eq!(result.len(), 1, "Should only flag the real link");
644 assert!(result[0].message.contains("nonexistent.md"));
645 }
646
647 #[test]
648 fn test_inline_code_spans() {
649 let temp_dir = tempdir().unwrap();
651 let base_path = temp_dir.path();
652
653 let content = r#"
655# Test Document
656
657This is a normal link: [Link](missing.md)
658
659This is a code span with a link: `[Link](another-missing.md)`
660
661Some more text with `inline code [Link](yet-another-missing.md) embedded`.
662
663 "#;
664
665 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
667
668 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
670 let result = rule.check(&ctx).unwrap();
671
672 assert_eq!(result.len(), 1, "Should have exactly one warning");
674 assert!(
675 result[0].message.contains("missing.md"),
676 "Warning should be for missing.md"
677 );
678 assert!(
679 !result.iter().any(|w| w.message.contains("another-missing.md")),
680 "Should not warn about link in code span"
681 );
682 assert!(
683 !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
684 "Should not warn about link in inline code"
685 );
686 }
687
688 #[test]
690 fn test_cross_file_scope() {
691 let rule = MD057ExistingRelativeLinks::new();
692 assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
693 }
694
695 #[test]
696 fn test_contribute_to_index_extracts_markdown_links() {
697 let rule = MD057ExistingRelativeLinks::new();
698 let content = r#"
699# Document
700
701[Link to docs](./docs/guide.md)
702[Link with fragment](./other.md#section)
703[External link](https://example.com)
704[Image link](image.png)
705[Media file](video.mp4)
706"#;
707
708 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
709 let mut index = FileIndex::new();
710 rule.contribute_to_index(&ctx, &mut index);
711
712 assert_eq!(index.cross_file_links.len(), 2);
714
715 assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
717 assert_eq!(index.cross_file_links[0].fragment, "");
718
719 assert_eq!(index.cross_file_links[1].target_path, "./other.md");
721 assert_eq!(index.cross_file_links[1].fragment, "section");
722 }
723
724 #[test]
725 fn test_contribute_to_index_skips_external_and_anchors() {
726 let rule = MD057ExistingRelativeLinks::new();
727 let content = r#"
728# Document
729
730[External](https://example.com)
731[Another external](http://example.org)
732[Fragment only](#section)
733[FTP link](ftp://files.example.com)
734[Mail link](mailto:test@example.com)
735[WWW link](www.example.com)
736"#;
737
738 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
739 let mut index = FileIndex::new();
740 rule.contribute_to_index(&ctx, &mut index);
741
742 assert_eq!(index.cross_file_links.len(), 0);
744 }
745
746 #[test]
747 fn test_cross_file_check_valid_link() {
748 use crate::workspace_index::WorkspaceIndex;
749
750 let rule = MD057ExistingRelativeLinks::new();
751
752 let mut workspace_index = WorkspaceIndex::new();
754 workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
755
756 let mut file_index = FileIndex::new();
758 file_index.add_cross_file_link(CrossFileLinkIndex {
759 target_path: "guide.md".to_string(),
760 fragment: "".to_string(),
761 line: 5,
762 column: 1,
763 });
764
765 let warnings = rule
767 .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
768 .unwrap();
769
770 assert!(warnings.is_empty());
772 }
773
774 #[test]
775 fn test_cross_file_check_missing_link() {
776 use crate::workspace_index::WorkspaceIndex;
777
778 let rule = MD057ExistingRelativeLinks::new();
779
780 let workspace_index = WorkspaceIndex::new();
782
783 let mut file_index = FileIndex::new();
785 file_index.add_cross_file_link(CrossFileLinkIndex {
786 target_path: "missing.md".to_string(),
787 fragment: "".to_string(),
788 line: 5,
789 column: 1,
790 });
791
792 let warnings = rule
794 .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
795 .unwrap();
796
797 assert_eq!(warnings.len(), 1);
799 assert!(warnings[0].message.contains("missing.md"));
800 assert!(warnings[0].message.contains("does not exist"));
801 }
802
803 #[test]
804 fn test_cross_file_check_parent_path() {
805 use crate::workspace_index::WorkspaceIndex;
806
807 let rule = MD057ExistingRelativeLinks::new();
808
809 let mut workspace_index = WorkspaceIndex::new();
811 workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
812
813 let mut file_index = FileIndex::new();
815 file_index.add_cross_file_link(CrossFileLinkIndex {
816 target_path: "../readme.md".to_string(),
817 fragment: "".to_string(),
818 line: 5,
819 column: 1,
820 });
821
822 let warnings = rule
824 .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
825 .unwrap();
826
827 assert!(warnings.is_empty());
829 }
830
831 #[test]
832 fn test_normalize_path_function() {
833 assert_eq!(
835 normalize_path(Path::new("docs/guide.md")),
836 PathBuf::from("docs/guide.md")
837 );
838
839 assert_eq!(
841 normalize_path(Path::new("./docs/guide.md")),
842 PathBuf::from("docs/guide.md")
843 );
844
845 assert_eq!(
847 normalize_path(Path::new("docs/sub/../guide.md")),
848 PathBuf::from("docs/guide.md")
849 );
850
851 assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
853 }
854
855 #[test]
856 fn test_resolve_absolute_link() {
857 let temp_dir = tempdir().expect("Failed to create temp dir");
859 let root = temp_dir.path();
860
861 let contributing = root.join("CONTRIBUTING.md");
863 File::create(&contributing).expect("Failed to create CONTRIBUTING.md");
864
865 let docs = root.join("docs");
867 std::fs::create_dir(&docs).expect("Failed to create docs dir");
868 let readme = docs.join("README.md");
869 File::create(&readme).expect("Failed to create README.md");
870
871 let resolved = resolve_absolute_link(&readme, "CONTRIBUTING.md");
874 assert!(resolved.exists(), "Should find CONTRIBUTING.md at workspace root");
875 assert_eq!(resolved, contributing);
876
877 let nonexistent = resolve_absolute_link(&readme, "NONEXISTENT.md");
879 assert!(!nonexistent.exists(), "Should not find nonexistent file");
880 }
881}