1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
8use crate::utils::element_cache::ElementCache;
9use lazy_static::lazy_static;
10use regex::Regex;
11use std::collections::HashMap;
12use std::env;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19lazy_static! {
21 static ref FILE_EXISTENCE_CACHE: Arc<Mutex<HashMap<PathBuf, bool>>> = Arc::new(Mutex::new(HashMap::new()));
22}
23
24fn reset_file_existence_cache() {
26 let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap();
27 cache.clear();
28}
29
30fn file_exists_with_cache(path: &Path) -> bool {
32 let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap();
33 *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists())
34}
35
36lazy_static! {
37 static ref LINK_START_REGEX: Regex =
39 Regex::new(r"!?\[[^\]]*\]").unwrap();
40
41 static ref URL_EXTRACT_REGEX: Regex =
44 Regex::new("\\]\\(\\s*<?([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*>?\\s*\\)").unwrap();
45
46 static ref CODE_FENCE_REGEX: Regex =
48 Regex::new(r"^( {0,3})(`{3,}|~{3,})").unwrap();
49
50 static ref PROTOCOL_DOMAIN_REGEX: Regex =
52 Regex::new(r"^(https?://|ftp://|mailto:|www\.)").unwrap();
53
54 static ref MEDIA_FILE_REGEX: Regex =
56 Regex::new(r"\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff|mp3|mp4|avi|mov|webm|wav|ogg|pdf)$").unwrap();
57
58 static ref FRAGMENT_ONLY_REGEX: Regex =
60 Regex::new(r"^#").unwrap();
61
62 static ref CURRENT_DIR: PathBuf = env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
64}
65
66#[derive(Debug, Default, Clone)]
68pub struct MD057ExistingRelativeLinks {
69 base_path: Arc<Mutex<Option<PathBuf>>>,
71 config: MD057Config,
73}
74
75impl MD057ExistingRelativeLinks {
76 pub fn new() -> Self {
78 Self::default()
79 }
80
81 pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
83 let path = path.as_ref();
84 let dir_path = if path.is_file() {
85 path.parent().map(|p| p.to_path_buf())
86 } else {
87 Some(path.to_path_buf())
88 };
89
90 *self.base_path.lock().unwrap() = dir_path;
91 self
92 }
93
94 pub fn with_skip_media_files(mut self, skip_media_files: bool) -> Self {
96 self.config.skip_media_files = skip_media_files;
97 self
98 }
99
100 pub fn from_config_struct(config: MD057Config) -> Self {
101 Self {
102 base_path: Arc::new(Mutex::new(None)),
103 config,
104 }
105 }
106
107 #[inline]
109 fn is_external_url(&self, url: &str) -> bool {
110 if url.is_empty() {
111 return false;
112 }
113
114 if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
116 return true;
117 }
118
119 if !self.is_media_file(url) && url.ends_with(".com") {
121 return true;
122 }
123
124 if url.starts_with('/') {
126 return false;
127 }
128
129 false
131 }
132
133 #[inline]
135 fn is_fragment_only_link(&self, url: &str) -> bool {
136 url.starts_with('#')
137 }
138
139 #[inline]
141 fn is_media_file(&self, url: &str) -> bool {
142 if !url.contains('.') {
144 return false;
145 }
146 MEDIA_FILE_REGEX.is_match(url)
147 }
148
149 #[inline]
151 fn should_skip_media_file(&self, url: &str) -> bool {
152 self.config.skip_media_files && self.is_media_file(url)
153 }
154
155 fn resolve_link_path(&self, link: &str) -> Option<PathBuf> {
157 self.base_path
158 .lock()
159 .unwrap()
160 .as_ref()
161 .map(|base_path| base_path.join(link))
162 }
163
164 fn process_link(&self, url: &str, line_num: usize, column: usize, warnings: &mut Vec<LintWarning>) {
166 if url.is_empty() {
168 return;
169 }
170
171 if self.is_external_url(url) || self.is_fragment_only_link(url) {
173 return;
174 }
175
176 if self.should_skip_media_file(url) {
178 return;
179 }
180
181 if let Some(resolved_path) = self.resolve_link_path(url) {
183 if !file_exists_with_cache(&resolved_path) {
185 warnings.push(LintWarning {
186 rule_name: Some(self.name()),
187 line: line_num,
188 column,
189 end_line: line_num,
190 end_column: column + url.len(),
191 message: format!("Relative link '{url}' does not exist"),
192 severity: Severity::Warning,
193 fix: None, });
195 }
196 }
197 }
198}
199
200impl Rule for MD057ExistingRelativeLinks {
201 fn name(&self) -> &'static str {
202 "MD057"
203 }
204
205 fn description(&self) -> &'static str {
206 "Relative links should point to existing files"
207 }
208
209 fn category(&self) -> RuleCategory {
210 RuleCategory::Link
211 }
212
213 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
214 let content = ctx.content;
215 content.is_empty() || !content.contains('[') || !content.contains("](")
216 }
217
218 fn check_with_structure(
220 &self,
221 ctx: &crate::lint_context::LintContext,
222 structure: &DocumentStructure,
223 ) -> LintResult {
224 let content = ctx.content;
225
226 if content.is_empty() || !content.contains('[') {
228 return Ok(Vec::new());
229 }
230
231 if !content.contains("](") {
233 return Ok(Vec::new());
234 }
235
236 reset_file_existence_cache();
238
239 let mut warnings = Vec::new();
240
241 let base_path = {
243 let base_path_guard = self.base_path.lock().unwrap();
244 if base_path_guard.is_some() {
245 base_path_guard.clone()
246 } else {
247 static CACHED_FILE_PATH: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
249 CACHED_FILE_PATH
250 .get_or_init(|| {
251 if let Ok(file_path) = env::var("RUMDL_FILE_PATH") {
252 let path = Path::new(&file_path);
253 if path.exists() {
254 path.parent()
255 .map(|p| p.to_path_buf())
256 .or_else(|| Some(CURRENT_DIR.clone()))
257 } else {
258 Some(CURRENT_DIR.clone())
259 }
260 } else {
261 Some(CURRENT_DIR.clone())
262 }
263 })
264 .clone()
265 }
266 };
267
268 if base_path.is_none() {
270 return Ok(warnings);
271 }
272
273 if !structure.links.is_empty() {
275 let mut line_positions = Vec::new();
277 let mut pos = 0;
278 line_positions.push(0);
279 for ch in content.chars() {
280 pos += ch.len_utf8();
281 if ch == '\n' {
282 line_positions.push(pos);
283 }
284 }
285
286 let element_cache = ElementCache::new(content);
288
289 let lines: Vec<&str> = content.lines().collect();
291
292 for link in &structure.links {
293 let line_idx = link.line - 1;
294 if line_idx >= lines.len() {
295 continue;
296 }
297
298 let line = lines[line_idx];
299
300 if !line.contains("](") {
302 continue;
303 }
304
305 for link_match in LINK_START_REGEX.find_iter(line) {
307 let start_pos = link_match.start();
308 let end_pos = link_match.end();
309
310 let absolute_start_pos = if line_idx < line_positions.len() {
312 line_positions[line_idx] + start_pos
313 } else {
314 content.lines().take(line_idx).map(|l| l.len() + 1).sum::<usize>() + start_pos
316 };
317
318 if element_cache.is_in_code_span(absolute_start_pos) {
320 continue;
321 }
322
323 if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
325 && let Some(url_group) = caps.get(1)
326 {
327 let url = url_group.as_str().trim();
328
329 let column = start_pos + 1;
331
332 self.process_link(url, link.line, column, &mut warnings);
334 }
335 }
336 }
337 }
338
339 Ok(warnings)
340 }
341
342 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
343 let content = ctx.content;
344 let structure = DocumentStructure::new(content);
346 self.check_with_structure(ctx, &structure)
347
348 }
350
351 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
352 Ok(ctx.content.to_string())
353 }
354
355 fn as_any(&self) -> &dyn std::any::Any {
356 self
357 }
358
359 fn default_config_section(&self) -> Option<(String, toml::Value)> {
360 let json_value = serde_json::to_value(&self.config).ok()?;
361 Some((
362 self.name().to_string(),
363 crate::rule_config_serde::json_to_toml_value(&json_value)?,
364 ))
365 }
366
367 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
368 where
369 Self: Sized,
370 {
371 let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
372 Box::new(Self::from_config_struct(rule_config))
373 }
374}
375
376impl DocumentStructureExtensions for MD057ExistingRelativeLinks {
377 fn has_relevant_elements(
378 &self,
379 _ctx: &crate::lint_context::LintContext,
380 _doc_structure: &DocumentStructure,
381 ) -> bool {
382 true
383 }
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389 use std::fs::File;
390 use std::io::Write;
391 use tempfile::tempdir;
392
393 #[test]
394 fn test_external_urls() {
395 let rule = MD057ExistingRelativeLinks::new();
396
397 assert!(rule.is_external_url("https://example.com"));
398 assert!(rule.is_external_url("http://example.com"));
399 assert!(rule.is_external_url("ftp://example.com"));
400 assert!(rule.is_external_url("www.example.com"));
401 assert!(rule.is_external_url("example.com"));
402
403 assert!(!rule.is_external_url("./relative/path.md"));
404 assert!(!rule.is_external_url("relative/path.md"));
405 assert!(!rule.is_external_url("../parent/path.md"));
406 }
407
408 #[test]
409 fn test_media_files() {
410 let rule_default = MD057ExistingRelativeLinks::new();
412
413 assert!(
415 rule_default.is_media_file("image.jpg"),
416 "image.jpg should be identified as a media file"
417 );
418 assert!(
419 rule_default.is_media_file("video.mp4"),
420 "video.mp4 should be identified as a media file"
421 );
422 assert!(
423 rule_default.is_media_file("document.pdf"),
424 "document.pdf should be identified as a media file"
425 );
426 assert!(
427 rule_default.is_media_file("path/to/audio.mp3"),
428 "path/to/audio.mp3 should be identified as a media file"
429 );
430
431 assert!(
432 !rule_default.is_media_file("document.md"),
433 "document.md should not be identified as a media file"
434 );
435 assert!(
436 !rule_default.is_media_file("code.rs"),
437 "code.rs should not be identified as a media file"
438 );
439
440 assert!(
442 rule_default.should_skip_media_file("image.jpg"),
443 "image.jpg should be skipped with default settings"
444 );
445 assert!(
446 !rule_default.should_skip_media_file("document.md"),
447 "document.md should not be skipped"
448 );
449
450 let rule_no_skip = MD057ExistingRelativeLinks::new().with_skip_media_files(false);
452 assert!(
453 !rule_no_skip.should_skip_media_file("image.jpg"),
454 "image.jpg should not be skipped when skip_media_files is false"
455 );
456 }
457
458 #[test]
459 fn test_no_warnings_without_base_path() {
460 let rule = MD057ExistingRelativeLinks::new();
461 let content = "[Link](missing.md)";
462
463 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
464 let result = rule.check(&ctx).unwrap();
465 assert!(result.is_empty(), "Should have no warnings without base path");
466 }
467
468 #[test]
469 fn test_existing_and_missing_links() {
470 let temp_dir = tempdir().unwrap();
472 let base_path = temp_dir.path();
473
474 let exists_path = base_path.join("exists.md");
476 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
477
478 assert!(exists_path.exists(), "exists.md should exist for this test");
480
481 let content = r#"
483# Test Document
484
485[Valid Link](exists.md)
486[Invalid Link](missing.md)
487[External Link](https://example.com)
488[Media Link](image.jpg)
489 "#;
490
491 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
493
494 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
496 let result = rule.check(&ctx).unwrap();
497
498 assert_eq!(result.len(), 1);
500 assert!(result[0].message.contains("missing.md"));
501
502 let structure = DocumentStructure::new(content);
504 let result_with_structure = rule.check_with_structure(&ctx, &structure).unwrap();
505
506 assert_eq!(result.len(), result_with_structure.len());
508 assert!(result_with_structure[0].message.contains("missing.md"));
509 }
510
511 #[test]
512 fn test_angle_bracket_links() {
513 let temp_dir = tempdir().unwrap();
515 let base_path = temp_dir.path();
516
517 let exists_path = base_path.join("exists.md");
519 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
520
521 let content = r#"
523# Test Document
524
525[Valid Link](<exists.md>)
526[Invalid Link](<missing.md>)
527[External Link](<https://example.com>)
528 "#;
529
530 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
532
533 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
534 let result = rule.check(&ctx).unwrap();
535
536 assert_eq!(result.len(), 1, "Should have exactly one warning");
538 assert!(
539 result[0].message.contains("missing.md"),
540 "Warning should mention missing.md"
541 );
542 }
543
544 #[test]
545 fn test_media_file_handling() {
546 let temp_dir = tempdir().unwrap();
548 let base_path = temp_dir.path();
549
550 let image_path = base_path.join("image.jpg");
552 assert!(
553 !image_path.exists(),
554 "Test precondition failed: image.jpg should not exist"
555 );
556
557 let content = "[Media Link](image.jpg)";
559
560 let rule_skip_media = MD057ExistingRelativeLinks::new().with_path(base_path);
562
563 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
564 let result_skip = rule_skip_media.check(&ctx).unwrap();
565
566 assert_eq!(
568 result_skip.len(),
569 0,
570 "Should have no warnings when skip_media_files is true"
571 );
572
573 let rule_check_all = MD057ExistingRelativeLinks::new()
575 .with_path(base_path)
576 .with_skip_media_files(false);
577
578 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
579 let result_all = rule_check_all.check(&ctx).unwrap();
580
581 assert_eq!(
583 result_all.len(),
584 1,
585 "Should have one warning when skip_media_files is false"
586 );
587 assert!(
588 result_all[0].message.contains("image.jpg"),
589 "Warning should mention image.jpg"
590 );
591 }
592
593 #[test]
594 fn test_code_span_detection() {
595 let rule = MD057ExistingRelativeLinks::new();
596
597 let temp_dir = tempdir().unwrap();
599 let base_path = temp_dir.path();
600
601 let rule = rule.with_path(base_path);
602
603 let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
605 let structure = DocumentStructure::new(content);
606
607 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
608 let result = rule.check_with_structure(&ctx, &structure).unwrap();
609
610 assert_eq!(result.len(), 1, "Should only flag the real link");
612 assert!(result[0].message.contains("nonexistent.md"));
613 }
614
615 #[test]
616 fn test_inline_code_spans() {
617 let temp_dir = tempdir().unwrap();
619 let base_path = temp_dir.path();
620
621 let content = r#"
623# Test Document
624
625This is a normal link: [Link](missing.md)
626
627This is a code span with a link: `[Link](another-missing.md)`
628
629Some more text with `inline code [Link](yet-another-missing.md) embedded`.
630
631 "#;
632
633 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
635
636 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
638 let result = rule.check(&ctx).unwrap();
639
640 assert_eq!(result.len(), 1, "Should have exactly one warning");
642 assert!(
643 result[0].message.contains("missing.md"),
644 "Warning should be for missing.md"
645 );
646 assert!(
647 !result.iter().any(|w| w.message.contains("another-missing.md")),
648 "Should not warn about link in code span"
649 );
650 assert!(
651 !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
652 "Should not warn about link in inline code"
653 );
654 }
655}