rumdl_lib/rules/
md057_existing_relative_links.rs1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use regex::Regex;
9use std::collections::HashMap;
10use std::env;
11use std::path::{Path, PathBuf};
12use std::sync::LazyLock;
13use std::sync::{Arc, Mutex};
14
15mod md057_config;
16use md057_config::MD057Config;
17
18static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
20 LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
21
22fn reset_file_existence_cache() {
24 let mut cache = FILE_EXISTENCE_CACHE
25 .lock()
26 .expect("File existence cache mutex poisoned");
27 cache.clear();
28}
29
30fn file_exists_with_cache(path: &Path) -> bool {
32 let mut cache = FILE_EXISTENCE_CACHE
33 .lock()
34 .expect("File existence cache mutex poisoned");
35 *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists())
36}
37
38static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
40
41static URL_EXTRACT_REGEX: LazyLock<Regex> =
44 LazyLock::new(|| Regex::new("\\]\\(\\s*<?([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*>?\\s*\\)").unwrap());
45
46static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
48 LazyLock::new(|| Regex::new(r"^(https?://|ftp://|mailto:|www\.)").unwrap());
49
50static MEDIA_FILE_REGEX: LazyLock<Regex> =
52 LazyLock::new(|| Regex::new(r"\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff|mp3|mp4|avi|mov|webm|wav|ogg|pdf)$").unwrap());
53
54static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
56
57#[derive(Debug, Default, Clone)]
59pub struct MD057ExistingRelativeLinks {
60 base_path: Arc<Mutex<Option<PathBuf>>>,
62 config: MD057Config,
64}
65
66impl MD057ExistingRelativeLinks {
67 pub fn new() -> Self {
69 Self::default()
70 }
71
72 pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
74 let path = path.as_ref();
75 let dir_path = if path.is_file() {
76 path.parent().map(|p| p.to_path_buf())
77 } else {
78 Some(path.to_path_buf())
79 };
80
81 *self.base_path.lock().expect("Base path mutex poisoned") = dir_path;
82 self
83 }
84
85 pub fn with_skip_media_files(mut self, skip_media_files: bool) -> Self {
87 self.config.skip_media_files = skip_media_files;
88 self
89 }
90
91 pub fn from_config_struct(config: MD057Config) -> Self {
92 Self {
93 base_path: Arc::new(Mutex::new(None)),
94 config,
95 }
96 }
97
98 #[inline]
100 fn is_external_url(&self, url: &str) -> bool {
101 if url.is_empty() {
102 return false;
103 }
104
105 if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
107 return true;
108 }
109
110 if !self.is_media_file(url) && url.ends_with(".com") {
112 return true;
113 }
114
115 if url.starts_with('/') {
117 return false;
118 }
119
120 false
122 }
123
124 #[inline]
126 fn is_fragment_only_link(&self, url: &str) -> bool {
127 url.starts_with('#')
128 }
129
130 #[inline]
132 fn is_media_file(&self, url: &str) -> bool {
133 if !url.contains('.') {
135 return false;
136 }
137 MEDIA_FILE_REGEX.is_match(url)
138 }
139
140 #[inline]
142 fn should_skip_media_file(&self, url: &str) -> bool {
143 self.config.skip_media_files && self.is_media_file(url)
144 }
145
146 fn resolve_link_path(&self, link: &str) -> Option<PathBuf> {
148 self.base_path
149 .lock()
150 .unwrap()
151 .as_ref()
152 .map(|base_path| base_path.join(link))
153 }
154
155 fn process_link(&self, url: &str, line_num: usize, column: usize, warnings: &mut Vec<LintWarning>) {
157 if url.is_empty() {
159 return;
160 }
161
162 if self.is_external_url(url) || self.is_fragment_only_link(url) {
164 return;
165 }
166
167 if self.should_skip_media_file(url) {
169 return;
170 }
171
172 if let Some(resolved_path) = self.resolve_link_path(url) {
174 if !file_exists_with_cache(&resolved_path) {
176 warnings.push(LintWarning {
177 rule_name: Some(self.name().to_string()),
178 line: line_num,
179 column,
180 end_line: line_num,
181 end_column: column + url.len(),
182 message: format!("Relative link '{url}' does not exist"),
183 severity: Severity::Warning,
184 fix: None, });
186 }
187 }
188 }
189}
190
191impl Rule for MD057ExistingRelativeLinks {
192 fn name(&self) -> &'static str {
193 "MD057"
194 }
195
196 fn description(&self) -> &'static str {
197 "Relative links should point to existing files"
198 }
199
200 fn category(&self) -> RuleCategory {
201 RuleCategory::Link
202 }
203
204 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
205 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
206 }
207
208 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
209 let content = ctx.content;
210
211 if content.is_empty() || !content.contains('[') {
213 return Ok(Vec::new());
214 }
215
216 if !content.contains("](") {
218 return Ok(Vec::new());
219 }
220
221 reset_file_existence_cache();
223
224 let mut warnings = Vec::new();
225
226 let base_path = {
228 let base_path_guard = self.base_path.lock().expect("Base path mutex poisoned");
229 if base_path_guard.is_some() {
230 base_path_guard.clone()
231 } else {
232 static CACHED_FILE_PATH: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
234 CACHED_FILE_PATH
235 .get_or_init(|| {
236 if let Ok(file_path) = env::var("RUMDL_FILE_PATH") {
237 let path = Path::new(&file_path);
238 if path.exists() {
239 path.parent()
240 .map(|p| p.to_path_buf())
241 .or_else(|| Some(CURRENT_DIR.clone()))
242 } else {
243 Some(CURRENT_DIR.clone())
244 }
245 } else {
246 Some(CURRENT_DIR.clone())
247 }
248 })
249 .clone()
250 }
251 };
252
253 if base_path.is_none() {
255 return Ok(warnings);
256 }
257
258 if !ctx.links.is_empty() {
260 let line_index = &ctx.line_index;
262
263 let element_cache = ElementCache::new(content);
265
266 let lines: Vec<&str> = content.lines().collect();
268
269 for link in &ctx.links {
270 let line_idx = link.line - 1;
271 if line_idx >= lines.len() {
272 continue;
273 }
274
275 let line = lines[line_idx];
276
277 if !line.contains("](") {
279 continue;
280 }
281
282 for link_match in LINK_START_REGEX.find_iter(line) {
284 let start_pos = link_match.start();
285 let end_pos = link_match.end();
286
287 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
289 let absolute_start_pos = line_start_byte + start_pos;
290
291 if element_cache.is_in_code_span(absolute_start_pos) {
293 continue;
294 }
295
296 if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
298 && let Some(url_group) = caps.get(1)
299 {
300 let url = url_group.as_str().trim();
301
302 let column = start_pos + 1;
304
305 self.process_link(url, link.line, column, &mut warnings);
307 }
308 }
309 }
310 }
311
312 Ok(warnings)
313 }
314
315 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
316 Ok(ctx.content.to_string())
317 }
318
319 fn as_any(&self) -> &dyn std::any::Any {
320 self
321 }
322
323 fn default_config_section(&self) -> Option<(String, toml::Value)> {
324 let json_value = serde_json::to_value(&self.config).ok()?;
325 Some((
326 self.name().to_string(),
327 crate::rule_config_serde::json_to_toml_value(&json_value)?,
328 ))
329 }
330
331 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
332 where
333 Self: Sized,
334 {
335 let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
336 Box::new(Self::from_config_struct(rule_config))
337 }
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343 use std::fs::File;
344 use std::io::Write;
345 use tempfile::tempdir;
346
347 #[test]
348 fn test_external_urls() {
349 let rule = MD057ExistingRelativeLinks::new();
350
351 assert!(rule.is_external_url("https://example.com"));
352 assert!(rule.is_external_url("http://example.com"));
353 assert!(rule.is_external_url("ftp://example.com"));
354 assert!(rule.is_external_url("www.example.com"));
355 assert!(rule.is_external_url("example.com"));
356
357 assert!(!rule.is_external_url("./relative/path.md"));
358 assert!(!rule.is_external_url("relative/path.md"));
359 assert!(!rule.is_external_url("../parent/path.md"));
360 }
361
362 #[test]
363 fn test_media_files() {
364 let rule_default = MD057ExistingRelativeLinks::new();
366
367 assert!(
369 rule_default.is_media_file("image.jpg"),
370 "image.jpg should be identified as a media file"
371 );
372 assert!(
373 rule_default.is_media_file("video.mp4"),
374 "video.mp4 should be identified as a media file"
375 );
376 assert!(
377 rule_default.is_media_file("document.pdf"),
378 "document.pdf should be identified as a media file"
379 );
380 assert!(
381 rule_default.is_media_file("path/to/audio.mp3"),
382 "path/to/audio.mp3 should be identified as a media file"
383 );
384
385 assert!(
386 !rule_default.is_media_file("document.md"),
387 "document.md should not be identified as a media file"
388 );
389 assert!(
390 !rule_default.is_media_file("code.rs"),
391 "code.rs should not be identified as a media file"
392 );
393
394 assert!(
396 rule_default.should_skip_media_file("image.jpg"),
397 "image.jpg should be skipped with default settings"
398 );
399 assert!(
400 !rule_default.should_skip_media_file("document.md"),
401 "document.md should not be skipped"
402 );
403
404 let rule_no_skip = MD057ExistingRelativeLinks::new().with_skip_media_files(false);
406 assert!(
407 !rule_no_skip.should_skip_media_file("image.jpg"),
408 "image.jpg should not be skipped when skip_media_files is false"
409 );
410 }
411
412 #[test]
413 fn test_no_warnings_without_base_path() {
414 let rule = MD057ExistingRelativeLinks::new();
415 let content = "[Link](missing.md)";
416
417 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
418 let result = rule.check(&ctx).unwrap();
419 assert!(result.is_empty(), "Should have no warnings without base path");
420 }
421
422 #[test]
423 fn test_existing_and_missing_links() {
424 let temp_dir = tempdir().unwrap();
426 let base_path = temp_dir.path();
427
428 let exists_path = base_path.join("exists.md");
430 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
431
432 assert!(exists_path.exists(), "exists.md should exist for this test");
434
435 let content = r#"
437# Test Document
438
439[Valid Link](exists.md)
440[Invalid Link](missing.md)
441[External Link](https://example.com)
442[Media Link](image.jpg)
443 "#;
444
445 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
447
448 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
450 let result = rule.check(&ctx).unwrap();
451
452 assert_eq!(result.len(), 1);
454 assert!(result[0].message.contains("missing.md"));
455
456 let result_with_structure = rule.check(&ctx).unwrap();
458
459 assert_eq!(result.len(), result_with_structure.len());
461 assert!(result_with_structure[0].message.contains("missing.md"));
462 }
463
464 #[test]
465 fn test_angle_bracket_links() {
466 let temp_dir = tempdir().unwrap();
468 let base_path = temp_dir.path();
469
470 let exists_path = base_path.join("exists.md");
472 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
473
474 let content = r#"
476# Test Document
477
478[Valid Link](<exists.md>)
479[Invalid Link](<missing.md>)
480[External Link](<https://example.com>)
481 "#;
482
483 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
485
486 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
487 let result = rule.check(&ctx).unwrap();
488
489 assert_eq!(result.len(), 1, "Should have exactly one warning");
491 assert!(
492 result[0].message.contains("missing.md"),
493 "Warning should mention missing.md"
494 );
495 }
496
497 #[test]
498 fn test_media_file_handling() {
499 let temp_dir = tempdir().unwrap();
501 let base_path = temp_dir.path();
502
503 let image_path = base_path.join("image.jpg");
505 assert!(
506 !image_path.exists(),
507 "Test precondition failed: image.jpg should not exist"
508 );
509
510 let content = "[Media Link](image.jpg)";
512
513 let rule_skip_media = MD057ExistingRelativeLinks::new().with_path(base_path);
515
516 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
517 let result_skip = rule_skip_media.check(&ctx).unwrap();
518
519 assert_eq!(
521 result_skip.len(),
522 0,
523 "Should have no warnings when skip_media_files is true"
524 );
525
526 let rule_check_all = MD057ExistingRelativeLinks::new()
528 .with_path(base_path)
529 .with_skip_media_files(false);
530
531 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
532 let result_all = rule_check_all.check(&ctx).unwrap();
533
534 assert_eq!(
536 result_all.len(),
537 1,
538 "Should have one warning when skip_media_files is false"
539 );
540 assert!(
541 result_all[0].message.contains("image.jpg"),
542 "Warning should mention image.jpg"
543 );
544 }
545
546 #[test]
547 fn test_code_span_detection() {
548 let rule = MD057ExistingRelativeLinks::new();
549
550 let temp_dir = tempdir().unwrap();
552 let base_path = temp_dir.path();
553
554 let rule = rule.with_path(base_path);
555
556 let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
558
559 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
560 let result = rule.check(&ctx).unwrap();
561
562 assert_eq!(result.len(), 1, "Should only flag the real link");
564 assert!(result[0].message.contains("nonexistent.md"));
565 }
566
567 #[test]
568 fn test_inline_code_spans() {
569 let temp_dir = tempdir().unwrap();
571 let base_path = temp_dir.path();
572
573 let content = r#"
575# Test Document
576
577This is a normal link: [Link](missing.md)
578
579This is a code span with a link: `[Link](another-missing.md)`
580
581Some more text with `inline code [Link](yet-another-missing.md) embedded`.
582
583 "#;
584
585 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
587
588 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
590 let result = rule.check(&ctx).unwrap();
591
592 assert_eq!(result.len(), 1, "Should have exactly one warning");
594 assert!(
595 result[0].message.contains("missing.md"),
596 "Warning should be for missing.md"
597 );
598 assert!(
599 !result.iter().any(|w| w.message.contains("another-missing.md")),
600 "Should not warn about link in code span"
601 );
602 assert!(
603 !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
604 "Should not warn about link in inline code"
605 );
606 }
607}