rumdl_lib/rules/
md057_existing_relative_links.rs1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use lazy_static::lazy_static;
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::{Arc, Mutex};
14
15mod md057_config;
16use md057_config::MD057Config;
17
18lazy_static! {
20 static ref FILE_EXISTENCE_CACHE: Arc<Mutex<HashMap<PathBuf, bool>>> = Arc::new(Mutex::new(HashMap::new()));
21}
22
23fn reset_file_existence_cache() {
25 let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap();
26 cache.clear();
27}
28
29fn file_exists_with_cache(path: &Path) -> bool {
31 let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap();
32 *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists())
33}
34
35lazy_static! {
36 static ref LINK_START_REGEX: Regex =
38 Regex::new(r"!?\[[^\]]*\]").unwrap();
39
40 static ref URL_EXTRACT_REGEX: Regex =
43 Regex::new("\\]\\(\\s*<?([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*>?\\s*\\)").unwrap();
44
45 static ref CODE_FENCE_REGEX: Regex =
47 Regex::new(r"^( {0,3})(`{3,}|~{3,})").unwrap();
48
49 static ref PROTOCOL_DOMAIN_REGEX: Regex =
51 Regex::new(r"^(https?://|ftp://|mailto:|www\.)").unwrap();
52
53 static ref MEDIA_FILE_REGEX: Regex =
55 Regex::new(r"\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff|mp3|mp4|avi|mov|webm|wav|ogg|pdf)$").unwrap();
56
57 static ref FRAGMENT_ONLY_REGEX: Regex =
59 Regex::new(r"^#").unwrap();
60
61 static ref CURRENT_DIR: PathBuf = env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
63}
64
65#[derive(Debug, Default, Clone)]
67pub struct MD057ExistingRelativeLinks {
68 base_path: Arc<Mutex<Option<PathBuf>>>,
70 config: MD057Config,
72}
73
74impl MD057ExistingRelativeLinks {
75 pub fn new() -> Self {
77 Self::default()
78 }
79
80 pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
82 let path = path.as_ref();
83 let dir_path = if path.is_file() {
84 path.parent().map(|p| p.to_path_buf())
85 } else {
86 Some(path.to_path_buf())
87 };
88
89 *self.base_path.lock().unwrap() = dir_path;
90 self
91 }
92
93 pub fn with_skip_media_files(mut self, skip_media_files: bool) -> Self {
95 self.config.skip_media_files = skip_media_files;
96 self
97 }
98
99 pub fn from_config_struct(config: MD057Config) -> Self {
100 Self {
101 base_path: Arc::new(Mutex::new(None)),
102 config,
103 }
104 }
105
106 #[inline]
108 fn is_external_url(&self, url: &str) -> bool {
109 if url.is_empty() {
110 return false;
111 }
112
113 if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
115 return true;
116 }
117
118 if !self.is_media_file(url) && url.ends_with(".com") {
120 return true;
121 }
122
123 if url.starts_with('/') {
125 return false;
126 }
127
128 false
130 }
131
132 #[inline]
134 fn is_fragment_only_link(&self, url: &str) -> bool {
135 url.starts_with('#')
136 }
137
138 #[inline]
140 fn is_media_file(&self, url: &str) -> bool {
141 if !url.contains('.') {
143 return false;
144 }
145 MEDIA_FILE_REGEX.is_match(url)
146 }
147
148 #[inline]
150 fn should_skip_media_file(&self, url: &str) -> bool {
151 self.config.skip_media_files && self.is_media_file(url)
152 }
153
154 fn resolve_link_path(&self, link: &str) -> Option<PathBuf> {
156 self.base_path
157 .lock()
158 .unwrap()
159 .as_ref()
160 .map(|base_path| base_path.join(link))
161 }
162
163 fn process_link(&self, url: &str, line_num: usize, column: usize, warnings: &mut Vec<LintWarning>) {
165 if url.is_empty() {
167 return;
168 }
169
170 if self.is_external_url(url) || self.is_fragment_only_link(url) {
172 return;
173 }
174
175 if self.should_skip_media_file(url) {
177 return;
178 }
179
180 if let Some(resolved_path) = self.resolve_link_path(url) {
182 if !file_exists_with_cache(&resolved_path) {
184 warnings.push(LintWarning {
185 rule_name: Some(self.name()),
186 line: line_num,
187 column,
188 end_line: line_num,
189 end_column: column + url.len(),
190 message: format!("Relative link '{url}' does not exist"),
191 severity: Severity::Warning,
192 fix: None, });
194 }
195 }
196 }
197}
198
199impl Rule for MD057ExistingRelativeLinks {
200 fn name(&self) -> &'static str {
201 "MD057"
202 }
203
204 fn description(&self) -> &'static str {
205 "Relative links should point to existing files"
206 }
207
208 fn category(&self) -> RuleCategory {
209 RuleCategory::Link
210 }
211
212 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
213 let content = ctx.content;
214 content.is_empty() || !content.contains('[') || !content.contains("](")
215 }
216
217 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
218 let content = ctx.content;
219
220 if content.is_empty() || !content.contains('[') {
222 return Ok(Vec::new());
223 }
224
225 if !content.contains("](") {
227 return Ok(Vec::new());
228 }
229
230 reset_file_existence_cache();
232
233 let mut warnings = Vec::new();
234
235 let base_path = {
237 let base_path_guard = self.base_path.lock().unwrap();
238 if base_path_guard.is_some() {
239 base_path_guard.clone()
240 } else {
241 static CACHED_FILE_PATH: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
243 CACHED_FILE_PATH
244 .get_or_init(|| {
245 if let Ok(file_path) = env::var("RUMDL_FILE_PATH") {
246 let path = Path::new(&file_path);
247 if path.exists() {
248 path.parent()
249 .map(|p| p.to_path_buf())
250 .or_else(|| Some(CURRENT_DIR.clone()))
251 } else {
252 Some(CURRENT_DIR.clone())
253 }
254 } else {
255 Some(CURRENT_DIR.clone())
256 }
257 })
258 .clone()
259 }
260 };
261
262 if base_path.is_none() {
264 return Ok(warnings);
265 }
266
267 if !ctx.links.is_empty() {
269 let mut line_positions = Vec::new();
271 let mut pos = 0;
272 line_positions.push(0);
273 for ch in content.chars() {
274 pos += ch.len_utf8();
275 if ch == '\n' {
276 line_positions.push(pos);
277 }
278 }
279
280 let element_cache = ElementCache::new(content);
282
283 let lines: Vec<&str> = content.lines().collect();
285
286 for link in &ctx.links {
287 let line_idx = link.line - 1;
288 if line_idx >= lines.len() {
289 continue;
290 }
291
292 let line = lines[line_idx];
293
294 if !line.contains("](") {
296 continue;
297 }
298
299 for link_match in LINK_START_REGEX.find_iter(line) {
301 let start_pos = link_match.start();
302 let end_pos = link_match.end();
303
304 let absolute_start_pos = if line_idx < line_positions.len() {
306 line_positions[line_idx] + start_pos
307 } else {
308 content.lines().take(line_idx).map(|l| l.len() + 1).sum::<usize>() + start_pos
310 };
311
312 if element_cache.is_in_code_span(absolute_start_pos) {
314 continue;
315 }
316
317 if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
319 && let Some(url_group) = caps.get(1)
320 {
321 let url = url_group.as_str().trim();
322
323 let column = start_pos + 1;
325
326 self.process_link(url, link.line, column, &mut warnings);
328 }
329 }
330 }
331 }
332
333 Ok(warnings)
334 }
335
336 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
337 Ok(ctx.content.to_string())
338 }
339
340 fn as_any(&self) -> &dyn std::any::Any {
341 self
342 }
343
344 fn default_config_section(&self) -> Option<(String, toml::Value)> {
345 let json_value = serde_json::to_value(&self.config).ok()?;
346 Some((
347 self.name().to_string(),
348 crate::rule_config_serde::json_to_toml_value(&json_value)?,
349 ))
350 }
351
352 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
353 where
354 Self: Sized,
355 {
356 let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
357 Box::new(Self::from_config_struct(rule_config))
358 }
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364 use std::fs::File;
365 use std::io::Write;
366 use tempfile::tempdir;
367
368 #[test]
369 fn test_external_urls() {
370 let rule = MD057ExistingRelativeLinks::new();
371
372 assert!(rule.is_external_url("https://example.com"));
373 assert!(rule.is_external_url("http://example.com"));
374 assert!(rule.is_external_url("ftp://example.com"));
375 assert!(rule.is_external_url("www.example.com"));
376 assert!(rule.is_external_url("example.com"));
377
378 assert!(!rule.is_external_url("./relative/path.md"));
379 assert!(!rule.is_external_url("relative/path.md"));
380 assert!(!rule.is_external_url("../parent/path.md"));
381 }
382
383 #[test]
384 fn test_media_files() {
385 let rule_default = MD057ExistingRelativeLinks::new();
387
388 assert!(
390 rule_default.is_media_file("image.jpg"),
391 "image.jpg should be identified as a media file"
392 );
393 assert!(
394 rule_default.is_media_file("video.mp4"),
395 "video.mp4 should be identified as a media file"
396 );
397 assert!(
398 rule_default.is_media_file("document.pdf"),
399 "document.pdf should be identified as a media file"
400 );
401 assert!(
402 rule_default.is_media_file("path/to/audio.mp3"),
403 "path/to/audio.mp3 should be identified as a media file"
404 );
405
406 assert!(
407 !rule_default.is_media_file("document.md"),
408 "document.md should not be identified as a media file"
409 );
410 assert!(
411 !rule_default.is_media_file("code.rs"),
412 "code.rs should not be identified as a media file"
413 );
414
415 assert!(
417 rule_default.should_skip_media_file("image.jpg"),
418 "image.jpg should be skipped with default settings"
419 );
420 assert!(
421 !rule_default.should_skip_media_file("document.md"),
422 "document.md should not be skipped"
423 );
424
425 let rule_no_skip = MD057ExistingRelativeLinks::new().with_skip_media_files(false);
427 assert!(
428 !rule_no_skip.should_skip_media_file("image.jpg"),
429 "image.jpg should not be skipped when skip_media_files is false"
430 );
431 }
432
433 #[test]
434 fn test_no_warnings_without_base_path() {
435 let rule = MD057ExistingRelativeLinks::new();
436 let content = "[Link](missing.md)";
437
438 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
439 let result = rule.check(&ctx).unwrap();
440 assert!(result.is_empty(), "Should have no warnings without base path");
441 }
442
443 #[test]
444 fn test_existing_and_missing_links() {
445 let temp_dir = tempdir().unwrap();
447 let base_path = temp_dir.path();
448
449 let exists_path = base_path.join("exists.md");
451 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
452
453 assert!(exists_path.exists(), "exists.md should exist for this test");
455
456 let content = r#"
458# Test Document
459
460[Valid Link](exists.md)
461[Invalid Link](missing.md)
462[External Link](https://example.com)
463[Media Link](image.jpg)
464 "#;
465
466 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
468
469 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
471 let result = rule.check(&ctx).unwrap();
472
473 assert_eq!(result.len(), 1);
475 assert!(result[0].message.contains("missing.md"));
476
477 let result_with_structure = rule.check(&ctx).unwrap();
479
480 assert_eq!(result.len(), result_with_structure.len());
482 assert!(result_with_structure[0].message.contains("missing.md"));
483 }
484
485 #[test]
486 fn test_angle_bracket_links() {
487 let temp_dir = tempdir().unwrap();
489 let base_path = temp_dir.path();
490
491 let exists_path = base_path.join("exists.md");
493 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
494
495 let content = r#"
497# Test Document
498
499[Valid Link](<exists.md>)
500[Invalid Link](<missing.md>)
501[External Link](<https://example.com>)
502 "#;
503
504 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
506
507 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
508 let result = rule.check(&ctx).unwrap();
509
510 assert_eq!(result.len(), 1, "Should have exactly one warning");
512 assert!(
513 result[0].message.contains("missing.md"),
514 "Warning should mention missing.md"
515 );
516 }
517
518 #[test]
519 fn test_media_file_handling() {
520 let temp_dir = tempdir().unwrap();
522 let base_path = temp_dir.path();
523
524 let image_path = base_path.join("image.jpg");
526 assert!(
527 !image_path.exists(),
528 "Test precondition failed: image.jpg should not exist"
529 );
530
531 let content = "[Media Link](image.jpg)";
533
534 let rule_skip_media = MD057ExistingRelativeLinks::new().with_path(base_path);
536
537 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
538 let result_skip = rule_skip_media.check(&ctx).unwrap();
539
540 assert_eq!(
542 result_skip.len(),
543 0,
544 "Should have no warnings when skip_media_files is true"
545 );
546
547 let rule_check_all = MD057ExistingRelativeLinks::new()
549 .with_path(base_path)
550 .with_skip_media_files(false);
551
552 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
553 let result_all = rule_check_all.check(&ctx).unwrap();
554
555 assert_eq!(
557 result_all.len(),
558 1,
559 "Should have one warning when skip_media_files is false"
560 );
561 assert!(
562 result_all[0].message.contains("image.jpg"),
563 "Warning should mention image.jpg"
564 );
565 }
566
567 #[test]
568 fn test_code_span_detection() {
569 let rule = MD057ExistingRelativeLinks::new();
570
571 let temp_dir = tempdir().unwrap();
573 let base_path = temp_dir.path();
574
575 let rule = rule.with_path(base_path);
576
577 let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
579
580 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
581 let result = rule.check(&ctx).unwrap();
582
583 assert_eq!(result.len(), 1, "Should only flag the real link");
585 assert!(result[0].message.contains("nonexistent.md"));
586 }
587
588 #[test]
589 fn test_inline_code_spans() {
590 let temp_dir = tempdir().unwrap();
592 let base_path = temp_dir.path();
593
594 let content = r#"
596# Test Document
597
598This is a normal link: [Link](missing.md)
599
600This is a code span with a link: `[Link](another-missing.md)`
601
602Some more text with `inline code [Link](yet-another-missing.md) embedded`.
603
604 "#;
605
606 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
608
609 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
611 let result = rule.check(&ctx).unwrap();
612
613 assert_eq!(result.len(), 1, "Should have exactly one warning");
615 assert!(
616 result[0].message.contains("missing.md"),
617 "Warning should be for missing.md"
618 );
619 assert!(
620 !result.iter().any(|w| w.message.contains("another-missing.md")),
621 "Should not warn about link in code span"
622 );
623 assert!(
624 !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
625 "Should not warn about link in inline code"
626 );
627 }
628}