rumdl_lib/rules/
md057_existing_relative_links.rs1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::utils::range_utils::LineIndex;
9use lazy_static::lazy_static;
10use regex::Regex;
11use std::collections::HashMap;
12use std::env;
13use std::path::{Path, PathBuf};
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19lazy_static! {
21 static ref FILE_EXISTENCE_CACHE: Arc<Mutex<HashMap<PathBuf, bool>>> = Arc::new(Mutex::new(HashMap::new()));
22}
23
24fn reset_file_existence_cache() {
26 let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap();
27 cache.clear();
28}
29
30fn file_exists_with_cache(path: &Path) -> bool {
32 let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap();
33 *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists())
34}
35
36lazy_static! {
37 static ref LINK_START_REGEX: Regex =
39 Regex::new(r"!?\[[^\]]*\]").unwrap();
40
41 static ref URL_EXTRACT_REGEX: Regex =
44 Regex::new("\\]\\(\\s*<?([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*>?\\s*\\)").unwrap();
45
46 static ref CODE_FENCE_REGEX: Regex =
48 Regex::new(r"^( {0,3})(`{3,}|~{3,})").unwrap();
49
50 static ref PROTOCOL_DOMAIN_REGEX: Regex =
52 Regex::new(r"^(https?://|ftp://|mailto:|www\.)").unwrap();
53
54 static ref MEDIA_FILE_REGEX: Regex =
56 Regex::new(r"\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff|mp3|mp4|avi|mov|webm|wav|ogg|pdf)$").unwrap();
57
58 static ref FRAGMENT_ONLY_REGEX: Regex =
60 Regex::new(r"^#").unwrap();
61
62 static ref CURRENT_DIR: PathBuf = env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
64}
65
66#[derive(Debug, Default, Clone)]
68pub struct MD057ExistingRelativeLinks {
69 base_path: Arc<Mutex<Option<PathBuf>>>,
71 config: MD057Config,
73}
74
75impl MD057ExistingRelativeLinks {
76 pub fn new() -> Self {
78 Self::default()
79 }
80
81 pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
83 let path = path.as_ref();
84 let dir_path = if path.is_file() {
85 path.parent().map(|p| p.to_path_buf())
86 } else {
87 Some(path.to_path_buf())
88 };
89
90 *self.base_path.lock().unwrap() = dir_path;
91 self
92 }
93
94 pub fn with_skip_media_files(mut self, skip_media_files: bool) -> Self {
96 self.config.skip_media_files = skip_media_files;
97 self
98 }
99
100 pub fn from_config_struct(config: MD057Config) -> Self {
101 Self {
102 base_path: Arc::new(Mutex::new(None)),
103 config,
104 }
105 }
106
107 #[inline]
109 fn is_external_url(&self, url: &str) -> bool {
110 if url.is_empty() {
111 return false;
112 }
113
114 if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
116 return true;
117 }
118
119 if !self.is_media_file(url) && url.ends_with(".com") {
121 return true;
122 }
123
124 if url.starts_with('/') {
126 return false;
127 }
128
129 false
131 }
132
133 #[inline]
135 fn is_fragment_only_link(&self, url: &str) -> bool {
136 url.starts_with('#')
137 }
138
139 #[inline]
141 fn is_media_file(&self, url: &str) -> bool {
142 if !url.contains('.') {
144 return false;
145 }
146 MEDIA_FILE_REGEX.is_match(url)
147 }
148
149 #[inline]
151 fn should_skip_media_file(&self, url: &str) -> bool {
152 self.config.skip_media_files && self.is_media_file(url)
153 }
154
155 fn resolve_link_path(&self, link: &str) -> Option<PathBuf> {
157 self.base_path
158 .lock()
159 .unwrap()
160 .as_ref()
161 .map(|base_path| base_path.join(link))
162 }
163
164 fn process_link(&self, url: &str, line_num: usize, column: usize, warnings: &mut Vec<LintWarning>) {
166 if url.is_empty() {
168 return;
169 }
170
171 if self.is_external_url(url) || self.is_fragment_only_link(url) {
173 return;
174 }
175
176 if self.should_skip_media_file(url) {
178 return;
179 }
180
181 if let Some(resolved_path) = self.resolve_link_path(url) {
183 if !file_exists_with_cache(&resolved_path) {
185 warnings.push(LintWarning {
186 rule_name: Some(self.name()),
187 line: line_num,
188 column,
189 end_line: line_num,
190 end_column: column + url.len(),
191 message: format!("Relative link '{url}' does not exist"),
192 severity: Severity::Warning,
193 fix: None, });
195 }
196 }
197 }
198}
199
200impl Rule for MD057ExistingRelativeLinks {
201 fn name(&self) -> &'static str {
202 "MD057"
203 }
204
205 fn description(&self) -> &'static str {
206 "Relative links should point to existing files"
207 }
208
209 fn category(&self) -> RuleCategory {
210 RuleCategory::Link
211 }
212
213 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
214 ctx.content.is_empty() || !ctx.likely_has_links_or_images()
215 }
216
217 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
218 let content = ctx.content;
219
220 if content.is_empty() || !content.contains('[') {
222 return Ok(Vec::new());
223 }
224
225 if !content.contains("](") {
227 return Ok(Vec::new());
228 }
229
230 reset_file_existence_cache();
232
233 let mut warnings = Vec::new();
234
235 let base_path = {
237 let base_path_guard = self.base_path.lock().unwrap();
238 if base_path_guard.is_some() {
239 base_path_guard.clone()
240 } else {
241 static CACHED_FILE_PATH: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
243 CACHED_FILE_PATH
244 .get_or_init(|| {
245 if let Ok(file_path) = env::var("RUMDL_FILE_PATH") {
246 let path = Path::new(&file_path);
247 if path.exists() {
248 path.parent()
249 .map(|p| p.to_path_buf())
250 .or_else(|| Some(CURRENT_DIR.clone()))
251 } else {
252 Some(CURRENT_DIR.clone())
253 }
254 } else {
255 Some(CURRENT_DIR.clone())
256 }
257 })
258 .clone()
259 }
260 };
261
262 if base_path.is_none() {
264 return Ok(warnings);
265 }
266
267 if !ctx.links.is_empty() {
269 let line_index = LineIndex::new(content.to_string());
271
272 let element_cache = ElementCache::new(content);
274
275 let lines: Vec<&str> = content.lines().collect();
277
278 for link in &ctx.links {
279 let line_idx = link.line - 1;
280 if line_idx >= lines.len() {
281 continue;
282 }
283
284 let line = lines[line_idx];
285
286 if !line.contains("](") {
288 continue;
289 }
290
291 for link_match in LINK_START_REGEX.find_iter(line) {
293 let start_pos = link_match.start();
294 let end_pos = link_match.end();
295
296 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
298 let absolute_start_pos = line_start_byte + start_pos;
299
300 if element_cache.is_in_code_span(absolute_start_pos) {
302 continue;
303 }
304
305 if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
307 && let Some(url_group) = caps.get(1)
308 {
309 let url = url_group.as_str().trim();
310
311 let column = start_pos + 1;
313
314 self.process_link(url, link.line, column, &mut warnings);
316 }
317 }
318 }
319 }
320
321 Ok(warnings)
322 }
323
324 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
325 Ok(ctx.content.to_string())
326 }
327
328 fn as_any(&self) -> &dyn std::any::Any {
329 self
330 }
331
332 fn default_config_section(&self) -> Option<(String, toml::Value)> {
333 let json_value = serde_json::to_value(&self.config).ok()?;
334 Some((
335 self.name().to_string(),
336 crate::rule_config_serde::json_to_toml_value(&json_value)?,
337 ))
338 }
339
340 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
341 where
342 Self: Sized,
343 {
344 let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
345 Box::new(Self::from_config_struct(rule_config))
346 }
347}
348
349#[cfg(test)]
350mod tests {
351 use super::*;
352 use std::fs::File;
353 use std::io::Write;
354 use tempfile::tempdir;
355
356 #[test]
357 fn test_external_urls() {
358 let rule = MD057ExistingRelativeLinks::new();
359
360 assert!(rule.is_external_url("https://example.com"));
361 assert!(rule.is_external_url("http://example.com"));
362 assert!(rule.is_external_url("ftp://example.com"));
363 assert!(rule.is_external_url("www.example.com"));
364 assert!(rule.is_external_url("example.com"));
365
366 assert!(!rule.is_external_url("./relative/path.md"));
367 assert!(!rule.is_external_url("relative/path.md"));
368 assert!(!rule.is_external_url("../parent/path.md"));
369 }
370
371 #[test]
372 fn test_media_files() {
373 let rule_default = MD057ExistingRelativeLinks::new();
375
376 assert!(
378 rule_default.is_media_file("image.jpg"),
379 "image.jpg should be identified as a media file"
380 );
381 assert!(
382 rule_default.is_media_file("video.mp4"),
383 "video.mp4 should be identified as a media file"
384 );
385 assert!(
386 rule_default.is_media_file("document.pdf"),
387 "document.pdf should be identified as a media file"
388 );
389 assert!(
390 rule_default.is_media_file("path/to/audio.mp3"),
391 "path/to/audio.mp3 should be identified as a media file"
392 );
393
394 assert!(
395 !rule_default.is_media_file("document.md"),
396 "document.md should not be identified as a media file"
397 );
398 assert!(
399 !rule_default.is_media_file("code.rs"),
400 "code.rs should not be identified as a media file"
401 );
402
403 assert!(
405 rule_default.should_skip_media_file("image.jpg"),
406 "image.jpg should be skipped with default settings"
407 );
408 assert!(
409 !rule_default.should_skip_media_file("document.md"),
410 "document.md should not be skipped"
411 );
412
413 let rule_no_skip = MD057ExistingRelativeLinks::new().with_skip_media_files(false);
415 assert!(
416 !rule_no_skip.should_skip_media_file("image.jpg"),
417 "image.jpg should not be skipped when skip_media_files is false"
418 );
419 }
420
421 #[test]
422 fn test_no_warnings_without_base_path() {
423 let rule = MD057ExistingRelativeLinks::new();
424 let content = "[Link](missing.md)";
425
426 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
427 let result = rule.check(&ctx).unwrap();
428 assert!(result.is_empty(), "Should have no warnings without base path");
429 }
430
431 #[test]
432 fn test_existing_and_missing_links() {
433 let temp_dir = tempdir().unwrap();
435 let base_path = temp_dir.path();
436
437 let exists_path = base_path.join("exists.md");
439 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
440
441 assert!(exists_path.exists(), "exists.md should exist for this test");
443
444 let content = r#"
446# Test Document
447
448[Valid Link](exists.md)
449[Invalid Link](missing.md)
450[External Link](https://example.com)
451[Media Link](image.jpg)
452 "#;
453
454 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
456
457 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459 let result = rule.check(&ctx).unwrap();
460
461 assert_eq!(result.len(), 1);
463 assert!(result[0].message.contains("missing.md"));
464
465 let result_with_structure = rule.check(&ctx).unwrap();
467
468 assert_eq!(result.len(), result_with_structure.len());
470 assert!(result_with_structure[0].message.contains("missing.md"));
471 }
472
473 #[test]
474 fn test_angle_bracket_links() {
475 let temp_dir = tempdir().unwrap();
477 let base_path = temp_dir.path();
478
479 let exists_path = base_path.join("exists.md");
481 File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
482
483 let content = r#"
485# Test Document
486
487[Valid Link](<exists.md>)
488[Invalid Link](<missing.md>)
489[External Link](<https://example.com>)
490 "#;
491
492 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
494
495 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
496 let result = rule.check(&ctx).unwrap();
497
498 assert_eq!(result.len(), 1, "Should have exactly one warning");
500 assert!(
501 result[0].message.contains("missing.md"),
502 "Warning should mention missing.md"
503 );
504 }
505
506 #[test]
507 fn test_media_file_handling() {
508 let temp_dir = tempdir().unwrap();
510 let base_path = temp_dir.path();
511
512 let image_path = base_path.join("image.jpg");
514 assert!(
515 !image_path.exists(),
516 "Test precondition failed: image.jpg should not exist"
517 );
518
519 let content = "[Media Link](image.jpg)";
521
522 let rule_skip_media = MD057ExistingRelativeLinks::new().with_path(base_path);
524
525 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
526 let result_skip = rule_skip_media.check(&ctx).unwrap();
527
528 assert_eq!(
530 result_skip.len(),
531 0,
532 "Should have no warnings when skip_media_files is true"
533 );
534
535 let rule_check_all = MD057ExistingRelativeLinks::new()
537 .with_path(base_path)
538 .with_skip_media_files(false);
539
540 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
541 let result_all = rule_check_all.check(&ctx).unwrap();
542
543 assert_eq!(
545 result_all.len(),
546 1,
547 "Should have one warning when skip_media_files is false"
548 );
549 assert!(
550 result_all[0].message.contains("image.jpg"),
551 "Warning should mention image.jpg"
552 );
553 }
554
555 #[test]
556 fn test_code_span_detection() {
557 let rule = MD057ExistingRelativeLinks::new();
558
559 let temp_dir = tempdir().unwrap();
561 let base_path = temp_dir.path();
562
563 let rule = rule.with_path(base_path);
564
565 let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
567
568 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
569 let result = rule.check(&ctx).unwrap();
570
571 assert_eq!(result.len(), 1, "Should only flag the real link");
573 assert!(result[0].message.contains("nonexistent.md"));
574 }
575
576 #[test]
577 fn test_inline_code_spans() {
578 let temp_dir = tempdir().unwrap();
580 let base_path = temp_dir.path();
581
582 let content = r#"
584# Test Document
585
586This is a normal link: [Link](missing.md)
587
588This is a code span with a link: `[Link](another-missing.md)`
589
590Some more text with `inline code [Link](yet-another-missing.md) embedded`.
591
592 "#;
593
594 let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
596
597 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
599 let result = rule.check(&ctx).unwrap();
600
601 assert_eq!(result.len(), 1, "Should have exactly one warning");
603 assert!(
604 result[0].message.contains("missing.md"),
605 "Warning should be for missing.md"
606 );
607 assert!(
608 !result.iter().any(|w| w.message.contains("another-missing.md")),
609 "Should not warn about link in code span"
610 );
611 assert!(
612 !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
613 "Should not warn about link in inline code"
614 );
615 }
616}