rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{CrossFileLinkIndex, FileIndex};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    let mut cache = FILE_EXISTENCE_CACHE
26        .lock()
27        .expect("File existence cache mutex poisoned");
28    cache.clear();
29}
30
31// Check if a file exists with caching
32fn file_exists_with_cache(path: &Path) -> bool {
33    let mut cache = FILE_EXISTENCE_CACHE
34        .lock()
35        .expect("File existence cache mutex poisoned");
36    *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists())
37}
38
39// Regex to match the start of a link - simplified for performance
40static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
41
42/// Regex to extract the URL from a markdown link
43/// Format: `](URL)` or `](URL "title")`
44static URL_EXTRACT_REGEX: LazyLock<Regex> =
45    LazyLock::new(|| Regex::new("\\]\\(\\s*<?([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*>?\\s*\\)").unwrap());
46
47/// Regex to detect protocol and domain for external links
48static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
49    LazyLock::new(|| Regex::new(r"^(https?://|ftp://|mailto:|www\.)").unwrap());
50
51/// Regex to detect media file types
52static MEDIA_FILE_REGEX: LazyLock<Regex> =
53    LazyLock::new(|| Regex::new(r"\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff|mp3|mp4|avi|mov|webm|wav|ogg|pdf)$").unwrap());
54
55// Current working directory
56static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
57
58/// Supported markdown file extensions
59const MARKDOWN_EXTENSIONS: &[&str] = &[
60    ".md",
61    ".markdown",
62    ".mdx",
63    ".mkd",
64    ".mkdn",
65    ".mdown",
66    ".mdwn",
67    ".qmd",
68    ".rmd",
69];
70
71/// Check if a path has a markdown extension (case-insensitive)
72#[inline]
73fn is_markdown_file(path: &str) -> bool {
74    let path_lower = path.to_lowercase();
75    MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
76}
77
78/// Rule MD057: Existing relative links should point to valid files or directories.
79#[derive(Debug, Default, Clone)]
80pub struct MD057ExistingRelativeLinks {
81    /// Base directory for resolving relative links
82    base_path: Arc<Mutex<Option<PathBuf>>>,
83    /// Configuration
84    config: MD057Config,
85}
86
87impl MD057ExistingRelativeLinks {
88    /// Create a new instance with default settings
89    pub fn new() -> Self {
90        Self::default()
91    }
92
93    /// Set the base path for resolving relative links
94    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
95        let path = path.as_ref();
96        let dir_path = if path.is_file() {
97            path.parent().map(|p| p.to_path_buf())
98        } else {
99            Some(path.to_path_buf())
100        };
101
102        *self.base_path.lock().expect("Base path mutex poisoned") = dir_path;
103        self
104    }
105
106    /// Configure whether to skip checking media files
107    pub fn with_skip_media_files(mut self, skip_media_files: bool) -> Self {
108        self.config.skip_media_files = skip_media_files;
109        self
110    }
111
112    pub fn from_config_struct(config: MD057Config) -> Self {
113        Self {
114            base_path: Arc::new(Mutex::new(None)),
115            config,
116        }
117    }
118
119    /// Check if a URL is external (optimized version)
120    #[inline]
121    fn is_external_url(&self, url: &str) -> bool {
122        if url.is_empty() {
123            return false;
124        }
125
126        // Quick checks for common external URL patterns
127        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
128            return true;
129        }
130
131        // More restrictive domain check using a simpler pattern
132        if !self.is_media_file(url) && url.ends_with(".com") {
133            return true;
134        }
135
136        // Absolute paths within the site are not external
137        if url.starts_with('/') {
138            return false;
139        }
140
141        // All other cases (relative paths, etc.) are not external
142        false
143    }
144
145    /// Check if the URL is a fragment-only link (internal document link)
146    #[inline]
147    fn is_fragment_only_link(&self, url: &str) -> bool {
148        url.starts_with('#')
149    }
150
151    /// Check if the URL has a media file extension (optimized with early returns)
152    #[inline]
153    fn is_media_file(&self, url: &str) -> bool {
154        // Quick check before using regex
155        if !url.contains('.') {
156            return false;
157        }
158        MEDIA_FILE_REGEX.is_match(url)
159    }
160
161    /// Determine if we should skip checking this media file
162    #[inline]
163    fn should_skip_media_file(&self, url: &str) -> bool {
164        self.config.skip_media_files && self.is_media_file(url)
165    }
166
167    /// Resolve a relative link against the base path
168    fn resolve_link_path(&self, link: &str) -> Option<PathBuf> {
169        self.base_path
170            .lock()
171            .unwrap()
172            .as_ref()
173            .map(|base_path| base_path.join(link))
174    }
175
176    /// Process a single link and check if it exists
177    fn process_link(&self, url: &str, line_num: usize, column: usize, warnings: &mut Vec<LintWarning>) {
178        // Skip empty URLs
179        if url.is_empty() {
180            return;
181        }
182
183        // Skip external URLs and fragment-only links (optimized order)
184        if self.is_external_url(url) || self.is_fragment_only_link(url) {
185            return;
186        }
187
188        // Skip media files if configured to do so
189        if self.should_skip_media_file(url) {
190            return;
191        }
192
193        // Resolve the relative link against the base path
194        if let Some(resolved_path) = self.resolve_link_path(url) {
195            // Check if the file exists (with caching to avoid filesystem calls)
196            if !file_exists_with_cache(&resolved_path) {
197                warnings.push(LintWarning {
198                    rule_name: Some(self.name().to_string()),
199                    line: line_num,
200                    column,
201                    end_line: line_num,
202                    end_column: column + url.len(),
203                    message: format!("Relative link '{url}' does not exist"),
204                    severity: Severity::Warning,
205                    fix: None, // No automatic fix for missing files
206                });
207            }
208        }
209    }
210}
211
212impl Rule for MD057ExistingRelativeLinks {
213    fn name(&self) -> &'static str {
214        "MD057"
215    }
216
217    fn description(&self) -> &'static str {
218        "Relative links should point to existing files"
219    }
220
221    fn category(&self) -> RuleCategory {
222        RuleCategory::Link
223    }
224
225    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
226        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
227    }
228
229    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
230        let content = ctx.content;
231
232        // Early returns for performance
233        if content.is_empty() || !content.contains('[') {
234            return Ok(Vec::new());
235        }
236
237        // Quick check for any potential links before expensive operations
238        if !content.contains("](") {
239            return Ok(Vec::new());
240        }
241
242        // Reset the file existence cache for a fresh run
243        reset_file_existence_cache();
244
245        let mut warnings = Vec::new();
246
247        // Cache base path lookup to avoid repeated mutex operations
248        let base_path = {
249            let base_path_guard = self.base_path.lock().expect("Base path mutex poisoned");
250            if base_path_guard.is_some() {
251                base_path_guard.clone()
252            } else {
253                // Try to determine the base path from the file being processed (cached)
254                static CACHED_FILE_PATH: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
255                CACHED_FILE_PATH
256                    .get_or_init(|| {
257                        if let Ok(file_path) = env::var("RUMDL_FILE_PATH") {
258                            let path = Path::new(&file_path);
259                            if path.exists() {
260                                path.parent()
261                                    .map(|p| p.to_path_buf())
262                                    .or_else(|| Some(CURRENT_DIR.clone()))
263                            } else {
264                                Some(CURRENT_DIR.clone())
265                            }
266                        } else {
267                            Some(CURRENT_DIR.clone())
268                        }
269                    })
270                    .clone()
271            }
272        };
273
274        // If we still don't have a base path, we can't validate relative links
275        if base_path.is_none() {
276            return Ok(warnings);
277        }
278
279        // Use LintContext links instead of expensive regex parsing
280        if !ctx.links.is_empty() {
281            // Use LineIndex for correct position calculation across all line ending types
282            let line_index = &ctx.line_index;
283
284            // Create element cache once for all links
285            let element_cache = ElementCache::new(content);
286
287            // Pre-collect lines to avoid repeated line iteration
288            let lines: Vec<&str> = content.lines().collect();
289
290            for link in &ctx.links {
291                let line_idx = link.line - 1;
292                if line_idx >= lines.len() {
293                    continue;
294                }
295
296                let line = lines[line_idx];
297
298                // Quick check for link pattern in this line
299                if !line.contains("](") {
300                    continue;
301                }
302
303                // Find all links in this line using optimized regex
304                for link_match in LINK_START_REGEX.find_iter(line) {
305                    let start_pos = link_match.start();
306                    let end_pos = link_match.end();
307
308                    // Calculate absolute position using LineIndex
309                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
310                    let absolute_start_pos = line_start_byte + start_pos;
311
312                    // Skip if this link is in a code span
313                    if element_cache.is_in_code_span(absolute_start_pos) {
314                        continue;
315                    }
316
317                    // Find the URL part after the link text
318                    if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
319                        && let Some(url_group) = caps.get(1)
320                    {
321                        let url = url_group.as_str().trim();
322
323                        // Calculate column position
324                        let column = start_pos + 1;
325
326                        // Process and validate the link
327                        self.process_link(url, link.line, column, &mut warnings);
328                    }
329                }
330            }
331        }
332
333        Ok(warnings)
334    }
335
336    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
337        Ok(ctx.content.to_string())
338    }
339
340    fn as_any(&self) -> &dyn std::any::Any {
341        self
342    }
343
344    fn default_config_section(&self) -> Option<(String, toml::Value)> {
345        let json_value = serde_json::to_value(&self.config).ok()?;
346        Some((
347            self.name().to_string(),
348            crate::rule_config_serde::json_to_toml_value(&json_value)?,
349        ))
350    }
351
352    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
353    where
354        Self: Sized,
355    {
356        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
357        Box::new(Self::from_config_struct(rule_config))
358    }
359
360    fn cross_file_scope(&self) -> CrossFileScope {
361        CrossFileScope::Workspace
362    }
363
364    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
365        let content = ctx.content;
366
367        // Early returns for performance
368        if content.is_empty() || !content.contains("](") {
369            return;
370        }
371
372        // Pre-collect lines to avoid repeated line iteration
373        let lines: Vec<&str> = content.lines().collect();
374        let element_cache = ElementCache::new(content);
375        let line_index = &ctx.line_index;
376
377        for link in &ctx.links {
378            let line_idx = link.line - 1;
379            if line_idx >= lines.len() {
380                continue;
381            }
382
383            let line = lines[line_idx];
384            if !line.contains("](") {
385                continue;
386            }
387
388            // Find all links in this line
389            for link_match in LINK_START_REGEX.find_iter(line) {
390                let start_pos = link_match.start();
391                let end_pos = link_match.end();
392
393                // Calculate absolute position for code span detection
394                let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
395                let absolute_start_pos = line_start_byte + start_pos;
396
397                // Skip if in code span
398                if element_cache.is_in_code_span(absolute_start_pos) {
399                    continue;
400                }
401
402                // Extract the URL (group 1) and fragment (group 2)
403                // The regex separates URL and fragment: group 1 excludes #, group 2 captures #fragment
404                if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
405                    && let Some(url_group) = caps.get(1)
406                {
407                    let file_path = url_group.as_str().trim();
408
409                    // Skip empty, external, or fragment-only URLs
410                    if file_path.is_empty()
411                        || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
412                        || file_path.starts_with("www.")
413                        || file_path.starts_with('#')
414                    {
415                        continue;
416                    }
417
418                    // Get fragment from capture group 2 (includes # prefix)
419                    let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
420
421                    // Only index links to markdown files
422                    if is_markdown_file(file_path) {
423                        index.add_cross_file_link(CrossFileLinkIndex {
424                            target_path: file_path.to_string(),
425                            fragment: fragment.to_string(),
426                            line: link.line,
427                            column: start_pos + 1,
428                        });
429                    }
430                }
431            }
432        }
433    }
434
435    fn cross_file_check(
436        &self,
437        file_path: &Path,
438        file_index: &FileIndex,
439        workspace_index: &crate::workspace_index::WorkspaceIndex,
440    ) -> LintResult {
441        let mut warnings = Vec::new();
442
443        // Get the directory containing this file for resolving relative links
444        let file_dir = file_path.parent();
445
446        for cross_link in &file_index.cross_file_links {
447            // Resolve the relative path
448            let target_path = if let Some(dir) = file_dir {
449                dir.join(&cross_link.target_path)
450            } else {
451                Path::new(&cross_link.target_path).to_path_buf()
452            };
453
454            // Normalize the path (handle .., ., etc.)
455            let target_path = normalize_path(&target_path);
456
457            // Check if the target file exists in the workspace index
458            if !workspace_index.contains_file(&target_path) {
459                // File not in index - it might not exist or might not be a markdown file
460                // For markdown files, if they're not indexed, they don't exist in the workspace
461                if cross_link.target_path.ends_with(".md") || cross_link.target_path.ends_with(".markdown") {
462                    warnings.push(LintWarning {
463                        rule_name: Some(self.name().to_string()),
464                        line: cross_link.line,
465                        column: cross_link.column,
466                        end_line: cross_link.line,
467                        end_column: cross_link.column + cross_link.target_path.len(),
468                        message: format!(
469                            "Relative link '{}' does not exist in the workspace",
470                            cross_link.target_path
471                        ),
472                        severity: Severity::Warning,
473                        fix: None,
474                    });
475                }
476            }
477        }
478
479        Ok(warnings)
480    }
481}
482
483/// Normalize a path by resolving . and .. components
484fn normalize_path(path: &Path) -> PathBuf {
485    let mut components = Vec::new();
486
487    for component in path.components() {
488        match component {
489            std::path::Component::ParentDir => {
490                // Go up one level if possible
491                if !components.is_empty() {
492                    components.pop();
493                }
494            }
495            std::path::Component::CurDir => {
496                // Skip current directory markers
497            }
498            _ => {
499                components.push(component);
500            }
501        }
502    }
503
504    components.iter().collect()
505}
506
507#[cfg(test)]
508mod tests {
509    use super::*;
510    use std::fs::File;
511    use std::io::Write;
512    use tempfile::tempdir;
513
514    #[test]
515    fn test_external_urls() {
516        let rule = MD057ExistingRelativeLinks::new();
517
518        assert!(rule.is_external_url("https://example.com"));
519        assert!(rule.is_external_url("http://example.com"));
520        assert!(rule.is_external_url("ftp://example.com"));
521        assert!(rule.is_external_url("www.example.com"));
522        assert!(rule.is_external_url("example.com"));
523
524        assert!(!rule.is_external_url("./relative/path.md"));
525        assert!(!rule.is_external_url("relative/path.md"));
526        assert!(!rule.is_external_url("../parent/path.md"));
527    }
528
529    #[test]
530    fn test_media_files() {
531        // Test with default settings (skip_media_files = true)
532        let rule_default = MD057ExistingRelativeLinks::new();
533
534        // Test media file identification
535        assert!(
536            rule_default.is_media_file("image.jpg"),
537            "image.jpg should be identified as a media file"
538        );
539        assert!(
540            rule_default.is_media_file("video.mp4"),
541            "video.mp4 should be identified as a media file"
542        );
543        assert!(
544            rule_default.is_media_file("document.pdf"),
545            "document.pdf should be identified as a media file"
546        );
547        assert!(
548            rule_default.is_media_file("path/to/audio.mp3"),
549            "path/to/audio.mp3 should be identified as a media file"
550        );
551
552        assert!(
553            !rule_default.is_media_file("document.md"),
554            "document.md should not be identified as a media file"
555        );
556        assert!(
557            !rule_default.is_media_file("code.rs"),
558            "code.rs should not be identified as a media file"
559        );
560
561        // Test media file skipping with default settings (skip_media_files = true)
562        assert!(
563            rule_default.should_skip_media_file("image.jpg"),
564            "image.jpg should be skipped with default settings"
565        );
566        assert!(
567            !rule_default.should_skip_media_file("document.md"),
568            "document.md should not be skipped"
569        );
570
571        // Test media file skipping with skip_media_files = false
572        let rule_no_skip = MD057ExistingRelativeLinks::new().with_skip_media_files(false);
573        assert!(
574            !rule_no_skip.should_skip_media_file("image.jpg"),
575            "image.jpg should not be skipped when skip_media_files is false"
576        );
577    }
578
579    #[test]
580    fn test_no_warnings_without_base_path() {
581        let rule = MD057ExistingRelativeLinks::new();
582        let content = "[Link](missing.md)";
583
584        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
585        let result = rule.check(&ctx).unwrap();
586        assert!(result.is_empty(), "Should have no warnings without base path");
587    }
588
589    #[test]
590    fn test_existing_and_missing_links() {
591        // Create a temporary directory for test files
592        let temp_dir = tempdir().unwrap();
593        let base_path = temp_dir.path();
594
595        // Create an existing file
596        let exists_path = base_path.join("exists.md");
597        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
598
599        // Verify the file exists
600        assert!(exists_path.exists(), "exists.md should exist for this test");
601
602        // Create test content with both existing and missing links
603        let content = r#"
604# Test Document
605
606[Valid Link](exists.md)
607[Invalid Link](missing.md)
608[External Link](https://example.com)
609[Media Link](image.jpg)
610        "#;
611
612        // Initialize rule with the base path
613        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
614
615        // Test the rule
616        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
617        let result = rule.check(&ctx).unwrap();
618
619        // Should have one warning for the missing.md link but not for the media file
620        assert_eq!(result.len(), 1);
621        assert!(result[0].message.contains("missing.md"));
622
623        // Test with check method
624        let result_with_structure = rule.check(&ctx).unwrap();
625
626        // Results should be the same
627        assert_eq!(result.len(), result_with_structure.len());
628        assert!(result_with_structure[0].message.contains("missing.md"));
629    }
630
631    #[test]
632    fn test_angle_bracket_links() {
633        // Create a temporary directory for test files
634        let temp_dir = tempdir().unwrap();
635        let base_path = temp_dir.path();
636
637        // Create an existing file
638        let exists_path = base_path.join("exists.md");
639        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
640
641        // Create test content with angle bracket links
642        let content = r#"
643# Test Document
644
645[Valid Link](<exists.md>)
646[Invalid Link](<missing.md>)
647[External Link](<https://example.com>)
648    "#;
649
650        // Test with default settings
651        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
652
653        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
654        let result = rule.check(&ctx).unwrap();
655
656        // Should have one warning for missing.md
657        assert_eq!(result.len(), 1, "Should have exactly one warning");
658        assert!(
659            result[0].message.contains("missing.md"),
660            "Warning should mention missing.md"
661        );
662    }
663
664    #[test]
665    fn test_media_file_handling() {
666        // Create a temporary directory for test files
667        let temp_dir = tempdir().unwrap();
668        let base_path = temp_dir.path();
669
670        // Explicitly check that image.jpg doesn't exist in the test directory
671        let image_path = base_path.join("image.jpg");
672        assert!(
673            !image_path.exists(),
674            "Test precondition failed: image.jpg should not exist"
675        );
676
677        // Create a test content with a media link - make sure it's very explicit
678        let content = "[Media Link](image.jpg)";
679
680        // Test with skip_media_files = true (default)
681        let rule_skip_media = MD057ExistingRelativeLinks::new().with_path(base_path);
682
683        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
684        let result_skip = rule_skip_media.check(&ctx).unwrap();
685
686        // Should have no warnings when media files are skipped
687        assert_eq!(
688            result_skip.len(),
689            0,
690            "Should have no warnings when skip_media_files is true"
691        );
692
693        // Test with skip_media_files = false
694        let rule_check_all = MD057ExistingRelativeLinks::new()
695            .with_path(base_path)
696            .with_skip_media_files(false);
697
698        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
699        let result_all = rule_check_all.check(&ctx).unwrap();
700
701        // Should warn about the missing media file
702        assert_eq!(
703            result_all.len(),
704            1,
705            "Should have one warning when skip_media_files is false"
706        );
707        assert!(
708            result_all[0].message.contains("image.jpg"),
709            "Warning should mention image.jpg"
710        );
711    }
712
713    #[test]
714    fn test_code_span_detection() {
715        let rule = MD057ExistingRelativeLinks::new();
716
717        // Create a temporary directory for test files
718        let temp_dir = tempdir().unwrap();
719        let base_path = temp_dir.path();
720
721        let rule = rule.with_path(base_path);
722
723        // Test with document structure
724        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
725
726        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
727        let result = rule.check(&ctx).unwrap();
728
729        // Should only find the real link, not the one in code
730        assert_eq!(result.len(), 1, "Should only flag the real link");
731        assert!(result[0].message.contains("nonexistent.md"));
732    }
733
734    #[test]
735    fn test_inline_code_spans() {
736        // Create a temporary directory for test files
737        let temp_dir = tempdir().unwrap();
738        let base_path = temp_dir.path();
739
740        // Create test content with links in inline code spans
741        let content = r#"
742# Test Document
743
744This is a normal link: [Link](missing.md)
745
746This is a code span with a link: `[Link](another-missing.md)`
747
748Some more text with `inline code [Link](yet-another-missing.md) embedded`.
749
750    "#;
751
752        // Initialize rule with the base path
753        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
754
755        // Test the rule
756        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
757        let result = rule.check(&ctx).unwrap();
758
759        // Should only have warning for the normal link, not for links in code spans
760        assert_eq!(result.len(), 1, "Should have exactly one warning");
761        assert!(
762            result[0].message.contains("missing.md"),
763            "Warning should be for missing.md"
764        );
765        assert!(
766            !result.iter().any(|w| w.message.contains("another-missing.md")),
767            "Should not warn about link in code span"
768        );
769        assert!(
770            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
771            "Should not warn about link in inline code"
772        );
773    }
774
775    // Cross-file validation tests
776    #[test]
777    fn test_cross_file_scope() {
778        let rule = MD057ExistingRelativeLinks::new();
779        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
780    }
781
782    #[test]
783    fn test_contribute_to_index_extracts_markdown_links() {
784        let rule = MD057ExistingRelativeLinks::new();
785        let content = r#"
786# Document
787
788[Link to docs](./docs/guide.md)
789[Link with fragment](./other.md#section)
790[External link](https://example.com)
791[Image link](image.png)
792[Media file](video.mp4)
793"#;
794
795        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
796        let mut index = FileIndex::new();
797        rule.contribute_to_index(&ctx, &mut index);
798
799        // Should only index markdown file links
800        assert_eq!(index.cross_file_links.len(), 2);
801
802        // Check first link
803        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
804        assert_eq!(index.cross_file_links[0].fragment, "");
805
806        // Check second link (with fragment)
807        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
808        assert_eq!(index.cross_file_links[1].fragment, "section");
809    }
810
811    #[test]
812    fn test_contribute_to_index_skips_external_and_anchors() {
813        let rule = MD057ExistingRelativeLinks::new();
814        let content = r#"
815# Document
816
817[External](https://example.com)
818[Another external](http://example.org)
819[Fragment only](#section)
820[FTP link](ftp://files.example.com)
821[Mail link](mailto:test@example.com)
822[WWW link](www.example.com)
823"#;
824
825        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
826        let mut index = FileIndex::new();
827        rule.contribute_to_index(&ctx, &mut index);
828
829        // Should not index any of these
830        assert_eq!(index.cross_file_links.len(), 0);
831    }
832
833    #[test]
834    fn test_cross_file_check_valid_link() {
835        use crate::workspace_index::WorkspaceIndex;
836
837        let rule = MD057ExistingRelativeLinks::new();
838
839        // Create a workspace index with the target file
840        let mut workspace_index = WorkspaceIndex::new();
841        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
842
843        // Create file index with a link to an existing file
844        let mut file_index = FileIndex::new();
845        file_index.add_cross_file_link(CrossFileLinkIndex {
846            target_path: "guide.md".to_string(),
847            fragment: "".to_string(),
848            line: 5,
849            column: 1,
850        });
851
852        // Run cross-file check from docs/index.md
853        let warnings = rule
854            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
855            .unwrap();
856
857        // Should have no warnings - file exists
858        assert!(warnings.is_empty());
859    }
860
861    #[test]
862    fn test_cross_file_check_missing_link() {
863        use crate::workspace_index::WorkspaceIndex;
864
865        let rule = MD057ExistingRelativeLinks::new();
866
867        // Create an empty workspace index
868        let workspace_index = WorkspaceIndex::new();
869
870        // Create file index with a link to a missing file
871        let mut file_index = FileIndex::new();
872        file_index.add_cross_file_link(CrossFileLinkIndex {
873            target_path: "missing.md".to_string(),
874            fragment: "".to_string(),
875            line: 5,
876            column: 1,
877        });
878
879        // Run cross-file check
880        let warnings = rule
881            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
882            .unwrap();
883
884        // Should have one warning for the missing file
885        assert_eq!(warnings.len(), 1);
886        assert!(warnings[0].message.contains("missing.md"));
887        assert!(warnings[0].message.contains("does not exist"));
888    }
889
890    #[test]
891    fn test_cross_file_check_parent_path() {
892        use crate::workspace_index::WorkspaceIndex;
893
894        let rule = MD057ExistingRelativeLinks::new();
895
896        // Create a workspace index with the target file at the root
897        let mut workspace_index = WorkspaceIndex::new();
898        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
899
900        // Create file index with a parent path link
901        let mut file_index = FileIndex::new();
902        file_index.add_cross_file_link(CrossFileLinkIndex {
903            target_path: "../readme.md".to_string(),
904            fragment: "".to_string(),
905            line: 5,
906            column: 1,
907        });
908
909        // Run cross-file check from docs/guide.md
910        let warnings = rule
911            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
912            .unwrap();
913
914        // Should have no warnings - file exists at normalized path
915        assert!(warnings.is_empty());
916    }
917
918    #[test]
919    fn test_normalize_path_function() {
920        // Test simple cases
921        assert_eq!(
922            normalize_path(Path::new("docs/guide.md")),
923            PathBuf::from("docs/guide.md")
924        );
925
926        // Test current directory removal
927        assert_eq!(
928            normalize_path(Path::new("./docs/guide.md")),
929            PathBuf::from("docs/guide.md")
930        );
931
932        // Test parent directory resolution
933        assert_eq!(
934            normalize_path(Path::new("docs/sub/../guide.md")),
935            PathBuf::from("docs/guide.md")
936        );
937
938        // Test multiple parent directories
939        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
940    }
941}