rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{CrossFileLinkIndex, FileIndex};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
26        cache.clear();
27    }
28}
29
30// Check if a file exists with caching
31fn file_exists_with_cache(path: &Path) -> bool {
32    match FILE_EXISTENCE_CACHE.lock() {
33        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
34        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
35    }
36}
37
38/// Check if a file exists, also trying markdown extensions for extensionless links.
39/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
40fn file_exists_or_markdown_extension(path: &Path) -> bool {
41    // First, check exact path
42    if file_exists_with_cache(path) {
43        return true;
44    }
45
46    // If the path has no extension, try adding markdown extensions
47    if path.extension().is_none() {
48        for ext in MARKDOWN_EXTENSIONS {
49            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
50            let path_with_ext = path.with_extension(&ext[1..]);
51            if file_exists_with_cache(&path_with_ext) {
52                return true;
53            }
54        }
55    }
56
57    false
58}
59
60// Regex to match the start of a link - simplified for performance
61static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
62
63/// Regex to extract the URL from a markdown link
64/// Format: `](URL)` or `](URL "title")`
65static URL_EXTRACT_REGEX: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new("\\]\\(\\s*<?([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*>?\\s*\\)").unwrap());
67
68/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
69/// Matches: scheme:// or scheme: (per RFC 3986)
70/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
71static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
73
74// Current working directory
75static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
76
77/// Supported markdown file extensions
78const MARKDOWN_EXTENSIONS: &[&str] = &[
79    ".md",
80    ".markdown",
81    ".mdx",
82    ".mkd",
83    ".mkdn",
84    ".mdown",
85    ".mdwn",
86    ".qmd",
87    ".rmd",
88];
89
90/// Check if a path has a markdown extension (case-insensitive)
91#[inline]
92fn is_markdown_file(path: &str) -> bool {
93    let path_lower = path.to_lowercase();
94    MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
95}
96
97/// Rule MD057: Existing relative links should point to valid files or directories.
98#[derive(Debug, Default, Clone)]
99pub struct MD057ExistingRelativeLinks {
100    /// Base directory for resolving relative links
101    base_path: Arc<Mutex<Option<PathBuf>>>,
102}
103
104impl MD057ExistingRelativeLinks {
105    /// Create a new instance with default settings
106    pub fn new() -> Self {
107        Self::default()
108    }
109
110    /// Set the base path for resolving relative links
111    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
112        let path = path.as_ref();
113        let dir_path = if path.is_file() {
114            path.parent().map(|p| p.to_path_buf())
115        } else {
116            Some(path.to_path_buf())
117        };
118
119        if let Ok(mut guard) = self.base_path.lock() {
120            *guard = dir_path;
121        }
122        self
123    }
124
125    pub fn from_config_struct(_config: MD057Config) -> Self {
126        Self::default()
127    }
128
129    /// Check if a URL is external or should be skipped for validation
130    #[inline]
131    fn is_external_url(&self, url: &str) -> bool {
132        if url.is_empty() {
133            return false;
134        }
135
136        // Quick checks for common external URL patterns
137        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
138            return true;
139        }
140
141        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
142        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
143        if url.starts_with("{{") || url.starts_with("{%") {
144            return true;
145        }
146
147        // Bare domain check (e.g., "example.com")
148        if url.ends_with(".com") {
149            return true;
150        }
151
152        // Absolute paths within the site are not external
153        if url.starts_with('/') {
154            return false;
155        }
156
157        // All other cases (relative paths, etc.) are not external
158        false
159    }
160
161    /// Check if the URL is a fragment-only link (internal document link)
162    #[inline]
163    fn is_fragment_only_link(&self, url: &str) -> bool {
164        url.starts_with('#')
165    }
166
167    /// Resolve a relative link against a provided base path
168    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
169        base_path.join(link)
170    }
171
172    /// Process a single link and check if it exists
173    fn process_link_with_base(
174        &self,
175        url: &str,
176        line_num: usize,
177        column: usize,
178        base_path: &Path,
179        warnings: &mut Vec<LintWarning>,
180    ) {
181        // Skip empty URLs
182        if url.is_empty() {
183            return;
184        }
185
186        // Skip external URLs and fragment-only links (optimized order)
187        if self.is_external_url(url) || self.is_fragment_only_link(url) {
188            return;
189        }
190
191        // Resolve the relative link against the base path
192        let resolved_path = Self::resolve_link_path_with_base(url, base_path);
193        // Check if the file exists, also trying markdown extensions for extensionless links
194        if !file_exists_or_markdown_extension(&resolved_path) {
195            warnings.push(LintWarning {
196                rule_name: Some(self.name().to_string()),
197                line: line_num,
198                column,
199                end_line: line_num,
200                end_column: column + url.len(),
201                message: format!("Relative link '{url}' does not exist"),
202                severity: Severity::Warning,
203                fix: None, // No automatic fix for missing files
204            });
205        }
206    }
207}
208
209impl Rule for MD057ExistingRelativeLinks {
210    fn name(&self) -> &'static str {
211        "MD057"
212    }
213
214    fn description(&self) -> &'static str {
215        "Relative links should point to existing files"
216    }
217
218    fn category(&self) -> RuleCategory {
219        RuleCategory::Link
220    }
221
222    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
223        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
224    }
225
226    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
227        let content = ctx.content;
228
229        // Early returns for performance
230        if content.is_empty() || !content.contains('[') {
231            return Ok(Vec::new());
232        }
233
234        // Quick check for any potential links before expensive operations
235        if !content.contains("](") {
236            return Ok(Vec::new());
237        }
238
239        // Reset the file existence cache for a fresh run
240        reset_file_existence_cache();
241
242        let mut warnings = Vec::new();
243
244        // Determine base path for resolving relative links
245        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
246        // This ensures each file resolves links relative to its own directory
247        let base_path: Option<PathBuf> = {
248            // First check if base_path was explicitly set via with_path() (for tests)
249            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
250            if explicit_base.is_some() {
251                explicit_base
252            } else if let Some(ref source_file) = ctx.source_file {
253                // Compute base path from the source file being processed
254                source_file
255                    .parent()
256                    .map(|p| p.to_path_buf())
257                    .or_else(|| Some(CURRENT_DIR.clone()))
258            } else {
259                // No source file available - cannot validate relative links
260                None
261            }
262        };
263
264        // If we still don't have a base path, we can't validate relative links
265        let Some(base_path) = base_path else {
266            return Ok(warnings);
267        };
268
269        // Use LintContext links instead of expensive regex parsing
270        if !ctx.links.is_empty() {
271            // Use LineIndex for correct position calculation across all line ending types
272            let line_index = &ctx.line_index;
273
274            // Create element cache once for all links
275            let element_cache = ElementCache::new(content);
276
277            // Pre-collect lines to avoid repeated line iteration
278            let lines: Vec<&str> = content.lines().collect();
279
280            for link in &ctx.links {
281                let line_idx = link.line - 1;
282                if line_idx >= lines.len() {
283                    continue;
284                }
285
286                let line = lines[line_idx];
287
288                // Quick check for link pattern in this line
289                if !line.contains("](") {
290                    continue;
291                }
292
293                // Find all links in this line using optimized regex
294                for link_match in LINK_START_REGEX.find_iter(line) {
295                    let start_pos = link_match.start();
296                    let end_pos = link_match.end();
297
298                    // Calculate absolute position using LineIndex
299                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
300                    let absolute_start_pos = line_start_byte + start_pos;
301
302                    // Skip if this link is in a code span
303                    if element_cache.is_in_code_span(absolute_start_pos) {
304                        continue;
305                    }
306
307                    // Find the URL part after the link text
308                    if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
309                        && let Some(url_group) = caps.get(1)
310                    {
311                        let url = url_group.as_str().trim();
312
313                        // Calculate column position
314                        let column = start_pos + 1;
315
316                        // Process and validate the link
317                        self.process_link_with_base(url, link.line, column, &base_path, &mut warnings);
318                    }
319                }
320            }
321        }
322
323        // Also process images - they have URLs already parsed
324        for image in &ctx.images {
325            let url = image.url.as_ref();
326            self.process_link_with_base(url, image.line, image.start_col + 1, &base_path, &mut warnings);
327        }
328
329        Ok(warnings)
330    }
331
332    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
333        Ok(ctx.content.to_string())
334    }
335
336    fn as_any(&self) -> &dyn std::any::Any {
337        self
338    }
339
340    fn default_config_section(&self) -> Option<(String, toml::Value)> {
341        // No configurable options for this rule
342        None
343    }
344
345    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
346    where
347        Self: Sized,
348    {
349        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
350        Box::new(Self::from_config_struct(rule_config))
351    }
352
353    fn cross_file_scope(&self) -> CrossFileScope {
354        CrossFileScope::Workspace
355    }
356
357    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
358        let content = ctx.content;
359
360        // Early returns for performance
361        if content.is_empty() || !content.contains("](") {
362            return;
363        }
364
365        // Pre-collect lines to avoid repeated line iteration
366        let lines: Vec<&str> = content.lines().collect();
367        let element_cache = ElementCache::new(content);
368        let line_index = &ctx.line_index;
369
370        for link in &ctx.links {
371            let line_idx = link.line - 1;
372            if line_idx >= lines.len() {
373                continue;
374            }
375
376            let line = lines[line_idx];
377            if !line.contains("](") {
378                continue;
379            }
380
381            // Find all links in this line
382            for link_match in LINK_START_REGEX.find_iter(line) {
383                let start_pos = link_match.start();
384                let end_pos = link_match.end();
385
386                // Calculate absolute position for code span detection
387                let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
388                let absolute_start_pos = line_start_byte + start_pos;
389
390                // Skip if in code span
391                if element_cache.is_in_code_span(absolute_start_pos) {
392                    continue;
393                }
394
395                // Extract the URL (group 1) and fragment (group 2)
396                // The regex separates URL and fragment: group 1 excludes #, group 2 captures #fragment
397                if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
398                    && let Some(url_group) = caps.get(1)
399                {
400                    let file_path = url_group.as_str().trim();
401
402                    // Skip empty, external, template variables, or fragment-only URLs
403                    if file_path.is_empty()
404                        || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
405                        || file_path.starts_with("www.")
406                        || file_path.starts_with('#')
407                        || file_path.starts_with("{{")
408                        || file_path.starts_with("{%")
409                    {
410                        continue;
411                    }
412
413                    // Get fragment from capture group 2 (includes # prefix)
414                    let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
415
416                    // Only index markdown file links for cross-file validation
417                    // Non-markdown files (images, media) are validated via filesystem in check()
418                    if is_markdown_file(file_path) {
419                        index.add_cross_file_link(CrossFileLinkIndex {
420                            target_path: file_path.to_string(),
421                            fragment: fragment.to_string(),
422                            line: link.line,
423                            column: start_pos + 1,
424                        });
425                    }
426                }
427            }
428        }
429    }
430
431    fn cross_file_check(
432        &self,
433        file_path: &Path,
434        file_index: &FileIndex,
435        workspace_index: &crate::workspace_index::WorkspaceIndex,
436    ) -> LintResult {
437        let mut warnings = Vec::new();
438
439        // Get the directory containing this file for resolving relative links
440        let file_dir = file_path.parent();
441
442        for cross_link in &file_index.cross_file_links {
443            // Resolve the relative path
444            let target_path = if cross_link.target_path.starts_with('/') {
445                // Absolute path from workspace root (e.g., "/CONTRIBUTING.md")
446                // Walk up from the current file's directory to find the workspace root
447                let stripped = cross_link.target_path.trim_start_matches('/');
448                resolve_absolute_link(file_path, stripped)
449            } else if let Some(dir) = file_dir {
450                dir.join(&cross_link.target_path)
451            } else {
452                Path::new(&cross_link.target_path).to_path_buf()
453            };
454
455            // Normalize the path (handle .., ., etc.)
456            let target_path = normalize_path(&target_path);
457
458            // Check if the target markdown file exists in the workspace index
459            if !workspace_index.contains_file(&target_path) {
460                // File not in index - check filesystem directly for case-insensitive filesystems
461                if !target_path.exists() {
462                    warnings.push(LintWarning {
463                        rule_name: Some(self.name().to_string()),
464                        line: cross_link.line,
465                        column: cross_link.column,
466                        end_line: cross_link.line,
467                        end_column: cross_link.column + cross_link.target_path.len(),
468                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
469                        severity: Severity::Warning,
470                        fix: None,
471                    });
472                }
473            }
474        }
475
476        Ok(warnings)
477    }
478}
479
480/// Normalize a path by resolving . and .. components
481fn normalize_path(path: &Path) -> PathBuf {
482    let mut components = Vec::new();
483
484    for component in path.components() {
485        match component {
486            std::path::Component::ParentDir => {
487                // Go up one level if possible
488                if !components.is_empty() {
489                    components.pop();
490                }
491            }
492            std::path::Component::CurDir => {
493                // Skip current directory markers
494            }
495            _ => {
496                components.push(component);
497            }
498        }
499    }
500
501    components.iter().collect()
502}
503
504/// Resolve an absolute link (e.g., "/CONTRIBUTING.md") relative to the workspace root.
505///
506/// Absolute paths in markdown (starting with "/") are relative to the workspace/repo root,
507/// not the filesystem root. This function walks up from the current file's directory
508/// to find where the target file exists.
509fn resolve_absolute_link(file_path: &Path, stripped_path: &str) -> PathBuf {
510    // Walk up from the file's directory, checking each ancestor for the target
511    let mut current = file_path.parent();
512    while let Some(dir) = current {
513        let candidate = dir.join(stripped_path);
514        if candidate.exists() {
515            return candidate;
516        }
517        current = dir.parent();
518    }
519
520    // If not found by walking up, return the path relative to the file's directory
521    // (this will likely fail the existence check later, which is correct behavior)
522    file_path
523        .parent()
524        .map(|d| d.join(stripped_path))
525        .unwrap_or_else(|| PathBuf::from(stripped_path))
526}
527
528#[cfg(test)]
529mod tests {
530    use super::*;
531    use std::fs::File;
532    use std::io::Write;
533    use tempfile::tempdir;
534
535    #[test]
536    fn test_external_urls() {
537        let rule = MD057ExistingRelativeLinks::new();
538
539        // Common web protocols
540        assert!(rule.is_external_url("https://example.com"));
541        assert!(rule.is_external_url("http://example.com"));
542        assert!(rule.is_external_url("ftp://example.com"));
543        assert!(rule.is_external_url("www.example.com"));
544        assert!(rule.is_external_url("example.com"));
545
546        // Special URI schemes (issue #192)
547        assert!(rule.is_external_url("file:///path/to/file"));
548        assert!(rule.is_external_url("smb://server/share"));
549        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
550        assert!(rule.is_external_url("mailto:user@example.com"));
551        assert!(rule.is_external_url("tel:+1234567890"));
552        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
553        assert!(rule.is_external_url("javascript:void(0)"));
554        assert!(rule.is_external_url("ssh://git@github.com/repo"));
555        assert!(rule.is_external_url("git://github.com/repo.git"));
556
557        // Template variables should be skipped (not checked as relative links)
558        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
559        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
560        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
561        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
562        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
563        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
564
565        // Relative paths should NOT be external
566        assert!(!rule.is_external_url("./relative/path.md"));
567        assert!(!rule.is_external_url("relative/path.md"));
568        assert!(!rule.is_external_url("../parent/path.md"));
569    }
570
571    #[test]
572    fn test_no_warnings_without_base_path() {
573        let rule = MD057ExistingRelativeLinks::new();
574        let content = "[Link](missing.md)";
575
576        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
577        let result = rule.check(&ctx).unwrap();
578        assert!(result.is_empty(), "Should have no warnings without base path");
579    }
580
581    #[test]
582    fn test_existing_and_missing_links() {
583        // Create a temporary directory for test files
584        let temp_dir = tempdir().unwrap();
585        let base_path = temp_dir.path();
586
587        // Create an existing file
588        let exists_path = base_path.join("exists.md");
589        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
590
591        // Verify the file exists
592        assert!(exists_path.exists(), "exists.md should exist for this test");
593
594        // Create test content with both existing and missing links
595        let content = r#"
596# Test Document
597
598[Valid Link](exists.md)
599[Invalid Link](missing.md)
600[External Link](https://example.com)
601[Media Link](image.jpg)
602        "#;
603
604        // Initialize rule with the base path (default: check all files including media)
605        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
606
607        // Test the rule
608        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
609        let result = rule.check(&ctx).unwrap();
610
611        // Should have two warnings: missing.md and image.jpg (both don't exist)
612        assert_eq!(result.len(), 2);
613        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
614        assert!(messages.iter().any(|m| m.contains("missing.md")));
615        assert!(messages.iter().any(|m| m.contains("image.jpg")));
616    }
617
618    #[test]
619    fn test_angle_bracket_links() {
620        // Create a temporary directory for test files
621        let temp_dir = tempdir().unwrap();
622        let base_path = temp_dir.path();
623
624        // Create an existing file
625        let exists_path = base_path.join("exists.md");
626        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
627
628        // Create test content with angle bracket links
629        let content = r#"
630# Test Document
631
632[Valid Link](<exists.md>)
633[Invalid Link](<missing.md>)
634[External Link](<https://example.com>)
635    "#;
636
637        // Test with default settings
638        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
639
640        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
641        let result = rule.check(&ctx).unwrap();
642
643        // Should have one warning for missing.md
644        assert_eq!(result.len(), 1, "Should have exactly one warning");
645        assert!(
646            result[0].message.contains("missing.md"),
647            "Warning should mention missing.md"
648        );
649    }
650
651    #[test]
652    fn test_all_file_types_checked() {
653        // Create a temporary directory for test files
654        let temp_dir = tempdir().unwrap();
655        let base_path = temp_dir.path();
656
657        // Create a test with various file types - all should be checked
658        let content = r#"
659[Image Link](image.jpg)
660[Video Link](video.mp4)
661[Markdown Link](document.md)
662[PDF Link](file.pdf)
663"#;
664
665        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
666
667        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
668        let result = rule.check(&ctx).unwrap();
669
670        // Should warn about all missing files regardless of extension
671        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
672    }
673
674    #[test]
675    fn test_code_span_detection() {
676        let rule = MD057ExistingRelativeLinks::new();
677
678        // Create a temporary directory for test files
679        let temp_dir = tempdir().unwrap();
680        let base_path = temp_dir.path();
681
682        let rule = rule.with_path(base_path);
683
684        // Test with document structure
685        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
686
687        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
688        let result = rule.check(&ctx).unwrap();
689
690        // Should only find the real link, not the one in code
691        assert_eq!(result.len(), 1, "Should only flag the real link");
692        assert!(result[0].message.contains("nonexistent.md"));
693    }
694
695    #[test]
696    fn test_inline_code_spans() {
697        // Create a temporary directory for test files
698        let temp_dir = tempdir().unwrap();
699        let base_path = temp_dir.path();
700
701        // Create test content with links in inline code spans
702        let content = r#"
703# Test Document
704
705This is a normal link: [Link](missing.md)
706
707This is a code span with a link: `[Link](another-missing.md)`
708
709Some more text with `inline code [Link](yet-another-missing.md) embedded`.
710
711    "#;
712
713        // Initialize rule with the base path
714        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
715
716        // Test the rule
717        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
718        let result = rule.check(&ctx).unwrap();
719
720        // Should only have warning for the normal link, not for links in code spans
721        assert_eq!(result.len(), 1, "Should have exactly one warning");
722        assert!(
723            result[0].message.contains("missing.md"),
724            "Warning should be for missing.md"
725        );
726        assert!(
727            !result.iter().any(|w| w.message.contains("another-missing.md")),
728            "Should not warn about link in code span"
729        );
730        assert!(
731            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
732            "Should not warn about link in inline code"
733        );
734    }
735
736    #[test]
737    fn test_extensionless_link_resolution() {
738        // Create a temporary directory for test files
739        let temp_dir = tempdir().unwrap();
740        let base_path = temp_dir.path();
741
742        // Create a markdown file WITHOUT specifying .md extension in the link
743        let page_path = base_path.join("page.md");
744        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
745
746        // Test content with extensionless link that should resolve to page.md
747        let content = r#"
748# Test Document
749
750[Link without extension](page)
751[Link with extension](page.md)
752[Missing link](nonexistent)
753"#;
754
755        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
756
757        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
758        let result = rule.check(&ctx).unwrap();
759
760        // Should only have warning for nonexistent link
761        // Both "page" and "page.md" should resolve to the same file
762        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
763        assert!(
764            result[0].message.contains("nonexistent"),
765            "Warning should be for 'nonexistent' not 'page'"
766        );
767    }
768
769    // Cross-file validation tests
770    #[test]
771    fn test_cross_file_scope() {
772        let rule = MD057ExistingRelativeLinks::new();
773        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
774    }
775
776    #[test]
777    fn test_contribute_to_index_extracts_markdown_links() {
778        let rule = MD057ExistingRelativeLinks::new();
779        let content = r#"
780# Document
781
782[Link to docs](./docs/guide.md)
783[Link with fragment](./other.md#section)
784[External link](https://example.com)
785[Image link](image.png)
786[Media file](video.mp4)
787"#;
788
789        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
790        let mut index = FileIndex::new();
791        rule.contribute_to_index(&ctx, &mut index);
792
793        // Should only index markdown file links
794        assert_eq!(index.cross_file_links.len(), 2);
795
796        // Check first link
797        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
798        assert_eq!(index.cross_file_links[0].fragment, "");
799
800        // Check second link (with fragment)
801        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
802        assert_eq!(index.cross_file_links[1].fragment, "section");
803    }
804
805    #[test]
806    fn test_contribute_to_index_skips_external_and_anchors() {
807        let rule = MD057ExistingRelativeLinks::new();
808        let content = r#"
809# Document
810
811[External](https://example.com)
812[Another external](http://example.org)
813[Fragment only](#section)
814[FTP link](ftp://files.example.com)
815[Mail link](mailto:test@example.com)
816[WWW link](www.example.com)
817"#;
818
819        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
820        let mut index = FileIndex::new();
821        rule.contribute_to_index(&ctx, &mut index);
822
823        // Should not index any of these
824        assert_eq!(index.cross_file_links.len(), 0);
825    }
826
827    #[test]
828    fn test_cross_file_check_valid_link() {
829        use crate::workspace_index::WorkspaceIndex;
830
831        let rule = MD057ExistingRelativeLinks::new();
832
833        // Create a workspace index with the target file
834        let mut workspace_index = WorkspaceIndex::new();
835        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
836
837        // Create file index with a link to an existing file
838        let mut file_index = FileIndex::new();
839        file_index.add_cross_file_link(CrossFileLinkIndex {
840            target_path: "guide.md".to_string(),
841            fragment: "".to_string(),
842            line: 5,
843            column: 1,
844        });
845
846        // Run cross-file check from docs/index.md
847        let warnings = rule
848            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
849            .unwrap();
850
851        // Should have no warnings - file exists
852        assert!(warnings.is_empty());
853    }
854
855    #[test]
856    fn test_cross_file_check_missing_link() {
857        use crate::workspace_index::WorkspaceIndex;
858
859        let rule = MD057ExistingRelativeLinks::new();
860
861        // Create an empty workspace index
862        let workspace_index = WorkspaceIndex::new();
863
864        // Create file index with a link to a missing file
865        let mut file_index = FileIndex::new();
866        file_index.add_cross_file_link(CrossFileLinkIndex {
867            target_path: "missing.md".to_string(),
868            fragment: "".to_string(),
869            line: 5,
870            column: 1,
871        });
872
873        // Run cross-file check
874        let warnings = rule
875            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
876            .unwrap();
877
878        // Should have one warning for the missing file
879        assert_eq!(warnings.len(), 1);
880        assert!(warnings[0].message.contains("missing.md"));
881        assert!(warnings[0].message.contains("does not exist"));
882    }
883
884    #[test]
885    fn test_cross_file_check_parent_path() {
886        use crate::workspace_index::WorkspaceIndex;
887
888        let rule = MD057ExistingRelativeLinks::new();
889
890        // Create a workspace index with the target file at the root
891        let mut workspace_index = WorkspaceIndex::new();
892        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
893
894        // Create file index with a parent path link
895        let mut file_index = FileIndex::new();
896        file_index.add_cross_file_link(CrossFileLinkIndex {
897            target_path: "../readme.md".to_string(),
898            fragment: "".to_string(),
899            line: 5,
900            column: 1,
901        });
902
903        // Run cross-file check from docs/guide.md
904        let warnings = rule
905            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
906            .unwrap();
907
908        // Should have no warnings - file exists at normalized path
909        assert!(warnings.is_empty());
910    }
911
912    #[test]
913    fn test_normalize_path_function() {
914        // Test simple cases
915        assert_eq!(
916            normalize_path(Path::new("docs/guide.md")),
917            PathBuf::from("docs/guide.md")
918        );
919
920        // Test current directory removal
921        assert_eq!(
922            normalize_path(Path::new("./docs/guide.md")),
923            PathBuf::from("docs/guide.md")
924        );
925
926        // Test parent directory resolution
927        assert_eq!(
928            normalize_path(Path::new("docs/sub/../guide.md")),
929            PathBuf::from("docs/guide.md")
930        );
931
932        // Test multiple parent directories
933        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
934    }
935
936    #[test]
937    fn test_resolve_absolute_link() {
938        // Create a temporary directory structure for testing
939        let temp_dir = tempdir().expect("Failed to create temp dir");
940        let root = temp_dir.path();
941
942        // Create root-level file
943        let contributing = root.join("CONTRIBUTING.md");
944        File::create(&contributing).expect("Failed to create CONTRIBUTING.md");
945
946        // Create nested directory with a markdown file
947        let docs = root.join("docs");
948        std::fs::create_dir(&docs).expect("Failed to create docs dir");
949        let readme = docs.join("README.md");
950        File::create(&readme).expect("Failed to create README.md");
951
952        // Test: absolute link from nested file to root file
953        // From docs/README.md, link to /CONTRIBUTING.md should resolve to root/CONTRIBUTING.md
954        let resolved = resolve_absolute_link(&readme, "CONTRIBUTING.md");
955        assert!(resolved.exists(), "Should find CONTRIBUTING.md at workspace root");
956        assert_eq!(resolved, contributing);
957
958        // Test: file that doesn't exist should not resolve (returns path relative to file's dir)
959        let nonexistent = resolve_absolute_link(&readme, "NONEXISTENT.md");
960        assert!(!nonexistent.exists(), "Should not find nonexistent file");
961    }
962}