rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{CrossFileLinkIndex, FileIndex};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    let mut cache = FILE_EXISTENCE_CACHE
26        .lock()
27        .expect("File existence cache mutex poisoned");
28    cache.clear();
29}
30
31// Check if a file exists with caching
32fn file_exists_with_cache(path: &Path) -> bool {
33    let mut cache = FILE_EXISTENCE_CACHE
34        .lock()
35        .expect("File existence cache mutex poisoned");
36    *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists())
37}
38
39// Regex to match the start of a link - simplified for performance
40static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
41
42/// Regex to extract the URL from a markdown link
43/// Format: `](URL)` or `](URL "title")`
44static URL_EXTRACT_REGEX: LazyLock<Regex> =
45    LazyLock::new(|| Regex::new("\\]\\(\\s*<?([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*>?\\s*\\)").unwrap());
46
47/// Regex to detect protocol and domain for external links
48static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
49    LazyLock::new(|| Regex::new(r"^(https?://|ftp://|mailto:|www\.)").unwrap());
50
51// Current working directory
52static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
53
54/// Supported markdown file extensions
55const MARKDOWN_EXTENSIONS: &[&str] = &[
56    ".md",
57    ".markdown",
58    ".mdx",
59    ".mkd",
60    ".mkdn",
61    ".mdown",
62    ".mdwn",
63    ".qmd",
64    ".rmd",
65];
66
67/// Check if a path has a markdown extension (case-insensitive)
68#[inline]
69fn is_markdown_file(path: &str) -> bool {
70    let path_lower = path.to_lowercase();
71    MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
72}
73
74/// Rule MD057: Existing relative links should point to valid files or directories.
75#[derive(Debug, Default, Clone)]
76pub struct MD057ExistingRelativeLinks {
77    /// Base directory for resolving relative links
78    base_path: Arc<Mutex<Option<PathBuf>>>,
79}
80
81impl MD057ExistingRelativeLinks {
82    /// Create a new instance with default settings
83    pub fn new() -> Self {
84        Self::default()
85    }
86
87    /// Set the base path for resolving relative links
88    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
89        let path = path.as_ref();
90        let dir_path = if path.is_file() {
91            path.parent().map(|p| p.to_path_buf())
92        } else {
93            Some(path.to_path_buf())
94        };
95
96        *self.base_path.lock().expect("Base path mutex poisoned") = dir_path;
97        self
98    }
99
100    pub fn from_config_struct(_config: MD057Config) -> Self {
101        Self::default()
102    }
103
104    /// Check if a URL is external (optimized version)
105    #[inline]
106    fn is_external_url(&self, url: &str) -> bool {
107        if url.is_empty() {
108            return false;
109        }
110
111        // Quick checks for common external URL patterns
112        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
113            return true;
114        }
115
116        // Bare domain check (e.g., "example.com")
117        if url.ends_with(".com") {
118            return true;
119        }
120
121        // Absolute paths within the site are not external
122        if url.starts_with('/') {
123            return false;
124        }
125
126        // All other cases (relative paths, etc.) are not external
127        false
128    }
129
130    /// Check if the URL is a fragment-only link (internal document link)
131    #[inline]
132    fn is_fragment_only_link(&self, url: &str) -> bool {
133        url.starts_with('#')
134    }
135
136    /// Resolve a relative link against the base path
137    fn resolve_link_path(&self, link: &str) -> Option<PathBuf> {
138        self.base_path
139            .lock()
140            .unwrap()
141            .as_ref()
142            .map(|base_path| base_path.join(link))
143    }
144
145    /// Process a single link and check if it exists
146    fn process_link(&self, url: &str, line_num: usize, column: usize, warnings: &mut Vec<LintWarning>) {
147        // Skip empty URLs
148        if url.is_empty() {
149            return;
150        }
151
152        // Skip external URLs and fragment-only links (optimized order)
153        if self.is_external_url(url) || self.is_fragment_only_link(url) {
154            return;
155        }
156
157        // Resolve the relative link against the base path
158        if let Some(resolved_path) = self.resolve_link_path(url) {
159            // Check if the file exists (with caching to avoid filesystem calls)
160            if !file_exists_with_cache(&resolved_path) {
161                warnings.push(LintWarning {
162                    rule_name: Some(self.name().to_string()),
163                    line: line_num,
164                    column,
165                    end_line: line_num,
166                    end_column: column + url.len(),
167                    message: format!("Relative link '{url}' does not exist"),
168                    severity: Severity::Warning,
169                    fix: None, // No automatic fix for missing files
170                });
171            }
172        }
173    }
174}
175
176impl Rule for MD057ExistingRelativeLinks {
177    fn name(&self) -> &'static str {
178        "MD057"
179    }
180
181    fn description(&self) -> &'static str {
182        "Relative links should point to existing files"
183    }
184
185    fn category(&self) -> RuleCategory {
186        RuleCategory::Link
187    }
188
189    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
190        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
191    }
192
193    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
194        let content = ctx.content;
195
196        // Early returns for performance
197        if content.is_empty() || !content.contains('[') {
198            return Ok(Vec::new());
199        }
200
201        // Quick check for any potential links before expensive operations
202        if !content.contains("](") {
203            return Ok(Vec::new());
204        }
205
206        // Reset the file existence cache for a fresh run
207        reset_file_existence_cache();
208
209        let mut warnings = Vec::new();
210
211        // Cache base path lookup to avoid repeated mutex operations
212        let base_path = {
213            let base_path_guard = self.base_path.lock().expect("Base path mutex poisoned");
214            if base_path_guard.is_some() {
215                base_path_guard.clone()
216            } else {
217                // Try to determine the base path from the file being processed (cached)
218                static CACHED_FILE_PATH: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
219                CACHED_FILE_PATH
220                    .get_or_init(|| {
221                        if let Ok(file_path) = env::var("RUMDL_FILE_PATH") {
222                            let path = Path::new(&file_path);
223                            if path.exists() {
224                                path.parent()
225                                    .map(|p| p.to_path_buf())
226                                    .or_else(|| Some(CURRENT_DIR.clone()))
227                            } else {
228                                Some(CURRENT_DIR.clone())
229                            }
230                        } else {
231                            Some(CURRENT_DIR.clone())
232                        }
233                    })
234                    .clone()
235            }
236        };
237
238        // If we still don't have a base path, we can't validate relative links
239        if base_path.is_none() {
240            return Ok(warnings);
241        }
242
243        // Use LintContext links instead of expensive regex parsing
244        if !ctx.links.is_empty() {
245            // Use LineIndex for correct position calculation across all line ending types
246            let line_index = &ctx.line_index;
247
248            // Create element cache once for all links
249            let element_cache = ElementCache::new(content);
250
251            // Pre-collect lines to avoid repeated line iteration
252            let lines: Vec<&str> = content.lines().collect();
253
254            for link in &ctx.links {
255                let line_idx = link.line - 1;
256                if line_idx >= lines.len() {
257                    continue;
258                }
259
260                let line = lines[line_idx];
261
262                // Quick check for link pattern in this line
263                if !line.contains("](") {
264                    continue;
265                }
266
267                // Find all links in this line using optimized regex
268                for link_match in LINK_START_REGEX.find_iter(line) {
269                    let start_pos = link_match.start();
270                    let end_pos = link_match.end();
271
272                    // Calculate absolute position using LineIndex
273                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
274                    let absolute_start_pos = line_start_byte + start_pos;
275
276                    // Skip if this link is in a code span
277                    if element_cache.is_in_code_span(absolute_start_pos) {
278                        continue;
279                    }
280
281                    // Find the URL part after the link text
282                    if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
283                        && let Some(url_group) = caps.get(1)
284                    {
285                        let url = url_group.as_str().trim();
286
287                        // Calculate column position
288                        let column = start_pos + 1;
289
290                        // Process and validate the link
291                        self.process_link(url, link.line, column, &mut warnings);
292                    }
293                }
294            }
295        }
296
297        Ok(warnings)
298    }
299
300    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
301        Ok(ctx.content.to_string())
302    }
303
304    fn as_any(&self) -> &dyn std::any::Any {
305        self
306    }
307
308    fn default_config_section(&self) -> Option<(String, toml::Value)> {
309        // No configurable options for this rule
310        None
311    }
312
313    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
314    where
315        Self: Sized,
316    {
317        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
318        Box::new(Self::from_config_struct(rule_config))
319    }
320
321    fn cross_file_scope(&self) -> CrossFileScope {
322        CrossFileScope::Workspace
323    }
324
325    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
326        let content = ctx.content;
327
328        // Early returns for performance
329        if content.is_empty() || !content.contains("](") {
330            return;
331        }
332
333        // Pre-collect lines to avoid repeated line iteration
334        let lines: Vec<&str> = content.lines().collect();
335        let element_cache = ElementCache::new(content);
336        let line_index = &ctx.line_index;
337
338        for link in &ctx.links {
339            let line_idx = link.line - 1;
340            if line_idx >= lines.len() {
341                continue;
342            }
343
344            let line = lines[line_idx];
345            if !line.contains("](") {
346                continue;
347            }
348
349            // Find all links in this line
350            for link_match in LINK_START_REGEX.find_iter(line) {
351                let start_pos = link_match.start();
352                let end_pos = link_match.end();
353
354                // Calculate absolute position for code span detection
355                let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
356                let absolute_start_pos = line_start_byte + start_pos;
357
358                // Skip if in code span
359                if element_cache.is_in_code_span(absolute_start_pos) {
360                    continue;
361                }
362
363                // Extract the URL (group 1) and fragment (group 2)
364                // The regex separates URL and fragment: group 1 excludes #, group 2 captures #fragment
365                if let Some(caps) = URL_EXTRACT_REGEX.captures_at(line, end_pos - 1)
366                    && let Some(url_group) = caps.get(1)
367                {
368                    let file_path = url_group.as_str().trim();
369
370                    // Skip empty, external, or fragment-only URLs
371                    if file_path.is_empty()
372                        || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
373                        || file_path.starts_with("www.")
374                        || file_path.starts_with('#')
375                    {
376                        continue;
377                    }
378
379                    // Get fragment from capture group 2 (includes # prefix)
380                    let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
381
382                    // Only index links to markdown files
383                    if is_markdown_file(file_path) {
384                        index.add_cross_file_link(CrossFileLinkIndex {
385                            target_path: file_path.to_string(),
386                            fragment: fragment.to_string(),
387                            line: link.line,
388                            column: start_pos + 1,
389                        });
390                    }
391                }
392            }
393        }
394    }
395
396    fn cross_file_check(
397        &self,
398        file_path: &Path,
399        file_index: &FileIndex,
400        workspace_index: &crate::workspace_index::WorkspaceIndex,
401    ) -> LintResult {
402        let mut warnings = Vec::new();
403
404        // Get the directory containing this file for resolving relative links
405        let file_dir = file_path.parent();
406
407        for cross_link in &file_index.cross_file_links {
408            // Resolve the relative path
409            let target_path = if cross_link.target_path.starts_with('/') {
410                // Absolute path from workspace root (e.g., "/CONTRIBUTING.md")
411                // Walk up from the current file's directory to find the workspace root
412                let stripped = cross_link.target_path.trim_start_matches('/');
413                resolve_absolute_link(file_path, stripped)
414            } else if let Some(dir) = file_dir {
415                dir.join(&cross_link.target_path)
416            } else {
417                Path::new(&cross_link.target_path).to_path_buf()
418            };
419
420            // Normalize the path (handle .., ., etc.)
421            let target_path = normalize_path(&target_path);
422
423            // Check if the target file exists in the workspace index
424            if !workspace_index.contains_file(&target_path) {
425                // File not in index - it might not exist or might not be a markdown file
426                // For markdown files, if they're not indexed, they don't exist in the workspace
427                if cross_link.target_path.ends_with(".md") || cross_link.target_path.ends_with(".markdown") {
428                    // Fallback: check the filesystem directly to handle case-insensitive
429                    // filesystems (macOS, Windows) where "README.pt-BR.md" matches "README.pt-br.md"
430                    if !target_path.exists() {
431                        warnings.push(LintWarning {
432                            rule_name: Some(self.name().to_string()),
433                            line: cross_link.line,
434                            column: cross_link.column,
435                            end_line: cross_link.line,
436                            end_column: cross_link.column + cross_link.target_path.len(),
437                            message: format!(
438                                "Relative link '{}' does not exist in the workspace",
439                                cross_link.target_path
440                            ),
441                            severity: Severity::Warning,
442                            fix: None,
443                        });
444                    }
445                }
446            }
447        }
448
449        Ok(warnings)
450    }
451}
452
453/// Normalize a path by resolving . and .. components
454fn normalize_path(path: &Path) -> PathBuf {
455    let mut components = Vec::new();
456
457    for component in path.components() {
458        match component {
459            std::path::Component::ParentDir => {
460                // Go up one level if possible
461                if !components.is_empty() {
462                    components.pop();
463                }
464            }
465            std::path::Component::CurDir => {
466                // Skip current directory markers
467            }
468            _ => {
469                components.push(component);
470            }
471        }
472    }
473
474    components.iter().collect()
475}
476
477/// Resolve an absolute link (e.g., "/CONTRIBUTING.md") relative to the workspace root.
478///
479/// Absolute paths in markdown (starting with "/") are relative to the workspace/repo root,
480/// not the filesystem root. This function walks up from the current file's directory
481/// to find where the target file exists.
482fn resolve_absolute_link(file_path: &Path, stripped_path: &str) -> PathBuf {
483    // Walk up from the file's directory, checking each ancestor for the target
484    let mut current = file_path.parent();
485    while let Some(dir) = current {
486        let candidate = dir.join(stripped_path);
487        if candidate.exists() {
488            return candidate;
489        }
490        current = dir.parent();
491    }
492
493    // If not found by walking up, return the path relative to the file's directory
494    // (this will likely fail the existence check later, which is correct behavior)
495    file_path
496        .parent()
497        .map(|d| d.join(stripped_path))
498        .unwrap_or_else(|| PathBuf::from(stripped_path))
499}
500
501#[cfg(test)]
502mod tests {
503    use super::*;
504    use std::fs::File;
505    use std::io::Write;
506    use tempfile::tempdir;
507
508    #[test]
509    fn test_external_urls() {
510        let rule = MD057ExistingRelativeLinks::new();
511
512        assert!(rule.is_external_url("https://example.com"));
513        assert!(rule.is_external_url("http://example.com"));
514        assert!(rule.is_external_url("ftp://example.com"));
515        assert!(rule.is_external_url("www.example.com"));
516        assert!(rule.is_external_url("example.com"));
517
518        assert!(!rule.is_external_url("./relative/path.md"));
519        assert!(!rule.is_external_url("relative/path.md"));
520        assert!(!rule.is_external_url("../parent/path.md"));
521    }
522
523    #[test]
524    fn test_no_warnings_without_base_path() {
525        let rule = MD057ExistingRelativeLinks::new();
526        let content = "[Link](missing.md)";
527
528        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
529        let result = rule.check(&ctx).unwrap();
530        assert!(result.is_empty(), "Should have no warnings without base path");
531    }
532
533    #[test]
534    fn test_existing_and_missing_links() {
535        // Create a temporary directory for test files
536        let temp_dir = tempdir().unwrap();
537        let base_path = temp_dir.path();
538
539        // Create an existing file
540        let exists_path = base_path.join("exists.md");
541        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
542
543        // Verify the file exists
544        assert!(exists_path.exists(), "exists.md should exist for this test");
545
546        // Create test content with both existing and missing links
547        let content = r#"
548# Test Document
549
550[Valid Link](exists.md)
551[Invalid Link](missing.md)
552[External Link](https://example.com)
553[Media Link](image.jpg)
554        "#;
555
556        // Initialize rule with the base path (default: check all files including media)
557        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
558
559        // Test the rule
560        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
561        let result = rule.check(&ctx).unwrap();
562
563        // Should have two warnings: missing.md and image.jpg (both don't exist)
564        assert_eq!(result.len(), 2);
565        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
566        assert!(messages.iter().any(|m| m.contains("missing.md")));
567        assert!(messages.iter().any(|m| m.contains("image.jpg")));
568    }
569
570    #[test]
571    fn test_angle_bracket_links() {
572        // Create a temporary directory for test files
573        let temp_dir = tempdir().unwrap();
574        let base_path = temp_dir.path();
575
576        // Create an existing file
577        let exists_path = base_path.join("exists.md");
578        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
579
580        // Create test content with angle bracket links
581        let content = r#"
582# Test Document
583
584[Valid Link](<exists.md>)
585[Invalid Link](<missing.md>)
586[External Link](<https://example.com>)
587    "#;
588
589        // Test with default settings
590        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
591
592        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
593        let result = rule.check(&ctx).unwrap();
594
595        // Should have one warning for missing.md
596        assert_eq!(result.len(), 1, "Should have exactly one warning");
597        assert!(
598            result[0].message.contains("missing.md"),
599            "Warning should mention missing.md"
600        );
601    }
602
603    #[test]
604    fn test_all_file_types_checked() {
605        // Create a temporary directory for test files
606        let temp_dir = tempdir().unwrap();
607        let base_path = temp_dir.path();
608
609        // Create a test with various file types - all should be checked
610        let content = r#"
611[Image Link](image.jpg)
612[Video Link](video.mp4)
613[Markdown Link](document.md)
614[PDF Link](file.pdf)
615"#;
616
617        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
618
619        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
620        let result = rule.check(&ctx).unwrap();
621
622        // Should warn about all missing files regardless of extension
623        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
624    }
625
626    #[test]
627    fn test_code_span_detection() {
628        let rule = MD057ExistingRelativeLinks::new();
629
630        // Create a temporary directory for test files
631        let temp_dir = tempdir().unwrap();
632        let base_path = temp_dir.path();
633
634        let rule = rule.with_path(base_path);
635
636        // Test with document structure
637        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
638
639        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
640        let result = rule.check(&ctx).unwrap();
641
642        // Should only find the real link, not the one in code
643        assert_eq!(result.len(), 1, "Should only flag the real link");
644        assert!(result[0].message.contains("nonexistent.md"));
645    }
646
647    #[test]
648    fn test_inline_code_spans() {
649        // Create a temporary directory for test files
650        let temp_dir = tempdir().unwrap();
651        let base_path = temp_dir.path();
652
653        // Create test content with links in inline code spans
654        let content = r#"
655# Test Document
656
657This is a normal link: [Link](missing.md)
658
659This is a code span with a link: `[Link](another-missing.md)`
660
661Some more text with `inline code [Link](yet-another-missing.md) embedded`.
662
663    "#;
664
665        // Initialize rule with the base path
666        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
667
668        // Test the rule
669        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
670        let result = rule.check(&ctx).unwrap();
671
672        // Should only have warning for the normal link, not for links in code spans
673        assert_eq!(result.len(), 1, "Should have exactly one warning");
674        assert!(
675            result[0].message.contains("missing.md"),
676            "Warning should be for missing.md"
677        );
678        assert!(
679            !result.iter().any(|w| w.message.contains("another-missing.md")),
680            "Should not warn about link in code span"
681        );
682        assert!(
683            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
684            "Should not warn about link in inline code"
685        );
686    }
687
688    // Cross-file validation tests
689    #[test]
690    fn test_cross_file_scope() {
691        let rule = MD057ExistingRelativeLinks::new();
692        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
693    }
694
695    #[test]
696    fn test_contribute_to_index_extracts_markdown_links() {
697        let rule = MD057ExistingRelativeLinks::new();
698        let content = r#"
699# Document
700
701[Link to docs](./docs/guide.md)
702[Link with fragment](./other.md#section)
703[External link](https://example.com)
704[Image link](image.png)
705[Media file](video.mp4)
706"#;
707
708        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
709        let mut index = FileIndex::new();
710        rule.contribute_to_index(&ctx, &mut index);
711
712        // Should only index markdown file links
713        assert_eq!(index.cross_file_links.len(), 2);
714
715        // Check first link
716        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
717        assert_eq!(index.cross_file_links[0].fragment, "");
718
719        // Check second link (with fragment)
720        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
721        assert_eq!(index.cross_file_links[1].fragment, "section");
722    }
723
724    #[test]
725    fn test_contribute_to_index_skips_external_and_anchors() {
726        let rule = MD057ExistingRelativeLinks::new();
727        let content = r#"
728# Document
729
730[External](https://example.com)
731[Another external](http://example.org)
732[Fragment only](#section)
733[FTP link](ftp://files.example.com)
734[Mail link](mailto:test@example.com)
735[WWW link](www.example.com)
736"#;
737
738        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
739        let mut index = FileIndex::new();
740        rule.contribute_to_index(&ctx, &mut index);
741
742        // Should not index any of these
743        assert_eq!(index.cross_file_links.len(), 0);
744    }
745
746    #[test]
747    fn test_cross_file_check_valid_link() {
748        use crate::workspace_index::WorkspaceIndex;
749
750        let rule = MD057ExistingRelativeLinks::new();
751
752        // Create a workspace index with the target file
753        let mut workspace_index = WorkspaceIndex::new();
754        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
755
756        // Create file index with a link to an existing file
757        let mut file_index = FileIndex::new();
758        file_index.add_cross_file_link(CrossFileLinkIndex {
759            target_path: "guide.md".to_string(),
760            fragment: "".to_string(),
761            line: 5,
762            column: 1,
763        });
764
765        // Run cross-file check from docs/index.md
766        let warnings = rule
767            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
768            .unwrap();
769
770        // Should have no warnings - file exists
771        assert!(warnings.is_empty());
772    }
773
774    #[test]
775    fn test_cross_file_check_missing_link() {
776        use crate::workspace_index::WorkspaceIndex;
777
778        let rule = MD057ExistingRelativeLinks::new();
779
780        // Create an empty workspace index
781        let workspace_index = WorkspaceIndex::new();
782
783        // Create file index with a link to a missing file
784        let mut file_index = FileIndex::new();
785        file_index.add_cross_file_link(CrossFileLinkIndex {
786            target_path: "missing.md".to_string(),
787            fragment: "".to_string(),
788            line: 5,
789            column: 1,
790        });
791
792        // Run cross-file check
793        let warnings = rule
794            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
795            .unwrap();
796
797        // Should have one warning for the missing file
798        assert_eq!(warnings.len(), 1);
799        assert!(warnings[0].message.contains("missing.md"));
800        assert!(warnings[0].message.contains("does not exist"));
801    }
802
803    #[test]
804    fn test_cross_file_check_parent_path() {
805        use crate::workspace_index::WorkspaceIndex;
806
807        let rule = MD057ExistingRelativeLinks::new();
808
809        // Create a workspace index with the target file at the root
810        let mut workspace_index = WorkspaceIndex::new();
811        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
812
813        // Create file index with a parent path link
814        let mut file_index = FileIndex::new();
815        file_index.add_cross_file_link(CrossFileLinkIndex {
816            target_path: "../readme.md".to_string(),
817            fragment: "".to_string(),
818            line: 5,
819            column: 1,
820        });
821
822        // Run cross-file check from docs/guide.md
823        let warnings = rule
824            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
825            .unwrap();
826
827        // Should have no warnings - file exists at normalized path
828        assert!(warnings.is_empty());
829    }
830
831    #[test]
832    fn test_normalize_path_function() {
833        // Test simple cases
834        assert_eq!(
835            normalize_path(Path::new("docs/guide.md")),
836            PathBuf::from("docs/guide.md")
837        );
838
839        // Test current directory removal
840        assert_eq!(
841            normalize_path(Path::new("./docs/guide.md")),
842            PathBuf::from("docs/guide.md")
843        );
844
845        // Test parent directory resolution
846        assert_eq!(
847            normalize_path(Path::new("docs/sub/../guide.md")),
848            PathBuf::from("docs/guide.md")
849        );
850
851        // Test multiple parent directories
852        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
853    }
854
855    #[test]
856    fn test_resolve_absolute_link() {
857        // Create a temporary directory structure for testing
858        let temp_dir = tempdir().expect("Failed to create temp dir");
859        let root = temp_dir.path();
860
861        // Create root-level file
862        let contributing = root.join("CONTRIBUTING.md");
863        File::create(&contributing).expect("Failed to create CONTRIBUTING.md");
864
865        // Create nested directory with a markdown file
866        let docs = root.join("docs");
867        std::fs::create_dir(&docs).expect("Failed to create docs dir");
868        let readme = docs.join("README.md");
869        File::create(&readme).expect("Failed to create README.md");
870
871        // Test: absolute link from nested file to root file
872        // From docs/README.md, link to /CONTRIBUTING.md should resolve to root/CONTRIBUTING.md
873        let resolved = resolve_absolute_link(&readme, "CONTRIBUTING.md");
874        assert!(resolved.exists(), "Should find CONTRIBUTING.md at workspace root");
875        assert_eq!(resolved, contributing);
876
877        // Test: file that doesn't exist should not resolve (returns path relative to file's dir)
878        let nonexistent = resolve_absolute_link(&readme, "NONEXISTENT.md");
879        assert!(!nonexistent.exists(), "Should not find nonexistent file");
880    }
881}