rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{CrossFileLinkIndex, FileIndex};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
26        cache.clear();
27    }
28}
29
30// Check if a file exists with caching
31fn file_exists_with_cache(path: &Path) -> bool {
32    match FILE_EXISTENCE_CACHE.lock() {
33        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
34        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
35    }
36}
37
38/// Check if a file exists, also trying markdown extensions for extensionless links.
39/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
40fn file_exists_or_markdown_extension(path: &Path) -> bool {
41    // First, check exact path
42    if file_exists_with_cache(path) {
43        return true;
44    }
45
46    // If the path has no extension, try adding markdown extensions
47    if path.extension().is_none() {
48        for ext in MARKDOWN_EXTENSIONS {
49            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
50            let path_with_ext = path.with_extension(&ext[1..]);
51            if file_exists_with_cache(&path_with_ext) {
52                return true;
53            }
54        }
55    }
56
57    false
58}
59
60// Regex to match the start of a link - simplified for performance
61static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
62
63/// Regex to extract the URL from an angle-bracketed markdown link
64/// Format: `](<URL>)` or `](<URL> "title")`
65/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
66static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
68
69/// Regex to extract the URL from a normal markdown link (without angle brackets)
70/// Format: `](URL)` or `](URL "title")`
71static URL_EXTRACT_REGEX: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
73
74/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
75/// Matches: scheme:// or scheme: (per RFC 3986)
76/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
77static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
78    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
79
80// Current working directory
81static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
82
83/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
84/// Returns None for non-hex characters.
85#[inline]
86fn hex_digit_to_value(byte: u8) -> Option<u8> {
87    match byte {
88        b'0'..=b'9' => Some(byte - b'0'),
89        b'a'..=b'f' => Some(byte - b'a' + 10),
90        b'A'..=b'F' => Some(byte - b'A' + 10),
91        _ => None,
92    }
93}
94
95/// Supported markdown file extensions
96const MARKDOWN_EXTENSIONS: &[&str] = &[
97    ".md",
98    ".markdown",
99    ".mdx",
100    ".mkd",
101    ".mkdn",
102    ".mdown",
103    ".mdwn",
104    ".qmd",
105    ".rmd",
106];
107
108/// Check if a path has a markdown extension (case-insensitive)
109#[inline]
110fn is_markdown_file(path: &str) -> bool {
111    let path_lower = path.to_lowercase();
112    MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
113}
114
115/// Rule MD057: Existing relative links should point to valid files or directories.
116#[derive(Debug, Clone, Default)]
117pub struct MD057ExistingRelativeLinks {
118    /// Base directory for resolving relative links
119    base_path: Arc<Mutex<Option<PathBuf>>>,
120}
121
122impl MD057ExistingRelativeLinks {
123    /// Create a new instance with default settings
124    pub fn new() -> Self {
125        Self::default()
126    }
127
128    /// Set the base path for resolving relative links
129    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
130        let path = path.as_ref();
131        let dir_path = if path.is_file() {
132            path.parent().map(|p| p.to_path_buf())
133        } else {
134            Some(path.to_path_buf())
135        };
136
137        if let Ok(mut guard) = self.base_path.lock() {
138            *guard = dir_path;
139        }
140        self
141    }
142
143    #[allow(unused_variables)]
144    pub fn from_config_struct(config: MD057Config) -> Self {
145        Self::default()
146    }
147
148    /// Check if a URL is external or should be skipped for validation.
149    ///
150    /// Returns `true` (skip validation) for:
151    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
152    /// - Bare domains: `www.example.com`, `example.com`
153    /// - Template variables: `{{URL}}`, `{{% include %}}`
154    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
155    ///
156    /// Returns `false` (validate) for:
157    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
158    #[inline]
159    fn is_external_url(&self, url: &str) -> bool {
160        if url.is_empty() {
161            return false;
162        }
163
164        // Quick checks for common external URL patterns
165        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
166            return true;
167        }
168
169        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
170        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
171        if url.starts_with("{{") || url.starts_with("{%") {
172            return true;
173        }
174
175        // Bare domain check (e.g., "example.com")
176        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
177        // Links like [text](nodejs.org/path) without a protocol are broken -
178        // they'll be treated as relative paths by markdown renderers.
179        // Flagging them helps users find missing protocols.
180        // We only skip .com as a minimal safety net for the most common case.
181        if url.ends_with(".com") {
182            return true;
183        }
184
185        // Absolute URL paths (e.g., /api/docs, /blog/post.html) are treated as web paths
186        // and skipped. These are typically routes for published documentation sites,
187        // not filesystem paths that can be validated locally.
188        if url.starts_with('/') {
189            return true;
190        }
191
192        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
193        // These are not filesystem paths but module/asset aliases
194        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
195        if url.starts_with('~') || url.starts_with('@') {
196            return true;
197        }
198
199        // All other cases (relative paths, etc.) are not external
200        false
201    }
202
203    /// Check if the URL is a fragment-only link (internal document link)
204    #[inline]
205    fn is_fragment_only_link(&self, url: &str) -> bool {
206        url.starts_with('#')
207    }
208
209    /// Decode URL percent-encoded sequences in a path.
210    /// Converts `%20` to space, `%2F` to `/`, etc.
211    /// Returns the original string if decoding fails or produces invalid UTF-8.
212    fn url_decode(path: &str) -> String {
213        // Quick check: if no percent sign, return as-is
214        if !path.contains('%') {
215            return path.to_string();
216        }
217
218        let bytes = path.as_bytes();
219        let mut result = Vec::with_capacity(bytes.len());
220        let mut i = 0;
221
222        while i < bytes.len() {
223            if bytes[i] == b'%' && i + 2 < bytes.len() {
224                // Try to parse the two hex digits following %
225                let hex1 = bytes[i + 1];
226                let hex2 = bytes[i + 2];
227                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
228                    result.push(d1 * 16 + d2);
229                    i += 3;
230                    continue;
231                }
232            }
233            result.push(bytes[i]);
234            i += 1;
235        }
236
237        // Convert to UTF-8, falling back to original if invalid
238        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
239    }
240
241    /// Strip query parameters and fragments from a URL for file existence checking.
242    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
243    /// for `path/to/image.png` or `file.md` respectively.
244    ///
245    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
246    /// so we check for `?` first. If a URL has both, only the query is stripped here
247    /// (fragments are handled separately by the regex in `contribute_to_index`).
248    fn strip_query_and_fragment(url: &str) -> &str {
249        // Find the first occurrence of '?' or '#', whichever comes first
250        // This handles both standard URLs (? before #) and edge cases (# before ?)
251        let query_pos = url.find('?');
252        let fragment_pos = url.find('#');
253
254        match (query_pos, fragment_pos) {
255            (Some(q), Some(f)) => {
256                // Both exist - strip at whichever comes first
257                &url[..q.min(f)]
258            }
259            (Some(q), None) => &url[..q],
260            (None, Some(f)) => &url[..f],
261            (None, None) => url,
262        }
263    }
264
265    /// Resolve a relative link against a provided base path
266    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
267        base_path.join(link)
268    }
269
270    /// Process a single link and check if it exists
271    fn process_link_with_base(
272        &self,
273        url: &str,
274        line_num: usize,
275        column: usize,
276        base_path: &Path,
277        warnings: &mut Vec<LintWarning>,
278    ) {
279        // Skip empty URLs
280        if url.is_empty() {
281            return;
282        }
283
284        // Skip external URLs and fragment-only links (optimized order)
285        if self.is_external_url(url) || self.is_fragment_only_link(url) {
286            return;
287        }
288
289        // Strip query parameters and fragments before checking file existence
290        // URLs like `path/to/image.png?raw=true` should check for `path/to/image.png`
291        let file_path = Self::strip_query_and_fragment(url);
292
293        // URL-decode the path to handle percent-encoded characters
294        // e.g., `penguin%20with%20space.jpg` -> `penguin with space.jpg`
295        let decoded_path = Self::url_decode(file_path);
296
297        // Resolve the relative link against the base path
298        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, base_path);
299
300        // Check if the file exists, also trying markdown extensions for extensionless links
301        if file_exists_or_markdown_extension(&resolved_path) {
302            return; // File exists, no warning needed
303        }
304
305        // For .html/.htm links, check if a corresponding markdown source exists
306        // This handles doc sites where .md is compiled to .html at build time
307        if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
308            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
309            && let (Some(stem), Some(parent)) = (
310                resolved_path.file_stem().and_then(|s| s.to_str()),
311                resolved_path.parent(),
312            )
313        {
314            for md_ext in MARKDOWN_EXTENSIONS {
315                let source_path = parent.join(format!("{stem}{md_ext}"));
316                if file_exists_with_cache(&source_path) {
317                    return; // Markdown source exists, link is valid
318                }
319            }
320        }
321
322        // File doesn't exist and no source file found
323        warnings.push(LintWarning {
324            rule_name: Some(self.name().to_string()),
325            line: line_num,
326            column,
327            end_line: line_num,
328            end_column: column + url.len(),
329            message: format!("Relative link '{url}' does not exist"),
330            severity: Severity::Warning,
331            fix: None,
332        });
333    }
334}
335
336impl Rule for MD057ExistingRelativeLinks {
337    fn name(&self) -> &'static str {
338        "MD057"
339    }
340
341    fn description(&self) -> &'static str {
342        "Relative links should point to existing files"
343    }
344
345    fn category(&self) -> RuleCategory {
346        RuleCategory::Link
347    }
348
349    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
350        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
351    }
352
353    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
354        let content = ctx.content;
355
356        // Early returns for performance
357        if content.is_empty() || !content.contains('[') {
358            return Ok(Vec::new());
359        }
360
361        // Quick check for any potential links before expensive operations
362        if !content.contains("](") {
363            return Ok(Vec::new());
364        }
365
366        // Reset the file existence cache for a fresh run
367        reset_file_existence_cache();
368
369        let mut warnings = Vec::new();
370
371        // Determine base path for resolving relative links
372        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
373        // This ensures each file resolves links relative to its own directory
374        let base_path: Option<PathBuf> = {
375            // First check if base_path was explicitly set via with_path() (for tests)
376            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
377            if explicit_base.is_some() {
378                explicit_base
379            } else if let Some(ref source_file) = ctx.source_file {
380                // Resolve symlinks to get the actual file location
381                // This ensures relative links are resolved from the target's directory,
382                // not the symlink's directory
383                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
384                resolved_file
385                    .parent()
386                    .map(|p| p.to_path_buf())
387                    .or_else(|| Some(CURRENT_DIR.clone()))
388            } else {
389                // No source file available - cannot validate relative links
390                None
391            }
392        };
393
394        // If we still don't have a base path, we can't validate relative links
395        let Some(base_path) = base_path else {
396            return Ok(warnings);
397        };
398
399        // Use LintContext links instead of expensive regex parsing
400        if !ctx.links.is_empty() {
401            // Use LineIndex for correct position calculation across all line ending types
402            let line_index = &ctx.line_index;
403
404            // Create element cache once for all links
405            let element_cache = ElementCache::new(content);
406
407            // Pre-collect lines to avoid repeated line iteration
408            let lines: Vec<&str> = content.lines().collect();
409
410            for link in &ctx.links {
411                let line_idx = link.line - 1;
412                if line_idx >= lines.len() {
413                    continue;
414                }
415
416                let line = lines[line_idx];
417
418                // Quick check for link pattern in this line
419                if !line.contains("](") {
420                    continue;
421                }
422
423                // Find all links in this line using optimized regex
424                for link_match in LINK_START_REGEX.find_iter(line) {
425                    let start_pos = link_match.start();
426                    let end_pos = link_match.end();
427
428                    // Calculate absolute position using LineIndex
429                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
430                    let absolute_start_pos = line_start_byte + start_pos;
431
432                    // Skip if this link is in a code span
433                    if element_cache.is_in_code_span(absolute_start_pos) {
434                        continue;
435                    }
436
437                    // Find the URL part after the link text
438                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
439                    // Then fall back to normal URL regex
440                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
441                        .captures_at(line, end_pos - 1)
442                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
443                        .or_else(|| {
444                            URL_EXTRACT_REGEX
445                                .captures_at(line, end_pos - 1)
446                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
447                        });
448
449                    if let Some((_caps, url_group)) = caps_and_url {
450                        let url = url_group.as_str().trim();
451
452                        // Calculate column position
453                        let column = start_pos + 1;
454
455                        // Process and validate the link
456                        self.process_link_with_base(url, link.line, column, &base_path, &mut warnings);
457                    }
458                }
459            }
460        }
461
462        // Also process images - they have URLs already parsed
463        for image in &ctx.images {
464            let url = image.url.as_ref();
465            self.process_link_with_base(url, image.line, image.start_col + 1, &base_path, &mut warnings);
466        }
467
468        Ok(warnings)
469    }
470
471    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
472        Ok(ctx.content.to_string())
473    }
474
475    fn as_any(&self) -> &dyn std::any::Any {
476        self
477    }
478
479    fn default_config_section(&self) -> Option<(String, toml::Value)> {
480        // No configurable options for this rule
481        None
482    }
483
484    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
485    where
486        Self: Sized,
487    {
488        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
489        Box::new(Self::from_config_struct(rule_config))
490    }
491
492    fn cross_file_scope(&self) -> CrossFileScope {
493        CrossFileScope::Workspace
494    }
495
496    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
497        let content = ctx.content;
498
499        // Early returns for performance
500        if content.is_empty() || !content.contains("](") {
501            return;
502        }
503
504        // Pre-collect lines to avoid repeated line iteration
505        let lines: Vec<&str> = content.lines().collect();
506        let element_cache = ElementCache::new(content);
507        let line_index = &ctx.line_index;
508
509        for link in &ctx.links {
510            let line_idx = link.line - 1;
511            if line_idx >= lines.len() {
512                continue;
513            }
514
515            let line = lines[line_idx];
516            if !line.contains("](") {
517                continue;
518            }
519
520            // Find all links in this line
521            for link_match in LINK_START_REGEX.find_iter(line) {
522                let start_pos = link_match.start();
523                let end_pos = link_match.end();
524
525                // Calculate absolute position for code span detection
526                let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
527                let absolute_start_pos = line_start_byte + start_pos;
528
529                // Skip if in code span
530                if element_cache.is_in_code_span(absolute_start_pos) {
531                    continue;
532                }
533
534                // Extract the URL (group 1) and fragment (group 2)
535                // The regex separates URL and fragment: group 1 excludes #, group 2 captures #fragment
536                // Try angle-bracket regex first (handles URLs with parens)
537                let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
538                    .captures_at(line, end_pos - 1)
539                    .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
540
541                if let Some(caps) = caps_result
542                    && let Some(url_group) = caps.get(1)
543                {
544                    let file_path = url_group.as_str().trim();
545
546                    // Skip empty, external, template variables, absolute URL paths,
547                    // framework aliases, or fragment-only URLs
548                    if file_path.is_empty()
549                        || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
550                        || file_path.starts_with("www.")
551                        || file_path.starts_with('#')
552                        || file_path.starts_with("{{")
553                        || file_path.starts_with("{%")
554                        || file_path.starts_with('/')
555                        || file_path.starts_with('~')
556                        || file_path.starts_with('@')
557                    {
558                        continue;
559                    }
560
561                    // Strip query parameters before indexing (e.g., `file.md?raw=true` -> `file.md`)
562                    let file_path = Self::strip_query_and_fragment(file_path);
563
564                    // Get fragment from capture group 2 (includes # prefix)
565                    let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
566
567                    // Only index markdown file links for cross-file validation
568                    // Non-markdown files (images, media) are validated via filesystem in check()
569                    if is_markdown_file(file_path) {
570                        index.add_cross_file_link(CrossFileLinkIndex {
571                            target_path: file_path.to_string(),
572                            fragment: fragment.to_string(),
573                            line: link.line,
574                            column: start_pos + 1,
575                        });
576                    }
577                }
578            }
579        }
580    }
581
582    fn cross_file_check(
583        &self,
584        file_path: &Path,
585        file_index: &FileIndex,
586        workspace_index: &crate::workspace_index::WorkspaceIndex,
587    ) -> LintResult {
588        let mut warnings = Vec::new();
589
590        // Get the directory containing this file for resolving relative links
591        let file_dir = file_path.parent();
592
593        for cross_link in &file_index.cross_file_links {
594            // URL-decode the path for filesystem operations
595            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
596            let decoded_target = Self::url_decode(&cross_link.target_path);
597
598            // Resolve the relative path using the decoded path
599            let target_path = if decoded_target.starts_with('/') {
600                // Absolute path from workspace root (e.g., "/CONTRIBUTING.md")
601                // Walk up from the current file's directory to find the workspace root
602                let stripped = decoded_target.trim_start_matches('/');
603                resolve_absolute_link(file_path, stripped)
604            } else if let Some(dir) = file_dir {
605                dir.join(&decoded_target)
606            } else {
607                Path::new(&decoded_target).to_path_buf()
608            };
609
610            // Normalize the path (handle .., ., etc.)
611            let target_path = normalize_path(&target_path);
612
613            // Check if the target file exists
614            let file_exists = workspace_index.contains_file(&target_path) || target_path.exists();
615
616            if !file_exists {
617                // For .html/.htm links, check if a corresponding markdown source exists
618                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
619                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
620                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
621                    && let (Some(stem), Some(parent)) =
622                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
623                {
624                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
625                        let source_path = parent.join(format!("{stem}{md_ext}"));
626                        workspace_index.contains_file(&source_path) || source_path.exists()
627                    })
628                } else {
629                    false
630                };
631
632                if !has_md_source {
633                    warnings.push(LintWarning {
634                        rule_name: Some(self.name().to_string()),
635                        line: cross_link.line,
636                        column: cross_link.column,
637                        end_line: cross_link.line,
638                        end_column: cross_link.column + cross_link.target_path.len(),
639                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
640                        severity: Severity::Warning,
641                        fix: None,
642                    });
643                }
644            }
645        }
646
647        Ok(warnings)
648    }
649}
650
651/// Normalize a path by resolving . and .. components
652fn normalize_path(path: &Path) -> PathBuf {
653    let mut components = Vec::new();
654
655    for component in path.components() {
656        match component {
657            std::path::Component::ParentDir => {
658                // Go up one level if possible
659                if !components.is_empty() {
660                    components.pop();
661                }
662            }
663            std::path::Component::CurDir => {
664                // Skip current directory markers
665            }
666            _ => {
667                components.push(component);
668            }
669        }
670    }
671
672    components.iter().collect()
673}
674
675/// Resolve an absolute link (e.g., "/CONTRIBUTING.md") relative to the workspace root.
676///
677/// Absolute paths in markdown (starting with "/") are relative to the workspace/repo root,
678/// not the filesystem root. This function walks up from the current file's directory
679/// to find where the target file exists.
680fn resolve_absolute_link(file_path: &Path, stripped_path: &str) -> PathBuf {
681    // Walk up from the file's directory, checking each ancestor for the target
682    let mut current = file_path.parent();
683    while let Some(dir) = current {
684        let candidate = dir.join(stripped_path);
685        if candidate.exists() {
686            return candidate;
687        }
688        current = dir.parent();
689    }
690
691    // If not found by walking up, return the path relative to the file's directory
692    // (this will likely fail the existence check later, which is correct behavior)
693    file_path
694        .parent()
695        .map(|d| d.join(stripped_path))
696        .unwrap_or_else(|| PathBuf::from(stripped_path))
697}
698
699#[cfg(test)]
700mod tests {
701    use super::*;
702    use std::fs::File;
703    use std::io::Write;
704    use tempfile::tempdir;
705
706    #[test]
707    fn test_strip_query_and_fragment() {
708        // Test query parameter stripping
709        assert_eq!(
710            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
711            "file.png"
712        );
713        assert_eq!(
714            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
715            "file.png"
716        );
717        assert_eq!(
718            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
719            "file.png"
720        );
721
722        // Test fragment stripping
723        assert_eq!(
724            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
725            "file.md"
726        );
727        assert_eq!(
728            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
729            "file.md"
730        );
731
732        // Test both query and fragment (query comes first, per RFC 3986)
733        assert_eq!(
734            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
735            "file.md"
736        );
737
738        // Test no query or fragment
739        assert_eq!(
740            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
741            "file.png"
742        );
743
744        // Test with path
745        assert_eq!(
746            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
747            "path/to/image.png"
748        );
749        assert_eq!(
750            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
751            "path/to/image.png"
752        );
753
754        // Edge case: fragment before query (non-standard but possible)
755        assert_eq!(
756            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
757            "file.md"
758        );
759    }
760
761    #[test]
762    fn test_url_decode() {
763        // Simple space encoding
764        assert_eq!(
765            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
766            "penguin with space.jpg"
767        );
768
769        // Path with encoded spaces
770        assert_eq!(
771            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
772            "assets/my file name.png"
773        );
774
775        // Multiple encoded characters
776        assert_eq!(
777            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
778            "hello world!.md"
779        );
780
781        // Lowercase hex
782        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
783
784        // Uppercase hex
785        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
786
787        // Mixed case hex
788        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
789
790        // No encoding - return as-is
791        assert_eq!(
792            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
793            "normal-file.md"
794        );
795
796        // Incomplete percent encoding - leave as-is
797        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
798
799        // Percent at end - leave as-is
800        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
801
802        // Invalid hex digits - leave as-is
803        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
804
805        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
806        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
807
808        // Empty string
809        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
810
811        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
812        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
813
814        // Multiple consecutive encoded characters
815        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
816
817        // Encoded path separators
818        assert_eq!(
819            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
820            "path/to/file.md"
821        );
822
823        // Mixed encoded and non-encoded
824        assert_eq!(
825            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
826            "hello world/foo bar.md"
827        );
828
829        // Special characters that are commonly encoded
830        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
831
832        // Percent at position that looks like encoding but isn't valid
833        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
834    }
835
836    #[test]
837    fn test_url_encoded_filenames() {
838        // Create a temporary directory for test files
839        let temp_dir = tempdir().unwrap();
840        let base_path = temp_dir.path();
841
842        // Create a file with spaces in the name
843        let file_with_spaces = base_path.join("penguin with space.jpg");
844        File::create(&file_with_spaces)
845            .unwrap()
846            .write_all(b"image data")
847            .unwrap();
848
849        // Create a subdirectory with spaces
850        let subdir = base_path.join("my images");
851        std::fs::create_dir(&subdir).unwrap();
852        let nested_file = subdir.join("photo 1.png");
853        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
854
855        // Test content with URL-encoded links
856        let content = r#"
857# Test Document with URL-Encoded Links
858
859![Penguin](penguin%20with%20space.jpg)
860![Photo](my%20images/photo%201.png)
861![Missing](missing%20file.jpg)
862"#;
863
864        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
865
866        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
867        let result = rule.check(&ctx).unwrap();
868
869        // Should only have one warning for the missing file
870        assert_eq!(
871            result.len(),
872            1,
873            "Should only warn about missing%20file.jpg. Got: {result:?}"
874        );
875        assert!(
876            result[0].message.contains("missing%20file.jpg"),
877            "Warning should mention the URL-encoded filename"
878        );
879    }
880
881    #[test]
882    fn test_external_urls() {
883        let rule = MD057ExistingRelativeLinks::new();
884
885        // Common web protocols
886        assert!(rule.is_external_url("https://example.com"));
887        assert!(rule.is_external_url("http://example.com"));
888        assert!(rule.is_external_url("ftp://example.com"));
889        assert!(rule.is_external_url("www.example.com"));
890        assert!(rule.is_external_url("example.com"));
891
892        // Special URI schemes (issue #192)
893        assert!(rule.is_external_url("file:///path/to/file"));
894        assert!(rule.is_external_url("smb://server/share"));
895        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
896        assert!(rule.is_external_url("mailto:user@example.com"));
897        assert!(rule.is_external_url("tel:+1234567890"));
898        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
899        assert!(rule.is_external_url("javascript:void(0)"));
900        assert!(rule.is_external_url("ssh://git@github.com/repo"));
901        assert!(rule.is_external_url("git://github.com/repo.git"));
902
903        // Template variables should be skipped (not checked as relative links)
904        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
905        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
906        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
907        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
908        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
909        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
910
911        // Absolute web URL paths should be skipped (not validated)
912        // These are typically routes for published documentation sites
913        assert!(rule.is_external_url("/api/v1/users"));
914        assert!(rule.is_external_url("/blog/2024/release.html"));
915        assert!(rule.is_external_url("/react/hooks/use-state.html"));
916        assert!(rule.is_external_url("/pkg/runtime"));
917        assert!(rule.is_external_url("/doc/go1compat"));
918        assert!(rule.is_external_url("/index.html"));
919        assert!(rule.is_external_url("/assets/logo.png"));
920
921        // Framework path aliases should be skipped (resolved by build tools)
922        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
923        assert!(rule.is_external_url("~/assets/image.png"));
924        assert!(rule.is_external_url("~/components/Button.vue"));
925        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
926
927        // @ prefix (common in Vue, webpack, Vite aliases)
928        assert!(rule.is_external_url("@/components/Header.vue"));
929        assert!(rule.is_external_url("@images/photo.jpg"));
930        assert!(rule.is_external_url("@assets/styles.css"));
931
932        // Relative paths should NOT be external (should be validated)
933        assert!(!rule.is_external_url("./relative/path.md"));
934        assert!(!rule.is_external_url("relative/path.md"));
935        assert!(!rule.is_external_url("../parent/path.md"));
936    }
937
938    #[test]
939    fn test_framework_path_aliases() {
940        // Create a temporary directory for test files
941        let temp_dir = tempdir().unwrap();
942        let base_path = temp_dir.path();
943
944        // Test content with framework path aliases (should all be skipped)
945        let content = r#"
946# Framework Path Aliases
947
948![Image 1](~/assets/penguin.jpg)
949![Image 2](~assets/logo.svg)
950![Image 3](@images/photo.jpg)
951![Image 4](@/components/icon.svg)
952[Link](@/pages/about.md)
953
954This is a [real missing link](missing.md) that should be flagged.
955"#;
956
957        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
958
959        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
960        let result = rule.check(&ctx).unwrap();
961
962        // Should only have one warning for the real missing link
963        assert_eq!(
964            result.len(),
965            1,
966            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
967        );
968        assert!(
969            result[0].message.contains("missing.md"),
970            "Warning should be for missing.md"
971        );
972    }
973
974    #[test]
975    fn test_url_decode_security_path_traversal() {
976        // Ensure URL decoding doesn't enable path traversal attacks
977        // The decoded path is still validated against the base path
978        let temp_dir = tempdir().unwrap();
979        let base_path = temp_dir.path();
980
981        // Create a file in the temp directory
982        let file_in_base = base_path.join("safe.md");
983        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
984
985        // Test with encoded path traversal attempt
986        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
987        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
988        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
989        let content = r#"
990[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
991[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
992[Safe link](safe.md)
993"#;
994
995        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
996
997        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
998        let result = rule.check(&ctx).unwrap();
999
1000        // The traversal attempts should still be flagged as missing
1001        // (they don't exist relative to base_path after decoding)
1002        assert_eq!(
1003            result.len(),
1004            2,
1005            "Should have warnings for traversal attempts. Got: {result:?}"
1006        );
1007    }
1008
1009    #[test]
1010    fn test_url_encoded_utf8_filenames() {
1011        // Test with actual UTF-8 encoded filenames
1012        let temp_dir = tempdir().unwrap();
1013        let base_path = temp_dir.path();
1014
1015        // Create files with unicode names
1016        let cafe_file = base_path.join("café.md");
1017        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1018
1019        let content = r#"
1020[Café link](caf%C3%A9.md)
1021[Missing unicode](r%C3%A9sum%C3%A9.md)
1022"#;
1023
1024        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1025
1026        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1027        let result = rule.check(&ctx).unwrap();
1028
1029        // Should only warn about the missing file
1030        assert_eq!(
1031            result.len(),
1032            1,
1033            "Should only warn about missing résumé.md. Got: {result:?}"
1034        );
1035        assert!(
1036            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1037            "Warning should mention the URL-encoded filename"
1038        );
1039    }
1040
1041    #[test]
1042    fn test_url_encoded_emoji_filenames() {
1043        // Test for issue #214: URL-encoded emoji paths should be correctly resolved
1044        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1045        let temp_dir = tempdir().unwrap();
1046        let base_path = temp_dir.path();
1047
1048        // Create directory with emoji in name: 👤 Personal
1049        let emoji_dir = base_path.join("👤 Personal");
1050        std::fs::create_dir(&emoji_dir).unwrap();
1051
1052        // Create file in that directory: TV Shows.md
1053        let file_path = emoji_dir.join("TV Shows.md");
1054        File::create(&file_path)
1055            .unwrap()
1056            .write_all(b"# TV Shows\n\nContent here.")
1057            .unwrap();
1058
1059        // Test content with URL-encoded emoji link
1060        // %F0%9F%91%A4 = 👤, %20 = space
1061        let content = r#"
1062# Test Document
1063
1064[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1065[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1066"#;
1067
1068        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1069
1070        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1071        let result = rule.check(&ctx).unwrap();
1072
1073        // Should only warn about the missing file, not the valid emoji path
1074        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1075        assert!(
1076            result[0].message.contains("Missing.md"),
1077            "Warning should be for Missing.md, got: {}",
1078            result[0].message
1079        );
1080    }
1081
1082    #[test]
1083    fn test_no_warnings_without_base_path() {
1084        let rule = MD057ExistingRelativeLinks::new();
1085        let content = "[Link](missing.md)";
1086
1087        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1088        let result = rule.check(&ctx).unwrap();
1089        assert!(result.is_empty(), "Should have no warnings without base path");
1090    }
1091
1092    #[test]
1093    fn test_existing_and_missing_links() {
1094        // Create a temporary directory for test files
1095        let temp_dir = tempdir().unwrap();
1096        let base_path = temp_dir.path();
1097
1098        // Create an existing file
1099        let exists_path = base_path.join("exists.md");
1100        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1101
1102        // Verify the file exists
1103        assert!(exists_path.exists(), "exists.md should exist for this test");
1104
1105        // Create test content with both existing and missing links
1106        let content = r#"
1107# Test Document
1108
1109[Valid Link](exists.md)
1110[Invalid Link](missing.md)
1111[External Link](https://example.com)
1112[Media Link](image.jpg)
1113        "#;
1114
1115        // Initialize rule with the base path (default: check all files including media)
1116        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1117
1118        // Test the rule
1119        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1120        let result = rule.check(&ctx).unwrap();
1121
1122        // Should have two warnings: missing.md and image.jpg (both don't exist)
1123        assert_eq!(result.len(), 2);
1124        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1125        assert!(messages.iter().any(|m| m.contains("missing.md")));
1126        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1127    }
1128
1129    #[test]
1130    fn test_angle_bracket_links() {
1131        // Create a temporary directory for test files
1132        let temp_dir = tempdir().unwrap();
1133        let base_path = temp_dir.path();
1134
1135        // Create an existing file
1136        let exists_path = base_path.join("exists.md");
1137        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1138
1139        // Create test content with angle bracket links
1140        let content = r#"
1141# Test Document
1142
1143[Valid Link](<exists.md>)
1144[Invalid Link](<missing.md>)
1145[External Link](<https://example.com>)
1146    "#;
1147
1148        // Test with default settings
1149        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1150
1151        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1152        let result = rule.check(&ctx).unwrap();
1153
1154        // Should have one warning for missing.md
1155        assert_eq!(result.len(), 1, "Should have exactly one warning");
1156        assert!(
1157            result[0].message.contains("missing.md"),
1158            "Warning should mention missing.md"
1159        );
1160    }
1161
1162    #[test]
1163    fn test_angle_bracket_links_with_parens() {
1164        // Create a temporary directory for test files
1165        let temp_dir = tempdir().unwrap();
1166        let base_path = temp_dir.path();
1167
1168        // Create directory structure with parentheses in path
1169        let app_dir = base_path.join("app");
1170        std::fs::create_dir(&app_dir).unwrap();
1171        let upload_dir = app_dir.join("(upload)");
1172        std::fs::create_dir(&upload_dir).unwrap();
1173        let page_file = upload_dir.join("page.tsx");
1174        File::create(&page_file)
1175            .unwrap()
1176            .write_all(b"export default function Page() {}")
1177            .unwrap();
1178
1179        // Create test content with angle bracket links containing parentheses
1180        let content = r#"
1181# Test Document with Paths Containing Parens
1182
1183[Upload Page](<app/(upload)/page.tsx>)
1184[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1185[Missing](<app/(missing)/file.md>)
1186"#;
1187
1188        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1189
1190        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1191        let result = rule.check(&ctx).unwrap();
1192
1193        // Should only have one warning for the missing file
1194        assert_eq!(
1195            result.len(),
1196            1,
1197            "Should have exactly one warning for missing file. Got: {result:?}"
1198        );
1199        assert!(
1200            result[0].message.contains("app/(missing)/file.md"),
1201            "Warning should mention app/(missing)/file.md"
1202        );
1203    }
1204
1205    #[test]
1206    fn test_all_file_types_checked() {
1207        // Create a temporary directory for test files
1208        let temp_dir = tempdir().unwrap();
1209        let base_path = temp_dir.path();
1210
1211        // Create a test with various file types - all should be checked
1212        let content = r#"
1213[Image Link](image.jpg)
1214[Video Link](video.mp4)
1215[Markdown Link](document.md)
1216[PDF Link](file.pdf)
1217"#;
1218
1219        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1220
1221        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1222        let result = rule.check(&ctx).unwrap();
1223
1224        // Should warn about all missing files regardless of extension
1225        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1226    }
1227
1228    #[test]
1229    fn test_code_span_detection() {
1230        let rule = MD057ExistingRelativeLinks::new();
1231
1232        // Create a temporary directory for test files
1233        let temp_dir = tempdir().unwrap();
1234        let base_path = temp_dir.path();
1235
1236        let rule = rule.with_path(base_path);
1237
1238        // Test with document structure
1239        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1240
1241        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1242        let result = rule.check(&ctx).unwrap();
1243
1244        // Should only find the real link, not the one in code
1245        assert_eq!(result.len(), 1, "Should only flag the real link");
1246        assert!(result[0].message.contains("nonexistent.md"));
1247    }
1248
1249    #[test]
1250    fn test_inline_code_spans() {
1251        // Create a temporary directory for test files
1252        let temp_dir = tempdir().unwrap();
1253        let base_path = temp_dir.path();
1254
1255        // Create test content with links in inline code spans
1256        let content = r#"
1257# Test Document
1258
1259This is a normal link: [Link](missing.md)
1260
1261This is a code span with a link: `[Link](another-missing.md)`
1262
1263Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1264
1265    "#;
1266
1267        // Initialize rule with the base path
1268        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1269
1270        // Test the rule
1271        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1272        let result = rule.check(&ctx).unwrap();
1273
1274        // Should only have warning for the normal link, not for links in code spans
1275        assert_eq!(result.len(), 1, "Should have exactly one warning");
1276        assert!(
1277            result[0].message.contains("missing.md"),
1278            "Warning should be for missing.md"
1279        );
1280        assert!(
1281            !result.iter().any(|w| w.message.contains("another-missing.md")),
1282            "Should not warn about link in code span"
1283        );
1284        assert!(
1285            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1286            "Should not warn about link in inline code"
1287        );
1288    }
1289
1290    #[test]
1291    fn test_extensionless_link_resolution() {
1292        // Create a temporary directory for test files
1293        let temp_dir = tempdir().unwrap();
1294        let base_path = temp_dir.path();
1295
1296        // Create a markdown file WITHOUT specifying .md extension in the link
1297        let page_path = base_path.join("page.md");
1298        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1299
1300        // Test content with extensionless link that should resolve to page.md
1301        let content = r#"
1302# Test Document
1303
1304[Link without extension](page)
1305[Link with extension](page.md)
1306[Missing link](nonexistent)
1307"#;
1308
1309        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1310
1311        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1312        let result = rule.check(&ctx).unwrap();
1313
1314        // Should only have warning for nonexistent link
1315        // Both "page" and "page.md" should resolve to the same file
1316        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1317        assert!(
1318            result[0].message.contains("nonexistent"),
1319            "Warning should be for 'nonexistent' not 'page'"
1320        );
1321    }
1322
1323    // Cross-file validation tests
1324    #[test]
1325    fn test_cross_file_scope() {
1326        let rule = MD057ExistingRelativeLinks::new();
1327        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1328    }
1329
1330    #[test]
1331    fn test_contribute_to_index_extracts_markdown_links() {
1332        let rule = MD057ExistingRelativeLinks::new();
1333        let content = r#"
1334# Document
1335
1336[Link to docs](./docs/guide.md)
1337[Link with fragment](./other.md#section)
1338[External link](https://example.com)
1339[Image link](image.png)
1340[Media file](video.mp4)
1341"#;
1342
1343        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1344        let mut index = FileIndex::new();
1345        rule.contribute_to_index(&ctx, &mut index);
1346
1347        // Should only index markdown file links
1348        assert_eq!(index.cross_file_links.len(), 2);
1349
1350        // Check first link
1351        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1352        assert_eq!(index.cross_file_links[0].fragment, "");
1353
1354        // Check second link (with fragment)
1355        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1356        assert_eq!(index.cross_file_links[1].fragment, "section");
1357    }
1358
1359    #[test]
1360    fn test_contribute_to_index_skips_external_and_anchors() {
1361        let rule = MD057ExistingRelativeLinks::new();
1362        let content = r#"
1363# Document
1364
1365[External](https://example.com)
1366[Another external](http://example.org)
1367[Fragment only](#section)
1368[FTP link](ftp://files.example.com)
1369[Mail link](mailto:test@example.com)
1370[WWW link](www.example.com)
1371"#;
1372
1373        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1374        let mut index = FileIndex::new();
1375        rule.contribute_to_index(&ctx, &mut index);
1376
1377        // Should not index any of these
1378        assert_eq!(index.cross_file_links.len(), 0);
1379    }
1380
1381    #[test]
1382    fn test_cross_file_check_valid_link() {
1383        use crate::workspace_index::WorkspaceIndex;
1384
1385        let rule = MD057ExistingRelativeLinks::new();
1386
1387        // Create a workspace index with the target file
1388        let mut workspace_index = WorkspaceIndex::new();
1389        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1390
1391        // Create file index with a link to an existing file
1392        let mut file_index = FileIndex::new();
1393        file_index.add_cross_file_link(CrossFileLinkIndex {
1394            target_path: "guide.md".to_string(),
1395            fragment: "".to_string(),
1396            line: 5,
1397            column: 1,
1398        });
1399
1400        // Run cross-file check from docs/index.md
1401        let warnings = rule
1402            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1403            .unwrap();
1404
1405        // Should have no warnings - file exists
1406        assert!(warnings.is_empty());
1407    }
1408
1409    #[test]
1410    fn test_cross_file_check_missing_link() {
1411        use crate::workspace_index::WorkspaceIndex;
1412
1413        let rule = MD057ExistingRelativeLinks::new();
1414
1415        // Create an empty workspace index
1416        let workspace_index = WorkspaceIndex::new();
1417
1418        // Create file index with a link to a missing file
1419        let mut file_index = FileIndex::new();
1420        file_index.add_cross_file_link(CrossFileLinkIndex {
1421            target_path: "missing.md".to_string(),
1422            fragment: "".to_string(),
1423            line: 5,
1424            column: 1,
1425        });
1426
1427        // Run cross-file check
1428        let warnings = rule
1429            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1430            .unwrap();
1431
1432        // Should have one warning for the missing file
1433        assert_eq!(warnings.len(), 1);
1434        assert!(warnings[0].message.contains("missing.md"));
1435        assert!(warnings[0].message.contains("does not exist"));
1436    }
1437
1438    #[test]
1439    fn test_cross_file_check_parent_path() {
1440        use crate::workspace_index::WorkspaceIndex;
1441
1442        let rule = MD057ExistingRelativeLinks::new();
1443
1444        // Create a workspace index with the target file at the root
1445        let mut workspace_index = WorkspaceIndex::new();
1446        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1447
1448        // Create file index with a parent path link
1449        let mut file_index = FileIndex::new();
1450        file_index.add_cross_file_link(CrossFileLinkIndex {
1451            target_path: "../readme.md".to_string(),
1452            fragment: "".to_string(),
1453            line: 5,
1454            column: 1,
1455        });
1456
1457        // Run cross-file check from docs/guide.md
1458        let warnings = rule
1459            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1460            .unwrap();
1461
1462        // Should have no warnings - file exists at normalized path
1463        assert!(warnings.is_empty());
1464    }
1465
1466    #[test]
1467    fn test_cross_file_check_html_link_with_md_source() {
1468        // Test that .html links are accepted when corresponding .md source exists
1469        // This supports mdBook and similar doc generators that compile .md to .html
1470        use crate::workspace_index::WorkspaceIndex;
1471
1472        let rule = MD057ExistingRelativeLinks::new();
1473
1474        // Create a workspace index with the .md source file
1475        let mut workspace_index = WorkspaceIndex::new();
1476        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1477
1478        // Create file index with an .html link (from another rule like MD051)
1479        let mut file_index = FileIndex::new();
1480        file_index.add_cross_file_link(CrossFileLinkIndex {
1481            target_path: "guide.html".to_string(),
1482            fragment: "section".to_string(),
1483            line: 10,
1484            column: 5,
1485        });
1486
1487        // Run cross-file check from docs/index.md
1488        let warnings = rule
1489            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1490            .unwrap();
1491
1492        // Should have no warnings - .md source exists for the .html link
1493        assert!(
1494            warnings.is_empty(),
1495            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1496        );
1497    }
1498
1499    #[test]
1500    fn test_cross_file_check_html_link_without_source() {
1501        // Test that .html links without corresponding .md source ARE flagged
1502        use crate::workspace_index::WorkspaceIndex;
1503
1504        let rule = MD057ExistingRelativeLinks::new();
1505
1506        // Create an empty workspace index
1507        let workspace_index = WorkspaceIndex::new();
1508
1509        // Create file index with an .html link to a non-existent file
1510        let mut file_index = FileIndex::new();
1511        file_index.add_cross_file_link(CrossFileLinkIndex {
1512            target_path: "missing.html".to_string(),
1513            fragment: "".to_string(),
1514            line: 10,
1515            column: 5,
1516        });
1517
1518        // Run cross-file check from docs/index.md
1519        let warnings = rule
1520            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1521            .unwrap();
1522
1523        // Should have one warning - no .md source exists
1524        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1525        assert!(warnings[0].message.contains("missing.html"));
1526    }
1527
1528    #[test]
1529    fn test_normalize_path_function() {
1530        // Test simple cases
1531        assert_eq!(
1532            normalize_path(Path::new("docs/guide.md")),
1533            PathBuf::from("docs/guide.md")
1534        );
1535
1536        // Test current directory removal
1537        assert_eq!(
1538            normalize_path(Path::new("./docs/guide.md")),
1539            PathBuf::from("docs/guide.md")
1540        );
1541
1542        // Test parent directory resolution
1543        assert_eq!(
1544            normalize_path(Path::new("docs/sub/../guide.md")),
1545            PathBuf::from("docs/guide.md")
1546        );
1547
1548        // Test multiple parent directories
1549        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
1550    }
1551
1552    #[test]
1553    fn test_resolve_absolute_link() {
1554        // Create a temporary directory structure for testing
1555        let temp_dir = tempdir().expect("Failed to create temp dir");
1556        let root = temp_dir.path();
1557
1558        // Create root-level file
1559        let contributing = root.join("CONTRIBUTING.md");
1560        File::create(&contributing).expect("Failed to create CONTRIBUTING.md");
1561
1562        // Create nested directory with a markdown file
1563        let docs = root.join("docs");
1564        std::fs::create_dir(&docs).expect("Failed to create docs dir");
1565        let readme = docs.join("README.md");
1566        File::create(&readme).expect("Failed to create README.md");
1567
1568        // Test: absolute link from nested file to root file
1569        // From docs/README.md, link to /CONTRIBUTING.md should resolve to root/CONTRIBUTING.md
1570        let resolved = resolve_absolute_link(&readme, "CONTRIBUTING.md");
1571        assert!(resolved.exists(), "Should find CONTRIBUTING.md at workspace root");
1572        assert_eq!(resolved, contributing);
1573
1574        // Test: file that doesn't exist should not resolve (returns path relative to file's dir)
1575        let nonexistent = resolve_absolute_link(&readme, "NONEXISTENT.md");
1576        assert!(!nonexistent.exists(), "Should not find nonexistent file");
1577    }
1578
1579    #[test]
1580    fn test_html_link_with_md_source() {
1581        // Links to .html files should pass if corresponding .md source exists
1582        let temp_dir = tempdir().unwrap();
1583        let base_path = temp_dir.path();
1584
1585        // Create guide.md (source file)
1586        let md_file = base_path.join("guide.md");
1587        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
1588
1589        let content = r#"
1590[Read the guide](guide.html)
1591[Also here](getting-started.html)
1592"#;
1593
1594        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1595        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1596        let result = rule.check(&ctx).unwrap();
1597
1598        // guide.html passes (guide.md exists), getting-started.html fails
1599        assert_eq!(
1600            result.len(),
1601            1,
1602            "Should only warn about missing source. Got: {result:?}"
1603        );
1604        assert!(result[0].message.contains("getting-started.html"));
1605    }
1606
1607    #[test]
1608    fn test_htm_link_with_md_source() {
1609        // .htm extension should also check for markdown source
1610        let temp_dir = tempdir().unwrap();
1611        let base_path = temp_dir.path();
1612
1613        let md_file = base_path.join("page.md");
1614        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
1615
1616        let content = "[Page](page.htm)";
1617
1618        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1619        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1620        let result = rule.check(&ctx).unwrap();
1621
1622        assert!(
1623            result.is_empty(),
1624            "Should not warn when .md source exists for .htm link"
1625        );
1626    }
1627
1628    #[test]
1629    fn test_html_link_finds_various_markdown_extensions() {
1630        // Should find .mdx, .markdown, etc. as source files
1631        let temp_dir = tempdir().unwrap();
1632        let base_path = temp_dir.path();
1633
1634        File::create(base_path.join("doc.md")).unwrap();
1635        File::create(base_path.join("tutorial.mdx")).unwrap();
1636        File::create(base_path.join("guide.markdown")).unwrap();
1637
1638        let content = r#"
1639[Doc](doc.html)
1640[Tutorial](tutorial.html)
1641[Guide](guide.html)
1642"#;
1643
1644        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1645        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1646        let result = rule.check(&ctx).unwrap();
1647
1648        assert!(
1649            result.is_empty(),
1650            "Should find all markdown variants as source files. Got: {result:?}"
1651        );
1652    }
1653
1654    #[test]
1655    fn test_html_link_in_subdirectory() {
1656        // Should find markdown source in subdirectories
1657        let temp_dir = tempdir().unwrap();
1658        let base_path = temp_dir.path();
1659
1660        let docs_dir = base_path.join("docs");
1661        std::fs::create_dir(&docs_dir).unwrap();
1662        File::create(docs_dir.join("guide.md"))
1663            .unwrap()
1664            .write_all(b"# Guide")
1665            .unwrap();
1666
1667        let content = "[Guide](docs/guide.html)";
1668
1669        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1670        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1671        let result = rule.check(&ctx).unwrap();
1672
1673        assert!(result.is_empty(), "Should find markdown source in subdirectory");
1674    }
1675}