rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{CrossFileLinkIndex, FileIndex};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
26        cache.clear();
27    }
28}
29
30// Check if a file exists with caching
31fn file_exists_with_cache(path: &Path) -> bool {
32    match FILE_EXISTENCE_CACHE.lock() {
33        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
34        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
35    }
36}
37
38/// Check if a file exists, also trying markdown extensions for extensionless links.
39/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
40fn file_exists_or_markdown_extension(path: &Path) -> bool {
41    // First, check exact path
42    if file_exists_with_cache(path) {
43        return true;
44    }
45
46    // If the path has no extension, try adding markdown extensions
47    if path.extension().is_none() {
48        for ext in MARKDOWN_EXTENSIONS {
49            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
50            let path_with_ext = path.with_extension(&ext[1..]);
51            if file_exists_with_cache(&path_with_ext) {
52                return true;
53            }
54        }
55    }
56
57    false
58}
59
60// Regex to match the start of a link - simplified for performance
61static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
62
63/// Regex to extract the URL from an angle-bracketed markdown link
64/// Format: `](<URL>)` or `](<URL> "title")`
65/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
66static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
68
69/// Regex to extract the URL from a normal markdown link (without angle brackets)
70/// Format: `](URL)` or `](URL "title")`
71static URL_EXTRACT_REGEX: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
73
74/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
75/// Matches: scheme:// or scheme: (per RFC 3986)
76/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
77static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
78    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
79
80// Current working directory
81static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
82
83/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
84/// Returns None for non-hex characters.
85#[inline]
86fn hex_digit_to_value(byte: u8) -> Option<u8> {
87    match byte {
88        b'0'..=b'9' => Some(byte - b'0'),
89        b'a'..=b'f' => Some(byte - b'a' + 10),
90        b'A'..=b'F' => Some(byte - b'A' + 10),
91        _ => None,
92    }
93}
94
95/// Supported markdown file extensions
96const MARKDOWN_EXTENSIONS: &[&str] = &[
97    ".md",
98    ".markdown",
99    ".mdx",
100    ".mkd",
101    ".mkdn",
102    ".mdown",
103    ".mdwn",
104    ".qmd",
105    ".rmd",
106];
107
108/// Check if a path has a markdown extension (case-insensitive)
109#[inline]
110fn is_markdown_file(path: &str) -> bool {
111    let path_lower = path.to_lowercase();
112    MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
113}
114
115/// Rule MD057: Existing relative links should point to valid files or directories.
116#[derive(Debug, Default, Clone)]
117pub struct MD057ExistingRelativeLinks {
118    /// Base directory for resolving relative links
119    base_path: Arc<Mutex<Option<PathBuf>>>,
120}
121
122impl MD057ExistingRelativeLinks {
123    /// Create a new instance with default settings
124    pub fn new() -> Self {
125        Self::default()
126    }
127
128    /// Set the base path for resolving relative links
129    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
130        let path = path.as_ref();
131        let dir_path = if path.is_file() {
132            path.parent().map(|p| p.to_path_buf())
133        } else {
134            Some(path.to_path_buf())
135        };
136
137        if let Ok(mut guard) = self.base_path.lock() {
138            *guard = dir_path;
139        }
140        self
141    }
142
143    pub fn from_config_struct(_config: MD057Config) -> Self {
144        Self::default()
145    }
146
147    /// Check if a URL is external or should be skipped for validation.
148    ///
149    /// Returns `true` (skip validation) for:
150    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
151    /// - Bare domains: `www.example.com`, `example.com`
152    /// - Template variables: `{{URL}}`, `{{% include %}}`
153    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
154    ///
155    /// Returns `false` (validate) for:
156    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
157    #[inline]
158    fn is_external_url(&self, url: &str) -> bool {
159        if url.is_empty() {
160            return false;
161        }
162
163        // Quick checks for common external URL patterns
164        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
165            return true;
166        }
167
168        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
169        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
170        if url.starts_with("{{") || url.starts_with("{%") {
171            return true;
172        }
173
174        // Bare domain check (e.g., "example.com")
175        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
176        // Links like [text](nodejs.org/path) without a protocol are broken -
177        // they'll be treated as relative paths by markdown renderers.
178        // Flagging them helps users find missing protocols.
179        // We only skip .com as a minimal safety net for the most common case.
180        if url.ends_with(".com") {
181            return true;
182        }
183
184        // Absolute URL paths (e.g., /api/docs, /blog/post.html) are treated as web paths
185        // and skipped. These are typically routes for published documentation sites,
186        // not filesystem paths that can be validated locally.
187        if url.starts_with('/') {
188            return true;
189        }
190
191        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
192        // These are not filesystem paths but module/asset aliases
193        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
194        if url.starts_with('~') || url.starts_with('@') {
195            return true;
196        }
197
198        // All other cases (relative paths, etc.) are not external
199        false
200    }
201
202    /// Check if the URL is a fragment-only link (internal document link)
203    #[inline]
204    fn is_fragment_only_link(&self, url: &str) -> bool {
205        url.starts_with('#')
206    }
207
208    /// Decode URL percent-encoded sequences in a path.
209    /// Converts `%20` to space, `%2F` to `/`, etc.
210    /// Returns the original string if decoding fails or produces invalid UTF-8.
211    fn url_decode(path: &str) -> String {
212        // Quick check: if no percent sign, return as-is
213        if !path.contains('%') {
214            return path.to_string();
215        }
216
217        let bytes = path.as_bytes();
218        let mut result = Vec::with_capacity(bytes.len());
219        let mut i = 0;
220
221        while i < bytes.len() {
222            if bytes[i] == b'%' && i + 2 < bytes.len() {
223                // Try to parse the two hex digits following %
224                let hex1 = bytes[i + 1];
225                let hex2 = bytes[i + 2];
226                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
227                    result.push(d1 * 16 + d2);
228                    i += 3;
229                    continue;
230                }
231            }
232            result.push(bytes[i]);
233            i += 1;
234        }
235
236        // Convert to UTF-8, falling back to original if invalid
237        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
238    }
239
240    /// Strip query parameters and fragments from a URL for file existence checking.
241    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
242    /// for `path/to/image.png` or `file.md` respectively.
243    ///
244    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
245    /// so we check for `?` first. If a URL has both, only the query is stripped here
246    /// (fragments are handled separately by the regex in `contribute_to_index`).
247    fn strip_query_and_fragment(url: &str) -> &str {
248        // Find the first occurrence of '?' or '#', whichever comes first
249        // This handles both standard URLs (? before #) and edge cases (# before ?)
250        let query_pos = url.find('?');
251        let fragment_pos = url.find('#');
252
253        match (query_pos, fragment_pos) {
254            (Some(q), Some(f)) => {
255                // Both exist - strip at whichever comes first
256                &url[..q.min(f)]
257            }
258            (Some(q), None) => &url[..q],
259            (None, Some(f)) => &url[..f],
260            (None, None) => url,
261        }
262    }
263
264    /// Resolve a relative link against a provided base path
265    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
266        base_path.join(link)
267    }
268
269    /// Process a single link and check if it exists
270    fn process_link_with_base(
271        &self,
272        url: &str,
273        line_num: usize,
274        column: usize,
275        base_path: &Path,
276        warnings: &mut Vec<LintWarning>,
277    ) {
278        // Skip empty URLs
279        if url.is_empty() {
280            return;
281        }
282
283        // Skip external URLs and fragment-only links (optimized order)
284        if self.is_external_url(url) || self.is_fragment_only_link(url) {
285            return;
286        }
287
288        // Strip query parameters and fragments before checking file existence
289        // URLs like `path/to/image.png?raw=true` should check for `path/to/image.png`
290        let file_path = Self::strip_query_and_fragment(url);
291
292        // URL-decode the path to handle percent-encoded characters
293        // e.g., `penguin%20with%20space.jpg` -> `penguin with space.jpg`
294        let decoded_path = Self::url_decode(file_path);
295
296        // Resolve the relative link against the base path
297        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, base_path);
298        // Check if the file exists, also trying markdown extensions for extensionless links
299        if !file_exists_or_markdown_extension(&resolved_path) {
300            warnings.push(LintWarning {
301                rule_name: Some(self.name().to_string()),
302                line: line_num,
303                column,
304                end_line: line_num,
305                end_column: column + url.len(),
306                message: format!("Relative link '{url}' does not exist"),
307                severity: Severity::Warning,
308                fix: None, // No automatic fix for missing files
309            });
310        }
311    }
312}
313
314impl Rule for MD057ExistingRelativeLinks {
315    fn name(&self) -> &'static str {
316        "MD057"
317    }
318
319    fn description(&self) -> &'static str {
320        "Relative links should point to existing files"
321    }
322
323    fn category(&self) -> RuleCategory {
324        RuleCategory::Link
325    }
326
327    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
328        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
329    }
330
331    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
332        let content = ctx.content;
333
334        // Early returns for performance
335        if content.is_empty() || !content.contains('[') {
336            return Ok(Vec::new());
337        }
338
339        // Quick check for any potential links before expensive operations
340        if !content.contains("](") {
341            return Ok(Vec::new());
342        }
343
344        // Reset the file existence cache for a fresh run
345        reset_file_existence_cache();
346
347        let mut warnings = Vec::new();
348
349        // Determine base path for resolving relative links
350        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
351        // This ensures each file resolves links relative to its own directory
352        let base_path: Option<PathBuf> = {
353            // First check if base_path was explicitly set via with_path() (for tests)
354            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
355            if explicit_base.is_some() {
356                explicit_base
357            } else if let Some(ref source_file) = ctx.source_file {
358                // Resolve symlinks to get the actual file location
359                // This ensures relative links are resolved from the target's directory,
360                // not the symlink's directory
361                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
362                resolved_file
363                    .parent()
364                    .map(|p| p.to_path_buf())
365                    .or_else(|| Some(CURRENT_DIR.clone()))
366            } else {
367                // No source file available - cannot validate relative links
368                None
369            }
370        };
371
372        // If we still don't have a base path, we can't validate relative links
373        let Some(base_path) = base_path else {
374            return Ok(warnings);
375        };
376
377        // Use LintContext links instead of expensive regex parsing
378        if !ctx.links.is_empty() {
379            // Use LineIndex for correct position calculation across all line ending types
380            let line_index = &ctx.line_index;
381
382            // Create element cache once for all links
383            let element_cache = ElementCache::new(content);
384
385            // Pre-collect lines to avoid repeated line iteration
386            let lines: Vec<&str> = content.lines().collect();
387
388            for link in &ctx.links {
389                let line_idx = link.line - 1;
390                if line_idx >= lines.len() {
391                    continue;
392                }
393
394                let line = lines[line_idx];
395
396                // Quick check for link pattern in this line
397                if !line.contains("](") {
398                    continue;
399                }
400
401                // Find all links in this line using optimized regex
402                for link_match in LINK_START_REGEX.find_iter(line) {
403                    let start_pos = link_match.start();
404                    let end_pos = link_match.end();
405
406                    // Calculate absolute position using LineIndex
407                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
408                    let absolute_start_pos = line_start_byte + start_pos;
409
410                    // Skip if this link is in a code span
411                    if element_cache.is_in_code_span(absolute_start_pos) {
412                        continue;
413                    }
414
415                    // Find the URL part after the link text
416                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
417                    // Then fall back to normal URL regex
418                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
419                        .captures_at(line, end_pos - 1)
420                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
421                        .or_else(|| {
422                            URL_EXTRACT_REGEX
423                                .captures_at(line, end_pos - 1)
424                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
425                        });
426
427                    if let Some((_caps, url_group)) = caps_and_url {
428                        let url = url_group.as_str().trim();
429
430                        // Calculate column position
431                        let column = start_pos + 1;
432
433                        // Process and validate the link
434                        self.process_link_with_base(url, link.line, column, &base_path, &mut warnings);
435                    }
436                }
437            }
438        }
439
440        // Also process images - they have URLs already parsed
441        for image in &ctx.images {
442            let url = image.url.as_ref();
443            self.process_link_with_base(url, image.line, image.start_col + 1, &base_path, &mut warnings);
444        }
445
446        Ok(warnings)
447    }
448
449    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
450        Ok(ctx.content.to_string())
451    }
452
453    fn as_any(&self) -> &dyn std::any::Any {
454        self
455    }
456
457    fn default_config_section(&self) -> Option<(String, toml::Value)> {
458        // No configurable options for this rule
459        None
460    }
461
462    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
463    where
464        Self: Sized,
465    {
466        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
467        Box::new(Self::from_config_struct(rule_config))
468    }
469
470    fn cross_file_scope(&self) -> CrossFileScope {
471        CrossFileScope::Workspace
472    }
473
474    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
475        let content = ctx.content;
476
477        // Early returns for performance
478        if content.is_empty() || !content.contains("](") {
479            return;
480        }
481
482        // Pre-collect lines to avoid repeated line iteration
483        let lines: Vec<&str> = content.lines().collect();
484        let element_cache = ElementCache::new(content);
485        let line_index = &ctx.line_index;
486
487        for link in &ctx.links {
488            let line_idx = link.line - 1;
489            if line_idx >= lines.len() {
490                continue;
491            }
492
493            let line = lines[line_idx];
494            if !line.contains("](") {
495                continue;
496            }
497
498            // Find all links in this line
499            for link_match in LINK_START_REGEX.find_iter(line) {
500                let start_pos = link_match.start();
501                let end_pos = link_match.end();
502
503                // Calculate absolute position for code span detection
504                let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
505                let absolute_start_pos = line_start_byte + start_pos;
506
507                // Skip if in code span
508                if element_cache.is_in_code_span(absolute_start_pos) {
509                    continue;
510                }
511
512                // Extract the URL (group 1) and fragment (group 2)
513                // The regex separates URL and fragment: group 1 excludes #, group 2 captures #fragment
514                // Try angle-bracket regex first (handles URLs with parens)
515                let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
516                    .captures_at(line, end_pos - 1)
517                    .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
518
519                if let Some(caps) = caps_result
520                    && let Some(url_group) = caps.get(1)
521                {
522                    let file_path = url_group.as_str().trim();
523
524                    // Skip empty, external, template variables, absolute URL paths,
525                    // framework aliases, or fragment-only URLs
526                    if file_path.is_empty()
527                        || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
528                        || file_path.starts_with("www.")
529                        || file_path.starts_with('#')
530                        || file_path.starts_with("{{")
531                        || file_path.starts_with("{%")
532                        || file_path.starts_with('/')
533                        || file_path.starts_with('~')
534                        || file_path.starts_with('@')
535                    {
536                        continue;
537                    }
538
539                    // Strip query parameters before indexing (e.g., `file.md?raw=true` -> `file.md`)
540                    let file_path = Self::strip_query_and_fragment(file_path);
541
542                    // Get fragment from capture group 2 (includes # prefix)
543                    let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
544
545                    // Only index markdown file links for cross-file validation
546                    // Non-markdown files (images, media) are validated via filesystem in check()
547                    if is_markdown_file(file_path) {
548                        index.add_cross_file_link(CrossFileLinkIndex {
549                            target_path: file_path.to_string(),
550                            fragment: fragment.to_string(),
551                            line: link.line,
552                            column: start_pos + 1,
553                        });
554                    }
555                }
556            }
557        }
558    }
559
560    fn cross_file_check(
561        &self,
562        file_path: &Path,
563        file_index: &FileIndex,
564        workspace_index: &crate::workspace_index::WorkspaceIndex,
565    ) -> LintResult {
566        let mut warnings = Vec::new();
567
568        // Get the directory containing this file for resolving relative links
569        let file_dir = file_path.parent();
570
571        for cross_link in &file_index.cross_file_links {
572            // Resolve the relative path
573            let target_path = if cross_link.target_path.starts_with('/') {
574                // Absolute path from workspace root (e.g., "/CONTRIBUTING.md")
575                // Walk up from the current file's directory to find the workspace root
576                let stripped = cross_link.target_path.trim_start_matches('/');
577                resolve_absolute_link(file_path, stripped)
578            } else if let Some(dir) = file_dir {
579                dir.join(&cross_link.target_path)
580            } else {
581                Path::new(&cross_link.target_path).to_path_buf()
582            };
583
584            // Normalize the path (handle .., ., etc.)
585            let target_path = normalize_path(&target_path);
586
587            // Check if the target markdown file exists in the workspace index
588            if !workspace_index.contains_file(&target_path) {
589                // File not in index - check filesystem directly for case-insensitive filesystems
590                if !target_path.exists() {
591                    warnings.push(LintWarning {
592                        rule_name: Some(self.name().to_string()),
593                        line: cross_link.line,
594                        column: cross_link.column,
595                        end_line: cross_link.line,
596                        end_column: cross_link.column + cross_link.target_path.len(),
597                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
598                        severity: Severity::Warning,
599                        fix: None,
600                    });
601                }
602            }
603        }
604
605        Ok(warnings)
606    }
607}
608
609/// Normalize a path by resolving . and .. components
610fn normalize_path(path: &Path) -> PathBuf {
611    let mut components = Vec::new();
612
613    for component in path.components() {
614        match component {
615            std::path::Component::ParentDir => {
616                // Go up one level if possible
617                if !components.is_empty() {
618                    components.pop();
619                }
620            }
621            std::path::Component::CurDir => {
622                // Skip current directory markers
623            }
624            _ => {
625                components.push(component);
626            }
627        }
628    }
629
630    components.iter().collect()
631}
632
633/// Resolve an absolute link (e.g., "/CONTRIBUTING.md") relative to the workspace root.
634///
635/// Absolute paths in markdown (starting with "/") are relative to the workspace/repo root,
636/// not the filesystem root. This function walks up from the current file's directory
637/// to find where the target file exists.
638fn resolve_absolute_link(file_path: &Path, stripped_path: &str) -> PathBuf {
639    // Walk up from the file's directory, checking each ancestor for the target
640    let mut current = file_path.parent();
641    while let Some(dir) = current {
642        let candidate = dir.join(stripped_path);
643        if candidate.exists() {
644            return candidate;
645        }
646        current = dir.parent();
647    }
648
649    // If not found by walking up, return the path relative to the file's directory
650    // (this will likely fail the existence check later, which is correct behavior)
651    file_path
652        .parent()
653        .map(|d| d.join(stripped_path))
654        .unwrap_or_else(|| PathBuf::from(stripped_path))
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660    use std::fs::File;
661    use std::io::Write;
662    use tempfile::tempdir;
663
664    #[test]
665    fn test_strip_query_and_fragment() {
666        // Test query parameter stripping
667        assert_eq!(
668            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
669            "file.png"
670        );
671        assert_eq!(
672            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
673            "file.png"
674        );
675        assert_eq!(
676            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
677            "file.png"
678        );
679
680        // Test fragment stripping
681        assert_eq!(
682            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
683            "file.md"
684        );
685        assert_eq!(
686            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
687            "file.md"
688        );
689
690        // Test both query and fragment (query comes first, per RFC 3986)
691        assert_eq!(
692            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
693            "file.md"
694        );
695
696        // Test no query or fragment
697        assert_eq!(
698            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
699            "file.png"
700        );
701
702        // Test with path
703        assert_eq!(
704            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
705            "path/to/image.png"
706        );
707        assert_eq!(
708            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
709            "path/to/image.png"
710        );
711
712        // Edge case: fragment before query (non-standard but possible)
713        assert_eq!(
714            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
715            "file.md"
716        );
717    }
718
719    #[test]
720    fn test_url_decode() {
721        // Simple space encoding
722        assert_eq!(
723            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
724            "penguin with space.jpg"
725        );
726
727        // Path with encoded spaces
728        assert_eq!(
729            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
730            "assets/my file name.png"
731        );
732
733        // Multiple encoded characters
734        assert_eq!(
735            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
736            "hello world!.md"
737        );
738
739        // Lowercase hex
740        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
741
742        // Uppercase hex
743        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
744
745        // Mixed case hex
746        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
747
748        // No encoding - return as-is
749        assert_eq!(
750            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
751            "normal-file.md"
752        );
753
754        // Incomplete percent encoding - leave as-is
755        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
756
757        // Percent at end - leave as-is
758        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
759
760        // Invalid hex digits - leave as-is
761        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
762
763        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
764        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
765
766        // Empty string
767        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
768
769        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
770        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
771
772        // Multiple consecutive encoded characters
773        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
774
775        // Encoded path separators
776        assert_eq!(
777            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
778            "path/to/file.md"
779        );
780
781        // Mixed encoded and non-encoded
782        assert_eq!(
783            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
784            "hello world/foo bar.md"
785        );
786
787        // Special characters that are commonly encoded
788        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
789
790        // Percent at position that looks like encoding but isn't valid
791        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
792    }
793
794    #[test]
795    fn test_url_encoded_filenames() {
796        // Create a temporary directory for test files
797        let temp_dir = tempdir().unwrap();
798        let base_path = temp_dir.path();
799
800        // Create a file with spaces in the name
801        let file_with_spaces = base_path.join("penguin with space.jpg");
802        File::create(&file_with_spaces)
803            .unwrap()
804            .write_all(b"image data")
805            .unwrap();
806
807        // Create a subdirectory with spaces
808        let subdir = base_path.join("my images");
809        std::fs::create_dir(&subdir).unwrap();
810        let nested_file = subdir.join("photo 1.png");
811        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
812
813        // Test content with URL-encoded links
814        let content = r#"
815# Test Document with URL-Encoded Links
816
817![Penguin](penguin%20with%20space.jpg)
818![Photo](my%20images/photo%201.png)
819![Missing](missing%20file.jpg)
820"#;
821
822        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
823
824        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
825        let result = rule.check(&ctx).unwrap();
826
827        // Should only have one warning for the missing file
828        assert_eq!(
829            result.len(),
830            1,
831            "Should only warn about missing%20file.jpg. Got: {result:?}"
832        );
833        assert!(
834            result[0].message.contains("missing%20file.jpg"),
835            "Warning should mention the URL-encoded filename"
836        );
837    }
838
839    #[test]
840    fn test_external_urls() {
841        let rule = MD057ExistingRelativeLinks::new();
842
843        // Common web protocols
844        assert!(rule.is_external_url("https://example.com"));
845        assert!(rule.is_external_url("http://example.com"));
846        assert!(rule.is_external_url("ftp://example.com"));
847        assert!(rule.is_external_url("www.example.com"));
848        assert!(rule.is_external_url("example.com"));
849
850        // Special URI schemes (issue #192)
851        assert!(rule.is_external_url("file:///path/to/file"));
852        assert!(rule.is_external_url("smb://server/share"));
853        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
854        assert!(rule.is_external_url("mailto:user@example.com"));
855        assert!(rule.is_external_url("tel:+1234567890"));
856        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
857        assert!(rule.is_external_url("javascript:void(0)"));
858        assert!(rule.is_external_url("ssh://git@github.com/repo"));
859        assert!(rule.is_external_url("git://github.com/repo.git"));
860
861        // Template variables should be skipped (not checked as relative links)
862        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
863        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
864        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
865        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
866        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
867        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
868
869        // Absolute web URL paths should be skipped (not validated)
870        // These are typically routes for published documentation sites
871        assert!(rule.is_external_url("/api/v1/users"));
872        assert!(rule.is_external_url("/blog/2024/release.html"));
873        assert!(rule.is_external_url("/react/hooks/use-state.html"));
874        assert!(rule.is_external_url("/pkg/runtime"));
875        assert!(rule.is_external_url("/doc/go1compat"));
876        assert!(rule.is_external_url("/index.html"));
877        assert!(rule.is_external_url("/assets/logo.png"));
878
879        // Framework path aliases should be skipped (resolved by build tools)
880        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
881        assert!(rule.is_external_url("~/assets/image.png"));
882        assert!(rule.is_external_url("~/components/Button.vue"));
883        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
884
885        // @ prefix (common in Vue, webpack, Vite aliases)
886        assert!(rule.is_external_url("@/components/Header.vue"));
887        assert!(rule.is_external_url("@images/photo.jpg"));
888        assert!(rule.is_external_url("@assets/styles.css"));
889
890        // Relative paths should NOT be external (should be validated)
891        assert!(!rule.is_external_url("./relative/path.md"));
892        assert!(!rule.is_external_url("relative/path.md"));
893        assert!(!rule.is_external_url("../parent/path.md"));
894    }
895
896    #[test]
897    fn test_framework_path_aliases() {
898        // Create a temporary directory for test files
899        let temp_dir = tempdir().unwrap();
900        let base_path = temp_dir.path();
901
902        // Test content with framework path aliases (should all be skipped)
903        let content = r#"
904# Framework Path Aliases
905
906![Image 1](~/assets/penguin.jpg)
907![Image 2](~assets/logo.svg)
908![Image 3](@images/photo.jpg)
909![Image 4](@/components/icon.svg)
910[Link](@/pages/about.md)
911
912This is a [real missing link](missing.md) that should be flagged.
913"#;
914
915        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
916
917        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
918        let result = rule.check(&ctx).unwrap();
919
920        // Should only have one warning for the real missing link
921        assert_eq!(
922            result.len(),
923            1,
924            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
925        );
926        assert!(
927            result[0].message.contains("missing.md"),
928            "Warning should be for missing.md"
929        );
930    }
931
932    #[test]
933    fn test_url_decode_security_path_traversal() {
934        // Ensure URL decoding doesn't enable path traversal attacks
935        // The decoded path is still validated against the base path
936        let temp_dir = tempdir().unwrap();
937        let base_path = temp_dir.path();
938
939        // Create a file in the temp directory
940        let file_in_base = base_path.join("safe.md");
941        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
942
943        // Test with encoded path traversal attempt
944        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
945        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
946        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
947        let content = r#"
948[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
949[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
950[Safe link](safe.md)
951"#;
952
953        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
954
955        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
956        let result = rule.check(&ctx).unwrap();
957
958        // The traversal attempts should still be flagged as missing
959        // (they don't exist relative to base_path after decoding)
960        assert_eq!(
961            result.len(),
962            2,
963            "Should have warnings for traversal attempts. Got: {result:?}"
964        );
965    }
966
967    #[test]
968    fn test_url_encoded_utf8_filenames() {
969        // Test with actual UTF-8 encoded filenames
970        let temp_dir = tempdir().unwrap();
971        let base_path = temp_dir.path();
972
973        // Create files with unicode names
974        let cafe_file = base_path.join("café.md");
975        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
976
977        let content = r#"
978[Café link](caf%C3%A9.md)
979[Missing unicode](r%C3%A9sum%C3%A9.md)
980"#;
981
982        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
983
984        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
985        let result = rule.check(&ctx).unwrap();
986
987        // Should only warn about the missing file
988        assert_eq!(
989            result.len(),
990            1,
991            "Should only warn about missing résumé.md. Got: {result:?}"
992        );
993        assert!(
994            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
995            "Warning should mention the URL-encoded filename"
996        );
997    }
998
999    #[test]
1000    fn test_no_warnings_without_base_path() {
1001        let rule = MD057ExistingRelativeLinks::new();
1002        let content = "[Link](missing.md)";
1003
1004        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1005        let result = rule.check(&ctx).unwrap();
1006        assert!(result.is_empty(), "Should have no warnings without base path");
1007    }
1008
1009    #[test]
1010    fn test_existing_and_missing_links() {
1011        // Create a temporary directory for test files
1012        let temp_dir = tempdir().unwrap();
1013        let base_path = temp_dir.path();
1014
1015        // Create an existing file
1016        let exists_path = base_path.join("exists.md");
1017        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1018
1019        // Verify the file exists
1020        assert!(exists_path.exists(), "exists.md should exist for this test");
1021
1022        // Create test content with both existing and missing links
1023        let content = r#"
1024# Test Document
1025
1026[Valid Link](exists.md)
1027[Invalid Link](missing.md)
1028[External Link](https://example.com)
1029[Media Link](image.jpg)
1030        "#;
1031
1032        // Initialize rule with the base path (default: check all files including media)
1033        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1034
1035        // Test the rule
1036        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1037        let result = rule.check(&ctx).unwrap();
1038
1039        // Should have two warnings: missing.md and image.jpg (both don't exist)
1040        assert_eq!(result.len(), 2);
1041        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1042        assert!(messages.iter().any(|m| m.contains("missing.md")));
1043        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1044    }
1045
1046    #[test]
1047    fn test_angle_bracket_links() {
1048        // Create a temporary directory for test files
1049        let temp_dir = tempdir().unwrap();
1050        let base_path = temp_dir.path();
1051
1052        // Create an existing file
1053        let exists_path = base_path.join("exists.md");
1054        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1055
1056        // Create test content with angle bracket links
1057        let content = r#"
1058# Test Document
1059
1060[Valid Link](<exists.md>)
1061[Invalid Link](<missing.md>)
1062[External Link](<https://example.com>)
1063    "#;
1064
1065        // Test with default settings
1066        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1067
1068        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1069        let result = rule.check(&ctx).unwrap();
1070
1071        // Should have one warning for missing.md
1072        assert_eq!(result.len(), 1, "Should have exactly one warning");
1073        assert!(
1074            result[0].message.contains("missing.md"),
1075            "Warning should mention missing.md"
1076        );
1077    }
1078
1079    #[test]
1080    fn test_angle_bracket_links_with_parens() {
1081        // Create a temporary directory for test files
1082        let temp_dir = tempdir().unwrap();
1083        let base_path = temp_dir.path();
1084
1085        // Create directory structure with parentheses in path
1086        let app_dir = base_path.join("app");
1087        std::fs::create_dir(&app_dir).unwrap();
1088        let upload_dir = app_dir.join("(upload)");
1089        std::fs::create_dir(&upload_dir).unwrap();
1090        let page_file = upload_dir.join("page.tsx");
1091        File::create(&page_file)
1092            .unwrap()
1093            .write_all(b"export default function Page() {}")
1094            .unwrap();
1095
1096        // Create test content with angle bracket links containing parentheses
1097        let content = r#"
1098# Test Document with Paths Containing Parens
1099
1100[Upload Page](<app/(upload)/page.tsx>)
1101[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1102[Missing](<app/(missing)/file.md>)
1103"#;
1104
1105        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1106
1107        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1108        let result = rule.check(&ctx).unwrap();
1109
1110        // Should only have one warning for the missing file
1111        assert_eq!(
1112            result.len(),
1113            1,
1114            "Should have exactly one warning for missing file. Got: {result:?}"
1115        );
1116        assert!(
1117            result[0].message.contains("app/(missing)/file.md"),
1118            "Warning should mention app/(missing)/file.md"
1119        );
1120    }
1121
1122    #[test]
1123    fn test_all_file_types_checked() {
1124        // Create a temporary directory for test files
1125        let temp_dir = tempdir().unwrap();
1126        let base_path = temp_dir.path();
1127
1128        // Create a test with various file types - all should be checked
1129        let content = r#"
1130[Image Link](image.jpg)
1131[Video Link](video.mp4)
1132[Markdown Link](document.md)
1133[PDF Link](file.pdf)
1134"#;
1135
1136        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1137
1138        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1139        let result = rule.check(&ctx).unwrap();
1140
1141        // Should warn about all missing files regardless of extension
1142        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1143    }
1144
1145    #[test]
1146    fn test_code_span_detection() {
1147        let rule = MD057ExistingRelativeLinks::new();
1148
1149        // Create a temporary directory for test files
1150        let temp_dir = tempdir().unwrap();
1151        let base_path = temp_dir.path();
1152
1153        let rule = rule.with_path(base_path);
1154
1155        // Test with document structure
1156        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1157
1158        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1159        let result = rule.check(&ctx).unwrap();
1160
1161        // Should only find the real link, not the one in code
1162        assert_eq!(result.len(), 1, "Should only flag the real link");
1163        assert!(result[0].message.contains("nonexistent.md"));
1164    }
1165
1166    #[test]
1167    fn test_inline_code_spans() {
1168        // Create a temporary directory for test files
1169        let temp_dir = tempdir().unwrap();
1170        let base_path = temp_dir.path();
1171
1172        // Create test content with links in inline code spans
1173        let content = r#"
1174# Test Document
1175
1176This is a normal link: [Link](missing.md)
1177
1178This is a code span with a link: `[Link](another-missing.md)`
1179
1180Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1181
1182    "#;
1183
1184        // Initialize rule with the base path
1185        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1186
1187        // Test the rule
1188        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1189        let result = rule.check(&ctx).unwrap();
1190
1191        // Should only have warning for the normal link, not for links in code spans
1192        assert_eq!(result.len(), 1, "Should have exactly one warning");
1193        assert!(
1194            result[0].message.contains("missing.md"),
1195            "Warning should be for missing.md"
1196        );
1197        assert!(
1198            !result.iter().any(|w| w.message.contains("another-missing.md")),
1199            "Should not warn about link in code span"
1200        );
1201        assert!(
1202            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1203            "Should not warn about link in inline code"
1204        );
1205    }
1206
1207    #[test]
1208    fn test_extensionless_link_resolution() {
1209        // Create a temporary directory for test files
1210        let temp_dir = tempdir().unwrap();
1211        let base_path = temp_dir.path();
1212
1213        // Create a markdown file WITHOUT specifying .md extension in the link
1214        let page_path = base_path.join("page.md");
1215        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1216
1217        // Test content with extensionless link that should resolve to page.md
1218        let content = r#"
1219# Test Document
1220
1221[Link without extension](page)
1222[Link with extension](page.md)
1223[Missing link](nonexistent)
1224"#;
1225
1226        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1227
1228        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1229        let result = rule.check(&ctx).unwrap();
1230
1231        // Should only have warning for nonexistent link
1232        // Both "page" and "page.md" should resolve to the same file
1233        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1234        assert!(
1235            result[0].message.contains("nonexistent"),
1236            "Warning should be for 'nonexistent' not 'page'"
1237        );
1238    }
1239
1240    // Cross-file validation tests
1241    #[test]
1242    fn test_cross_file_scope() {
1243        let rule = MD057ExistingRelativeLinks::new();
1244        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1245    }
1246
1247    #[test]
1248    fn test_contribute_to_index_extracts_markdown_links() {
1249        let rule = MD057ExistingRelativeLinks::new();
1250        let content = r#"
1251# Document
1252
1253[Link to docs](./docs/guide.md)
1254[Link with fragment](./other.md#section)
1255[External link](https://example.com)
1256[Image link](image.png)
1257[Media file](video.mp4)
1258"#;
1259
1260        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1261        let mut index = FileIndex::new();
1262        rule.contribute_to_index(&ctx, &mut index);
1263
1264        // Should only index markdown file links
1265        assert_eq!(index.cross_file_links.len(), 2);
1266
1267        // Check first link
1268        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1269        assert_eq!(index.cross_file_links[0].fragment, "");
1270
1271        // Check second link (with fragment)
1272        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1273        assert_eq!(index.cross_file_links[1].fragment, "section");
1274    }
1275
1276    #[test]
1277    fn test_contribute_to_index_skips_external_and_anchors() {
1278        let rule = MD057ExistingRelativeLinks::new();
1279        let content = r#"
1280# Document
1281
1282[External](https://example.com)
1283[Another external](http://example.org)
1284[Fragment only](#section)
1285[FTP link](ftp://files.example.com)
1286[Mail link](mailto:test@example.com)
1287[WWW link](www.example.com)
1288"#;
1289
1290        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1291        let mut index = FileIndex::new();
1292        rule.contribute_to_index(&ctx, &mut index);
1293
1294        // Should not index any of these
1295        assert_eq!(index.cross_file_links.len(), 0);
1296    }
1297
1298    #[test]
1299    fn test_cross_file_check_valid_link() {
1300        use crate::workspace_index::WorkspaceIndex;
1301
1302        let rule = MD057ExistingRelativeLinks::new();
1303
1304        // Create a workspace index with the target file
1305        let mut workspace_index = WorkspaceIndex::new();
1306        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1307
1308        // Create file index with a link to an existing file
1309        let mut file_index = FileIndex::new();
1310        file_index.add_cross_file_link(CrossFileLinkIndex {
1311            target_path: "guide.md".to_string(),
1312            fragment: "".to_string(),
1313            line: 5,
1314            column: 1,
1315        });
1316
1317        // Run cross-file check from docs/index.md
1318        let warnings = rule
1319            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1320            .unwrap();
1321
1322        // Should have no warnings - file exists
1323        assert!(warnings.is_empty());
1324    }
1325
1326    #[test]
1327    fn test_cross_file_check_missing_link() {
1328        use crate::workspace_index::WorkspaceIndex;
1329
1330        let rule = MD057ExistingRelativeLinks::new();
1331
1332        // Create an empty workspace index
1333        let workspace_index = WorkspaceIndex::new();
1334
1335        // Create file index with a link to a missing file
1336        let mut file_index = FileIndex::new();
1337        file_index.add_cross_file_link(CrossFileLinkIndex {
1338            target_path: "missing.md".to_string(),
1339            fragment: "".to_string(),
1340            line: 5,
1341            column: 1,
1342        });
1343
1344        // Run cross-file check
1345        let warnings = rule
1346            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1347            .unwrap();
1348
1349        // Should have one warning for the missing file
1350        assert_eq!(warnings.len(), 1);
1351        assert!(warnings[0].message.contains("missing.md"));
1352        assert!(warnings[0].message.contains("does not exist"));
1353    }
1354
1355    #[test]
1356    fn test_cross_file_check_parent_path() {
1357        use crate::workspace_index::WorkspaceIndex;
1358
1359        let rule = MD057ExistingRelativeLinks::new();
1360
1361        // Create a workspace index with the target file at the root
1362        let mut workspace_index = WorkspaceIndex::new();
1363        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1364
1365        // Create file index with a parent path link
1366        let mut file_index = FileIndex::new();
1367        file_index.add_cross_file_link(CrossFileLinkIndex {
1368            target_path: "../readme.md".to_string(),
1369            fragment: "".to_string(),
1370            line: 5,
1371            column: 1,
1372        });
1373
1374        // Run cross-file check from docs/guide.md
1375        let warnings = rule
1376            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1377            .unwrap();
1378
1379        // Should have no warnings - file exists at normalized path
1380        assert!(warnings.is_empty());
1381    }
1382
1383    #[test]
1384    fn test_normalize_path_function() {
1385        // Test simple cases
1386        assert_eq!(
1387            normalize_path(Path::new("docs/guide.md")),
1388            PathBuf::from("docs/guide.md")
1389        );
1390
1391        // Test current directory removal
1392        assert_eq!(
1393            normalize_path(Path::new("./docs/guide.md")),
1394            PathBuf::from("docs/guide.md")
1395        );
1396
1397        // Test parent directory resolution
1398        assert_eq!(
1399            normalize_path(Path::new("docs/sub/../guide.md")),
1400            PathBuf::from("docs/guide.md")
1401        );
1402
1403        // Test multiple parent directories
1404        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
1405    }
1406
1407    #[test]
1408    fn test_resolve_absolute_link() {
1409        // Create a temporary directory structure for testing
1410        let temp_dir = tempdir().expect("Failed to create temp dir");
1411        let root = temp_dir.path();
1412
1413        // Create root-level file
1414        let contributing = root.join("CONTRIBUTING.md");
1415        File::create(&contributing).expect("Failed to create CONTRIBUTING.md");
1416
1417        // Create nested directory with a markdown file
1418        let docs = root.join("docs");
1419        std::fs::create_dir(&docs).expect("Failed to create docs dir");
1420        let readme = docs.join("README.md");
1421        File::create(&readme).expect("Failed to create README.md");
1422
1423        // Test: absolute link from nested file to root file
1424        // From docs/README.md, link to /CONTRIBUTING.md should resolve to root/CONTRIBUTING.md
1425        let resolved = resolve_absolute_link(&readme, "CONTRIBUTING.md");
1426        assert!(resolved.exists(), "Should find CONTRIBUTING.md at workspace root");
1427        assert_eq!(resolved, contributing);
1428
1429        // Test: file that doesn't exist should not resolve (returns path relative to file's dir)
1430        let nonexistent = resolve_absolute_link(&readme, "NONEXISTENT.md");
1431        assert!(!nonexistent.exists(), "Should not find nonexistent file");
1432    }
1433}
rumdl_lib/rules/md057_existing_relative_links.rs

rumdl_lib/rules/
md057_existing_relative_links.rs