rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{FileIndex, extract_cross_file_links};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
26        cache.clear();
27    }
28}
29
30// Check if a file exists with caching
31fn file_exists_with_cache(path: &Path) -> bool {
32    match FILE_EXISTENCE_CACHE.lock() {
33        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
34        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
35    }
36}
37
38/// Check if a file exists, also trying markdown extensions for extensionless links.
39/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
40fn file_exists_or_markdown_extension(path: &Path) -> bool {
41    // First, check exact path
42    if file_exists_with_cache(path) {
43        return true;
44    }
45
46    // If the path has no extension, try adding markdown extensions
47    if path.extension().is_none() {
48        for ext in MARKDOWN_EXTENSIONS {
49            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
50            let path_with_ext = path.with_extension(&ext[1..]);
51            if file_exists_with_cache(&path_with_ext) {
52                return true;
53            }
54        }
55    }
56
57    false
58}
59
60// Regex to match the start of a link - simplified for performance
61static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
62
63/// Regex to extract the URL from an angle-bracketed markdown link
64/// Format: `](<URL>)` or `](<URL> "title")`
65/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
66static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
68
69/// Regex to extract the URL from a normal markdown link (without angle brackets)
70/// Format: `](URL)` or `](URL "title")`
71static URL_EXTRACT_REGEX: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
73
74/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
75/// Matches: scheme:// or scheme: (per RFC 3986)
76/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
77static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
78    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
79
80// Current working directory
81static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
82
83/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
84/// Returns None for non-hex characters.
85#[inline]
86fn hex_digit_to_value(byte: u8) -> Option<u8> {
87    match byte {
88        b'0'..=b'9' => Some(byte - b'0'),
89        b'a'..=b'f' => Some(byte - b'a' + 10),
90        b'A'..=b'F' => Some(byte - b'A' + 10),
91        _ => None,
92    }
93}
94
95/// Supported markdown file extensions
96const MARKDOWN_EXTENSIONS: &[&str] = &[
97    ".md",
98    ".markdown",
99    ".mdx",
100    ".mkd",
101    ".mkdn",
102    ".mdown",
103    ".mdwn",
104    ".qmd",
105    ".rmd",
106];
107
108/// Rule MD057: Existing relative links should point to valid files or directories.
109#[derive(Debug, Clone, Default)]
110pub struct MD057ExistingRelativeLinks {
111    /// Base directory for resolving relative links
112    base_path: Arc<Mutex<Option<PathBuf>>>,
113}
114
115impl MD057ExistingRelativeLinks {
116    /// Create a new instance with default settings
117    pub fn new() -> Self {
118        Self::default()
119    }
120
121    /// Set the base path for resolving relative links
122    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
123        let path = path.as_ref();
124        let dir_path = if path.is_file() {
125            path.parent().map(|p| p.to_path_buf())
126        } else {
127            Some(path.to_path_buf())
128        };
129
130        if let Ok(mut guard) = self.base_path.lock() {
131            *guard = dir_path;
132        }
133        self
134    }
135
136    #[allow(unused_variables)]
137    pub fn from_config_struct(config: MD057Config) -> Self {
138        Self::default()
139    }
140
141    /// Check if a URL is external or should be skipped for validation.
142    ///
143    /// Returns `true` (skip validation) for:
144    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
145    /// - Bare domains: `www.example.com`, `example.com`
146    /// - Email addresses: `user@example.com` (without `mailto:`)
147    /// - Template variables: `{{URL}}`, `{{% include %}}`
148    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
149    ///
150    /// Returns `false` (validate) for:
151    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
152    #[inline]
153    fn is_external_url(&self, url: &str) -> bool {
154        if url.is_empty() {
155            return false;
156        }
157
158        // Quick checks for common external URL patterns
159        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
160            return true;
161        }
162
163        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
164        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
165        if url.starts_with("{{") || url.starts_with("{%") {
166            return true;
167        }
168
169        // Simple check: if URL contains @, it's almost certainly an email address
170        // File paths with @ are extremely rare, so this is a safe heuristic
171        if url.contains('@') {
172            return true; // It's an email address, skip it
173        }
174
175        // Bare domain check (e.g., "example.com")
176        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
177        // Links like [text](nodejs.org/path) without a protocol are broken -
178        // they'll be treated as relative paths by markdown renderers.
179        // Flagging them helps users find missing protocols.
180        // We only skip .com as a minimal safety net for the most common case.
181        if url.ends_with(".com") {
182            return true;
183        }
184
185        // Absolute URL paths (e.g., /api/docs, /blog/post.html) are treated as web paths
186        // and skipped. These are typically routes for published documentation sites,
187        // not filesystem paths that can be validated locally.
188        if url.starts_with('/') {
189            return true;
190        }
191
192        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
193        // These are not filesystem paths but module/asset aliases
194        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
195        if url.starts_with('~') || url.starts_with('@') {
196            return true;
197        }
198
199        // All other cases (relative paths, etc.) are not external
200        false
201    }
202
203    /// Check if the URL is a fragment-only link (internal document link)
204    #[inline]
205    fn is_fragment_only_link(&self, url: &str) -> bool {
206        url.starts_with('#')
207    }
208
209    /// Decode URL percent-encoded sequences in a path.
210    /// Converts `%20` to space, `%2F` to `/`, etc.
211    /// Returns the original string if decoding fails or produces invalid UTF-8.
212    fn url_decode(path: &str) -> String {
213        // Quick check: if no percent sign, return as-is
214        if !path.contains('%') {
215            return path.to_string();
216        }
217
218        let bytes = path.as_bytes();
219        let mut result = Vec::with_capacity(bytes.len());
220        let mut i = 0;
221
222        while i < bytes.len() {
223            if bytes[i] == b'%' && i + 2 < bytes.len() {
224                // Try to parse the two hex digits following %
225                let hex1 = bytes[i + 1];
226                let hex2 = bytes[i + 2];
227                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
228                    result.push(d1 * 16 + d2);
229                    i += 3;
230                    continue;
231                }
232            }
233            result.push(bytes[i]);
234            i += 1;
235        }
236
237        // Convert to UTF-8, falling back to original if invalid
238        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
239    }
240
241    /// Strip query parameters and fragments from a URL for file existence checking.
242    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
243    /// for `path/to/image.png` or `file.md` respectively.
244    ///
245    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
246    /// so we check for `?` first. If a URL has both, only the query is stripped here
247    /// (fragments are handled separately by the regex in `contribute_to_index`).
248    fn strip_query_and_fragment(url: &str) -> &str {
249        // Find the first occurrence of '?' or '#', whichever comes first
250        // This handles both standard URLs (? before #) and edge cases (# before ?)
251        let query_pos = url.find('?');
252        let fragment_pos = url.find('#');
253
254        match (query_pos, fragment_pos) {
255            (Some(q), Some(f)) => {
256                // Both exist - strip at whichever comes first
257                &url[..q.min(f)]
258            }
259            (Some(q), None) => &url[..q],
260            (None, Some(f)) => &url[..f],
261            (None, None) => url,
262        }
263    }
264
265    /// Resolve a relative link against a provided base path
266    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
267        base_path.join(link)
268    }
269}
270
271impl Rule for MD057ExistingRelativeLinks {
272    fn name(&self) -> &'static str {
273        "MD057"
274    }
275
276    fn description(&self) -> &'static str {
277        "Relative links should point to existing files"
278    }
279
280    fn category(&self) -> RuleCategory {
281        RuleCategory::Link
282    }
283
284    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
285        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
286    }
287
288    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
289        let content = ctx.content;
290
291        // Early returns for performance
292        if content.is_empty() || !content.contains('[') {
293            return Ok(Vec::new());
294        }
295
296        // Quick check for any potential links before expensive operations
297        if !content.contains("](") {
298            return Ok(Vec::new());
299        }
300
301        // Reset the file existence cache for a fresh run
302        reset_file_existence_cache();
303
304        let mut warnings = Vec::new();
305
306        // Determine base path for resolving relative links
307        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
308        // This ensures each file resolves links relative to its own directory
309        let base_path: Option<PathBuf> = {
310            // First check if base_path was explicitly set via with_path() (for tests)
311            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
312            if explicit_base.is_some() {
313                explicit_base
314            } else if let Some(ref source_file) = ctx.source_file {
315                // Resolve symlinks to get the actual file location
316                // This ensures relative links are resolved from the target's directory,
317                // not the symlink's directory
318                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
319                resolved_file
320                    .parent()
321                    .map(|p| p.to_path_buf())
322                    .or_else(|| Some(CURRENT_DIR.clone()))
323            } else {
324                // No source file available - cannot validate relative links
325                None
326            }
327        };
328
329        // If we still don't have a base path, we can't validate relative links
330        let Some(base_path) = base_path else {
331            return Ok(warnings);
332        };
333
334        // Use LintContext links instead of expensive regex parsing
335        if !ctx.links.is_empty() {
336            // Use LineIndex for correct position calculation across all line ending types
337            let line_index = &ctx.line_index;
338
339            // Create element cache once for all links
340            let element_cache = ElementCache::new(content);
341
342            // Pre-collect lines to avoid repeated line iteration
343            let lines: Vec<&str> = content.lines().collect();
344
345            // Track which lines we've already processed to avoid duplicates
346            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
347            let mut processed_lines = std::collections::HashSet::new();
348
349            for link in &ctx.links {
350                let line_idx = link.line - 1;
351                if line_idx >= lines.len() {
352                    continue;
353                }
354
355                // Skip if we've already processed this line
356                if !processed_lines.insert(line_idx) {
357                    continue;
358                }
359
360                let line = lines[line_idx];
361
362                // Quick check for link pattern in this line
363                if !line.contains("](") {
364                    continue;
365                }
366
367                // Find all links in this line using optimized regex
368                for link_match in LINK_START_REGEX.find_iter(line) {
369                    let start_pos = link_match.start();
370                    let end_pos = link_match.end();
371
372                    // Calculate absolute position using LineIndex
373                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
374                    let absolute_start_pos = line_start_byte + start_pos;
375
376                    // Skip if this link is in a code span
377                    if element_cache.is_in_code_span(absolute_start_pos) {
378                        continue;
379                    }
380
381                    // Find the URL part after the link text
382                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
383                    // Then fall back to normal URL regex
384                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
385                        .captures_at(line, end_pos - 1)
386                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
387                        .or_else(|| {
388                            URL_EXTRACT_REGEX
389                                .captures_at(line, end_pos - 1)
390                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
391                        });
392
393                    if let Some((_caps, url_group)) = caps_and_url {
394                        let url = url_group.as_str().trim();
395
396                        // Skip empty URLs
397                        if url.is_empty() {
398                            continue;
399                        }
400
401                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
402                        // These are Rust API references, not file paths
403                        // Example: [`f32::is_subnormal`], [`Vec::push`]
404                        if url.starts_with('`') && url.ends_with('`') {
405                            continue;
406                        }
407
408                        // Skip external URLs, absolute paths, and fragment-only links
409                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
410                            continue;
411                        }
412
413                        // Strip query parameters and fragments before checking file existence
414                        let file_path = Self::strip_query_and_fragment(url);
415
416                        // URL-decode the path to handle percent-encoded characters
417                        let decoded_path = Self::url_decode(file_path);
418
419                        // Resolve the relative link against the base path
420                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
421
422                        // Check if the file exists, also trying markdown extensions for extensionless links
423                        if file_exists_or_markdown_extension(&resolved_path) {
424                            continue; // File exists, no warning needed
425                        }
426
427                        // For .html/.htm links, check if a corresponding markdown source exists
428                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
429                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
430                            && let (Some(stem), Some(parent)) = (
431                                resolved_path.file_stem().and_then(|s| s.to_str()),
432                                resolved_path.parent(),
433                            ) {
434                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
435                                let source_path = parent.join(format!("{stem}{md_ext}"));
436                                file_exists_with_cache(&source_path)
437                            })
438                        } else {
439                            false
440                        };
441
442                        if has_md_source {
443                            continue; // Markdown source exists, link is valid
444                        }
445
446                        // File doesn't exist and no source file found
447                        // Use actual URL position from regex capture group
448                        // Note: capture group positions are absolute within the line string
449                        let url_start = url_group.start();
450                        let url_end = url_group.end();
451
452                        warnings.push(LintWarning {
453                            rule_name: Some(self.name().to_string()),
454                            line: link.line,
455                            column: url_start + 1, // 1-indexed
456                            end_line: link.line,
457                            end_column: url_end + 1, // 1-indexed
458                            message: format!("Relative link '{url}' does not exist"),
459                            severity: Severity::Error,
460                            fix: None,
461                        });
462                    }
463                }
464            }
465        }
466
467        // Also process images - they have URLs already parsed
468        for image in &ctx.images {
469            let url = image.url.as_ref();
470
471            // Skip empty URLs
472            if url.is_empty() {
473                continue;
474            }
475
476            // Skip external URLs, absolute paths, and fragment-only links
477            if self.is_external_url(url) || self.is_fragment_only_link(url) {
478                continue;
479            }
480
481            // Strip query parameters and fragments before checking file existence
482            let file_path = Self::strip_query_and_fragment(url);
483
484            // URL-decode the path to handle percent-encoded characters
485            let decoded_path = Self::url_decode(file_path);
486
487            // Resolve the relative link against the base path
488            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
489
490            // Check if the file exists, also trying markdown extensions for extensionless links
491            if file_exists_or_markdown_extension(&resolved_path) {
492                continue; // File exists, no warning needed
493            }
494
495            // For .html/.htm links, check if a corresponding markdown source exists
496            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
497                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
498                && let (Some(stem), Some(parent)) = (
499                    resolved_path.file_stem().and_then(|s| s.to_str()),
500                    resolved_path.parent(),
501                ) {
502                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
503                    let source_path = parent.join(format!("{stem}{md_ext}"));
504                    file_exists_with_cache(&source_path)
505                })
506            } else {
507                false
508            };
509
510            if has_md_source {
511                continue; // Markdown source exists, link is valid
512            }
513
514            // File doesn't exist and no source file found
515            // Images already have correct position from parser
516            warnings.push(LintWarning {
517                rule_name: Some(self.name().to_string()),
518                line: image.line,
519                column: image.start_col + 1,
520                end_line: image.line,
521                end_column: image.start_col + 1 + url.len(),
522                message: format!("Relative link '{url}' does not exist"),
523                severity: Severity::Error,
524                fix: None,
525            });
526        }
527
528        Ok(warnings)
529    }
530
531    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
532        Ok(ctx.content.to_string())
533    }
534
535    fn as_any(&self) -> &dyn std::any::Any {
536        self
537    }
538
539    fn default_config_section(&self) -> Option<(String, toml::Value)> {
540        // No configurable options for this rule
541        None
542    }
543
544    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
545    where
546        Self: Sized,
547    {
548        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
549        Box::new(Self::from_config_struct(rule_config))
550    }
551
552    fn cross_file_scope(&self) -> CrossFileScope {
553        CrossFileScope::Workspace
554    }
555
556    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
557        // Use the shared utility for cross-file link extraction
558        // This ensures consistent position tracking between CLI and LSP
559        for link in extract_cross_file_links(ctx) {
560            index.add_cross_file_link(link);
561        }
562    }
563
564    fn cross_file_check(
565        &self,
566        file_path: &Path,
567        file_index: &FileIndex,
568        workspace_index: &crate::workspace_index::WorkspaceIndex,
569    ) -> LintResult {
570        let mut warnings = Vec::new();
571
572        // Get the directory containing this file for resolving relative links
573        let file_dir = file_path.parent();
574
575        for cross_link in &file_index.cross_file_links {
576            // URL-decode the path for filesystem operations
577            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
578            let decoded_target = Self::url_decode(&cross_link.target_path);
579
580            // Skip absolute/protocol-relative paths (web paths, not filesystem paths)
581            if decoded_target.starts_with('/') {
582                continue;
583            }
584
585            // Resolve relative path
586            let target_path = if let Some(dir) = file_dir {
587                dir.join(&decoded_target)
588            } else {
589                Path::new(&decoded_target).to_path_buf()
590            };
591
592            // Normalize the path (handle .., ., etc.)
593            let target_path = normalize_path(&target_path);
594
595            // Check if the target file exists, also trying markdown extensions for extensionless links
596            let file_exists =
597                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
598
599            if !file_exists {
600                // For .html/.htm links, check if a corresponding markdown source exists
601                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
602                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
603                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
604                    && let (Some(stem), Some(parent)) =
605                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
606                {
607                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
608                        let source_path = parent.join(format!("{stem}{md_ext}"));
609                        workspace_index.contains_file(&source_path) || source_path.exists()
610                    })
611                } else {
612                    false
613                };
614
615                if !has_md_source {
616                    warnings.push(LintWarning {
617                        rule_name: Some(self.name().to_string()),
618                        line: cross_link.line,
619                        column: cross_link.column,
620                        end_line: cross_link.line,
621                        end_column: cross_link.column + cross_link.target_path.len(),
622                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
623                        severity: Severity::Error,
624                        fix: None,
625                    });
626                }
627            }
628        }
629
630        Ok(warnings)
631    }
632}
633
634/// Normalize a path by resolving . and .. components
635fn normalize_path(path: &Path) -> PathBuf {
636    let mut components = Vec::new();
637
638    for component in path.components() {
639        match component {
640            std::path::Component::ParentDir => {
641                // Go up one level if possible
642                if !components.is_empty() {
643                    components.pop();
644                }
645            }
646            std::path::Component::CurDir => {
647                // Skip current directory markers
648            }
649            _ => {
650                components.push(component);
651            }
652        }
653    }
654
655    components.iter().collect()
656}
657
658#[cfg(test)]
659mod tests {
660    use super::*;
661    use crate::workspace_index::CrossFileLinkIndex;
662    use std::fs::File;
663    use std::io::Write;
664    use tempfile::tempdir;
665
666    #[test]
667    fn test_strip_query_and_fragment() {
668        // Test query parameter stripping
669        assert_eq!(
670            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
671            "file.png"
672        );
673        assert_eq!(
674            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
675            "file.png"
676        );
677        assert_eq!(
678            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
679            "file.png"
680        );
681
682        // Test fragment stripping
683        assert_eq!(
684            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
685            "file.md"
686        );
687        assert_eq!(
688            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
689            "file.md"
690        );
691
692        // Test both query and fragment (query comes first, per RFC 3986)
693        assert_eq!(
694            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
695            "file.md"
696        );
697
698        // Test no query or fragment
699        assert_eq!(
700            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
701            "file.png"
702        );
703
704        // Test with path
705        assert_eq!(
706            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
707            "path/to/image.png"
708        );
709        assert_eq!(
710            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
711            "path/to/image.png"
712        );
713
714        // Edge case: fragment before query (non-standard but possible)
715        assert_eq!(
716            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
717            "file.md"
718        );
719    }
720
721    #[test]
722    fn test_url_decode() {
723        // Simple space encoding
724        assert_eq!(
725            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
726            "penguin with space.jpg"
727        );
728
729        // Path with encoded spaces
730        assert_eq!(
731            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
732            "assets/my file name.png"
733        );
734
735        // Multiple encoded characters
736        assert_eq!(
737            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
738            "hello world!.md"
739        );
740
741        // Lowercase hex
742        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
743
744        // Uppercase hex
745        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
746
747        // Mixed case hex
748        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
749
750        // No encoding - return as-is
751        assert_eq!(
752            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
753            "normal-file.md"
754        );
755
756        // Incomplete percent encoding - leave as-is
757        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
758
759        // Percent at end - leave as-is
760        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
761
762        // Invalid hex digits - leave as-is
763        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
764
765        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
766        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
767
768        // Empty string
769        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
770
771        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
772        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
773
774        // Multiple consecutive encoded characters
775        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
776
777        // Encoded path separators
778        assert_eq!(
779            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
780            "path/to/file.md"
781        );
782
783        // Mixed encoded and non-encoded
784        assert_eq!(
785            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
786            "hello world/foo bar.md"
787        );
788
789        // Special characters that are commonly encoded
790        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
791
792        // Percent at position that looks like encoding but isn't valid
793        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
794    }
795
796    #[test]
797    fn test_url_encoded_filenames() {
798        // Create a temporary directory for test files
799        let temp_dir = tempdir().unwrap();
800        let base_path = temp_dir.path();
801
802        // Create a file with spaces in the name
803        let file_with_spaces = base_path.join("penguin with space.jpg");
804        File::create(&file_with_spaces)
805            .unwrap()
806            .write_all(b"image data")
807            .unwrap();
808
809        // Create a subdirectory with spaces
810        let subdir = base_path.join("my images");
811        std::fs::create_dir(&subdir).unwrap();
812        let nested_file = subdir.join("photo 1.png");
813        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
814
815        // Test content with URL-encoded links
816        let content = r#"
817# Test Document with URL-Encoded Links
818
819![Penguin](penguin%20with%20space.jpg)
820![Photo](my%20images/photo%201.png)
821![Missing](missing%20file.jpg)
822"#;
823
824        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
825
826        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
827        let result = rule.check(&ctx).unwrap();
828
829        // Should only have one warning for the missing file
830        assert_eq!(
831            result.len(),
832            1,
833            "Should only warn about missing%20file.jpg. Got: {result:?}"
834        );
835        assert!(
836            result[0].message.contains("missing%20file.jpg"),
837            "Warning should mention the URL-encoded filename"
838        );
839    }
840
841    #[test]
842    fn test_external_urls() {
843        let rule = MD057ExistingRelativeLinks::new();
844
845        // Common web protocols
846        assert!(rule.is_external_url("https://example.com"));
847        assert!(rule.is_external_url("http://example.com"));
848        assert!(rule.is_external_url("ftp://example.com"));
849        assert!(rule.is_external_url("www.example.com"));
850        assert!(rule.is_external_url("example.com"));
851
852        // Special URI schemes
853        assert!(rule.is_external_url("file:///path/to/file"));
854        assert!(rule.is_external_url("smb://server/share"));
855        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
856        assert!(rule.is_external_url("mailto:user@example.com"));
857        assert!(rule.is_external_url("tel:+1234567890"));
858        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
859        assert!(rule.is_external_url("javascript:void(0)"));
860        assert!(rule.is_external_url("ssh://git@github.com/repo"));
861        assert!(rule.is_external_url("git://github.com/repo.git"));
862
863        // Email addresses without mailto: protocol
864        // These are clearly not file links and should be skipped
865        assert!(rule.is_external_url("user@example.com"));
866        assert!(rule.is_external_url("steering@kubernetes.io"));
867        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
868        assert!(rule.is_external_url("user_name@sub.domain.com"));
869        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
870
871        // Template variables should be skipped (not checked as relative links)
872        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
873        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
874        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
875        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
876        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
877        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
878
879        // Absolute web URL paths should be skipped (not validated)
880        // These are typically routes for published documentation sites
881        assert!(rule.is_external_url("/api/v1/users"));
882        assert!(rule.is_external_url("/blog/2024/release.html"));
883        assert!(rule.is_external_url("/react/hooks/use-state.html"));
884        assert!(rule.is_external_url("/pkg/runtime"));
885        assert!(rule.is_external_url("/doc/go1compat"));
886        assert!(rule.is_external_url("/index.html"));
887        assert!(rule.is_external_url("/assets/logo.png"));
888
889        // Framework path aliases should be skipped (resolved by build tools)
890        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
891        assert!(rule.is_external_url("~/assets/image.png"));
892        assert!(rule.is_external_url("~/components/Button.vue"));
893        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
894
895        // @ prefix (common in Vue, webpack, Vite aliases)
896        assert!(rule.is_external_url("@/components/Header.vue"));
897        assert!(rule.is_external_url("@images/photo.jpg"));
898        assert!(rule.is_external_url("@assets/styles.css"));
899
900        // Relative paths should NOT be external (should be validated)
901        assert!(!rule.is_external_url("./relative/path.md"));
902        assert!(!rule.is_external_url("relative/path.md"));
903        assert!(!rule.is_external_url("../parent/path.md"));
904    }
905
906    #[test]
907    fn test_framework_path_aliases() {
908        // Create a temporary directory for test files
909        let temp_dir = tempdir().unwrap();
910        let base_path = temp_dir.path();
911
912        // Test content with framework path aliases (should all be skipped)
913        let content = r#"
914# Framework Path Aliases
915
916![Image 1](~/assets/penguin.jpg)
917![Image 2](~assets/logo.svg)
918![Image 3](@images/photo.jpg)
919![Image 4](@/components/icon.svg)
920[Link](@/pages/about.md)
921
922This is a [real missing link](missing.md) that should be flagged.
923"#;
924
925        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
926
927        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
928        let result = rule.check(&ctx).unwrap();
929
930        // Should only have one warning for the real missing link
931        assert_eq!(
932            result.len(),
933            1,
934            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
935        );
936        assert!(
937            result[0].message.contains("missing.md"),
938            "Warning should be for missing.md"
939        );
940    }
941
942    #[test]
943    fn test_url_decode_security_path_traversal() {
944        // Ensure URL decoding doesn't enable path traversal attacks
945        // The decoded path is still validated against the base path
946        let temp_dir = tempdir().unwrap();
947        let base_path = temp_dir.path();
948
949        // Create a file in the temp directory
950        let file_in_base = base_path.join("safe.md");
951        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
952
953        // Test with encoded path traversal attempt
954        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
955        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
956        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
957        let content = r#"
958[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
959[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
960[Safe link](safe.md)
961"#;
962
963        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
964
965        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
966        let result = rule.check(&ctx).unwrap();
967
968        // The traversal attempts should still be flagged as missing
969        // (they don't exist relative to base_path after decoding)
970        assert_eq!(
971            result.len(),
972            2,
973            "Should have warnings for traversal attempts. Got: {result:?}"
974        );
975    }
976
977    #[test]
978    fn test_url_encoded_utf8_filenames() {
979        // Test with actual UTF-8 encoded filenames
980        let temp_dir = tempdir().unwrap();
981        let base_path = temp_dir.path();
982
983        // Create files with unicode names
984        let cafe_file = base_path.join("café.md");
985        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
986
987        let content = r#"
988[Café link](caf%C3%A9.md)
989[Missing unicode](r%C3%A9sum%C3%A9.md)
990"#;
991
992        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
993
994        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
995        let result = rule.check(&ctx).unwrap();
996
997        // Should only warn about the missing file
998        assert_eq!(
999            result.len(),
1000            1,
1001            "Should only warn about missing résumé.md. Got: {result:?}"
1002        );
1003        assert!(
1004            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1005            "Warning should mention the URL-encoded filename"
1006        );
1007    }
1008
1009    #[test]
1010    fn test_url_encoded_emoji_filenames() {
1011        // URL-encoded emoji paths should be correctly resolved
1012        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1013        let temp_dir = tempdir().unwrap();
1014        let base_path = temp_dir.path();
1015
1016        // Create directory with emoji in name: 👤 Personal
1017        let emoji_dir = base_path.join("👤 Personal");
1018        std::fs::create_dir(&emoji_dir).unwrap();
1019
1020        // Create file in that directory: TV Shows.md
1021        let file_path = emoji_dir.join("TV Shows.md");
1022        File::create(&file_path)
1023            .unwrap()
1024            .write_all(b"# TV Shows\n\nContent here.")
1025            .unwrap();
1026
1027        // Test content with URL-encoded emoji link
1028        // %F0%9F%91%A4 = 👤, %20 = space
1029        let content = r#"
1030# Test Document
1031
1032[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1033[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1034"#;
1035
1036        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1037
1038        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1039        let result = rule.check(&ctx).unwrap();
1040
1041        // Should only warn about the missing file, not the valid emoji path
1042        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1043        assert!(
1044            result[0].message.contains("Missing.md"),
1045            "Warning should be for Missing.md, got: {}",
1046            result[0].message
1047        );
1048    }
1049
1050    #[test]
1051    fn test_no_warnings_without_base_path() {
1052        let rule = MD057ExistingRelativeLinks::new();
1053        let content = "[Link](missing.md)";
1054
1055        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1056        let result = rule.check(&ctx).unwrap();
1057        assert!(result.is_empty(), "Should have no warnings without base path");
1058    }
1059
1060    #[test]
1061    fn test_existing_and_missing_links() {
1062        // Create a temporary directory for test files
1063        let temp_dir = tempdir().unwrap();
1064        let base_path = temp_dir.path();
1065
1066        // Create an existing file
1067        let exists_path = base_path.join("exists.md");
1068        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1069
1070        // Verify the file exists
1071        assert!(exists_path.exists(), "exists.md should exist for this test");
1072
1073        // Create test content with both existing and missing links
1074        let content = r#"
1075# Test Document
1076
1077[Valid Link](exists.md)
1078[Invalid Link](missing.md)
1079[External Link](https://example.com)
1080[Media Link](image.jpg)
1081        "#;
1082
1083        // Initialize rule with the base path (default: check all files including media)
1084        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1085
1086        // Test the rule
1087        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1088        let result = rule.check(&ctx).unwrap();
1089
1090        // Should have two warnings: missing.md and image.jpg (both don't exist)
1091        assert_eq!(result.len(), 2);
1092        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1093        assert!(messages.iter().any(|m| m.contains("missing.md")));
1094        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1095    }
1096
1097    #[test]
1098    fn test_angle_bracket_links() {
1099        // Create a temporary directory for test files
1100        let temp_dir = tempdir().unwrap();
1101        let base_path = temp_dir.path();
1102
1103        // Create an existing file
1104        let exists_path = base_path.join("exists.md");
1105        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1106
1107        // Create test content with angle bracket links
1108        let content = r#"
1109# Test Document
1110
1111[Valid Link](<exists.md>)
1112[Invalid Link](<missing.md>)
1113[External Link](<https://example.com>)
1114    "#;
1115
1116        // Test with default settings
1117        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1118
1119        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1120        let result = rule.check(&ctx).unwrap();
1121
1122        // Should have one warning for missing.md
1123        assert_eq!(result.len(), 1, "Should have exactly one warning");
1124        assert!(
1125            result[0].message.contains("missing.md"),
1126            "Warning should mention missing.md"
1127        );
1128    }
1129
1130    #[test]
1131    fn test_angle_bracket_links_with_parens() {
1132        // Create a temporary directory for test files
1133        let temp_dir = tempdir().unwrap();
1134        let base_path = temp_dir.path();
1135
1136        // Create directory structure with parentheses in path
1137        let app_dir = base_path.join("app");
1138        std::fs::create_dir(&app_dir).unwrap();
1139        let upload_dir = app_dir.join("(upload)");
1140        std::fs::create_dir(&upload_dir).unwrap();
1141        let page_file = upload_dir.join("page.tsx");
1142        File::create(&page_file)
1143            .unwrap()
1144            .write_all(b"export default function Page() {}")
1145            .unwrap();
1146
1147        // Create test content with angle bracket links containing parentheses
1148        let content = r#"
1149# Test Document with Paths Containing Parens
1150
1151[Upload Page](<app/(upload)/page.tsx>)
1152[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1153[Missing](<app/(missing)/file.md>)
1154"#;
1155
1156        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1157
1158        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1159        let result = rule.check(&ctx).unwrap();
1160
1161        // Should only have one warning for the missing file
1162        assert_eq!(
1163            result.len(),
1164            1,
1165            "Should have exactly one warning for missing file. Got: {result:?}"
1166        );
1167        assert!(
1168            result[0].message.contains("app/(missing)/file.md"),
1169            "Warning should mention app/(missing)/file.md"
1170        );
1171    }
1172
1173    #[test]
1174    fn test_all_file_types_checked() {
1175        // Create a temporary directory for test files
1176        let temp_dir = tempdir().unwrap();
1177        let base_path = temp_dir.path();
1178
1179        // Create a test with various file types - all should be checked
1180        let content = r#"
1181[Image Link](image.jpg)
1182[Video Link](video.mp4)
1183[Markdown Link](document.md)
1184[PDF Link](file.pdf)
1185"#;
1186
1187        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1188
1189        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1190        let result = rule.check(&ctx).unwrap();
1191
1192        // Should warn about all missing files regardless of extension
1193        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1194    }
1195
1196    #[test]
1197    fn test_code_span_detection() {
1198        let rule = MD057ExistingRelativeLinks::new();
1199
1200        // Create a temporary directory for test files
1201        let temp_dir = tempdir().unwrap();
1202        let base_path = temp_dir.path();
1203
1204        let rule = rule.with_path(base_path);
1205
1206        // Test with document structure
1207        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1208
1209        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1210        let result = rule.check(&ctx).unwrap();
1211
1212        // Should only find the real link, not the one in code
1213        assert_eq!(result.len(), 1, "Should only flag the real link");
1214        assert!(result[0].message.contains("nonexistent.md"));
1215    }
1216
1217    #[test]
1218    fn test_inline_code_spans() {
1219        // Create a temporary directory for test files
1220        let temp_dir = tempdir().unwrap();
1221        let base_path = temp_dir.path();
1222
1223        // Create test content with links in inline code spans
1224        let content = r#"
1225# Test Document
1226
1227This is a normal link: [Link](missing.md)
1228
1229This is a code span with a link: `[Link](another-missing.md)`
1230
1231Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1232
1233    "#;
1234
1235        // Initialize rule with the base path
1236        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1237
1238        // Test the rule
1239        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1240        let result = rule.check(&ctx).unwrap();
1241
1242        // Should only have warning for the normal link, not for links in code spans
1243        assert_eq!(result.len(), 1, "Should have exactly one warning");
1244        assert!(
1245            result[0].message.contains("missing.md"),
1246            "Warning should be for missing.md"
1247        );
1248        assert!(
1249            !result.iter().any(|w| w.message.contains("another-missing.md")),
1250            "Should not warn about link in code span"
1251        );
1252        assert!(
1253            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1254            "Should not warn about link in inline code"
1255        );
1256    }
1257
1258    #[test]
1259    fn test_extensionless_link_resolution() {
1260        // Create a temporary directory for test files
1261        let temp_dir = tempdir().unwrap();
1262        let base_path = temp_dir.path();
1263
1264        // Create a markdown file WITHOUT specifying .md extension in the link
1265        let page_path = base_path.join("page.md");
1266        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1267
1268        // Test content with extensionless link that should resolve to page.md
1269        let content = r#"
1270# Test Document
1271
1272[Link without extension](page)
1273[Link with extension](page.md)
1274[Missing link](nonexistent)
1275"#;
1276
1277        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1278
1279        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1280        let result = rule.check(&ctx).unwrap();
1281
1282        // Should only have warning for nonexistent link
1283        // Both "page" and "page.md" should resolve to the same file
1284        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1285        assert!(
1286            result[0].message.contains("nonexistent"),
1287            "Warning should be for 'nonexistent' not 'page'"
1288        );
1289    }
1290
1291    // Cross-file validation tests
1292    #[test]
1293    fn test_cross_file_scope() {
1294        let rule = MD057ExistingRelativeLinks::new();
1295        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1296    }
1297
1298    #[test]
1299    fn test_contribute_to_index_extracts_markdown_links() {
1300        let rule = MD057ExistingRelativeLinks::new();
1301        let content = r#"
1302# Document
1303
1304[Link to docs](./docs/guide.md)
1305[Link with fragment](./other.md#section)
1306[External link](https://example.com)
1307[Image link](image.png)
1308[Media file](video.mp4)
1309"#;
1310
1311        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1312        let mut index = FileIndex::new();
1313        rule.contribute_to_index(&ctx, &mut index);
1314
1315        // Should only index markdown file links
1316        assert_eq!(index.cross_file_links.len(), 2);
1317
1318        // Check first link
1319        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1320        assert_eq!(index.cross_file_links[0].fragment, "");
1321
1322        // Check second link (with fragment)
1323        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1324        assert_eq!(index.cross_file_links[1].fragment, "section");
1325    }
1326
1327    #[test]
1328    fn test_contribute_to_index_skips_external_and_anchors() {
1329        let rule = MD057ExistingRelativeLinks::new();
1330        let content = r#"
1331# Document
1332
1333[External](https://example.com)
1334[Another external](http://example.org)
1335[Fragment only](#section)
1336[FTP link](ftp://files.example.com)
1337[Mail link](mailto:test@example.com)
1338[WWW link](www.example.com)
1339"#;
1340
1341        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1342        let mut index = FileIndex::new();
1343        rule.contribute_to_index(&ctx, &mut index);
1344
1345        // Should not index any of these
1346        assert_eq!(index.cross_file_links.len(), 0);
1347    }
1348
1349    #[test]
1350    fn test_cross_file_check_valid_link() {
1351        use crate::workspace_index::WorkspaceIndex;
1352
1353        let rule = MD057ExistingRelativeLinks::new();
1354
1355        // Create a workspace index with the target file
1356        let mut workspace_index = WorkspaceIndex::new();
1357        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1358
1359        // Create file index with a link to an existing file
1360        let mut file_index = FileIndex::new();
1361        file_index.add_cross_file_link(CrossFileLinkIndex {
1362            target_path: "guide.md".to_string(),
1363            fragment: "".to_string(),
1364            line: 5,
1365            column: 1,
1366        });
1367
1368        // Run cross-file check from docs/index.md
1369        let warnings = rule
1370            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1371            .unwrap();
1372
1373        // Should have no warnings - file exists
1374        assert!(warnings.is_empty());
1375    }
1376
1377    #[test]
1378    fn test_cross_file_check_missing_link() {
1379        use crate::workspace_index::WorkspaceIndex;
1380
1381        let rule = MD057ExistingRelativeLinks::new();
1382
1383        // Create an empty workspace index
1384        let workspace_index = WorkspaceIndex::new();
1385
1386        // Create file index with a link to a missing file
1387        let mut file_index = FileIndex::new();
1388        file_index.add_cross_file_link(CrossFileLinkIndex {
1389            target_path: "missing.md".to_string(),
1390            fragment: "".to_string(),
1391            line: 5,
1392            column: 1,
1393        });
1394
1395        // Run cross-file check
1396        let warnings = rule
1397            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1398            .unwrap();
1399
1400        // Should have one warning for the missing file
1401        assert_eq!(warnings.len(), 1);
1402        assert!(warnings[0].message.contains("missing.md"));
1403        assert!(warnings[0].message.contains("does not exist"));
1404    }
1405
1406    #[test]
1407    fn test_cross_file_check_parent_path() {
1408        use crate::workspace_index::WorkspaceIndex;
1409
1410        let rule = MD057ExistingRelativeLinks::new();
1411
1412        // Create a workspace index with the target file at the root
1413        let mut workspace_index = WorkspaceIndex::new();
1414        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1415
1416        // Create file index with a parent path link
1417        let mut file_index = FileIndex::new();
1418        file_index.add_cross_file_link(CrossFileLinkIndex {
1419            target_path: "../readme.md".to_string(),
1420            fragment: "".to_string(),
1421            line: 5,
1422            column: 1,
1423        });
1424
1425        // Run cross-file check from docs/guide.md
1426        let warnings = rule
1427            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1428            .unwrap();
1429
1430        // Should have no warnings - file exists at normalized path
1431        assert!(warnings.is_empty());
1432    }
1433
1434    #[test]
1435    fn test_cross_file_check_html_link_with_md_source() {
1436        // Test that .html links are accepted when corresponding .md source exists
1437        // This supports mdBook and similar doc generators that compile .md to .html
1438        use crate::workspace_index::WorkspaceIndex;
1439
1440        let rule = MD057ExistingRelativeLinks::new();
1441
1442        // Create a workspace index with the .md source file
1443        let mut workspace_index = WorkspaceIndex::new();
1444        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1445
1446        // Create file index with an .html link (from another rule like MD051)
1447        let mut file_index = FileIndex::new();
1448        file_index.add_cross_file_link(CrossFileLinkIndex {
1449            target_path: "guide.html".to_string(),
1450            fragment: "section".to_string(),
1451            line: 10,
1452            column: 5,
1453        });
1454
1455        // Run cross-file check from docs/index.md
1456        let warnings = rule
1457            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1458            .unwrap();
1459
1460        // Should have no warnings - .md source exists for the .html link
1461        assert!(
1462            warnings.is_empty(),
1463            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1464        );
1465    }
1466
1467    #[test]
1468    fn test_cross_file_check_html_link_without_source() {
1469        // Test that .html links without corresponding .md source ARE flagged
1470        use crate::workspace_index::WorkspaceIndex;
1471
1472        let rule = MD057ExistingRelativeLinks::new();
1473
1474        // Create an empty workspace index
1475        let workspace_index = WorkspaceIndex::new();
1476
1477        // Create file index with an .html link to a non-existent file
1478        let mut file_index = FileIndex::new();
1479        file_index.add_cross_file_link(CrossFileLinkIndex {
1480            target_path: "missing.html".to_string(),
1481            fragment: "".to_string(),
1482            line: 10,
1483            column: 5,
1484        });
1485
1486        // Run cross-file check from docs/index.md
1487        let warnings = rule
1488            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1489            .unwrap();
1490
1491        // Should have one warning - no .md source exists
1492        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1493        assert!(warnings[0].message.contains("missing.html"));
1494    }
1495
1496    #[test]
1497    fn test_normalize_path_function() {
1498        // Test simple cases
1499        assert_eq!(
1500            normalize_path(Path::new("docs/guide.md")),
1501            PathBuf::from("docs/guide.md")
1502        );
1503
1504        // Test current directory removal
1505        assert_eq!(
1506            normalize_path(Path::new("./docs/guide.md")),
1507            PathBuf::from("docs/guide.md")
1508        );
1509
1510        // Test parent directory resolution
1511        assert_eq!(
1512            normalize_path(Path::new("docs/sub/../guide.md")),
1513            PathBuf::from("docs/guide.md")
1514        );
1515
1516        // Test multiple parent directories
1517        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
1518    }
1519
1520    #[test]
1521    fn test_html_link_with_md_source() {
1522        // Links to .html files should pass if corresponding .md source exists
1523        let temp_dir = tempdir().unwrap();
1524        let base_path = temp_dir.path();
1525
1526        // Create guide.md (source file)
1527        let md_file = base_path.join("guide.md");
1528        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
1529
1530        let content = r#"
1531[Read the guide](guide.html)
1532[Also here](getting-started.html)
1533"#;
1534
1535        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1536        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1537        let result = rule.check(&ctx).unwrap();
1538
1539        // guide.html passes (guide.md exists), getting-started.html fails
1540        assert_eq!(
1541            result.len(),
1542            1,
1543            "Should only warn about missing source. Got: {result:?}"
1544        );
1545        assert!(result[0].message.contains("getting-started.html"));
1546    }
1547
1548    #[test]
1549    fn test_htm_link_with_md_source() {
1550        // .htm extension should also check for markdown source
1551        let temp_dir = tempdir().unwrap();
1552        let base_path = temp_dir.path();
1553
1554        let md_file = base_path.join("page.md");
1555        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
1556
1557        let content = "[Page](page.htm)";
1558
1559        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1560        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1561        let result = rule.check(&ctx).unwrap();
1562
1563        assert!(
1564            result.is_empty(),
1565            "Should not warn when .md source exists for .htm link"
1566        );
1567    }
1568
1569    #[test]
1570    fn test_html_link_finds_various_markdown_extensions() {
1571        // Should find .mdx, .markdown, etc. as source files
1572        let temp_dir = tempdir().unwrap();
1573        let base_path = temp_dir.path();
1574
1575        File::create(base_path.join("doc.md")).unwrap();
1576        File::create(base_path.join("tutorial.mdx")).unwrap();
1577        File::create(base_path.join("guide.markdown")).unwrap();
1578
1579        let content = r#"
1580[Doc](doc.html)
1581[Tutorial](tutorial.html)
1582[Guide](guide.html)
1583"#;
1584
1585        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1586        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1587        let result = rule.check(&ctx).unwrap();
1588
1589        assert!(
1590            result.is_empty(),
1591            "Should find all markdown variants as source files. Got: {result:?}"
1592        );
1593    }
1594
1595    #[test]
1596    fn test_html_link_in_subdirectory() {
1597        // Should find markdown source in subdirectories
1598        let temp_dir = tempdir().unwrap();
1599        let base_path = temp_dir.path();
1600
1601        let docs_dir = base_path.join("docs");
1602        std::fs::create_dir(&docs_dir).unwrap();
1603        File::create(docs_dir.join("guide.md"))
1604            .unwrap()
1605            .write_all(b"# Guide")
1606            .unwrap();
1607
1608        let content = "[Guide](docs/guide.html)";
1609
1610        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1611        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1612        let result = rule.check(&ctx).unwrap();
1613
1614        assert!(result.is_empty(), "Should find markdown source in subdirectory");
1615    }
1616
1617    #[test]
1618    fn test_absolute_path_skipped_in_check() {
1619        // Test that absolute paths are skipped during link validation
1620        // This fixes the bug where /pkg/runtime was being flagged
1621        let temp_dir = tempdir().unwrap();
1622        let base_path = temp_dir.path();
1623
1624        let content = r#"
1625# Test Document
1626
1627[Go Runtime](/pkg/runtime)
1628[Go Runtime with Fragment](/pkg/runtime#section)
1629[API Docs](/api/v1/users)
1630[Blog Post](/blog/2024/release.html)
1631[React Hook](/react/hooks/use-state.html)
1632"#;
1633
1634        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1635        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1636        let result = rule.check(&ctx).unwrap();
1637
1638        // Should have NO warnings - all absolute paths should be skipped
1639        assert!(
1640            result.is_empty(),
1641            "Absolute paths should be skipped. Got warnings: {result:?}"
1642        );
1643    }
1644
1645    #[test]
1646    fn test_absolute_path_skipped_in_cross_file_check() {
1647        // Test that absolute paths are skipped in cross_file_check()
1648        use crate::workspace_index::WorkspaceIndex;
1649
1650        let rule = MD057ExistingRelativeLinks::new();
1651
1652        // Create an empty workspace index (no files exist)
1653        let workspace_index = WorkspaceIndex::new();
1654
1655        // Create file index with absolute path links (should be skipped)
1656        let mut file_index = FileIndex::new();
1657        file_index.add_cross_file_link(CrossFileLinkIndex {
1658            target_path: "/pkg/runtime.md".to_string(),
1659            fragment: "".to_string(),
1660            line: 5,
1661            column: 1,
1662        });
1663        file_index.add_cross_file_link(CrossFileLinkIndex {
1664            target_path: "/api/v1/users.md".to_string(),
1665            fragment: "section".to_string(),
1666            line: 10,
1667            column: 1,
1668        });
1669
1670        // Run cross-file check
1671        let warnings = rule
1672            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1673            .unwrap();
1674
1675        // Should have NO warnings - absolute paths should be skipped
1676        assert!(
1677            warnings.is_empty(),
1678            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
1679        );
1680    }
1681
1682    #[test]
1683    fn test_protocol_relative_url_not_skipped() {
1684        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
1685        // They should still be caught by is_external_url() though
1686        let temp_dir = tempdir().unwrap();
1687        let base_path = temp_dir.path();
1688
1689        let content = r#"
1690# Test Document
1691
1692[External](//example.com/page)
1693[Another](//cdn.example.com/asset.js)
1694"#;
1695
1696        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1697        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1698        let result = rule.check(&ctx).unwrap();
1699
1700        // Should have NO warnings - protocol-relative URLs are external and should be skipped
1701        assert!(
1702            result.is_empty(),
1703            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
1704        );
1705    }
1706
1707    #[test]
1708    fn test_email_addresses_skipped() {
1709        // Test that email addresses without mailto: are skipped
1710        // These are clearly not file links (the @ symbol is definitive)
1711        let temp_dir = tempdir().unwrap();
1712        let base_path = temp_dir.path();
1713
1714        let content = r#"
1715# Test Document
1716
1717[Contact](user@example.com)
1718[Steering](steering@kubernetes.io)
1719[Support](john.doe+filter@company.co.uk)
1720[User](user_name@sub.domain.com)
1721"#;
1722
1723        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1724        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1725        let result = rule.check(&ctx).unwrap();
1726
1727        // Should have NO warnings - email addresses are clearly not file links and should be skipped
1728        assert!(
1729            result.is_empty(),
1730            "Email addresses should be skipped. Got warnings: {result:?}"
1731        );
1732    }
1733
1734    #[test]
1735    fn test_email_addresses_vs_file_paths() {
1736        // Test that email addresses (anything with @) are skipped
1737        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
1738        let temp_dir = tempdir().unwrap();
1739        let base_path = temp_dir.path();
1740
1741        let content = r#"
1742# Test Document
1743
1744[Email](user@example.com)  <!-- Should be skipped (email) -->
1745[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
1746[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
1747"#;
1748
1749        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1750        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1751        let result = rule.check(&ctx).unwrap();
1752
1753        // All should be skipped - anything with @ is treated as an email
1754        assert!(
1755            result.is_empty(),
1756            "All email addresses should be skipped. Got: {result:?}"
1757        );
1758    }
1759
1760    #[test]
1761    fn test_diagnostic_position_accuracy() {
1762        // Test that diagnostics point to the URL, not the link text
1763        let temp_dir = tempdir().unwrap();
1764        let base_path = temp_dir.path();
1765
1766        // Position markers:     0         1         2         3
1767        //                       0123456789012345678901234567890123456789
1768        let content = "prefix [text](missing.md) suffix";
1769        //             The URL "missing.md" starts at 0-indexed position 14
1770        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
1771
1772        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1773        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1774        let result = rule.check(&ctx).unwrap();
1775
1776        assert_eq!(result.len(), 1, "Should have exactly one warning");
1777        assert_eq!(result[0].line, 1, "Should be on line 1");
1778        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
1779        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
1780    }
1781
1782    #[test]
1783    fn test_diagnostic_position_angle_brackets() {
1784        // Test position accuracy with angle bracket links
1785        let temp_dir = tempdir().unwrap();
1786        let base_path = temp_dir.path();
1787
1788        // Position markers:     0         1         2
1789        //                       012345678901234567890
1790        let content = "[link](<missing.md>)";
1791        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
1792
1793        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1794        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1795        let result = rule.check(&ctx).unwrap();
1796
1797        assert_eq!(result.len(), 1, "Should have exactly one warning");
1798        assert_eq!(result[0].line, 1, "Should be on line 1");
1799        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
1800    }
1801
1802    #[test]
1803    fn test_diagnostic_position_multiline() {
1804        // Test that line numbers are correct for links on different lines
1805        let temp_dir = tempdir().unwrap();
1806        let base_path = temp_dir.path();
1807
1808        let content = r#"# Title
1809Some text on line 2
1810[link on line 3](missing1.md)
1811More text
1812[link on line 5](missing2.md)"#;
1813
1814        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1815        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1816        let result = rule.check(&ctx).unwrap();
1817
1818        assert_eq!(result.len(), 2, "Should have two warnings");
1819
1820        // First warning should be on line 3
1821        assert_eq!(result[0].line, 3, "First warning should be on line 3");
1822        assert!(result[0].message.contains("missing1.md"));
1823
1824        // Second warning should be on line 5
1825        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
1826        assert!(result[1].message.contains("missing2.md"));
1827    }
1828
1829    #[test]
1830    fn test_diagnostic_position_with_spaces() {
1831        // Test position with URLs that have spaces in parentheses
1832        let temp_dir = tempdir().unwrap();
1833        let base_path = temp_dir.path();
1834
1835        let content = "[link]( missing.md )";
1836        //             0123456789012345678901
1837        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
1838        //             which is 1-indexed column 9
1839
1840        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1841        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1842        let result = rule.check(&ctx).unwrap();
1843
1844        assert_eq!(result.len(), 1, "Should have exactly one warning");
1845        // The regex captures the URL without leading/trailing spaces
1846        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
1847    }
1848
1849    #[test]
1850    fn test_diagnostic_position_image() {
1851        // Test that image diagnostics also have correct positions
1852        let temp_dir = tempdir().unwrap();
1853        let base_path = temp_dir.path();
1854
1855        let content = "![alt text](missing.jpg)";
1856
1857        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1858        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1859        let result = rule.check(&ctx).unwrap();
1860
1861        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
1862        assert_eq!(result[0].line, 1);
1863        // Images use start_col from the parser, which should point to the URL
1864        assert!(result[0].column > 0, "Should have valid column position");
1865        assert!(result[0].message.contains("missing.jpg"));
1866    }
1867
1868    #[test]
1869    fn test_wikilinks_skipped() {
1870        // Wikilinks should not trigger MD057 warnings
1871        // They use a different linking system (e.g., Obsidian, wiki software)
1872        let temp_dir = tempdir().unwrap();
1873        let base_path = temp_dir.path();
1874
1875        let content = r#"# Test Document
1876
1877[[Microsoft#Windows OS]]
1878[[SomePage]]
1879[[Page With Spaces]]
1880[[path/to/page#section]]
1881[[page|Display Text]]
1882
1883This is a [real missing link](missing.md) that should be flagged.
1884"#;
1885
1886        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1887        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1888        let result = rule.check(&ctx).unwrap();
1889
1890        // Should only warn about the regular markdown link, not wikilinks
1891        assert_eq!(
1892            result.len(),
1893            1,
1894            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
1895        );
1896        assert!(
1897            result[0].message.contains("missing.md"),
1898            "Warning should be for missing.md, not wikilinks"
1899        );
1900    }
1901
1902    #[test]
1903    fn test_wikilinks_not_added_to_index() {
1904        // Wikilinks should not be added to the cross-file link index
1905        let temp_dir = tempdir().unwrap();
1906        let base_path = temp_dir.path();
1907
1908        let content = r#"# Test Document
1909
1910[[Microsoft#Windows OS]]
1911[[SomePage#section]]
1912[Regular Link](other.md)
1913"#;
1914
1915        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1916        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1917
1918        let mut file_index = FileIndex::new();
1919        rule.contribute_to_index(&ctx, &mut file_index);
1920
1921        // Should only have the regular markdown link (if it's a markdown file)
1922        // Wikilinks should not be added
1923        let cross_file_links = &file_index.cross_file_links;
1924        assert_eq!(
1925            cross_file_links.len(),
1926            1,
1927            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
1928        );
1929        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
1930    }
1931}
rumdl_lib/rules/md057_existing_relative_links.rs

rumdl_lib/rules/
md057_existing_relative_links.rs