Skip to main content

rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{
7    CrossFileScope, Fix, FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity,
8};
9use crate::utils::element_cache::ElementCache;
10use crate::workspace_index::{FileIndex, extract_cross_file_links};
11use regex::Regex;
12use std::collections::HashMap;
13use std::env;
14use std::path::{Path, PathBuf};
15use std::sync::LazyLock;
16use std::sync::{Arc, Mutex};
17
18mod md057_config;
19use crate::rule_config_serde::RuleConfig;
20use crate::utils::mkdocs_config::resolve_docs_dir;
21pub use md057_config::{AbsoluteLinksOption, MD057Config};
22
23// Thread-safe cache for file existence checks to avoid redundant filesystem operations
24static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
25    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
26
27// Reset the file existence cache (typically between rule runs)
28fn reset_file_existence_cache() {
29    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
30        cache.clear();
31    }
32}
33
34// Check if a file exists with caching
35fn file_exists_with_cache(path: &Path) -> bool {
36    match FILE_EXISTENCE_CACHE.lock() {
37        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
38        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
39    }
40}
41
42/// Check if a file exists, also trying markdown extensions for extensionless links.
43/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
44fn file_exists_or_markdown_extension(path: &Path) -> bool {
45    // First, check exact path
46    if file_exists_with_cache(path) {
47        return true;
48    }
49
50    // If the path has no extension, try adding markdown extensions
51    if path.extension().is_none() {
52        for ext in MARKDOWN_EXTENSIONS {
53            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
54            let path_with_ext = path.with_extension(&ext[1..]);
55            if file_exists_with_cache(&path_with_ext) {
56                return true;
57            }
58        }
59    }
60
61    false
62}
63
64// Regex to match the start of a link - simplified for performance
65static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
66
67/// Regex to extract the URL from an angle-bracketed markdown link
68/// Format: `](<URL>)` or `](<URL> "title")`
69/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
70static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
71    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
72
73/// Regex to extract the URL from a normal markdown link (without angle brackets)
74/// Format: `](URL)` or `](URL "title")`
75static URL_EXTRACT_REGEX: LazyLock<Regex> =
76    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
77
78/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
79/// Matches: scheme:// or scheme: (per RFC 3986)
80/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
81static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
82    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
83
84// Current working directory
85static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
86
87/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
88/// Returns None for non-hex characters.
89#[inline]
90fn hex_digit_to_value(byte: u8) -> Option<u8> {
91    match byte {
92        b'0'..=b'9' => Some(byte - b'0'),
93        b'a'..=b'f' => Some(byte - b'a' + 10),
94        b'A'..=b'F' => Some(byte - b'A' + 10),
95        _ => None,
96    }
97}
98
99/// Supported markdown file extensions
100const MARKDOWN_EXTENSIONS: &[&str] = &[
101    ".md",
102    ".markdown",
103    ".mdx",
104    ".mkd",
105    ".mkdn",
106    ".mdown",
107    ".mdwn",
108    ".qmd",
109    ".rmd",
110];
111
112/// Rule MD057: Existing relative links should point to valid files or directories.
113#[derive(Debug, Clone)]
114pub struct MD057ExistingRelativeLinks {
115    /// Base directory for resolving relative links
116    base_path: Arc<Mutex<Option<PathBuf>>>,
117    /// Configuration for the rule
118    config: MD057Config,
119}
120
121impl Default for MD057ExistingRelativeLinks {
122    fn default() -> Self {
123        Self {
124            base_path: Arc::new(Mutex::new(None)),
125            config: MD057Config::default(),
126        }
127    }
128}
129
130impl MD057ExistingRelativeLinks {
131    /// Create a new instance with default settings
132    pub fn new() -> Self {
133        Self::default()
134    }
135
136    /// Set the base path for resolving relative links
137    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
138        let path = path.as_ref();
139        let dir_path = if path.is_file() {
140            path.parent().map(|p| p.to_path_buf())
141        } else {
142            Some(path.to_path_buf())
143        };
144
145        if let Ok(mut guard) = self.base_path.lock() {
146            *guard = dir_path;
147        }
148        self
149    }
150
151    pub fn from_config_struct(config: MD057Config) -> Self {
152        Self {
153            base_path: Arc::new(Mutex::new(None)),
154            config,
155        }
156    }
157
158    /// Check if a URL is external or should be skipped for validation.
159    ///
160    /// Returns `true` (skip validation) for:
161    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
162    /// - Bare domains: `www.example.com`, `example.com`
163    /// - Email addresses: `user@example.com` (without `mailto:`)
164    /// - Template variables: `{{URL}}`, `{{% include %}}`
165    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
166    ///
167    /// Returns `false` (validate) for:
168    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
169    #[inline]
170    fn is_external_url(&self, url: &str) -> bool {
171        if url.is_empty() {
172            return false;
173        }
174
175        // Quick checks for common external URL patterns
176        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
177            return true;
178        }
179
180        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
181        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
182        if url.starts_with("{{") || url.starts_with("{%") {
183            return true;
184        }
185
186        // Simple check: if URL contains @, it's almost certainly an email address
187        // File paths with @ are extremely rare, so this is a safe heuristic
188        if url.contains('@') {
189            return true; // It's an email address, skip it
190        }
191
192        // Bare domain check (e.g., "example.com")
193        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
194        // Links like [text](nodejs.org/path) without a protocol are broken -
195        // they'll be treated as relative paths by markdown renderers.
196        // Flagging them helps users find missing protocols.
197        // We only skip .com as a minimal safety net for the most common case.
198        if url.ends_with(".com") {
199            return true;
200        }
201
202        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
203        // These are not filesystem paths but module/asset aliases
204        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
205        if url.starts_with('~') || url.starts_with('@') {
206            return true;
207        }
208
209        // All other cases (relative paths, etc.) are not external
210        false
211    }
212
213    /// Check if the URL is a fragment-only link (internal document link)
214    #[inline]
215    fn is_fragment_only_link(&self, url: &str) -> bool {
216        url.starts_with('#')
217    }
218
219    /// Check if the URL is an absolute path (starts with /)
220    /// These are typically routes for published documentation sites.
221    #[inline]
222    fn is_absolute_path(url: &str) -> bool {
223        url.starts_with('/')
224    }
225
226    /// Decode URL percent-encoded sequences in a path.
227    /// Converts `%20` to space, `%2F` to `/`, etc.
228    /// Returns the original string if decoding fails or produces invalid UTF-8.
229    fn url_decode(path: &str) -> String {
230        // Quick check: if no percent sign, return as-is
231        if !path.contains('%') {
232            return path.to_string();
233        }
234
235        let bytes = path.as_bytes();
236        let mut result = Vec::with_capacity(bytes.len());
237        let mut i = 0;
238
239        while i < bytes.len() {
240            if bytes[i] == b'%' && i + 2 < bytes.len() {
241                // Try to parse the two hex digits following %
242                let hex1 = bytes[i + 1];
243                let hex2 = bytes[i + 2];
244                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
245                    result.push(d1 * 16 + d2);
246                    i += 3;
247                    continue;
248                }
249            }
250            result.push(bytes[i]);
251            i += 1;
252        }
253
254        // Convert to UTF-8, falling back to original if invalid
255        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
256    }
257
258    /// Strip query parameters and fragments from a URL for file existence checking.
259    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
260    /// for `path/to/image.png` or `file.md` respectively.
261    ///
262    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
263    /// so we check for `?` first. If a URL has both, only the query is stripped here
264    /// (fragments are handled separately by the regex in `contribute_to_index`).
265    fn strip_query_and_fragment(url: &str) -> &str {
266        // Find the first occurrence of '?' or '#', whichever comes first
267        // This handles both standard URLs (? before #) and edge cases (# before ?)
268        let query_pos = url.find('?');
269        let fragment_pos = url.find('#');
270
271        match (query_pos, fragment_pos) {
272            (Some(q), Some(f)) => {
273                // Both exist - strip at whichever comes first
274                &url[..q.min(f)]
275            }
276            (Some(q), None) => &url[..q],
277            (None, Some(f)) => &url[..f],
278            (None, None) => url,
279        }
280    }
281
282    /// Resolve a relative link against a provided base path
283    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
284        base_path.join(link)
285    }
286
287    /// Check if a relative link can be compacted and return the simplified form.
288    ///
289    /// Returns `None` if compact-paths is disabled, the link has no traversal,
290    /// or the link is already the shortest form.
291    /// Returns `Some(suggestion)` with the full compacted URL (including fragment/query suffix).
292    fn compact_path_suggestion(&self, url: &str, base_path: &Path) -> Option<String> {
293        if !self.config.compact_paths {
294            return None;
295        }
296
297        // Split URL into path and suffix (fragment/query)
298        let path_end = url
299            .find('?')
300            .unwrap_or(url.len())
301            .min(url.find('#').unwrap_or(url.len()));
302        let path_part = &url[..path_end];
303        let suffix = &url[path_end..];
304
305        // URL-decode the path portion for filesystem resolution
306        let decoded_path = Self::url_decode(path_part);
307
308        compute_compact_path(base_path, &decoded_path).map(|compact| format!("{compact}{suffix}"))
309    }
310
311    /// Validate an absolute link by resolving it relative to MkDocs docs_dir.
312    ///
313    /// Returns `Some(warning_message)` if the link is broken, `None` if valid.
314    /// Falls back to a generic warning if no mkdocs.yml is found.
315    fn validate_absolute_link_via_docs_dir(url: &str, source_path: &Path) -> Option<String> {
316        let Some(docs_dir) = resolve_docs_dir(source_path) else {
317            // No mkdocs.yml found — fall back to warn behavior
318            return Some(format!(
319                "Absolute link '{url}' cannot be validated locally (no mkdocs.yml found)"
320            ));
321        };
322
323        // Strip leading / and resolve relative to docs_dir
324        let relative_url = url.trim_start_matches('/');
325
326        // Strip query/fragment before checking existence
327        let file_path = Self::strip_query_and_fragment(relative_url);
328        let decoded = Self::url_decode(file_path);
329        let resolved_path = docs_dir.join(&decoded);
330
331        // For directory-style links (ending with /, bare path to a directory, or empty
332        // decoded path like "/"), check for index.md inside the directory.
333        // This must be checked BEFORE file_exists_or_markdown_extension because
334        // path.exists() returns true for directories — we need to verify index.md exists.
335        let is_directory_link = url.ends_with('/') || decoded.is_empty();
336        if is_directory_link || resolved_path.is_dir() {
337            let index_path = resolved_path.join("index.md");
338            if file_exists_with_cache(&index_path) {
339                return None; // Valid directory link with index.md
340            }
341            // Directory exists but no index.md — fall through to error
342            if resolved_path.is_dir() {
343                return Some(format!(
344                    "Absolute link '{url}' resolves to directory '{}' which has no index.md",
345                    resolved_path.display()
346                ));
347            }
348        }
349
350        // Check existence (with markdown extension fallback for extensionless links)
351        if file_exists_or_markdown_extension(&resolved_path) {
352            return None; // Valid link
353        }
354
355        // For .html/.htm links, check for corresponding markdown source
356        if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
357            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
358            && let (Some(stem), Some(parent)) = (
359                resolved_path.file_stem().and_then(|s| s.to_str()),
360                resolved_path.parent(),
361            )
362        {
363            let has_md_source = MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
364                let source_path = parent.join(format!("{stem}{md_ext}"));
365                file_exists_with_cache(&source_path)
366            });
367            if has_md_source {
368                return None; // Markdown source exists
369            }
370        }
371
372        Some(format!(
373            "Absolute link '{url}' resolves to '{}' which does not exist",
374            resolved_path.display()
375        ))
376    }
377}
378
379impl Rule for MD057ExistingRelativeLinks {
380    fn name(&self) -> &'static str {
381        "MD057"
382    }
383
384    fn description(&self) -> &'static str {
385        "Relative links should point to existing files"
386    }
387
388    fn category(&self) -> RuleCategory {
389        RuleCategory::Link
390    }
391
392    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
393        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
394    }
395
396    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
397        let content = ctx.content;
398
399        // Early returns for performance
400        if content.is_empty() || !content.contains('[') {
401            return Ok(Vec::new());
402        }
403
404        // Quick check for any potential links before expensive operations
405        // Check for inline links "](", reference definitions "]:", or images "!["
406        if !content.contains("](") && !content.contains("]:") {
407            return Ok(Vec::new());
408        }
409
410        // Reset the file existence cache for a fresh run
411        reset_file_existence_cache();
412
413        let mut warnings = Vec::new();
414
415        // Determine base path for resolving relative links
416        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
417        // This ensures each file resolves links relative to its own directory
418        let base_path: Option<PathBuf> = {
419            // First check if base_path was explicitly set via with_path() (for tests)
420            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
421            if explicit_base.is_some() {
422                explicit_base
423            } else if let Some(ref source_file) = ctx.source_file {
424                // Resolve symlinks to get the actual file location
425                // This ensures relative links are resolved from the target's directory,
426                // not the symlink's directory
427                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
428                resolved_file
429                    .parent()
430                    .map(|p| p.to_path_buf())
431                    .or_else(|| Some(CURRENT_DIR.clone()))
432            } else {
433                // No source file available - cannot validate relative links
434                None
435            }
436        };
437
438        // If we still don't have a base path, we can't validate relative links
439        let Some(base_path) = base_path else {
440            return Ok(warnings);
441        };
442
443        // Use LintContext links instead of expensive regex parsing
444        if !ctx.links.is_empty() {
445            // Use LineIndex for correct position calculation across all line ending types
446            let line_index = &ctx.line_index;
447
448            // Create element cache once for all links
449            let element_cache = ElementCache::new(content);
450
451            // Pre-collected lines from context
452            let lines = ctx.raw_lines();
453
454            // Track which lines we've already processed to avoid duplicates
455            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
456            let mut processed_lines = std::collections::HashSet::new();
457
458            for link in &ctx.links {
459                let line_idx = link.line - 1;
460                if line_idx >= lines.len() {
461                    continue;
462                }
463
464                // Skip lines inside PyMdown blocks
465                if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
466                    continue;
467                }
468
469                // Skip if we've already processed this line
470                if !processed_lines.insert(line_idx) {
471                    continue;
472                }
473
474                let line = lines[line_idx];
475
476                // Quick check for link pattern in this line
477                if !line.contains("](") {
478                    continue;
479                }
480
481                // Find all links in this line using optimized regex
482                for link_match in LINK_START_REGEX.find_iter(line) {
483                    let start_pos = link_match.start();
484                    let end_pos = link_match.end();
485
486                    // Calculate absolute position using LineIndex
487                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
488                    let absolute_start_pos = line_start_byte + start_pos;
489
490                    // Skip if this link is in a code span
491                    if element_cache.is_in_code_span(absolute_start_pos) {
492                        continue;
493                    }
494
495                    // Skip if this link is in a math span (LaTeX $...$ or $$...$$)
496                    if ctx.is_in_math_span(absolute_start_pos) {
497                        continue;
498                    }
499
500                    // Find the URL part after the link text
501                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
502                    // Then fall back to normal URL regex
503                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
504                        .captures_at(line, end_pos - 1)
505                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
506                        .or_else(|| {
507                            URL_EXTRACT_REGEX
508                                .captures_at(line, end_pos - 1)
509                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
510                        });
511
512                    if let Some((caps, url_group)) = caps_and_url {
513                        let url = url_group.as_str().trim();
514
515                        // Skip empty URLs
516                        if url.is_empty() {
517                            continue;
518                        }
519
520                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
521                        // These are Rust API references, not file paths
522                        // Example: [`f32::is_subnormal`], [`Vec::push`]
523                        if url.starts_with('`') && url.ends_with('`') {
524                            continue;
525                        }
526
527                        // Skip external URLs and fragment-only links
528                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
529                            continue;
530                        }
531
532                        // Handle absolute paths based on config
533                        if Self::is_absolute_path(url) {
534                            match self.config.absolute_links {
535                                AbsoluteLinksOption::Warn => {
536                                    let url_start = url_group.start();
537                                    let url_end = url_group.end();
538                                    warnings.push(LintWarning {
539                                        rule_name: Some(self.name().to_string()),
540                                        line: link.line,
541                                        column: url_start + 1,
542                                        end_line: link.line,
543                                        end_column: url_end + 1,
544                                        message: format!("Absolute link '{url}' cannot be validated locally"),
545                                        severity: Severity::Warning,
546                                        fix: None,
547                                    });
548                                }
549                                AbsoluteLinksOption::RelativeToDocs => {
550                                    if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
551                                        let url_start = url_group.start();
552                                        let url_end = url_group.end();
553                                        warnings.push(LintWarning {
554                                            rule_name: Some(self.name().to_string()),
555                                            line: link.line,
556                                            column: url_start + 1,
557                                            end_line: link.line,
558                                            end_column: url_end + 1,
559                                            message: msg,
560                                            severity: Severity::Warning,
561                                            fix: None,
562                                        });
563                                    }
564                                }
565                                AbsoluteLinksOption::Ignore => {}
566                            }
567                            continue;
568                        }
569
570                        // Check for unnecessary path traversal (compact-paths)
571                        // Reconstruct full URL including fragment (regex group 2)
572                        // since url_group (group 1) contains only the path part
573                        let full_url_for_compact = if let Some(frag) = caps.get(2) {
574                            format!("{url}{}", frag.as_str())
575                        } else {
576                            url.to_string()
577                        };
578                        if let Some(suggestion) = self.compact_path_suggestion(&full_url_for_compact, &base_path) {
579                            let url_start = url_group.start();
580                            let url_end = caps.get(2).map_or(url_group.end(), |frag| frag.end());
581                            let fix_byte_start = line_start_byte + url_start;
582                            let fix_byte_end = line_start_byte + url_end;
583                            warnings.push(LintWarning {
584                                rule_name: Some(self.name().to_string()),
585                                line: link.line,
586                                column: url_start + 1,
587                                end_line: link.line,
588                                end_column: url_end + 1,
589                                message: format!(
590                                    "Relative link '{full_url_for_compact}' can be simplified to '{suggestion}'"
591                                ),
592                                severity: Severity::Warning,
593                                fix: Some(Fix {
594                                    range: fix_byte_start..fix_byte_end,
595                                    replacement: suggestion,
596                                }),
597                            });
598                        }
599
600                        // Strip query parameters and fragments before checking file existence
601                        let file_path = Self::strip_query_and_fragment(url);
602
603                        // URL-decode the path to handle percent-encoded characters
604                        let decoded_path = Self::url_decode(file_path);
605
606                        // Resolve the relative link against the base path
607                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
608
609                        // Check if the file exists, also trying markdown extensions for extensionless links
610                        if file_exists_or_markdown_extension(&resolved_path) {
611                            continue; // File exists, no warning needed
612                        }
613
614                        // For .html/.htm links, check if a corresponding markdown source exists
615                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
616                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
617                            && let (Some(stem), Some(parent)) = (
618                                resolved_path.file_stem().and_then(|s| s.to_str()),
619                                resolved_path.parent(),
620                            ) {
621                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
622                                let source_path = parent.join(format!("{stem}{md_ext}"));
623                                file_exists_with_cache(&source_path)
624                            })
625                        } else {
626                            false
627                        };
628
629                        if has_md_source {
630                            continue; // Markdown source exists, link is valid
631                        }
632
633                        // File doesn't exist and no source file found
634                        // Use actual URL position from regex capture group
635                        // Note: capture group positions are absolute within the line string
636                        let url_start = url_group.start();
637                        let url_end = url_group.end();
638
639                        warnings.push(LintWarning {
640                            rule_name: Some(self.name().to_string()),
641                            line: link.line,
642                            column: url_start + 1, // 1-indexed
643                            end_line: link.line,
644                            end_column: url_end + 1, // 1-indexed
645                            message: format!("Relative link '{url}' does not exist"),
646                            severity: Severity::Error,
647                            fix: None,
648                        });
649                    }
650                }
651            }
652        }
653
654        // Also process images - they have URLs already parsed
655        for image in &ctx.images {
656            // Skip images inside PyMdown blocks (MkDocs flavor)
657            if ctx.line_info(image.line).is_some_and(|info| info.in_pymdown_block) {
658                continue;
659            }
660
661            let url = image.url.as_ref();
662
663            // Skip empty URLs
664            if url.is_empty() {
665                continue;
666            }
667
668            // Skip external URLs and fragment-only links
669            if self.is_external_url(url) || self.is_fragment_only_link(url) {
670                continue;
671            }
672
673            // Handle absolute paths based on config
674            if Self::is_absolute_path(url) {
675                match self.config.absolute_links {
676                    AbsoluteLinksOption::Warn => {
677                        warnings.push(LintWarning {
678                            rule_name: Some(self.name().to_string()),
679                            line: image.line,
680                            column: image.start_col + 1,
681                            end_line: image.line,
682                            end_column: image.start_col + 1 + url.len(),
683                            message: format!("Absolute link '{url}' cannot be validated locally"),
684                            severity: Severity::Warning,
685                            fix: None,
686                        });
687                    }
688                    AbsoluteLinksOption::RelativeToDocs => {
689                        if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
690                            warnings.push(LintWarning {
691                                rule_name: Some(self.name().to_string()),
692                                line: image.line,
693                                column: image.start_col + 1,
694                                end_line: image.line,
695                                end_column: image.start_col + 1 + url.len(),
696                                message: msg,
697                                severity: Severity::Warning,
698                                fix: None,
699                            });
700                        }
701                    }
702                    AbsoluteLinksOption::Ignore => {}
703                }
704                continue;
705            }
706
707            // Check for unnecessary path traversal (compact-paths)
708            if let Some(suggestion) = self.compact_path_suggestion(url, &base_path) {
709                // Find the URL position within the image syntax using document byte offsets.
710                // Search from image.byte_offset (the `!` character) to locate the URL string.
711                let fix = content[image.byte_offset..image.byte_end].find(url).map(|url_offset| {
712                    let fix_byte_start = image.byte_offset + url_offset;
713                    let fix_byte_end = fix_byte_start + url.len();
714                    Fix {
715                        range: fix_byte_start..fix_byte_end,
716                        replacement: suggestion.clone(),
717                    }
718                });
719
720                let img_line_start_byte = ctx.line_index.get_line_start_byte(image.line).unwrap_or(0);
721                let url_col = fix
722                    .as_ref()
723                    .map_or(image.start_col + 1, |f| f.range.start - img_line_start_byte + 1);
724                warnings.push(LintWarning {
725                    rule_name: Some(self.name().to_string()),
726                    line: image.line,
727                    column: url_col,
728                    end_line: image.line,
729                    end_column: url_col + url.len(),
730                    message: format!("Relative link '{url}' can be simplified to '{suggestion}'"),
731                    severity: Severity::Warning,
732                    fix,
733                });
734            }
735
736            // Strip query parameters and fragments before checking file existence
737            let file_path = Self::strip_query_and_fragment(url);
738
739            // URL-decode the path to handle percent-encoded characters
740            let decoded_path = Self::url_decode(file_path);
741
742            // Resolve the relative link against the base path
743            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
744
745            // Check if the file exists, also trying markdown extensions for extensionless links
746            if file_exists_or_markdown_extension(&resolved_path) {
747                continue; // File exists, no warning needed
748            }
749
750            // For .html/.htm links, check if a corresponding markdown source exists
751            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
752                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
753                && let (Some(stem), Some(parent)) = (
754                    resolved_path.file_stem().and_then(|s| s.to_str()),
755                    resolved_path.parent(),
756                ) {
757                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
758                    let source_path = parent.join(format!("{stem}{md_ext}"));
759                    file_exists_with_cache(&source_path)
760                })
761            } else {
762                false
763            };
764
765            if has_md_source {
766                continue; // Markdown source exists, link is valid
767            }
768
769            // File doesn't exist and no source file found
770            // Images already have correct position from parser
771            warnings.push(LintWarning {
772                rule_name: Some(self.name().to_string()),
773                line: image.line,
774                column: image.start_col + 1,
775                end_line: image.line,
776                end_column: image.start_col + 1 + url.len(),
777                message: format!("Relative link '{url}' does not exist"),
778                severity: Severity::Error,
779                fix: None,
780            });
781        }
782
783        // Also process reference definitions: [ref]: ./path.md
784        for ref_def in &ctx.reference_defs {
785            let url = &ref_def.url;
786
787            // Skip empty URLs
788            if url.is_empty() {
789                continue;
790            }
791
792            // Skip external URLs and fragment-only links
793            if self.is_external_url(url) || self.is_fragment_only_link(url) {
794                continue;
795            }
796
797            // Handle absolute paths based on config
798            if Self::is_absolute_path(url) {
799                match self.config.absolute_links {
800                    AbsoluteLinksOption::Warn => {
801                        let line_idx = ref_def.line - 1;
802                        let column = content.lines().nth(line_idx).map_or(1, |line_content| {
803                            line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
804                        });
805                        warnings.push(LintWarning {
806                            rule_name: Some(self.name().to_string()),
807                            line: ref_def.line,
808                            column,
809                            end_line: ref_def.line,
810                            end_column: column + url.len(),
811                            message: format!("Absolute link '{url}' cannot be validated locally"),
812                            severity: Severity::Warning,
813                            fix: None,
814                        });
815                    }
816                    AbsoluteLinksOption::RelativeToDocs => {
817                        if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
818                            let line_idx = ref_def.line - 1;
819                            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
820                                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
821                            });
822                            warnings.push(LintWarning {
823                                rule_name: Some(self.name().to_string()),
824                                line: ref_def.line,
825                                column,
826                                end_line: ref_def.line,
827                                end_column: column + url.len(),
828                                message: msg,
829                                severity: Severity::Warning,
830                                fix: None,
831                            });
832                        }
833                    }
834                    AbsoluteLinksOption::Ignore => {}
835                }
836                continue;
837            }
838
839            // Check for unnecessary path traversal (compact-paths)
840            if let Some(suggestion) = self.compact_path_suggestion(url, &base_path) {
841                let ref_line_idx = ref_def.line - 1;
842                let col = content.lines().nth(ref_line_idx).map_or(1, |line_content| {
843                    line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
844                });
845                let ref_line_start_byte = ctx.line_index.get_line_start_byte(ref_def.line).unwrap_or(0);
846                let fix_byte_start = ref_line_start_byte + col - 1;
847                let fix_byte_end = fix_byte_start + url.len();
848                warnings.push(LintWarning {
849                    rule_name: Some(self.name().to_string()),
850                    line: ref_def.line,
851                    column: col,
852                    end_line: ref_def.line,
853                    end_column: col + url.len(),
854                    message: format!("Relative link '{url}' can be simplified to '{suggestion}'"),
855                    severity: Severity::Warning,
856                    fix: Some(Fix {
857                        range: fix_byte_start..fix_byte_end,
858                        replacement: suggestion,
859                    }),
860                });
861            }
862
863            // Strip query parameters and fragments before checking file existence
864            let file_path = Self::strip_query_and_fragment(url);
865
866            // URL-decode the path to handle percent-encoded characters
867            let decoded_path = Self::url_decode(file_path);
868
869            // Resolve the relative link against the base path
870            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
871
872            // Check if the file exists, also trying markdown extensions for extensionless links
873            if file_exists_or_markdown_extension(&resolved_path) {
874                continue; // File exists, no warning needed
875            }
876
877            // For .html/.htm links, check if a corresponding markdown source exists
878            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
879                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
880                && let (Some(stem), Some(parent)) = (
881                    resolved_path.file_stem().and_then(|s| s.to_str()),
882                    resolved_path.parent(),
883                ) {
884                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
885                    let source_path = parent.join(format!("{stem}{md_ext}"));
886                    file_exists_with_cache(&source_path)
887                })
888            } else {
889                false
890            };
891
892            if has_md_source {
893                continue; // Markdown source exists, link is valid
894            }
895
896            // File doesn't exist and no source file found
897            // Calculate column position: find URL within the line
898            let line_idx = ref_def.line - 1;
899            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
900                // Find URL position in line (after ]: )
901                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
902            });
903
904            warnings.push(LintWarning {
905                rule_name: Some(self.name().to_string()),
906                line: ref_def.line,
907                column,
908                end_line: ref_def.line,
909                end_column: column + url.len(),
910                message: format!("Relative link '{url}' does not exist"),
911                severity: Severity::Error,
912                fix: None,
913            });
914        }
915
916        Ok(warnings)
917    }
918
919    fn fix_capability(&self) -> FixCapability {
920        if self.config.compact_paths {
921            FixCapability::ConditionallyFixable
922        } else {
923            FixCapability::Unfixable
924        }
925    }
926
927    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
928        if !self.config.compact_paths {
929            return Ok(ctx.content.to_string());
930        }
931
932        let warnings = self.check(ctx)?;
933        let warnings =
934            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
935        let mut content = ctx.content.to_string();
936
937        // Collect fixable warnings (compact-paths) sorted by byte offset descending
938        let mut fixes: Vec<_> = warnings.iter().filter_map(|w| w.fix.as_ref()).collect();
939        fixes.sort_by(|a, b| b.range.start.cmp(&a.range.start));
940
941        for fix in fixes {
942            if fix.range.end <= content.len() {
943                content.replace_range(fix.range.clone(), &fix.replacement);
944            }
945        }
946
947        Ok(content)
948    }
949
950    fn as_any(&self) -> &dyn std::any::Any {
951        self
952    }
953
954    fn default_config_section(&self) -> Option<(String, toml::Value)> {
955        let default_config = MD057Config::default();
956        let json_value = serde_json::to_value(&default_config).ok()?;
957        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
958
959        if let toml::Value::Table(table) = toml_value {
960            if !table.is_empty() {
961                Some((MD057Config::RULE_NAME.to_string(), toml::Value::Table(table)))
962            } else {
963                None
964            }
965        } else {
966            None
967        }
968    }
969
970    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
971    where
972        Self: Sized,
973    {
974        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
975        Box::new(Self::from_config_struct(rule_config))
976    }
977
978    fn cross_file_scope(&self) -> CrossFileScope {
979        CrossFileScope::Workspace
980    }
981
982    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
983        // Use the shared utility for cross-file link extraction
984        // This ensures consistent position tracking between CLI and LSP
985        for link in extract_cross_file_links(ctx) {
986            index.add_cross_file_link(link);
987        }
988    }
989
990    fn cross_file_check(
991        &self,
992        file_path: &Path,
993        file_index: &FileIndex,
994        workspace_index: &crate::workspace_index::WorkspaceIndex,
995    ) -> LintResult {
996        let mut warnings = Vec::new();
997
998        // Get the directory containing this file for resolving relative links
999        let file_dir = file_path.parent();
1000
1001        for cross_link in &file_index.cross_file_links {
1002            // URL-decode the path for filesystem operations
1003            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
1004            let decoded_target = Self::url_decode(&cross_link.target_path);
1005
1006            // Skip absolute paths — they are already handled by check()
1007            // which validates them according to the absolute_links config.
1008            // Handling them here too would produce duplicate warnings.
1009            if decoded_target.starts_with('/') {
1010                continue;
1011            }
1012
1013            // Resolve relative path
1014            let target_path = if let Some(dir) = file_dir {
1015                dir.join(&decoded_target)
1016            } else {
1017                Path::new(&decoded_target).to_path_buf()
1018            };
1019
1020            // Normalize the path (handle .., ., etc.)
1021            let target_path = normalize_path(&target_path);
1022
1023            // Check if the target file exists, also trying markdown extensions for extensionless links
1024            let file_exists =
1025                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
1026
1027            if !file_exists {
1028                // For .html/.htm links, check if a corresponding markdown source exists
1029                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
1030                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
1031                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
1032                    && let (Some(stem), Some(parent)) =
1033                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
1034                {
1035                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
1036                        let source_path = parent.join(format!("{stem}{md_ext}"));
1037                        workspace_index.contains_file(&source_path) || source_path.exists()
1038                    })
1039                } else {
1040                    false
1041                };
1042
1043                if !has_md_source {
1044                    warnings.push(LintWarning {
1045                        rule_name: Some(self.name().to_string()),
1046                        line: cross_link.line,
1047                        column: cross_link.column,
1048                        end_line: cross_link.line,
1049                        end_column: cross_link.column + cross_link.target_path.len(),
1050                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
1051                        severity: Severity::Error,
1052                        fix: None,
1053                    });
1054                }
1055            }
1056        }
1057
1058        Ok(warnings)
1059    }
1060}
1061
1062/// Compute the shortest relative path from `from_dir` to `to_path`.
1063///
1064/// Both paths must be normalized (no `.` or `..` components).
1065/// Returns a relative `PathBuf` that navigates from `from_dir` to `to_path`.
1066fn shortest_relative_path(from_dir: &Path, to_path: &Path) -> PathBuf {
1067    let from_components: Vec<_> = from_dir.components().collect();
1068    let to_components: Vec<_> = to_path.components().collect();
1069
1070    // Find common prefix length
1071    let common_len = from_components
1072        .iter()
1073        .zip(to_components.iter())
1074        .take_while(|(a, b)| a == b)
1075        .count();
1076
1077    let mut result = PathBuf::new();
1078
1079    // Go up for each remaining component in from_dir
1080    for _ in common_len..from_components.len() {
1081        result.push("..");
1082    }
1083
1084    // Append remaining components from to_path
1085    for component in &to_components[common_len..] {
1086        result.push(component);
1087    }
1088
1089    result
1090}
1091
1092/// Check if a relative link path can be shortened.
1093///
1094/// Given the source directory and the raw link path, computes whether there's
1095/// a shorter equivalent path. Returns `Some(compact_path)` if the link can
1096/// be simplified, `None` if it's already optimal.
1097fn compute_compact_path(source_dir: &Path, raw_link_path: &str) -> Option<String> {
1098    let link_path = Path::new(raw_link_path);
1099
1100    // Only check paths that contain traversal (../ or ./)
1101    let has_traversal = link_path
1102        .components()
1103        .any(|c| matches!(c, std::path::Component::ParentDir | std::path::Component::CurDir));
1104
1105    if !has_traversal {
1106        return None;
1107    }
1108
1109    // Resolve: source_dir + raw_link_path, then normalize
1110    let combined = source_dir.join(link_path);
1111    let normalized_target = normalize_path(&combined);
1112
1113    // Compute shortest path from source_dir back to the normalized target
1114    let normalized_source = normalize_path(source_dir);
1115    let shortest = shortest_relative_path(&normalized_source, &normalized_target);
1116
1117    // Compare against the raw link path — if it differs, the path can be compacted
1118    if shortest != link_path {
1119        let compact = shortest.to_string_lossy().to_string();
1120        // Avoid suggesting empty path
1121        if compact.is_empty() {
1122            return None;
1123        }
1124        // Markdown links always use forward slashes regardless of platform
1125        Some(compact.replace('\\', "/"))
1126    } else {
1127        None
1128    }
1129}
1130
1131/// Normalize a path by resolving . and .. components
1132fn normalize_path(path: &Path) -> PathBuf {
1133    let mut components = Vec::new();
1134
1135    for component in path.components() {
1136        match component {
1137            std::path::Component::ParentDir => {
1138                // Go up one level if possible
1139                if !components.is_empty() {
1140                    components.pop();
1141                }
1142            }
1143            std::path::Component::CurDir => {
1144                // Skip current directory markers
1145            }
1146            _ => {
1147                components.push(component);
1148            }
1149        }
1150    }
1151
1152    components.iter().collect()
1153}
1154
1155#[cfg(test)]
1156mod tests {
1157    use super::*;
1158    use crate::workspace_index::CrossFileLinkIndex;
1159    use std::fs::File;
1160    use std::io::Write;
1161    use tempfile::tempdir;
1162
1163    #[test]
1164    fn test_strip_query_and_fragment() {
1165        // Test query parameter stripping
1166        assert_eq!(
1167            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
1168            "file.png"
1169        );
1170        assert_eq!(
1171            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
1172            "file.png"
1173        );
1174        assert_eq!(
1175            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
1176            "file.png"
1177        );
1178
1179        // Test fragment stripping
1180        assert_eq!(
1181            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
1182            "file.md"
1183        );
1184        assert_eq!(
1185            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
1186            "file.md"
1187        );
1188
1189        // Test both query and fragment (query comes first, per RFC 3986)
1190        assert_eq!(
1191            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
1192            "file.md"
1193        );
1194
1195        // Test no query or fragment
1196        assert_eq!(
1197            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
1198            "file.png"
1199        );
1200
1201        // Test with path
1202        assert_eq!(
1203            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
1204            "path/to/image.png"
1205        );
1206        assert_eq!(
1207            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
1208            "path/to/image.png"
1209        );
1210
1211        // Edge case: fragment before query (non-standard but possible)
1212        assert_eq!(
1213            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
1214            "file.md"
1215        );
1216    }
1217
1218    #[test]
1219    fn test_url_decode() {
1220        // Simple space encoding
1221        assert_eq!(
1222            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
1223            "penguin with space.jpg"
1224        );
1225
1226        // Path with encoded spaces
1227        assert_eq!(
1228            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
1229            "assets/my file name.png"
1230        );
1231
1232        // Multiple encoded characters
1233        assert_eq!(
1234            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
1235            "hello world!.md"
1236        );
1237
1238        // Lowercase hex
1239        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
1240
1241        // Uppercase hex
1242        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
1243
1244        // Mixed case hex
1245        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
1246
1247        // No encoding - return as-is
1248        assert_eq!(
1249            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
1250            "normal-file.md"
1251        );
1252
1253        // Incomplete percent encoding - leave as-is
1254        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
1255
1256        // Percent at end - leave as-is
1257        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
1258
1259        // Invalid hex digits - leave as-is
1260        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
1261
1262        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
1263        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
1264
1265        // Empty string
1266        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
1267
1268        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
1269        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
1270
1271        // Multiple consecutive encoded characters
1272        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
1273
1274        // Encoded path separators
1275        assert_eq!(
1276            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
1277            "path/to/file.md"
1278        );
1279
1280        // Mixed encoded and non-encoded
1281        assert_eq!(
1282            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
1283            "hello world/foo bar.md"
1284        );
1285
1286        // Special characters that are commonly encoded
1287        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
1288
1289        // Percent at position that looks like encoding but isn't valid
1290        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
1291    }
1292
1293    #[test]
1294    fn test_url_encoded_filenames() {
1295        // Create a temporary directory for test files
1296        let temp_dir = tempdir().unwrap();
1297        let base_path = temp_dir.path();
1298
1299        // Create a file with spaces in the name
1300        let file_with_spaces = base_path.join("penguin with space.jpg");
1301        File::create(&file_with_spaces)
1302            .unwrap()
1303            .write_all(b"image data")
1304            .unwrap();
1305
1306        // Create a subdirectory with spaces
1307        let subdir = base_path.join("my images");
1308        std::fs::create_dir(&subdir).unwrap();
1309        let nested_file = subdir.join("photo 1.png");
1310        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
1311
1312        // Test content with URL-encoded links
1313        let content = r#"
1314# Test Document with URL-Encoded Links
1315
1316![Penguin](penguin%20with%20space.jpg)
1317![Photo](my%20images/photo%201.png)
1318![Missing](missing%20file.jpg)
1319"#;
1320
1321        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1322
1323        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1324        let result = rule.check(&ctx).unwrap();
1325
1326        // Should only have one warning for the missing file
1327        assert_eq!(
1328            result.len(),
1329            1,
1330            "Should only warn about missing%20file.jpg. Got: {result:?}"
1331        );
1332        assert!(
1333            result[0].message.contains("missing%20file.jpg"),
1334            "Warning should mention the URL-encoded filename"
1335        );
1336    }
1337
1338    #[test]
1339    fn test_external_urls() {
1340        let rule = MD057ExistingRelativeLinks::new();
1341
1342        // Common web protocols
1343        assert!(rule.is_external_url("https://example.com"));
1344        assert!(rule.is_external_url("http://example.com"));
1345        assert!(rule.is_external_url("ftp://example.com"));
1346        assert!(rule.is_external_url("www.example.com"));
1347        assert!(rule.is_external_url("example.com"));
1348
1349        // Special URI schemes
1350        assert!(rule.is_external_url("file:///path/to/file"));
1351        assert!(rule.is_external_url("smb://server/share"));
1352        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
1353        assert!(rule.is_external_url("mailto:user@example.com"));
1354        assert!(rule.is_external_url("tel:+1234567890"));
1355        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
1356        assert!(rule.is_external_url("javascript:void(0)"));
1357        assert!(rule.is_external_url("ssh://git@github.com/repo"));
1358        assert!(rule.is_external_url("git://github.com/repo.git"));
1359
1360        // Email addresses without mailto: protocol
1361        // These are clearly not file links and should be skipped
1362        assert!(rule.is_external_url("user@example.com"));
1363        assert!(rule.is_external_url("steering@kubernetes.io"));
1364        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
1365        assert!(rule.is_external_url("user_name@sub.domain.com"));
1366        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
1367
1368        // Template variables should be skipped (not checked as relative links)
1369        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
1370        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
1371        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
1372        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
1373        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
1374        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
1375
1376        // Absolute paths are NOT external (handled separately via is_absolute_path)
1377        // By default they are ignored, but can be configured to warn
1378        assert!(!rule.is_external_url("/api/v1/users"));
1379        assert!(!rule.is_external_url("/blog/2024/release.html"));
1380        assert!(!rule.is_external_url("/react/hooks/use-state.html"));
1381        assert!(!rule.is_external_url("/pkg/runtime"));
1382        assert!(!rule.is_external_url("/doc/go1compat"));
1383        assert!(!rule.is_external_url("/index.html"));
1384        assert!(!rule.is_external_url("/assets/logo.png"));
1385
1386        // But is_absolute_path should detect them
1387        assert!(MD057ExistingRelativeLinks::is_absolute_path("/api/v1/users"));
1388        assert!(MD057ExistingRelativeLinks::is_absolute_path("/blog/2024/release.html"));
1389        assert!(MD057ExistingRelativeLinks::is_absolute_path("/index.html"));
1390        assert!(!MD057ExistingRelativeLinks::is_absolute_path("./relative.md"));
1391        assert!(!MD057ExistingRelativeLinks::is_absolute_path("relative.md"));
1392
1393        // Framework path aliases should be skipped (resolved by build tools)
1394        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
1395        assert!(rule.is_external_url("~/assets/image.png"));
1396        assert!(rule.is_external_url("~/components/Button.vue"));
1397        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
1398
1399        // @ prefix (common in Vue, webpack, Vite aliases)
1400        assert!(rule.is_external_url("@/components/Header.vue"));
1401        assert!(rule.is_external_url("@images/photo.jpg"));
1402        assert!(rule.is_external_url("@assets/styles.css"));
1403
1404        // Relative paths should NOT be external (should be validated)
1405        assert!(!rule.is_external_url("./relative/path.md"));
1406        assert!(!rule.is_external_url("relative/path.md"));
1407        assert!(!rule.is_external_url("../parent/path.md"));
1408    }
1409
1410    #[test]
1411    fn test_framework_path_aliases() {
1412        // Create a temporary directory for test files
1413        let temp_dir = tempdir().unwrap();
1414        let base_path = temp_dir.path();
1415
1416        // Test content with framework path aliases (should all be skipped)
1417        let content = r#"
1418# Framework Path Aliases
1419
1420![Image 1](~/assets/penguin.jpg)
1421![Image 2](~assets/logo.svg)
1422![Image 3](@images/photo.jpg)
1423![Image 4](@/components/icon.svg)
1424[Link](@/pages/about.md)
1425
1426This is a [real missing link](missing.md) that should be flagged.
1427"#;
1428
1429        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1430
1431        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1432        let result = rule.check(&ctx).unwrap();
1433
1434        // Should only have one warning for the real missing link
1435        assert_eq!(
1436            result.len(),
1437            1,
1438            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
1439        );
1440        assert!(
1441            result[0].message.contains("missing.md"),
1442            "Warning should be for missing.md"
1443        );
1444    }
1445
1446    #[test]
1447    fn test_url_decode_security_path_traversal() {
1448        // Ensure URL decoding doesn't enable path traversal attacks
1449        // The decoded path is still validated against the base path
1450        let temp_dir = tempdir().unwrap();
1451        let base_path = temp_dir.path();
1452
1453        // Create a file in the temp directory
1454        let file_in_base = base_path.join("safe.md");
1455        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
1456
1457        // Test with encoded path traversal attempt
1458        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
1459        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
1460        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
1461        let content = r#"
1462[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
1463[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
1464[Safe link](safe.md)
1465"#;
1466
1467        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1468
1469        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1470        let result = rule.check(&ctx).unwrap();
1471
1472        // The traversal attempts should still be flagged as missing
1473        // (they don't exist relative to base_path after decoding)
1474        assert_eq!(
1475            result.len(),
1476            2,
1477            "Should have warnings for traversal attempts. Got: {result:?}"
1478        );
1479    }
1480
1481    #[test]
1482    fn test_url_encoded_utf8_filenames() {
1483        // Test with actual UTF-8 encoded filenames
1484        let temp_dir = tempdir().unwrap();
1485        let base_path = temp_dir.path();
1486
1487        // Create files with unicode names
1488        let cafe_file = base_path.join("café.md");
1489        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1490
1491        let content = r#"
1492[Café link](caf%C3%A9.md)
1493[Missing unicode](r%C3%A9sum%C3%A9.md)
1494"#;
1495
1496        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1497
1498        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1499        let result = rule.check(&ctx).unwrap();
1500
1501        // Should only warn about the missing file
1502        assert_eq!(
1503            result.len(),
1504            1,
1505            "Should only warn about missing résumé.md. Got: {result:?}"
1506        );
1507        assert!(
1508            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1509            "Warning should mention the URL-encoded filename"
1510        );
1511    }
1512
1513    #[test]
1514    fn test_url_encoded_emoji_filenames() {
1515        // URL-encoded emoji paths should be correctly resolved
1516        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1517        let temp_dir = tempdir().unwrap();
1518        let base_path = temp_dir.path();
1519
1520        // Create directory with emoji in name: 👤 Personal
1521        let emoji_dir = base_path.join("👤 Personal");
1522        std::fs::create_dir(&emoji_dir).unwrap();
1523
1524        // Create file in that directory: TV Shows.md
1525        let file_path = emoji_dir.join("TV Shows.md");
1526        File::create(&file_path)
1527            .unwrap()
1528            .write_all(b"# TV Shows\n\nContent here.")
1529            .unwrap();
1530
1531        // Test content with URL-encoded emoji link
1532        // %F0%9F%91%A4 = 👤, %20 = space
1533        let content = r#"
1534# Test Document
1535
1536[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1537[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1538"#;
1539
1540        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1541
1542        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1543        let result = rule.check(&ctx).unwrap();
1544
1545        // Should only warn about the missing file, not the valid emoji path
1546        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1547        assert!(
1548            result[0].message.contains("Missing.md"),
1549            "Warning should be for Missing.md, got: {}",
1550            result[0].message
1551        );
1552    }
1553
1554    #[test]
1555    fn test_no_warnings_without_base_path() {
1556        let rule = MD057ExistingRelativeLinks::new();
1557        let content = "[Link](missing.md)";
1558
1559        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1560        let result = rule.check(&ctx).unwrap();
1561        assert!(result.is_empty(), "Should have no warnings without base path");
1562    }
1563
1564    #[test]
1565    fn test_existing_and_missing_links() {
1566        // Create a temporary directory for test files
1567        let temp_dir = tempdir().unwrap();
1568        let base_path = temp_dir.path();
1569
1570        // Create an existing file
1571        let exists_path = base_path.join("exists.md");
1572        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1573
1574        // Verify the file exists
1575        assert!(exists_path.exists(), "exists.md should exist for this test");
1576
1577        // Create test content with both existing and missing links
1578        let content = r#"
1579# Test Document
1580
1581[Valid Link](exists.md)
1582[Invalid Link](missing.md)
1583[External Link](https://example.com)
1584[Media Link](image.jpg)
1585        "#;
1586
1587        // Initialize rule with the base path (default: check all files including media)
1588        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1589
1590        // Test the rule
1591        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1592        let result = rule.check(&ctx).unwrap();
1593
1594        // Should have two warnings: missing.md and image.jpg (both don't exist)
1595        assert_eq!(result.len(), 2);
1596        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1597        assert!(messages.iter().any(|m| m.contains("missing.md")));
1598        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1599    }
1600
1601    #[test]
1602    fn test_angle_bracket_links() {
1603        // Create a temporary directory for test files
1604        let temp_dir = tempdir().unwrap();
1605        let base_path = temp_dir.path();
1606
1607        // Create an existing file
1608        let exists_path = base_path.join("exists.md");
1609        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1610
1611        // Create test content with angle bracket links
1612        let content = r#"
1613# Test Document
1614
1615[Valid Link](<exists.md>)
1616[Invalid Link](<missing.md>)
1617[External Link](<https://example.com>)
1618    "#;
1619
1620        // Test with default settings
1621        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1622
1623        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1624        let result = rule.check(&ctx).unwrap();
1625
1626        // Should have one warning for missing.md
1627        assert_eq!(result.len(), 1, "Should have exactly one warning");
1628        assert!(
1629            result[0].message.contains("missing.md"),
1630            "Warning should mention missing.md"
1631        );
1632    }
1633
1634    #[test]
1635    fn test_angle_bracket_links_with_parens() {
1636        // Create a temporary directory for test files
1637        let temp_dir = tempdir().unwrap();
1638        let base_path = temp_dir.path();
1639
1640        // Create directory structure with parentheses in path
1641        let app_dir = base_path.join("app");
1642        std::fs::create_dir(&app_dir).unwrap();
1643        let upload_dir = app_dir.join("(upload)");
1644        std::fs::create_dir(&upload_dir).unwrap();
1645        let page_file = upload_dir.join("page.tsx");
1646        File::create(&page_file)
1647            .unwrap()
1648            .write_all(b"export default function Page() {}")
1649            .unwrap();
1650
1651        // Create test content with angle bracket links containing parentheses
1652        let content = r#"
1653# Test Document with Paths Containing Parens
1654
1655[Upload Page](<app/(upload)/page.tsx>)
1656[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1657[Missing](<app/(missing)/file.md>)
1658"#;
1659
1660        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1661
1662        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1663        let result = rule.check(&ctx).unwrap();
1664
1665        // Should only have one warning for the missing file
1666        assert_eq!(
1667            result.len(),
1668            1,
1669            "Should have exactly one warning for missing file. Got: {result:?}"
1670        );
1671        assert!(
1672            result[0].message.contains("app/(missing)/file.md"),
1673            "Warning should mention app/(missing)/file.md"
1674        );
1675    }
1676
1677    #[test]
1678    fn test_all_file_types_checked() {
1679        // Create a temporary directory for test files
1680        let temp_dir = tempdir().unwrap();
1681        let base_path = temp_dir.path();
1682
1683        // Create a test with various file types - all should be checked
1684        let content = r#"
1685[Image Link](image.jpg)
1686[Video Link](video.mp4)
1687[Markdown Link](document.md)
1688[PDF Link](file.pdf)
1689"#;
1690
1691        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1692
1693        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1694        let result = rule.check(&ctx).unwrap();
1695
1696        // Should warn about all missing files regardless of extension
1697        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1698    }
1699
1700    #[test]
1701    fn test_code_span_detection() {
1702        let rule = MD057ExistingRelativeLinks::new();
1703
1704        // Create a temporary directory for test files
1705        let temp_dir = tempdir().unwrap();
1706        let base_path = temp_dir.path();
1707
1708        let rule = rule.with_path(base_path);
1709
1710        // Test with document structure
1711        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1712
1713        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1714        let result = rule.check(&ctx).unwrap();
1715
1716        // Should only find the real link, not the one in code
1717        assert_eq!(result.len(), 1, "Should only flag the real link");
1718        assert!(result[0].message.contains("nonexistent.md"));
1719    }
1720
1721    #[test]
1722    fn test_inline_code_spans() {
1723        // Create a temporary directory for test files
1724        let temp_dir = tempdir().unwrap();
1725        let base_path = temp_dir.path();
1726
1727        // Create test content with links in inline code spans
1728        let content = r#"
1729# Test Document
1730
1731This is a normal link: [Link](missing.md)
1732
1733This is a code span with a link: `[Link](another-missing.md)`
1734
1735Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1736
1737    "#;
1738
1739        // Initialize rule with the base path
1740        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1741
1742        // Test the rule
1743        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1744        let result = rule.check(&ctx).unwrap();
1745
1746        // Should only have warning for the normal link, not for links in code spans
1747        assert_eq!(result.len(), 1, "Should have exactly one warning");
1748        assert!(
1749            result[0].message.contains("missing.md"),
1750            "Warning should be for missing.md"
1751        );
1752        assert!(
1753            !result.iter().any(|w| w.message.contains("another-missing.md")),
1754            "Should not warn about link in code span"
1755        );
1756        assert!(
1757            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1758            "Should not warn about link in inline code"
1759        );
1760    }
1761
1762    #[test]
1763    fn test_extensionless_link_resolution() {
1764        // Create a temporary directory for test files
1765        let temp_dir = tempdir().unwrap();
1766        let base_path = temp_dir.path();
1767
1768        // Create a markdown file WITHOUT specifying .md extension in the link
1769        let page_path = base_path.join("page.md");
1770        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1771
1772        // Test content with extensionless link that should resolve to page.md
1773        let content = r#"
1774# Test Document
1775
1776[Link without extension](page)
1777[Link with extension](page.md)
1778[Missing link](nonexistent)
1779"#;
1780
1781        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1782
1783        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1784        let result = rule.check(&ctx).unwrap();
1785
1786        // Should only have warning for nonexistent link
1787        // Both "page" and "page.md" should resolve to the same file
1788        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1789        assert!(
1790            result[0].message.contains("nonexistent"),
1791            "Warning should be for 'nonexistent' not 'page'"
1792        );
1793    }
1794
1795    // Cross-file validation tests
1796    #[test]
1797    fn test_cross_file_scope() {
1798        let rule = MD057ExistingRelativeLinks::new();
1799        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1800    }
1801
1802    #[test]
1803    fn test_contribute_to_index_extracts_markdown_links() {
1804        let rule = MD057ExistingRelativeLinks::new();
1805        let content = r#"
1806# Document
1807
1808[Link to docs](./docs/guide.md)
1809[Link with fragment](./other.md#section)
1810[External link](https://example.com)
1811[Image link](image.png)
1812[Media file](video.mp4)
1813"#;
1814
1815        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1816        let mut index = FileIndex::new();
1817        rule.contribute_to_index(&ctx, &mut index);
1818
1819        // Should only index markdown file links
1820        assert_eq!(index.cross_file_links.len(), 2);
1821
1822        // Check first link
1823        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1824        assert_eq!(index.cross_file_links[0].fragment, "");
1825
1826        // Check second link (with fragment)
1827        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1828        assert_eq!(index.cross_file_links[1].fragment, "section");
1829    }
1830
1831    #[test]
1832    fn test_contribute_to_index_skips_external_and_anchors() {
1833        let rule = MD057ExistingRelativeLinks::new();
1834        let content = r#"
1835# Document
1836
1837[External](https://example.com)
1838[Another external](http://example.org)
1839[Fragment only](#section)
1840[FTP link](ftp://files.example.com)
1841[Mail link](mailto:test@example.com)
1842[WWW link](www.example.com)
1843"#;
1844
1845        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1846        let mut index = FileIndex::new();
1847        rule.contribute_to_index(&ctx, &mut index);
1848
1849        // Should not index any of these
1850        assert_eq!(index.cross_file_links.len(), 0);
1851    }
1852
1853    #[test]
1854    fn test_cross_file_check_valid_link() {
1855        use crate::workspace_index::WorkspaceIndex;
1856
1857        let rule = MD057ExistingRelativeLinks::new();
1858
1859        // Create a workspace index with the target file
1860        let mut workspace_index = WorkspaceIndex::new();
1861        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1862
1863        // Create file index with a link to an existing file
1864        let mut file_index = FileIndex::new();
1865        file_index.add_cross_file_link(CrossFileLinkIndex {
1866            target_path: "guide.md".to_string(),
1867            fragment: "".to_string(),
1868            line: 5,
1869            column: 1,
1870        });
1871
1872        // Run cross-file check from docs/index.md
1873        let warnings = rule
1874            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1875            .unwrap();
1876
1877        // Should have no warnings - file exists
1878        assert!(warnings.is_empty());
1879    }
1880
1881    #[test]
1882    fn test_cross_file_check_missing_link() {
1883        use crate::workspace_index::WorkspaceIndex;
1884
1885        let rule = MD057ExistingRelativeLinks::new();
1886
1887        // Create an empty workspace index
1888        let workspace_index = WorkspaceIndex::new();
1889
1890        // Create file index with a link to a missing file
1891        let mut file_index = FileIndex::new();
1892        file_index.add_cross_file_link(CrossFileLinkIndex {
1893            target_path: "missing.md".to_string(),
1894            fragment: "".to_string(),
1895            line: 5,
1896            column: 1,
1897        });
1898
1899        // Run cross-file check
1900        let warnings = rule
1901            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1902            .unwrap();
1903
1904        // Should have one warning for the missing file
1905        assert_eq!(warnings.len(), 1);
1906        assert!(warnings[0].message.contains("missing.md"));
1907        assert!(warnings[0].message.contains("does not exist"));
1908    }
1909
1910    #[test]
1911    fn test_cross_file_check_parent_path() {
1912        use crate::workspace_index::WorkspaceIndex;
1913
1914        let rule = MD057ExistingRelativeLinks::new();
1915
1916        // Create a workspace index with the target file at the root
1917        let mut workspace_index = WorkspaceIndex::new();
1918        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1919
1920        // Create file index with a parent path link
1921        let mut file_index = FileIndex::new();
1922        file_index.add_cross_file_link(CrossFileLinkIndex {
1923            target_path: "../readme.md".to_string(),
1924            fragment: "".to_string(),
1925            line: 5,
1926            column: 1,
1927        });
1928
1929        // Run cross-file check from docs/guide.md
1930        let warnings = rule
1931            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1932            .unwrap();
1933
1934        // Should have no warnings - file exists at normalized path
1935        assert!(warnings.is_empty());
1936    }
1937
1938    #[test]
1939    fn test_cross_file_check_html_link_with_md_source() {
1940        // Test that .html links are accepted when corresponding .md source exists
1941        // This supports mdBook and similar doc generators that compile .md to .html
1942        use crate::workspace_index::WorkspaceIndex;
1943
1944        let rule = MD057ExistingRelativeLinks::new();
1945
1946        // Create a workspace index with the .md source file
1947        let mut workspace_index = WorkspaceIndex::new();
1948        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1949
1950        // Create file index with an .html link (from another rule like MD051)
1951        let mut file_index = FileIndex::new();
1952        file_index.add_cross_file_link(CrossFileLinkIndex {
1953            target_path: "guide.html".to_string(),
1954            fragment: "section".to_string(),
1955            line: 10,
1956            column: 5,
1957        });
1958
1959        // Run cross-file check from docs/index.md
1960        let warnings = rule
1961            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1962            .unwrap();
1963
1964        // Should have no warnings - .md source exists for the .html link
1965        assert!(
1966            warnings.is_empty(),
1967            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1968        );
1969    }
1970
1971    #[test]
1972    fn test_cross_file_check_html_link_without_source() {
1973        // Test that .html links without corresponding .md source ARE flagged
1974        use crate::workspace_index::WorkspaceIndex;
1975
1976        let rule = MD057ExistingRelativeLinks::new();
1977
1978        // Create an empty workspace index
1979        let workspace_index = WorkspaceIndex::new();
1980
1981        // Create file index with an .html link to a non-existent file
1982        let mut file_index = FileIndex::new();
1983        file_index.add_cross_file_link(CrossFileLinkIndex {
1984            target_path: "missing.html".to_string(),
1985            fragment: "".to_string(),
1986            line: 10,
1987            column: 5,
1988        });
1989
1990        // Run cross-file check from docs/index.md
1991        let warnings = rule
1992            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1993            .unwrap();
1994
1995        // Should have one warning - no .md source exists
1996        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1997        assert!(warnings[0].message.contains("missing.html"));
1998    }
1999
2000    #[test]
2001    fn test_normalize_path_function() {
2002        // Test simple cases
2003        assert_eq!(
2004            normalize_path(Path::new("docs/guide.md")),
2005            PathBuf::from("docs/guide.md")
2006        );
2007
2008        // Test current directory removal
2009        assert_eq!(
2010            normalize_path(Path::new("./docs/guide.md")),
2011            PathBuf::from("docs/guide.md")
2012        );
2013
2014        // Test parent directory resolution
2015        assert_eq!(
2016            normalize_path(Path::new("docs/sub/../guide.md")),
2017            PathBuf::from("docs/guide.md")
2018        );
2019
2020        // Test multiple parent directories
2021        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
2022    }
2023
2024    #[test]
2025    fn test_html_link_with_md_source() {
2026        // Links to .html files should pass if corresponding .md source exists
2027        let temp_dir = tempdir().unwrap();
2028        let base_path = temp_dir.path();
2029
2030        // Create guide.md (source file)
2031        let md_file = base_path.join("guide.md");
2032        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2033
2034        let content = r#"
2035[Read the guide](guide.html)
2036[Also here](getting-started.html)
2037"#;
2038
2039        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2040        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2041        let result = rule.check(&ctx).unwrap();
2042
2043        // guide.html passes (guide.md exists), getting-started.html fails
2044        assert_eq!(
2045            result.len(),
2046            1,
2047            "Should only warn about missing source. Got: {result:?}"
2048        );
2049        assert!(result[0].message.contains("getting-started.html"));
2050    }
2051
2052    #[test]
2053    fn test_htm_link_with_md_source() {
2054        // .htm extension should also check for markdown source
2055        let temp_dir = tempdir().unwrap();
2056        let base_path = temp_dir.path();
2057
2058        let md_file = base_path.join("page.md");
2059        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
2060
2061        let content = "[Page](page.htm)";
2062
2063        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2064        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2065        let result = rule.check(&ctx).unwrap();
2066
2067        assert!(
2068            result.is_empty(),
2069            "Should not warn when .md source exists for .htm link"
2070        );
2071    }
2072
2073    #[test]
2074    fn test_html_link_finds_various_markdown_extensions() {
2075        // Should find .mdx, .markdown, etc. as source files
2076        let temp_dir = tempdir().unwrap();
2077        let base_path = temp_dir.path();
2078
2079        File::create(base_path.join("doc.md")).unwrap();
2080        File::create(base_path.join("tutorial.mdx")).unwrap();
2081        File::create(base_path.join("guide.markdown")).unwrap();
2082
2083        let content = r#"
2084[Doc](doc.html)
2085[Tutorial](tutorial.html)
2086[Guide](guide.html)
2087"#;
2088
2089        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2090        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2091        let result = rule.check(&ctx).unwrap();
2092
2093        assert!(
2094            result.is_empty(),
2095            "Should find all markdown variants as source files. Got: {result:?}"
2096        );
2097    }
2098
2099    #[test]
2100    fn test_html_link_in_subdirectory() {
2101        // Should find markdown source in subdirectories
2102        let temp_dir = tempdir().unwrap();
2103        let base_path = temp_dir.path();
2104
2105        let docs_dir = base_path.join("docs");
2106        std::fs::create_dir(&docs_dir).unwrap();
2107        File::create(docs_dir.join("guide.md"))
2108            .unwrap()
2109            .write_all(b"# Guide")
2110            .unwrap();
2111
2112        let content = "[Guide](docs/guide.html)";
2113
2114        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2115        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2116        let result = rule.check(&ctx).unwrap();
2117
2118        assert!(result.is_empty(), "Should find markdown source in subdirectory");
2119    }
2120
2121    #[test]
2122    fn test_absolute_path_skipped_in_check() {
2123        // Test that absolute paths are skipped during link validation
2124        // This fixes the bug where /pkg/runtime was being flagged
2125        let temp_dir = tempdir().unwrap();
2126        let base_path = temp_dir.path();
2127
2128        let content = r#"
2129# Test Document
2130
2131[Go Runtime](/pkg/runtime)
2132[Go Runtime with Fragment](/pkg/runtime#section)
2133[API Docs](/api/v1/users)
2134[Blog Post](/blog/2024/release.html)
2135[React Hook](/react/hooks/use-state.html)
2136"#;
2137
2138        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2139        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2140        let result = rule.check(&ctx).unwrap();
2141
2142        // Should have NO warnings - all absolute paths should be skipped
2143        assert!(
2144            result.is_empty(),
2145            "Absolute paths should be skipped. Got warnings: {result:?}"
2146        );
2147    }
2148
2149    #[test]
2150    fn test_absolute_path_skipped_in_cross_file_check() {
2151        // Test that absolute paths are skipped in cross_file_check()
2152        use crate::workspace_index::WorkspaceIndex;
2153
2154        let rule = MD057ExistingRelativeLinks::new();
2155
2156        // Create an empty workspace index (no files exist)
2157        let workspace_index = WorkspaceIndex::new();
2158
2159        // Create file index with absolute path links (should be skipped)
2160        let mut file_index = FileIndex::new();
2161        file_index.add_cross_file_link(CrossFileLinkIndex {
2162            target_path: "/pkg/runtime.md".to_string(),
2163            fragment: "".to_string(),
2164            line: 5,
2165            column: 1,
2166        });
2167        file_index.add_cross_file_link(CrossFileLinkIndex {
2168            target_path: "/api/v1/users.md".to_string(),
2169            fragment: "section".to_string(),
2170            line: 10,
2171            column: 1,
2172        });
2173
2174        // Run cross-file check
2175        let warnings = rule
2176            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
2177            .unwrap();
2178
2179        // Should have NO warnings - absolute paths should be skipped
2180        assert!(
2181            warnings.is_empty(),
2182            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
2183        );
2184    }
2185
2186    #[test]
2187    fn test_protocol_relative_url_not_skipped() {
2188        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
2189        // They should still be caught by is_external_url() though
2190        let temp_dir = tempdir().unwrap();
2191        let base_path = temp_dir.path();
2192
2193        let content = r#"
2194# Test Document
2195
2196[External](//example.com/page)
2197[Another](//cdn.example.com/asset.js)
2198"#;
2199
2200        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2201        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2202        let result = rule.check(&ctx).unwrap();
2203
2204        // Should have NO warnings - protocol-relative URLs are external and should be skipped
2205        assert!(
2206            result.is_empty(),
2207            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
2208        );
2209    }
2210
2211    #[test]
2212    fn test_email_addresses_skipped() {
2213        // Test that email addresses without mailto: are skipped
2214        // These are clearly not file links (the @ symbol is definitive)
2215        let temp_dir = tempdir().unwrap();
2216        let base_path = temp_dir.path();
2217
2218        let content = r#"
2219# Test Document
2220
2221[Contact](user@example.com)
2222[Steering](steering@kubernetes.io)
2223[Support](john.doe+filter@company.co.uk)
2224[User](user_name@sub.domain.com)
2225"#;
2226
2227        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2228        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2229        let result = rule.check(&ctx).unwrap();
2230
2231        // Should have NO warnings - email addresses are clearly not file links and should be skipped
2232        assert!(
2233            result.is_empty(),
2234            "Email addresses should be skipped. Got warnings: {result:?}"
2235        );
2236    }
2237
2238    #[test]
2239    fn test_email_addresses_vs_file_paths() {
2240        // Test that email addresses (anything with @) are skipped
2241        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
2242        let temp_dir = tempdir().unwrap();
2243        let base_path = temp_dir.path();
2244
2245        let content = r#"
2246# Test Document
2247
2248[Email](user@example.com)  <!-- Should be skipped (email) -->
2249[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
2250[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
2251"#;
2252
2253        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2254        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2255        let result = rule.check(&ctx).unwrap();
2256
2257        // All should be skipped - anything with @ is treated as an email
2258        assert!(
2259            result.is_empty(),
2260            "All email addresses should be skipped. Got: {result:?}"
2261        );
2262    }
2263
2264    #[test]
2265    fn test_diagnostic_position_accuracy() {
2266        // Test that diagnostics point to the URL, not the link text
2267        let temp_dir = tempdir().unwrap();
2268        let base_path = temp_dir.path();
2269
2270        // Position markers:     0         1         2         3
2271        //                       0123456789012345678901234567890123456789
2272        let content = "prefix [text](missing.md) suffix";
2273        //             The URL "missing.md" starts at 0-indexed position 14
2274        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
2275
2276        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2277        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2278        let result = rule.check(&ctx).unwrap();
2279
2280        assert_eq!(result.len(), 1, "Should have exactly one warning");
2281        assert_eq!(result[0].line, 1, "Should be on line 1");
2282        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
2283        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
2284    }
2285
2286    #[test]
2287    fn test_diagnostic_position_angle_brackets() {
2288        // Test position accuracy with angle bracket links
2289        let temp_dir = tempdir().unwrap();
2290        let base_path = temp_dir.path();
2291
2292        // Position markers:     0         1         2
2293        //                       012345678901234567890
2294        let content = "[link](<missing.md>)";
2295        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
2296
2297        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2298        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2299        let result = rule.check(&ctx).unwrap();
2300
2301        assert_eq!(result.len(), 1, "Should have exactly one warning");
2302        assert_eq!(result[0].line, 1, "Should be on line 1");
2303        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
2304    }
2305
2306    #[test]
2307    fn test_diagnostic_position_multiline() {
2308        // Test that line numbers are correct for links on different lines
2309        let temp_dir = tempdir().unwrap();
2310        let base_path = temp_dir.path();
2311
2312        let content = r#"# Title
2313Some text on line 2
2314[link on line 3](missing1.md)
2315More text
2316[link on line 5](missing2.md)"#;
2317
2318        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2319        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2320        let result = rule.check(&ctx).unwrap();
2321
2322        assert_eq!(result.len(), 2, "Should have two warnings");
2323
2324        // First warning should be on line 3
2325        assert_eq!(result[0].line, 3, "First warning should be on line 3");
2326        assert!(result[0].message.contains("missing1.md"));
2327
2328        // Second warning should be on line 5
2329        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
2330        assert!(result[1].message.contains("missing2.md"));
2331    }
2332
2333    #[test]
2334    fn test_diagnostic_position_with_spaces() {
2335        // Test position with URLs that have spaces in parentheses
2336        let temp_dir = tempdir().unwrap();
2337        let base_path = temp_dir.path();
2338
2339        let content = "[link]( missing.md )";
2340        //             0123456789012345678901
2341        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
2342        //             which is 1-indexed column 9
2343
2344        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2345        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2346        let result = rule.check(&ctx).unwrap();
2347
2348        assert_eq!(result.len(), 1, "Should have exactly one warning");
2349        // The regex captures the URL without leading/trailing spaces
2350        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
2351    }
2352
2353    #[test]
2354    fn test_diagnostic_position_image() {
2355        // Test that image diagnostics also have correct positions
2356        let temp_dir = tempdir().unwrap();
2357        let base_path = temp_dir.path();
2358
2359        let content = "![alt text](missing.jpg)";
2360
2361        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2362        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2363        let result = rule.check(&ctx).unwrap();
2364
2365        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
2366        assert_eq!(result[0].line, 1);
2367        // Images use start_col from the parser, which should point to the URL
2368        assert!(result[0].column > 0, "Should have valid column position");
2369        assert!(result[0].message.contains("missing.jpg"));
2370    }
2371
2372    #[test]
2373    fn test_wikilinks_skipped() {
2374        // Wikilinks should not trigger MD057 warnings
2375        // They use a different linking system (e.g., Obsidian, wiki software)
2376        let temp_dir = tempdir().unwrap();
2377        let base_path = temp_dir.path();
2378
2379        let content = r#"# Test Document
2380
2381[[Microsoft#Windows OS]]
2382[[SomePage]]
2383[[Page With Spaces]]
2384[[path/to/page#section]]
2385[[page|Display Text]]
2386
2387This is a [real missing link](missing.md) that should be flagged.
2388"#;
2389
2390        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2391        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2392        let result = rule.check(&ctx).unwrap();
2393
2394        // Should only warn about the regular markdown link, not wikilinks
2395        assert_eq!(
2396            result.len(),
2397            1,
2398            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
2399        );
2400        assert!(
2401            result[0].message.contains("missing.md"),
2402            "Warning should be for missing.md, not wikilinks"
2403        );
2404    }
2405
2406    #[test]
2407    fn test_wikilinks_not_added_to_index() {
2408        // Wikilinks should not be added to the cross-file link index
2409        let temp_dir = tempdir().unwrap();
2410        let base_path = temp_dir.path();
2411
2412        let content = r#"# Test Document
2413
2414[[Microsoft#Windows OS]]
2415[[SomePage#section]]
2416[Regular Link](other.md)
2417"#;
2418
2419        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2420        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2421
2422        let mut file_index = FileIndex::new();
2423        rule.contribute_to_index(&ctx, &mut file_index);
2424
2425        // Should only have the regular markdown link (if it's a markdown file)
2426        // Wikilinks should not be added
2427        let cross_file_links = &file_index.cross_file_links;
2428        assert_eq!(
2429            cross_file_links.len(),
2430            1,
2431            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
2432        );
2433        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
2434    }
2435
2436    #[test]
2437    fn test_reference_definition_missing_file() {
2438        // Reference definitions [ref]: ./path.md should be checked
2439        let temp_dir = tempdir().unwrap();
2440        let base_path = temp_dir.path();
2441
2442        let content = r#"# Test Document
2443
2444[test]: ./missing.md
2445[example]: ./nonexistent.html
2446
2447Use [test] and [example] here.
2448"#;
2449
2450        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2451        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2452        let result = rule.check(&ctx).unwrap();
2453
2454        // Should have warnings for both reference definitions
2455        assert_eq!(
2456            result.len(),
2457            2,
2458            "Should have warnings for missing reference definition targets. Got: {result:?}"
2459        );
2460        assert!(
2461            result.iter().any(|w| w.message.contains("missing.md")),
2462            "Should warn about missing.md"
2463        );
2464        assert!(
2465            result.iter().any(|w| w.message.contains("nonexistent.html")),
2466            "Should warn about nonexistent.html"
2467        );
2468    }
2469
2470    #[test]
2471    fn test_reference_definition_existing_file() {
2472        // Reference definitions to existing files should NOT trigger warnings
2473        let temp_dir = tempdir().unwrap();
2474        let base_path = temp_dir.path();
2475
2476        // Create an existing file
2477        let exists_path = base_path.join("exists.md");
2478        File::create(&exists_path)
2479            .unwrap()
2480            .write_all(b"# Existing file")
2481            .unwrap();
2482
2483        let content = r#"# Test Document
2484
2485[test]: ./exists.md
2486
2487Use [test] here.
2488"#;
2489
2490        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2491        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2492        let result = rule.check(&ctx).unwrap();
2493
2494        // Should have NO warnings since the file exists
2495        assert!(
2496            result.is_empty(),
2497            "Should not warn about existing file. Got: {result:?}"
2498        );
2499    }
2500
2501    #[test]
2502    fn test_reference_definition_external_url_skipped() {
2503        // Reference definitions with external URLs should be skipped
2504        let temp_dir = tempdir().unwrap();
2505        let base_path = temp_dir.path();
2506
2507        let content = r#"# Test Document
2508
2509[google]: https://google.com
2510[example]: http://example.org
2511[mail]: mailto:test@example.com
2512[ftp]: ftp://files.example.com
2513[local]: ./missing.md
2514
2515Use [google], [example], [mail], [ftp], [local] here.
2516"#;
2517
2518        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2519        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2520        let result = rule.check(&ctx).unwrap();
2521
2522        // Should only warn about the local missing file, not external URLs
2523        assert_eq!(
2524            result.len(),
2525            1,
2526            "Should only warn about local missing file. Got: {result:?}"
2527        );
2528        assert!(
2529            result[0].message.contains("missing.md"),
2530            "Warning should be for missing.md"
2531        );
2532    }
2533
2534    #[test]
2535    fn test_reference_definition_fragment_only_skipped() {
2536        // Reference definitions with fragment-only URLs should be skipped
2537        let temp_dir = tempdir().unwrap();
2538        let base_path = temp_dir.path();
2539
2540        let content = r#"# Test Document
2541
2542[section]: #my-section
2543
2544Use [section] here.
2545"#;
2546
2547        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2548        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2549        let result = rule.check(&ctx).unwrap();
2550
2551        // Should have NO warnings for fragment-only links
2552        assert!(
2553            result.is_empty(),
2554            "Should not warn about fragment-only reference. Got: {result:?}"
2555        );
2556    }
2557
2558    #[test]
2559    fn test_reference_definition_column_position() {
2560        // Test that column position points to the URL in the reference definition
2561        let temp_dir = tempdir().unwrap();
2562        let base_path = temp_dir.path();
2563
2564        // Position markers:     0         1         2
2565        //                       0123456789012345678901
2566        let content = "[ref]: ./missing.md";
2567        //             The URL "./missing.md" starts at 0-indexed position 7
2568        //             which is 1-indexed column 8
2569
2570        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2571        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2572        let result = rule.check(&ctx).unwrap();
2573
2574        assert_eq!(result.len(), 1, "Should have exactly one warning");
2575        assert_eq!(result[0].line, 1, "Should be on line 1");
2576        assert_eq!(result[0].column, 8, "Should point to start of URL './missing.md'");
2577    }
2578
2579    #[test]
2580    fn test_reference_definition_html_with_md_source() {
2581        // Reference definitions to .html files should pass if corresponding .md source exists
2582        let temp_dir = tempdir().unwrap();
2583        let base_path = temp_dir.path();
2584
2585        // Create guide.md (source file)
2586        let md_file = base_path.join("guide.md");
2587        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2588
2589        let content = r#"# Test Document
2590
2591[guide]: ./guide.html
2592[missing]: ./missing.html
2593
2594Use [guide] and [missing] here.
2595"#;
2596
2597        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2598        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2599        let result = rule.check(&ctx).unwrap();
2600
2601        // guide.html passes (guide.md exists), missing.html fails
2602        assert_eq!(
2603            result.len(),
2604            1,
2605            "Should only warn about missing source. Got: {result:?}"
2606        );
2607        assert!(result[0].message.contains("missing.html"));
2608    }
2609
2610    #[test]
2611    fn test_reference_definition_url_encoded() {
2612        // Reference definitions with URL-encoded paths should be decoded before checking
2613        let temp_dir = tempdir().unwrap();
2614        let base_path = temp_dir.path();
2615
2616        // Create a file with spaces in the name
2617        let file_with_spaces = base_path.join("file with spaces.md");
2618        File::create(&file_with_spaces).unwrap().write_all(b"# Spaces").unwrap();
2619
2620        let content = r#"# Test Document
2621
2622[spaces]: ./file%20with%20spaces.md
2623[missing]: ./missing%20file.md
2624
2625Use [spaces] and [missing] here.
2626"#;
2627
2628        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2629        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2630        let result = rule.check(&ctx).unwrap();
2631
2632        // Should only warn about the missing file
2633        assert_eq!(
2634            result.len(),
2635            1,
2636            "Should only warn about missing URL-encoded file. Got: {result:?}"
2637        );
2638        assert!(result[0].message.contains("missing%20file.md"));
2639    }
2640
2641    #[test]
2642    fn test_inline_and_reference_both_checked() {
2643        // Both inline links and reference definitions should be checked
2644        let temp_dir = tempdir().unwrap();
2645        let base_path = temp_dir.path();
2646
2647        let content = r#"# Test Document
2648
2649[inline link](./inline-missing.md)
2650[ref]: ./ref-missing.md
2651
2652Use [ref] here.
2653"#;
2654
2655        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2656        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2657        let result = rule.check(&ctx).unwrap();
2658
2659        // Should warn about both the inline link and the reference definition
2660        assert_eq!(
2661            result.len(),
2662            2,
2663            "Should warn about both inline and reference links. Got: {result:?}"
2664        );
2665        assert!(
2666            result.iter().any(|w| w.message.contains("inline-missing.md")),
2667            "Should warn about inline-missing.md"
2668        );
2669        assert!(
2670            result.iter().any(|w| w.message.contains("ref-missing.md")),
2671            "Should warn about ref-missing.md"
2672        );
2673    }
2674
2675    #[test]
2676    fn test_footnote_definitions_not_flagged() {
2677        // Regression test for issue #286: footnote definitions should not be
2678        // treated as reference definitions and flagged as broken links
2679        let rule = MD057ExistingRelativeLinks::default();
2680
2681        let content = r#"# Title
2682
2683A footnote[^1].
2684
2685[^1]: [link](https://www.google.com).
2686"#;
2687
2688        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2689        let result = rule.check(&ctx).unwrap();
2690
2691        assert!(
2692            result.is_empty(),
2693            "Footnote definitions should not trigger MD057 warnings. Got: {result:?}"
2694        );
2695    }
2696
2697    #[test]
2698    fn test_footnote_with_relative_link_inside() {
2699        // Footnotes containing relative links should not be checked
2700        // (the footnote content is not a URL, it's content that may contain links)
2701        let rule = MD057ExistingRelativeLinks::default();
2702
2703        let content = r#"# Title
2704
2705See the footnote[^1].
2706
2707[^1]: Check out [this file](./existing.md) for more info.
2708[^2]: Also see [missing](./does-not-exist.md).
2709"#;
2710
2711        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2712        let result = rule.check(&ctx).unwrap();
2713
2714        // The inline links INSIDE footnotes should be checked (./existing.md, ./does-not-exist.md)
2715        // but the footnote definition itself should not be treated as a reference definition
2716        // Note: This test verifies that [^1]: and [^2]: are not parsed as ref defs with
2717        // URLs like "[this file](./existing.md)" or "[missing](./does-not-exist.md)"
2718        for warning in &result {
2719            assert!(
2720                !warning.message.contains("[this file]"),
2721                "Footnote content should not be treated as URL: {warning:?}"
2722            );
2723            assert!(
2724                !warning.message.contains("[missing]"),
2725                "Footnote content should not be treated as URL: {warning:?}"
2726            );
2727        }
2728    }
2729
2730    #[test]
2731    fn test_mixed_footnotes_and_reference_definitions() {
2732        // Ensure regular reference definitions are still checked while footnotes are skipped
2733        let temp_dir = tempdir().unwrap();
2734        let base_path = temp_dir.path();
2735
2736        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2737
2738        let content = r#"# Title
2739
2740A footnote[^1] and a [ref link][myref].
2741
2742[^1]: This is a footnote with [link](https://example.com).
2743
2744[myref]: ./missing-file.md "This should be checked"
2745"#;
2746
2747        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2748        let result = rule.check(&ctx).unwrap();
2749
2750        // Should only warn about the regular reference definition, not the footnote
2751        assert_eq!(
2752            result.len(),
2753            1,
2754            "Should only warn about the regular reference definition. Got: {result:?}"
2755        );
2756        assert!(
2757            result[0].message.contains("missing-file.md"),
2758            "Should warn about missing-file.md in reference definition"
2759        );
2760    }
2761
2762    #[test]
2763    fn test_absolute_links_ignore_by_default() {
2764        // By default, absolute links are ignored (not validated)
2765        let temp_dir = tempdir().unwrap();
2766        let base_path = temp_dir.path();
2767
2768        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2769
2770        let content = r#"# Links
2771
2772[API docs](/api/v1/users)
2773[Blog post](/blog/2024/release.html)
2774![Logo](/assets/logo.png)
2775
2776[ref]: /docs/reference.md
2777"#;
2778
2779        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2780        let result = rule.check(&ctx).unwrap();
2781
2782        // No warnings - absolute links are ignored by default
2783        assert!(
2784            result.is_empty(),
2785            "Absolute links should be ignored by default. Got: {result:?}"
2786        );
2787    }
2788
2789    #[test]
2790    fn test_absolute_links_warn_config() {
2791        // When configured to warn, absolute links should generate warnings
2792        let temp_dir = tempdir().unwrap();
2793        let base_path = temp_dir.path();
2794
2795        let config = MD057Config {
2796            absolute_links: AbsoluteLinksOption::Warn,
2797            ..Default::default()
2798        };
2799        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2800
2801        let content = r#"# Links
2802
2803[API docs](/api/v1/users)
2804[Blog post](/blog/2024/release.html)
2805"#;
2806
2807        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2808        let result = rule.check(&ctx).unwrap();
2809
2810        // Should have 2 warnings for the 2 absolute links
2811        assert_eq!(
2812            result.len(),
2813            2,
2814            "Should warn about both absolute links. Got: {result:?}"
2815        );
2816        assert!(
2817            result[0].message.contains("cannot be validated locally"),
2818            "Warning should explain why: {}",
2819            result[0].message
2820        );
2821        assert!(
2822            result[0].message.contains("/api/v1/users"),
2823            "Warning should include the link path"
2824        );
2825    }
2826
2827    #[test]
2828    fn test_absolute_links_warn_images() {
2829        // Images with absolute paths should also warn when configured
2830        let temp_dir = tempdir().unwrap();
2831        let base_path = temp_dir.path();
2832
2833        let config = MD057Config {
2834            absolute_links: AbsoluteLinksOption::Warn,
2835            ..Default::default()
2836        };
2837        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2838
2839        let content = r#"# Images
2840
2841![Logo](/assets/logo.png)
2842"#;
2843
2844        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2845        let result = rule.check(&ctx).unwrap();
2846
2847        assert_eq!(
2848            result.len(),
2849            1,
2850            "Should warn about absolute image path. Got: {result:?}"
2851        );
2852        assert!(
2853            result[0].message.contains("/assets/logo.png"),
2854            "Warning should include the image path"
2855        );
2856    }
2857
2858    #[test]
2859    fn test_absolute_links_warn_reference_definitions() {
2860        // Reference definitions with absolute paths should also warn when configured
2861        let temp_dir = tempdir().unwrap();
2862        let base_path = temp_dir.path();
2863
2864        let config = MD057Config {
2865            absolute_links: AbsoluteLinksOption::Warn,
2866            ..Default::default()
2867        };
2868        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2869
2870        let content = r#"# Reference
2871
2872See the [docs][ref].
2873
2874[ref]: /docs/reference.md
2875"#;
2876
2877        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2878        let result = rule.check(&ctx).unwrap();
2879
2880        assert_eq!(
2881            result.len(),
2882            1,
2883            "Should warn about absolute reference definition. Got: {result:?}"
2884        );
2885        assert!(
2886            result[0].message.contains("/docs/reference.md"),
2887            "Warning should include the reference path"
2888        );
2889    }
2890}