Skip to main content

rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{
7    CrossFileScope, Fix, FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity,
8};
9use crate::workspace_index::{FileIndex, extract_cross_file_links};
10use regex::Regex;
11use std::collections::HashMap;
12use std::env;
13use std::path::{Path, PathBuf};
14use std::sync::LazyLock;
15use std::sync::{Arc, Mutex};
16
17mod md057_config;
18use crate::rule_config_serde::RuleConfig;
19use crate::utils::mkdocs_config::resolve_docs_dir;
20pub use md057_config::{AbsoluteLinksOption, MD057Config};
21
22// Thread-safe cache for file existence checks to avoid redundant filesystem operations
23static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
24    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
25
26// Reset the file existence cache (typically between rule runs)
27fn reset_file_existence_cache() {
28    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
29        cache.clear();
30    }
31}
32
33// Check if a file exists with caching
34fn file_exists_with_cache(path: &Path) -> bool {
35    match FILE_EXISTENCE_CACHE.lock() {
36        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
37        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
38    }
39}
40
41/// Check if a file exists, also trying markdown extensions for extensionless links.
42/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
43fn file_exists_or_markdown_extension(path: &Path) -> bool {
44    // First, check exact path
45    if file_exists_with_cache(path) {
46        return true;
47    }
48
49    // If the path has no extension, try adding markdown extensions
50    if path.extension().is_none() {
51        for ext in MARKDOWN_EXTENSIONS {
52            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
53            let path_with_ext = path.with_extension(&ext[1..]);
54            if file_exists_with_cache(&path_with_ext) {
55                return true;
56            }
57        }
58    }
59
60    false
61}
62
63// Regex to match the start of a link - simplified for performance
64static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
65
66/// Regex to extract the URL from an angle-bracketed markdown link
67/// Format: `](<URL>)` or `](<URL> "title")`
68/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
69static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
70    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
71
72/// Regex to extract the URL from a normal markdown link (without angle brackets)
73/// Format: `](URL)` or `](URL "title")`
74static URL_EXTRACT_REGEX: LazyLock<Regex> =
75    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
76
77/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
78/// Matches: scheme:// or scheme: (per RFC 3986)
79/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
80static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
81    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
82
83// Current working directory
84static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
85
86/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
87/// Returns None for non-hex characters.
88#[inline]
89fn hex_digit_to_value(byte: u8) -> Option<u8> {
90    match byte {
91        b'0'..=b'9' => Some(byte - b'0'),
92        b'a'..=b'f' => Some(byte - b'a' + 10),
93        b'A'..=b'F' => Some(byte - b'A' + 10),
94        _ => None,
95    }
96}
97
98/// Supported markdown file extensions
99const MARKDOWN_EXTENSIONS: &[&str] = &[
100    ".md",
101    ".markdown",
102    ".mdx",
103    ".mkd",
104    ".mkdn",
105    ".mdown",
106    ".mdwn",
107    ".qmd",
108    ".rmd",
109];
110
111/// Rule MD057: Existing relative links should point to valid files or directories.
112#[derive(Debug, Clone)]
113pub struct MD057ExistingRelativeLinks {
114    /// Base directory for resolving relative links
115    base_path: Arc<Mutex<Option<PathBuf>>>,
116    /// Configuration for the rule
117    config: MD057Config,
118}
119
120impl Default for MD057ExistingRelativeLinks {
121    fn default() -> Self {
122        Self {
123            base_path: Arc::new(Mutex::new(None)),
124            config: MD057Config::default(),
125        }
126    }
127}
128
129impl MD057ExistingRelativeLinks {
130    /// Create a new instance with default settings
131    pub fn new() -> Self {
132        Self::default()
133    }
134
135    /// Set the base path for resolving relative links
136    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
137        let path = path.as_ref();
138        let dir_path = if path.is_file() {
139            path.parent().map(|p| p.to_path_buf())
140        } else {
141            Some(path.to_path_buf())
142        };
143
144        if let Ok(mut guard) = self.base_path.lock() {
145            *guard = dir_path;
146        }
147        self
148    }
149
150    pub fn from_config_struct(config: MD057Config) -> Self {
151        Self {
152            base_path: Arc::new(Mutex::new(None)),
153            config,
154        }
155    }
156
157    /// Check if a URL is external or should be skipped for validation.
158    ///
159    /// Returns `true` (skip validation) for:
160    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
161    /// - Bare domains: `www.example.com`, `example.com`
162    /// - Email addresses: `user@example.com` (without `mailto:`)
163    /// - Template variables: `{{URL}}`, `{{% include %}}`
164    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
165    ///
166    /// Returns `false` (validate) for:
167    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
168    #[inline]
169    fn is_external_url(&self, url: &str) -> bool {
170        if url.is_empty() {
171            return false;
172        }
173
174        // Quick checks for common external URL patterns
175        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
176            return true;
177        }
178
179        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
180        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
181        if url.starts_with("{{") || url.starts_with("{%") {
182            return true;
183        }
184
185        // Simple check: if URL contains @, it's almost certainly an email address
186        // File paths with @ are extremely rare, so this is a safe heuristic
187        if url.contains('@') {
188            return true; // It's an email address, skip it
189        }
190
191        // Bare domain check (e.g., "example.com")
192        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
193        // Links like [text](nodejs.org/path) without a protocol are broken -
194        // they'll be treated as relative paths by markdown renderers.
195        // Flagging them helps users find missing protocols.
196        // We only skip .com as a minimal safety net for the most common case.
197        if url.ends_with(".com") {
198            return true;
199        }
200
201        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
202        // These are not filesystem paths but module/asset aliases
203        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
204        if url.starts_with('~') || url.starts_with('@') {
205            return true;
206        }
207
208        // All other cases (relative paths, etc.) are not external
209        false
210    }
211
212    /// Check if the URL is a fragment-only link (internal document link)
213    #[inline]
214    fn is_fragment_only_link(&self, url: &str) -> bool {
215        url.starts_with('#')
216    }
217
218    /// Check if the URL is an absolute path (starts with /)
219    /// These are typically routes for published documentation sites.
220    #[inline]
221    fn is_absolute_path(url: &str) -> bool {
222        url.starts_with('/')
223    }
224
225    /// Decode URL percent-encoded sequences in a path.
226    /// Converts `%20` to space, `%2F` to `/`, etc.
227    /// Returns the original string if decoding fails or produces invalid UTF-8.
228    fn url_decode(path: &str) -> String {
229        // Quick check: if no percent sign, return as-is
230        if !path.contains('%') {
231            return path.to_string();
232        }
233
234        let bytes = path.as_bytes();
235        let mut result = Vec::with_capacity(bytes.len());
236        let mut i = 0;
237
238        while i < bytes.len() {
239            if bytes[i] == b'%' && i + 2 < bytes.len() {
240                // Try to parse the two hex digits following %
241                let hex1 = bytes[i + 1];
242                let hex2 = bytes[i + 2];
243                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
244                    result.push(d1 * 16 + d2);
245                    i += 3;
246                    continue;
247                }
248            }
249            result.push(bytes[i]);
250            i += 1;
251        }
252
253        // Convert to UTF-8, falling back to original if invalid
254        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
255    }
256
257    /// Strip query parameters and fragments from a URL for file existence checking.
258    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
259    /// for `path/to/image.png` or `file.md` respectively.
260    ///
261    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
262    /// so we check for `?` first. If a URL has both, only the query is stripped here
263    /// (fragments are handled separately by the regex in `contribute_to_index`).
264    fn strip_query_and_fragment(url: &str) -> &str {
265        // Find the first occurrence of '?' or '#', whichever comes first
266        // This handles both standard URLs (? before #) and edge cases (# before ?)
267        let query_pos = url.find('?');
268        let fragment_pos = url.find('#');
269
270        match (query_pos, fragment_pos) {
271            (Some(q), Some(f)) => {
272                // Both exist - strip at whichever comes first
273                &url[..q.min(f)]
274            }
275            (Some(q), None) => &url[..q],
276            (None, Some(f)) => &url[..f],
277            (None, None) => url,
278        }
279    }
280
281    /// Resolve a relative link against a provided base path
282    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
283        base_path.join(link)
284    }
285
286    /// Check if a relative link can be compacted and return the simplified form.
287    ///
288    /// Returns `None` if compact-paths is disabled, the link has no traversal,
289    /// or the link is already the shortest form.
290    /// Returns `Some(suggestion)` with the full compacted URL (including fragment/query suffix).
291    fn compact_path_suggestion(&self, url: &str, base_path: &Path) -> Option<String> {
292        if !self.config.compact_paths {
293            return None;
294        }
295
296        // Split URL into path and suffix (fragment/query)
297        let path_end = url
298            .find('?')
299            .unwrap_or(url.len())
300            .min(url.find('#').unwrap_or(url.len()));
301        let path_part = &url[..path_end];
302        let suffix = &url[path_end..];
303
304        // URL-decode the path portion for filesystem resolution
305        let decoded_path = Self::url_decode(path_part);
306
307        compute_compact_path(base_path, &decoded_path).map(|compact| format!("{compact}{suffix}"))
308    }
309
310    /// Validate an absolute link by resolving it relative to MkDocs docs_dir.
311    ///
312    /// Returns `Some(warning_message)` if the link is broken, `None` if valid.
313    /// Falls back to a generic warning if no mkdocs.yml is found.
314    fn validate_absolute_link_via_docs_dir(url: &str, source_path: &Path) -> Option<String> {
315        let Some(docs_dir) = resolve_docs_dir(source_path) else {
316            // No mkdocs.yml found — fall back to warn behavior
317            return Some(format!(
318                "Absolute link '{url}' cannot be validated locally (no mkdocs.yml found)"
319            ));
320        };
321
322        // Strip leading / and resolve relative to docs_dir
323        let relative_url = url.trim_start_matches('/');
324
325        // Strip query/fragment before checking existence
326        let file_path = Self::strip_query_and_fragment(relative_url);
327        let decoded = Self::url_decode(file_path);
328        let resolved_path = docs_dir.join(&decoded);
329
330        // For directory-style links (ending with /, bare path to a directory, or empty
331        // decoded path like "/"), check for index.md inside the directory.
332        // This must be checked BEFORE file_exists_or_markdown_extension because
333        // path.exists() returns true for directories — we need to verify index.md exists.
334        let is_directory_link = url.ends_with('/') || decoded.is_empty();
335        if is_directory_link || resolved_path.is_dir() {
336            let index_path = resolved_path.join("index.md");
337            if file_exists_with_cache(&index_path) {
338                return None; // Valid directory link with index.md
339            }
340            // Directory exists but no index.md — fall through to error
341            if resolved_path.is_dir() {
342                return Some(format!(
343                    "Absolute link '{url}' resolves to directory '{}' which has no index.md",
344                    resolved_path.display()
345                ));
346            }
347        }
348
349        // Check existence (with markdown extension fallback for extensionless links)
350        if file_exists_or_markdown_extension(&resolved_path) {
351            return None; // Valid link
352        }
353
354        // For .html/.htm links, check for corresponding markdown source
355        if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
356            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
357            && let (Some(stem), Some(parent)) = (
358                resolved_path.file_stem().and_then(|s| s.to_str()),
359                resolved_path.parent(),
360            )
361        {
362            let has_md_source = MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
363                let source_path = parent.join(format!("{stem}{md_ext}"));
364                file_exists_with_cache(&source_path)
365            });
366            if has_md_source {
367                return None; // Markdown source exists
368            }
369        }
370
371        Some(format!(
372            "Absolute link '{url}' resolves to '{}' which does not exist",
373            resolved_path.display()
374        ))
375    }
376}
377
378impl Rule for MD057ExistingRelativeLinks {
379    fn name(&self) -> &'static str {
380        "MD057"
381    }
382
383    fn description(&self) -> &'static str {
384        "Relative links should point to existing files"
385    }
386
387    fn category(&self) -> RuleCategory {
388        RuleCategory::Link
389    }
390
391    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
392        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
393    }
394
395    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
396        let content = ctx.content;
397
398        // Early returns for performance
399        if content.is_empty() || !content.contains('[') {
400            return Ok(Vec::new());
401        }
402
403        // Quick check for any potential links before expensive operations
404        // Check for inline links "](", reference definitions "]:", or images "!["
405        if !content.contains("](") && !content.contains("]:") {
406            return Ok(Vec::new());
407        }
408
409        // Reset the file existence cache for a fresh run
410        reset_file_existence_cache();
411
412        let mut warnings = Vec::new();
413
414        // Determine base path for resolving relative links
415        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
416        // This ensures each file resolves links relative to its own directory
417        let base_path: Option<PathBuf> = {
418            // First check if base_path was explicitly set via with_path() (for tests)
419            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
420            if explicit_base.is_some() {
421                explicit_base
422            } else if let Some(ref source_file) = ctx.source_file {
423                // Resolve symlinks to get the actual file location
424                // This ensures relative links are resolved from the target's directory,
425                // not the symlink's directory
426                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
427                resolved_file
428                    .parent()
429                    .map(|p| p.to_path_buf())
430                    .or_else(|| Some(CURRENT_DIR.clone()))
431            } else {
432                // No source file available - cannot validate relative links
433                None
434            }
435        };
436
437        // If we still don't have a base path, we can't validate relative links
438        let Some(base_path) = base_path else {
439            return Ok(warnings);
440        };
441
442        // Use LintContext links instead of expensive regex parsing
443        if !ctx.links.is_empty() {
444            // Use LineIndex for correct position calculation across all line ending types
445            let line_index = &ctx.line_index;
446
447            // Pre-collected lines from context
448            let lines = ctx.raw_lines();
449
450            // Track which lines we've already processed to avoid duplicates
451            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
452            let mut processed_lines = std::collections::HashSet::new();
453
454            for link in &ctx.links {
455                let line_idx = link.line - 1;
456                if line_idx >= lines.len() {
457                    continue;
458                }
459
460                // Skip lines inside PyMdown blocks
461                if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
462                    continue;
463                }
464
465                // Skip if we've already processed this line
466                if !processed_lines.insert(line_idx) {
467                    continue;
468                }
469
470                let line = lines[line_idx];
471
472                // Quick check for link pattern in this line
473                if !line.contains("](") {
474                    continue;
475                }
476
477                // Find all links in this line using optimized regex
478                for link_match in LINK_START_REGEX.find_iter(line) {
479                    let start_pos = link_match.start();
480                    let end_pos = link_match.end();
481
482                    // Calculate absolute position using LineIndex
483                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
484                    let absolute_start_pos = line_start_byte + start_pos;
485
486                    // Skip if this link is in a code span
487                    if ctx.is_in_code_span_byte(absolute_start_pos) {
488                        continue;
489                    }
490
491                    // Skip if this link is in a math span (LaTeX $...$ or $$...$$)
492                    if ctx.is_in_math_span(absolute_start_pos) {
493                        continue;
494                    }
495
496                    // Find the URL part after the link text
497                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
498                    // Then fall back to normal URL regex
499                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
500                        .captures_at(line, end_pos - 1)
501                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
502                        .or_else(|| {
503                            URL_EXTRACT_REGEX
504                                .captures_at(line, end_pos - 1)
505                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
506                        });
507
508                    if let Some((caps, url_group)) = caps_and_url {
509                        let url = url_group.as_str().trim();
510
511                        // Skip empty URLs
512                        if url.is_empty() {
513                            continue;
514                        }
515
516                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
517                        // These are Rust API references, not file paths
518                        // Example: [`f32::is_subnormal`], [`Vec::push`]
519                        if url.starts_with('`') && url.ends_with('`') {
520                            continue;
521                        }
522
523                        // Skip external URLs and fragment-only links
524                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
525                            continue;
526                        }
527
528                        // Handle absolute paths based on config
529                        if Self::is_absolute_path(url) {
530                            match self.config.absolute_links {
531                                AbsoluteLinksOption::Warn => {
532                                    let url_start = url_group.start();
533                                    let url_end = url_group.end();
534                                    warnings.push(LintWarning {
535                                        rule_name: Some(self.name().to_string()),
536                                        line: link.line,
537                                        column: url_start + 1,
538                                        end_line: link.line,
539                                        end_column: url_end + 1,
540                                        message: format!("Absolute link '{url}' cannot be validated locally"),
541                                        severity: Severity::Warning,
542                                        fix: None,
543                                    });
544                                }
545                                AbsoluteLinksOption::RelativeToDocs => {
546                                    if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
547                                        let url_start = url_group.start();
548                                        let url_end = url_group.end();
549                                        warnings.push(LintWarning {
550                                            rule_name: Some(self.name().to_string()),
551                                            line: link.line,
552                                            column: url_start + 1,
553                                            end_line: link.line,
554                                            end_column: url_end + 1,
555                                            message: msg,
556                                            severity: Severity::Warning,
557                                            fix: None,
558                                        });
559                                    }
560                                }
561                                AbsoluteLinksOption::Ignore => {}
562                            }
563                            continue;
564                        }
565
566                        // Check for unnecessary path traversal (compact-paths)
567                        // Reconstruct full URL including fragment (regex group 2)
568                        // since url_group (group 1) contains only the path part
569                        let full_url_for_compact = if let Some(frag) = caps.get(2) {
570                            format!("{url}{}", frag.as_str())
571                        } else {
572                            url.to_string()
573                        };
574                        if let Some(suggestion) = self.compact_path_suggestion(&full_url_for_compact, &base_path) {
575                            let url_start = url_group.start();
576                            let url_end = caps.get(2).map_or(url_group.end(), |frag| frag.end());
577                            let fix_byte_start = line_start_byte + url_start;
578                            let fix_byte_end = line_start_byte + url_end;
579                            warnings.push(LintWarning {
580                                rule_name: Some(self.name().to_string()),
581                                line: link.line,
582                                column: url_start + 1,
583                                end_line: link.line,
584                                end_column: url_end + 1,
585                                message: format!(
586                                    "Relative link '{full_url_for_compact}' can be simplified to '{suggestion}'"
587                                ),
588                                severity: Severity::Warning,
589                                fix: Some(Fix {
590                                    range: fix_byte_start..fix_byte_end,
591                                    replacement: suggestion,
592                                }),
593                            });
594                        }
595
596                        // Strip query parameters and fragments before checking file existence
597                        let file_path = Self::strip_query_and_fragment(url);
598
599                        // URL-decode the path to handle percent-encoded characters
600                        let decoded_path = Self::url_decode(file_path);
601
602                        // Resolve the relative link against the base path
603                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
604
605                        // Check if the file exists, also trying markdown extensions for extensionless links
606                        if file_exists_or_markdown_extension(&resolved_path) {
607                            continue; // File exists, no warning needed
608                        }
609
610                        // For .html/.htm links, check if a corresponding markdown source exists
611                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
612                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
613                            && let (Some(stem), Some(parent)) = (
614                                resolved_path.file_stem().and_then(|s| s.to_str()),
615                                resolved_path.parent(),
616                            ) {
617                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
618                                let source_path = parent.join(format!("{stem}{md_ext}"));
619                                file_exists_with_cache(&source_path)
620                            })
621                        } else {
622                            false
623                        };
624
625                        if has_md_source {
626                            continue; // Markdown source exists, link is valid
627                        }
628
629                        // File doesn't exist and no source file found
630                        // Use actual URL position from regex capture group
631                        // Note: capture group positions are absolute within the line string
632                        let url_start = url_group.start();
633                        let url_end = url_group.end();
634
635                        warnings.push(LintWarning {
636                            rule_name: Some(self.name().to_string()),
637                            line: link.line,
638                            column: url_start + 1, // 1-indexed
639                            end_line: link.line,
640                            end_column: url_end + 1, // 1-indexed
641                            message: format!("Relative link '{url}' does not exist"),
642                            severity: Severity::Error,
643                            fix: None,
644                        });
645                    }
646                }
647            }
648        }
649
650        // Also process images - they have URLs already parsed
651        for image in &ctx.images {
652            // Skip images inside PyMdown blocks (MkDocs flavor)
653            if ctx.line_info(image.line).is_some_and(|info| info.in_pymdown_block) {
654                continue;
655            }
656
657            let url = image.url.as_ref();
658
659            // Skip empty URLs
660            if url.is_empty() {
661                continue;
662            }
663
664            // Skip external URLs and fragment-only links
665            if self.is_external_url(url) || self.is_fragment_only_link(url) {
666                continue;
667            }
668
669            // Handle absolute paths based on config
670            if Self::is_absolute_path(url) {
671                match self.config.absolute_links {
672                    AbsoluteLinksOption::Warn => {
673                        warnings.push(LintWarning {
674                            rule_name: Some(self.name().to_string()),
675                            line: image.line,
676                            column: image.start_col + 1,
677                            end_line: image.line,
678                            end_column: image.start_col + 1 + url.len(),
679                            message: format!("Absolute link '{url}' cannot be validated locally"),
680                            severity: Severity::Warning,
681                            fix: None,
682                        });
683                    }
684                    AbsoluteLinksOption::RelativeToDocs => {
685                        if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
686                            warnings.push(LintWarning {
687                                rule_name: Some(self.name().to_string()),
688                                line: image.line,
689                                column: image.start_col + 1,
690                                end_line: image.line,
691                                end_column: image.start_col + 1 + url.len(),
692                                message: msg,
693                                severity: Severity::Warning,
694                                fix: None,
695                            });
696                        }
697                    }
698                    AbsoluteLinksOption::Ignore => {}
699                }
700                continue;
701            }
702
703            // Check for unnecessary path traversal (compact-paths)
704            if let Some(suggestion) = self.compact_path_suggestion(url, &base_path) {
705                // Find the URL position within the image syntax using document byte offsets.
706                // Search from image.byte_offset (the `!` character) to locate the URL string.
707                let fix = content[image.byte_offset..image.byte_end].find(url).map(|url_offset| {
708                    let fix_byte_start = image.byte_offset + url_offset;
709                    let fix_byte_end = fix_byte_start + url.len();
710                    Fix {
711                        range: fix_byte_start..fix_byte_end,
712                        replacement: suggestion.clone(),
713                    }
714                });
715
716                let img_line_start_byte = ctx.line_index.get_line_start_byte(image.line).unwrap_or(0);
717                let url_col = fix
718                    .as_ref()
719                    .map_or(image.start_col + 1, |f| f.range.start - img_line_start_byte + 1);
720                warnings.push(LintWarning {
721                    rule_name: Some(self.name().to_string()),
722                    line: image.line,
723                    column: url_col,
724                    end_line: image.line,
725                    end_column: url_col + url.len(),
726                    message: format!("Relative link '{url}' can be simplified to '{suggestion}'"),
727                    severity: Severity::Warning,
728                    fix,
729                });
730            }
731
732            // Strip query parameters and fragments before checking file existence
733            let file_path = Self::strip_query_and_fragment(url);
734
735            // URL-decode the path to handle percent-encoded characters
736            let decoded_path = Self::url_decode(file_path);
737
738            // Resolve the relative link against the base path
739            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
740
741            // Check if the file exists, also trying markdown extensions for extensionless links
742            if file_exists_or_markdown_extension(&resolved_path) {
743                continue; // File exists, no warning needed
744            }
745
746            // For .html/.htm links, check if a corresponding markdown source exists
747            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
748                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
749                && let (Some(stem), Some(parent)) = (
750                    resolved_path.file_stem().and_then(|s| s.to_str()),
751                    resolved_path.parent(),
752                ) {
753                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
754                    let source_path = parent.join(format!("{stem}{md_ext}"));
755                    file_exists_with_cache(&source_path)
756                })
757            } else {
758                false
759            };
760
761            if has_md_source {
762                continue; // Markdown source exists, link is valid
763            }
764
765            // File doesn't exist and no source file found
766            // Images already have correct position from parser
767            warnings.push(LintWarning {
768                rule_name: Some(self.name().to_string()),
769                line: image.line,
770                column: image.start_col + 1,
771                end_line: image.line,
772                end_column: image.start_col + 1 + url.len(),
773                message: format!("Relative link '{url}' does not exist"),
774                severity: Severity::Error,
775                fix: None,
776            });
777        }
778
779        // Also process reference definitions: [ref]: ./path.md
780        for ref_def in &ctx.reference_defs {
781            let url = &ref_def.url;
782
783            // Skip empty URLs
784            if url.is_empty() {
785                continue;
786            }
787
788            // Skip external URLs and fragment-only links
789            if self.is_external_url(url) || self.is_fragment_only_link(url) {
790                continue;
791            }
792
793            // Handle absolute paths based on config
794            if Self::is_absolute_path(url) {
795                match self.config.absolute_links {
796                    AbsoluteLinksOption::Warn => {
797                        let line_idx = ref_def.line - 1;
798                        let column = content.lines().nth(line_idx).map_or(1, |line_content| {
799                            line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
800                        });
801                        warnings.push(LintWarning {
802                            rule_name: Some(self.name().to_string()),
803                            line: ref_def.line,
804                            column,
805                            end_line: ref_def.line,
806                            end_column: column + url.len(),
807                            message: format!("Absolute link '{url}' cannot be validated locally"),
808                            severity: Severity::Warning,
809                            fix: None,
810                        });
811                    }
812                    AbsoluteLinksOption::RelativeToDocs => {
813                        if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
814                            let line_idx = ref_def.line - 1;
815                            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
816                                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
817                            });
818                            warnings.push(LintWarning {
819                                rule_name: Some(self.name().to_string()),
820                                line: ref_def.line,
821                                column,
822                                end_line: ref_def.line,
823                                end_column: column + url.len(),
824                                message: msg,
825                                severity: Severity::Warning,
826                                fix: None,
827                            });
828                        }
829                    }
830                    AbsoluteLinksOption::Ignore => {}
831                }
832                continue;
833            }
834
835            // Check for unnecessary path traversal (compact-paths)
836            if let Some(suggestion) = self.compact_path_suggestion(url, &base_path) {
837                let ref_line_idx = ref_def.line - 1;
838                let col = content.lines().nth(ref_line_idx).map_or(1, |line_content| {
839                    line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
840                });
841                let ref_line_start_byte = ctx.line_index.get_line_start_byte(ref_def.line).unwrap_or(0);
842                let fix_byte_start = ref_line_start_byte + col - 1;
843                let fix_byte_end = fix_byte_start + url.len();
844                warnings.push(LintWarning {
845                    rule_name: Some(self.name().to_string()),
846                    line: ref_def.line,
847                    column: col,
848                    end_line: ref_def.line,
849                    end_column: col + url.len(),
850                    message: format!("Relative link '{url}' can be simplified to '{suggestion}'"),
851                    severity: Severity::Warning,
852                    fix: Some(Fix {
853                        range: fix_byte_start..fix_byte_end,
854                        replacement: suggestion,
855                    }),
856                });
857            }
858
859            // Strip query parameters and fragments before checking file existence
860            let file_path = Self::strip_query_and_fragment(url);
861
862            // URL-decode the path to handle percent-encoded characters
863            let decoded_path = Self::url_decode(file_path);
864
865            // Resolve the relative link against the base path
866            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
867
868            // Check if the file exists, also trying markdown extensions for extensionless links
869            if file_exists_or_markdown_extension(&resolved_path) {
870                continue; // File exists, no warning needed
871            }
872
873            // For .html/.htm links, check if a corresponding markdown source exists
874            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
875                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
876                && let (Some(stem), Some(parent)) = (
877                    resolved_path.file_stem().and_then(|s| s.to_str()),
878                    resolved_path.parent(),
879                ) {
880                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
881                    let source_path = parent.join(format!("{stem}{md_ext}"));
882                    file_exists_with_cache(&source_path)
883                })
884            } else {
885                false
886            };
887
888            if has_md_source {
889                continue; // Markdown source exists, link is valid
890            }
891
892            // File doesn't exist and no source file found
893            // Calculate column position: find URL within the line
894            let line_idx = ref_def.line - 1;
895            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
896                // Find URL position in line (after ]: )
897                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
898            });
899
900            warnings.push(LintWarning {
901                rule_name: Some(self.name().to_string()),
902                line: ref_def.line,
903                column,
904                end_line: ref_def.line,
905                end_column: column + url.len(),
906                message: format!("Relative link '{url}' does not exist"),
907                severity: Severity::Error,
908                fix: None,
909            });
910        }
911
912        Ok(warnings)
913    }
914
915    fn fix_capability(&self) -> FixCapability {
916        if self.config.compact_paths {
917            FixCapability::ConditionallyFixable
918        } else {
919            FixCapability::Unfixable
920        }
921    }
922
923    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
924        if !self.config.compact_paths {
925            return Ok(ctx.content.to_string());
926        }
927
928        let warnings = self.check(ctx)?;
929        let warnings =
930            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
931        let mut content = ctx.content.to_string();
932
933        // Collect fixable warnings (compact-paths) sorted by byte offset descending
934        let mut fixes: Vec<_> = warnings.iter().filter_map(|w| w.fix.as_ref()).collect();
935        fixes.sort_by(|a, b| b.range.start.cmp(&a.range.start));
936
937        for fix in fixes {
938            if fix.range.end <= content.len() {
939                content.replace_range(fix.range.clone(), &fix.replacement);
940            }
941        }
942
943        Ok(content)
944    }
945
946    fn as_any(&self) -> &dyn std::any::Any {
947        self
948    }
949
950    fn default_config_section(&self) -> Option<(String, toml::Value)> {
951        let default_config = MD057Config::default();
952        let json_value = serde_json::to_value(&default_config).ok()?;
953        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
954
955        if let toml::Value::Table(table) = toml_value {
956            if !table.is_empty() {
957                Some((MD057Config::RULE_NAME.to_string(), toml::Value::Table(table)))
958            } else {
959                None
960            }
961        } else {
962            None
963        }
964    }
965
966    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
967    where
968        Self: Sized,
969    {
970        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
971        Box::new(Self::from_config_struct(rule_config))
972    }
973
974    fn cross_file_scope(&self) -> CrossFileScope {
975        CrossFileScope::Workspace
976    }
977
978    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
979        // Use the shared utility for cross-file link extraction
980        // This ensures consistent position tracking between CLI and LSP
981        for link in extract_cross_file_links(ctx) {
982            index.add_cross_file_link(link);
983        }
984    }
985
986    fn cross_file_check(
987        &self,
988        file_path: &Path,
989        file_index: &FileIndex,
990        workspace_index: &crate::workspace_index::WorkspaceIndex,
991    ) -> LintResult {
992        let mut warnings = Vec::new();
993
994        // Get the directory containing this file for resolving relative links
995        let file_dir = file_path.parent();
996
997        for cross_link in &file_index.cross_file_links {
998            // URL-decode the path for filesystem operations
999            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
1000            let decoded_target = Self::url_decode(&cross_link.target_path);
1001
1002            // Skip absolute paths — they are already handled by check()
1003            // which validates them according to the absolute_links config.
1004            // Handling them here too would produce duplicate warnings.
1005            if decoded_target.starts_with('/') {
1006                continue;
1007            }
1008
1009            // Resolve relative path
1010            let target_path = if let Some(dir) = file_dir {
1011                dir.join(&decoded_target)
1012            } else {
1013                Path::new(&decoded_target).to_path_buf()
1014            };
1015
1016            // Normalize the path (handle .., ., etc.)
1017            let target_path = normalize_path(&target_path);
1018
1019            // Check if the target file exists, also trying markdown extensions for extensionless links
1020            let file_exists =
1021                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
1022
1023            if !file_exists {
1024                // For .html/.htm links, check if a corresponding markdown source exists
1025                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
1026                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
1027                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
1028                    && let (Some(stem), Some(parent)) =
1029                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
1030                {
1031                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
1032                        let source_path = parent.join(format!("{stem}{md_ext}"));
1033                        workspace_index.contains_file(&source_path) || source_path.exists()
1034                    })
1035                } else {
1036                    false
1037                };
1038
1039                if !has_md_source {
1040                    warnings.push(LintWarning {
1041                        rule_name: Some(self.name().to_string()),
1042                        line: cross_link.line,
1043                        column: cross_link.column,
1044                        end_line: cross_link.line,
1045                        end_column: cross_link.column + cross_link.target_path.len(),
1046                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
1047                        severity: Severity::Error,
1048                        fix: None,
1049                    });
1050                }
1051            }
1052        }
1053
1054        Ok(warnings)
1055    }
1056}
1057
1058/// Compute the shortest relative path from `from_dir` to `to_path`.
1059///
1060/// Both paths must be normalized (no `.` or `..` components).
1061/// Returns a relative `PathBuf` that navigates from `from_dir` to `to_path`.
1062fn shortest_relative_path(from_dir: &Path, to_path: &Path) -> PathBuf {
1063    let from_components: Vec<_> = from_dir.components().collect();
1064    let to_components: Vec<_> = to_path.components().collect();
1065
1066    // Find common prefix length
1067    let common_len = from_components
1068        .iter()
1069        .zip(to_components.iter())
1070        .take_while(|(a, b)| a == b)
1071        .count();
1072
1073    let mut result = PathBuf::new();
1074
1075    // Go up for each remaining component in from_dir
1076    for _ in common_len..from_components.len() {
1077        result.push("..");
1078    }
1079
1080    // Append remaining components from to_path
1081    for component in &to_components[common_len..] {
1082        result.push(component);
1083    }
1084
1085    result
1086}
1087
1088/// Check if a relative link path can be shortened.
1089///
1090/// Given the source directory and the raw link path, computes whether there's
1091/// a shorter equivalent path. Returns `Some(compact_path)` if the link can
1092/// be simplified, `None` if it's already optimal.
1093fn compute_compact_path(source_dir: &Path, raw_link_path: &str) -> Option<String> {
1094    let link_path = Path::new(raw_link_path);
1095
1096    // Only check paths that contain traversal (../ or ./)
1097    let has_traversal = link_path
1098        .components()
1099        .any(|c| matches!(c, std::path::Component::ParentDir | std::path::Component::CurDir));
1100
1101    if !has_traversal {
1102        return None;
1103    }
1104
1105    // Resolve: source_dir + raw_link_path, then normalize
1106    let combined = source_dir.join(link_path);
1107    let normalized_target = normalize_path(&combined);
1108
1109    // Compute shortest path from source_dir back to the normalized target
1110    let normalized_source = normalize_path(source_dir);
1111    let shortest = shortest_relative_path(&normalized_source, &normalized_target);
1112
1113    // Compare against the raw link path — if it differs, the path can be compacted
1114    if shortest != link_path {
1115        let compact = shortest.to_string_lossy().to_string();
1116        // Avoid suggesting empty path
1117        if compact.is_empty() {
1118            return None;
1119        }
1120        // Markdown links always use forward slashes regardless of platform
1121        Some(compact.replace('\\', "/"))
1122    } else {
1123        None
1124    }
1125}
1126
1127/// Normalize a path by resolving . and .. components
1128fn normalize_path(path: &Path) -> PathBuf {
1129    let mut components = Vec::new();
1130
1131    for component in path.components() {
1132        match component {
1133            std::path::Component::ParentDir => {
1134                // Go up one level if possible
1135                if !components.is_empty() {
1136                    components.pop();
1137                }
1138            }
1139            std::path::Component::CurDir => {
1140                // Skip current directory markers
1141            }
1142            _ => {
1143                components.push(component);
1144            }
1145        }
1146    }
1147
1148    components.iter().collect()
1149}
1150
1151#[cfg(test)]
1152mod tests {
1153    use super::*;
1154    use crate::workspace_index::CrossFileLinkIndex;
1155    use std::fs::File;
1156    use std::io::Write;
1157    use tempfile::tempdir;
1158
1159    #[test]
1160    fn test_strip_query_and_fragment() {
1161        // Test query parameter stripping
1162        assert_eq!(
1163            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
1164            "file.png"
1165        );
1166        assert_eq!(
1167            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
1168            "file.png"
1169        );
1170        assert_eq!(
1171            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
1172            "file.png"
1173        );
1174
1175        // Test fragment stripping
1176        assert_eq!(
1177            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
1178            "file.md"
1179        );
1180        assert_eq!(
1181            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
1182            "file.md"
1183        );
1184
1185        // Test both query and fragment (query comes first, per RFC 3986)
1186        assert_eq!(
1187            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
1188            "file.md"
1189        );
1190
1191        // Test no query or fragment
1192        assert_eq!(
1193            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
1194            "file.png"
1195        );
1196
1197        // Test with path
1198        assert_eq!(
1199            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
1200            "path/to/image.png"
1201        );
1202        assert_eq!(
1203            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
1204            "path/to/image.png"
1205        );
1206
1207        // Edge case: fragment before query (non-standard but possible)
1208        assert_eq!(
1209            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
1210            "file.md"
1211        );
1212    }
1213
1214    #[test]
1215    fn test_url_decode() {
1216        // Simple space encoding
1217        assert_eq!(
1218            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
1219            "penguin with space.jpg"
1220        );
1221
1222        // Path with encoded spaces
1223        assert_eq!(
1224            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
1225            "assets/my file name.png"
1226        );
1227
1228        // Multiple encoded characters
1229        assert_eq!(
1230            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
1231            "hello world!.md"
1232        );
1233
1234        // Lowercase hex
1235        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
1236
1237        // Uppercase hex
1238        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
1239
1240        // Mixed case hex
1241        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
1242
1243        // No encoding - return as-is
1244        assert_eq!(
1245            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
1246            "normal-file.md"
1247        );
1248
1249        // Incomplete percent encoding - leave as-is
1250        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
1251
1252        // Percent at end - leave as-is
1253        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
1254
1255        // Invalid hex digits - leave as-is
1256        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
1257
1258        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
1259        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
1260
1261        // Empty string
1262        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
1263
1264        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
1265        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
1266
1267        // Multiple consecutive encoded characters
1268        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
1269
1270        // Encoded path separators
1271        assert_eq!(
1272            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
1273            "path/to/file.md"
1274        );
1275
1276        // Mixed encoded and non-encoded
1277        assert_eq!(
1278            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
1279            "hello world/foo bar.md"
1280        );
1281
1282        // Special characters that are commonly encoded
1283        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
1284
1285        // Percent at position that looks like encoding but isn't valid
1286        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
1287    }
1288
1289    #[test]
1290    fn test_url_encoded_filenames() {
1291        // Create a temporary directory for test files
1292        let temp_dir = tempdir().unwrap();
1293        let base_path = temp_dir.path();
1294
1295        // Create a file with spaces in the name
1296        let file_with_spaces = base_path.join("penguin with space.jpg");
1297        File::create(&file_with_spaces)
1298            .unwrap()
1299            .write_all(b"image data")
1300            .unwrap();
1301
1302        // Create a subdirectory with spaces
1303        let subdir = base_path.join("my images");
1304        std::fs::create_dir(&subdir).unwrap();
1305        let nested_file = subdir.join("photo 1.png");
1306        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
1307
1308        // Test content with URL-encoded links
1309        let content = r#"
1310# Test Document with URL-Encoded Links
1311
1312![Penguin](penguin%20with%20space.jpg)
1313![Photo](my%20images/photo%201.png)
1314![Missing](missing%20file.jpg)
1315"#;
1316
1317        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1318
1319        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1320        let result = rule.check(&ctx).unwrap();
1321
1322        // Should only have one warning for the missing file
1323        assert_eq!(
1324            result.len(),
1325            1,
1326            "Should only warn about missing%20file.jpg. Got: {result:?}"
1327        );
1328        assert!(
1329            result[0].message.contains("missing%20file.jpg"),
1330            "Warning should mention the URL-encoded filename"
1331        );
1332    }
1333
1334    #[test]
1335    fn test_external_urls() {
1336        let rule = MD057ExistingRelativeLinks::new();
1337
1338        // Common web protocols
1339        assert!(rule.is_external_url("https://example.com"));
1340        assert!(rule.is_external_url("http://example.com"));
1341        assert!(rule.is_external_url("ftp://example.com"));
1342        assert!(rule.is_external_url("www.example.com"));
1343        assert!(rule.is_external_url("example.com"));
1344
1345        // Special URI schemes
1346        assert!(rule.is_external_url("file:///path/to/file"));
1347        assert!(rule.is_external_url("smb://server/share"));
1348        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
1349        assert!(rule.is_external_url("mailto:user@example.com"));
1350        assert!(rule.is_external_url("tel:+1234567890"));
1351        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
1352        assert!(rule.is_external_url("javascript:void(0)"));
1353        assert!(rule.is_external_url("ssh://git@github.com/repo"));
1354        assert!(rule.is_external_url("git://github.com/repo.git"));
1355
1356        // Email addresses without mailto: protocol
1357        // These are clearly not file links and should be skipped
1358        assert!(rule.is_external_url("user@example.com"));
1359        assert!(rule.is_external_url("steering@kubernetes.io"));
1360        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
1361        assert!(rule.is_external_url("user_name@sub.domain.com"));
1362        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
1363
1364        // Template variables should be skipped (not checked as relative links)
1365        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
1366        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
1367        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
1368        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
1369        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
1370        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
1371
1372        // Absolute paths are NOT external (handled separately via is_absolute_path)
1373        // By default they are ignored, but can be configured to warn
1374        assert!(!rule.is_external_url("/api/v1/users"));
1375        assert!(!rule.is_external_url("/blog/2024/release.html"));
1376        assert!(!rule.is_external_url("/react/hooks/use-state.html"));
1377        assert!(!rule.is_external_url("/pkg/runtime"));
1378        assert!(!rule.is_external_url("/doc/go1compat"));
1379        assert!(!rule.is_external_url("/index.html"));
1380        assert!(!rule.is_external_url("/assets/logo.png"));
1381
1382        // But is_absolute_path should detect them
1383        assert!(MD057ExistingRelativeLinks::is_absolute_path("/api/v1/users"));
1384        assert!(MD057ExistingRelativeLinks::is_absolute_path("/blog/2024/release.html"));
1385        assert!(MD057ExistingRelativeLinks::is_absolute_path("/index.html"));
1386        assert!(!MD057ExistingRelativeLinks::is_absolute_path("./relative.md"));
1387        assert!(!MD057ExistingRelativeLinks::is_absolute_path("relative.md"));
1388
1389        // Framework path aliases should be skipped (resolved by build tools)
1390        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
1391        assert!(rule.is_external_url("~/assets/image.png"));
1392        assert!(rule.is_external_url("~/components/Button.vue"));
1393        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
1394
1395        // @ prefix (common in Vue, webpack, Vite aliases)
1396        assert!(rule.is_external_url("@/components/Header.vue"));
1397        assert!(rule.is_external_url("@images/photo.jpg"));
1398        assert!(rule.is_external_url("@assets/styles.css"));
1399
1400        // Relative paths should NOT be external (should be validated)
1401        assert!(!rule.is_external_url("./relative/path.md"));
1402        assert!(!rule.is_external_url("relative/path.md"));
1403        assert!(!rule.is_external_url("../parent/path.md"));
1404    }
1405
1406    #[test]
1407    fn test_framework_path_aliases() {
1408        // Create a temporary directory for test files
1409        let temp_dir = tempdir().unwrap();
1410        let base_path = temp_dir.path();
1411
1412        // Test content with framework path aliases (should all be skipped)
1413        let content = r#"
1414# Framework Path Aliases
1415
1416![Image 1](~/assets/penguin.jpg)
1417![Image 2](~assets/logo.svg)
1418![Image 3](@images/photo.jpg)
1419![Image 4](@/components/icon.svg)
1420[Link](@/pages/about.md)
1421
1422This is a [real missing link](missing.md) that should be flagged.
1423"#;
1424
1425        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1426
1427        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1428        let result = rule.check(&ctx).unwrap();
1429
1430        // Should only have one warning for the real missing link
1431        assert_eq!(
1432            result.len(),
1433            1,
1434            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
1435        );
1436        assert!(
1437            result[0].message.contains("missing.md"),
1438            "Warning should be for missing.md"
1439        );
1440    }
1441
1442    #[test]
1443    fn test_url_decode_security_path_traversal() {
1444        // Ensure URL decoding doesn't enable path traversal attacks
1445        // The decoded path is still validated against the base path
1446        let temp_dir = tempdir().unwrap();
1447        let base_path = temp_dir.path();
1448
1449        // Create a file in the temp directory
1450        let file_in_base = base_path.join("safe.md");
1451        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
1452
1453        // Test with encoded path traversal attempt
1454        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
1455        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
1456        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
1457        let content = r#"
1458[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
1459[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
1460[Safe link](safe.md)
1461"#;
1462
1463        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1464
1465        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1466        let result = rule.check(&ctx).unwrap();
1467
1468        // The traversal attempts should still be flagged as missing
1469        // (they don't exist relative to base_path after decoding)
1470        assert_eq!(
1471            result.len(),
1472            2,
1473            "Should have warnings for traversal attempts. Got: {result:?}"
1474        );
1475    }
1476
1477    #[test]
1478    fn test_url_encoded_utf8_filenames() {
1479        // Test with actual UTF-8 encoded filenames
1480        let temp_dir = tempdir().unwrap();
1481        let base_path = temp_dir.path();
1482
1483        // Create files with unicode names
1484        let cafe_file = base_path.join("café.md");
1485        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1486
1487        let content = r#"
1488[Café link](caf%C3%A9.md)
1489[Missing unicode](r%C3%A9sum%C3%A9.md)
1490"#;
1491
1492        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1493
1494        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1495        let result = rule.check(&ctx).unwrap();
1496
1497        // Should only warn about the missing file
1498        assert_eq!(
1499            result.len(),
1500            1,
1501            "Should only warn about missing résumé.md. Got: {result:?}"
1502        );
1503        assert!(
1504            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1505            "Warning should mention the URL-encoded filename"
1506        );
1507    }
1508
1509    #[test]
1510    fn test_url_encoded_emoji_filenames() {
1511        // URL-encoded emoji paths should be correctly resolved
1512        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1513        let temp_dir = tempdir().unwrap();
1514        let base_path = temp_dir.path();
1515
1516        // Create directory with emoji in name: 👤 Personal
1517        let emoji_dir = base_path.join("👤 Personal");
1518        std::fs::create_dir(&emoji_dir).unwrap();
1519
1520        // Create file in that directory: TV Shows.md
1521        let file_path = emoji_dir.join("TV Shows.md");
1522        File::create(&file_path)
1523            .unwrap()
1524            .write_all(b"# TV Shows\n\nContent here.")
1525            .unwrap();
1526
1527        // Test content with URL-encoded emoji link
1528        // %F0%9F%91%A4 = 👤, %20 = space
1529        let content = r#"
1530# Test Document
1531
1532[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1533[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1534"#;
1535
1536        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1537
1538        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1539        let result = rule.check(&ctx).unwrap();
1540
1541        // Should only warn about the missing file, not the valid emoji path
1542        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1543        assert!(
1544            result[0].message.contains("Missing.md"),
1545            "Warning should be for Missing.md, got: {}",
1546            result[0].message
1547        );
1548    }
1549
1550    #[test]
1551    fn test_no_warnings_without_base_path() {
1552        let rule = MD057ExistingRelativeLinks::new();
1553        let content = "[Link](missing.md)";
1554
1555        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1556        let result = rule.check(&ctx).unwrap();
1557        assert!(result.is_empty(), "Should have no warnings without base path");
1558    }
1559
1560    #[test]
1561    fn test_existing_and_missing_links() {
1562        // Create a temporary directory for test files
1563        let temp_dir = tempdir().unwrap();
1564        let base_path = temp_dir.path();
1565
1566        // Create an existing file
1567        let exists_path = base_path.join("exists.md");
1568        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1569
1570        // Verify the file exists
1571        assert!(exists_path.exists(), "exists.md should exist for this test");
1572
1573        // Create test content with both existing and missing links
1574        let content = r#"
1575# Test Document
1576
1577[Valid Link](exists.md)
1578[Invalid Link](missing.md)
1579[External Link](https://example.com)
1580[Media Link](image.jpg)
1581        "#;
1582
1583        // Initialize rule with the base path (default: check all files including media)
1584        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1585
1586        // Test the rule
1587        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1588        let result = rule.check(&ctx).unwrap();
1589
1590        // Should have two warnings: missing.md and image.jpg (both don't exist)
1591        assert_eq!(result.len(), 2);
1592        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1593        assert!(messages.iter().any(|m| m.contains("missing.md")));
1594        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1595    }
1596
1597    #[test]
1598    fn test_angle_bracket_links() {
1599        // Create a temporary directory for test files
1600        let temp_dir = tempdir().unwrap();
1601        let base_path = temp_dir.path();
1602
1603        // Create an existing file
1604        let exists_path = base_path.join("exists.md");
1605        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1606
1607        // Create test content with angle bracket links
1608        let content = r#"
1609# Test Document
1610
1611[Valid Link](<exists.md>)
1612[Invalid Link](<missing.md>)
1613[External Link](<https://example.com>)
1614    "#;
1615
1616        // Test with default settings
1617        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1618
1619        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1620        let result = rule.check(&ctx).unwrap();
1621
1622        // Should have one warning for missing.md
1623        assert_eq!(result.len(), 1, "Should have exactly one warning");
1624        assert!(
1625            result[0].message.contains("missing.md"),
1626            "Warning should mention missing.md"
1627        );
1628    }
1629
1630    #[test]
1631    fn test_angle_bracket_links_with_parens() {
1632        // Create a temporary directory for test files
1633        let temp_dir = tempdir().unwrap();
1634        let base_path = temp_dir.path();
1635
1636        // Create directory structure with parentheses in path
1637        let app_dir = base_path.join("app");
1638        std::fs::create_dir(&app_dir).unwrap();
1639        let upload_dir = app_dir.join("(upload)");
1640        std::fs::create_dir(&upload_dir).unwrap();
1641        let page_file = upload_dir.join("page.tsx");
1642        File::create(&page_file)
1643            .unwrap()
1644            .write_all(b"export default function Page() {}")
1645            .unwrap();
1646
1647        // Create test content with angle bracket links containing parentheses
1648        let content = r#"
1649# Test Document with Paths Containing Parens
1650
1651[Upload Page](<app/(upload)/page.tsx>)
1652[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1653[Missing](<app/(missing)/file.md>)
1654"#;
1655
1656        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1657
1658        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1659        let result = rule.check(&ctx).unwrap();
1660
1661        // Should only have one warning for the missing file
1662        assert_eq!(
1663            result.len(),
1664            1,
1665            "Should have exactly one warning for missing file. Got: {result:?}"
1666        );
1667        assert!(
1668            result[0].message.contains("app/(missing)/file.md"),
1669            "Warning should mention app/(missing)/file.md"
1670        );
1671    }
1672
1673    #[test]
1674    fn test_all_file_types_checked() {
1675        // Create a temporary directory for test files
1676        let temp_dir = tempdir().unwrap();
1677        let base_path = temp_dir.path();
1678
1679        // Create a test with various file types - all should be checked
1680        let content = r#"
1681[Image Link](image.jpg)
1682[Video Link](video.mp4)
1683[Markdown Link](document.md)
1684[PDF Link](file.pdf)
1685"#;
1686
1687        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1688
1689        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1690        let result = rule.check(&ctx).unwrap();
1691
1692        // Should warn about all missing files regardless of extension
1693        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1694    }
1695
1696    #[test]
1697    fn test_code_span_detection() {
1698        let rule = MD057ExistingRelativeLinks::new();
1699
1700        // Create a temporary directory for test files
1701        let temp_dir = tempdir().unwrap();
1702        let base_path = temp_dir.path();
1703
1704        let rule = rule.with_path(base_path);
1705
1706        // Test with document structure
1707        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1708
1709        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1710        let result = rule.check(&ctx).unwrap();
1711
1712        // Should only find the real link, not the one in code
1713        assert_eq!(result.len(), 1, "Should only flag the real link");
1714        assert!(result[0].message.contains("nonexistent.md"));
1715    }
1716
1717    #[test]
1718    fn test_inline_code_spans() {
1719        // Create a temporary directory for test files
1720        let temp_dir = tempdir().unwrap();
1721        let base_path = temp_dir.path();
1722
1723        // Create test content with links in inline code spans
1724        let content = r#"
1725# Test Document
1726
1727This is a normal link: [Link](missing.md)
1728
1729This is a code span with a link: `[Link](another-missing.md)`
1730
1731Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1732
1733    "#;
1734
1735        // Initialize rule with the base path
1736        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1737
1738        // Test the rule
1739        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1740        let result = rule.check(&ctx).unwrap();
1741
1742        // Should only have warning for the normal link, not for links in code spans
1743        assert_eq!(result.len(), 1, "Should have exactly one warning");
1744        assert!(
1745            result[0].message.contains("missing.md"),
1746            "Warning should be for missing.md"
1747        );
1748        assert!(
1749            !result.iter().any(|w| w.message.contains("another-missing.md")),
1750            "Should not warn about link in code span"
1751        );
1752        assert!(
1753            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1754            "Should not warn about link in inline code"
1755        );
1756    }
1757
1758    #[test]
1759    fn test_extensionless_link_resolution() {
1760        // Create a temporary directory for test files
1761        let temp_dir = tempdir().unwrap();
1762        let base_path = temp_dir.path();
1763
1764        // Create a markdown file WITHOUT specifying .md extension in the link
1765        let page_path = base_path.join("page.md");
1766        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1767
1768        // Test content with extensionless link that should resolve to page.md
1769        let content = r#"
1770# Test Document
1771
1772[Link without extension](page)
1773[Link with extension](page.md)
1774[Missing link](nonexistent)
1775"#;
1776
1777        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1778
1779        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1780        let result = rule.check(&ctx).unwrap();
1781
1782        // Should only have warning for nonexistent link
1783        // Both "page" and "page.md" should resolve to the same file
1784        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1785        assert!(
1786            result[0].message.contains("nonexistent"),
1787            "Warning should be for 'nonexistent' not 'page'"
1788        );
1789    }
1790
1791    // Cross-file validation tests
1792    #[test]
1793    fn test_cross_file_scope() {
1794        let rule = MD057ExistingRelativeLinks::new();
1795        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1796    }
1797
1798    #[test]
1799    fn test_contribute_to_index_extracts_markdown_links() {
1800        let rule = MD057ExistingRelativeLinks::new();
1801        let content = r#"
1802# Document
1803
1804[Link to docs](./docs/guide.md)
1805[Link with fragment](./other.md#section)
1806[External link](https://example.com)
1807[Image link](image.png)
1808[Media file](video.mp4)
1809"#;
1810
1811        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1812        let mut index = FileIndex::new();
1813        rule.contribute_to_index(&ctx, &mut index);
1814
1815        // Should only index markdown file links
1816        assert_eq!(index.cross_file_links.len(), 2);
1817
1818        // Check first link
1819        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1820        assert_eq!(index.cross_file_links[0].fragment, "");
1821
1822        // Check second link (with fragment)
1823        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1824        assert_eq!(index.cross_file_links[1].fragment, "section");
1825    }
1826
1827    #[test]
1828    fn test_contribute_to_index_skips_external_and_anchors() {
1829        let rule = MD057ExistingRelativeLinks::new();
1830        let content = r#"
1831# Document
1832
1833[External](https://example.com)
1834[Another external](http://example.org)
1835[Fragment only](#section)
1836[FTP link](ftp://files.example.com)
1837[Mail link](mailto:test@example.com)
1838[WWW link](www.example.com)
1839"#;
1840
1841        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1842        let mut index = FileIndex::new();
1843        rule.contribute_to_index(&ctx, &mut index);
1844
1845        // Should not index any of these
1846        assert_eq!(index.cross_file_links.len(), 0);
1847    }
1848
1849    #[test]
1850    fn test_cross_file_check_valid_link() {
1851        use crate::workspace_index::WorkspaceIndex;
1852
1853        let rule = MD057ExistingRelativeLinks::new();
1854
1855        // Create a workspace index with the target file
1856        let mut workspace_index = WorkspaceIndex::new();
1857        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1858
1859        // Create file index with a link to an existing file
1860        let mut file_index = FileIndex::new();
1861        file_index.add_cross_file_link(CrossFileLinkIndex {
1862            target_path: "guide.md".to_string(),
1863            fragment: "".to_string(),
1864            line: 5,
1865            column: 1,
1866        });
1867
1868        // Run cross-file check from docs/index.md
1869        let warnings = rule
1870            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1871            .unwrap();
1872
1873        // Should have no warnings - file exists
1874        assert!(warnings.is_empty());
1875    }
1876
1877    #[test]
1878    fn test_cross_file_check_missing_link() {
1879        use crate::workspace_index::WorkspaceIndex;
1880
1881        let rule = MD057ExistingRelativeLinks::new();
1882
1883        // Create an empty workspace index
1884        let workspace_index = WorkspaceIndex::new();
1885
1886        // Create file index with a link to a missing file
1887        let mut file_index = FileIndex::new();
1888        file_index.add_cross_file_link(CrossFileLinkIndex {
1889            target_path: "missing.md".to_string(),
1890            fragment: "".to_string(),
1891            line: 5,
1892            column: 1,
1893        });
1894
1895        // Run cross-file check
1896        let warnings = rule
1897            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1898            .unwrap();
1899
1900        // Should have one warning for the missing file
1901        assert_eq!(warnings.len(), 1);
1902        assert!(warnings[0].message.contains("missing.md"));
1903        assert!(warnings[0].message.contains("does not exist"));
1904    }
1905
1906    #[test]
1907    fn test_cross_file_check_parent_path() {
1908        use crate::workspace_index::WorkspaceIndex;
1909
1910        let rule = MD057ExistingRelativeLinks::new();
1911
1912        // Create a workspace index with the target file at the root
1913        let mut workspace_index = WorkspaceIndex::new();
1914        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1915
1916        // Create file index with a parent path link
1917        let mut file_index = FileIndex::new();
1918        file_index.add_cross_file_link(CrossFileLinkIndex {
1919            target_path: "../readme.md".to_string(),
1920            fragment: "".to_string(),
1921            line: 5,
1922            column: 1,
1923        });
1924
1925        // Run cross-file check from docs/guide.md
1926        let warnings = rule
1927            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1928            .unwrap();
1929
1930        // Should have no warnings - file exists at normalized path
1931        assert!(warnings.is_empty());
1932    }
1933
1934    #[test]
1935    fn test_cross_file_check_html_link_with_md_source() {
1936        // Test that .html links are accepted when corresponding .md source exists
1937        // This supports mdBook and similar doc generators that compile .md to .html
1938        use crate::workspace_index::WorkspaceIndex;
1939
1940        let rule = MD057ExistingRelativeLinks::new();
1941
1942        // Create a workspace index with the .md source file
1943        let mut workspace_index = WorkspaceIndex::new();
1944        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1945
1946        // Create file index with an .html link (from another rule like MD051)
1947        let mut file_index = FileIndex::new();
1948        file_index.add_cross_file_link(CrossFileLinkIndex {
1949            target_path: "guide.html".to_string(),
1950            fragment: "section".to_string(),
1951            line: 10,
1952            column: 5,
1953        });
1954
1955        // Run cross-file check from docs/index.md
1956        let warnings = rule
1957            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1958            .unwrap();
1959
1960        // Should have no warnings - .md source exists for the .html link
1961        assert!(
1962            warnings.is_empty(),
1963            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1964        );
1965    }
1966
1967    #[test]
1968    fn test_cross_file_check_html_link_without_source() {
1969        // Test that .html links without corresponding .md source ARE flagged
1970        use crate::workspace_index::WorkspaceIndex;
1971
1972        let rule = MD057ExistingRelativeLinks::new();
1973
1974        // Create an empty workspace index
1975        let workspace_index = WorkspaceIndex::new();
1976
1977        // Create file index with an .html link to a non-existent file
1978        let mut file_index = FileIndex::new();
1979        file_index.add_cross_file_link(CrossFileLinkIndex {
1980            target_path: "missing.html".to_string(),
1981            fragment: "".to_string(),
1982            line: 10,
1983            column: 5,
1984        });
1985
1986        // Run cross-file check from docs/index.md
1987        let warnings = rule
1988            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1989            .unwrap();
1990
1991        // Should have one warning - no .md source exists
1992        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1993        assert!(warnings[0].message.contains("missing.html"));
1994    }
1995
1996    #[test]
1997    fn test_normalize_path_function() {
1998        // Test simple cases
1999        assert_eq!(
2000            normalize_path(Path::new("docs/guide.md")),
2001            PathBuf::from("docs/guide.md")
2002        );
2003
2004        // Test current directory removal
2005        assert_eq!(
2006            normalize_path(Path::new("./docs/guide.md")),
2007            PathBuf::from("docs/guide.md")
2008        );
2009
2010        // Test parent directory resolution
2011        assert_eq!(
2012            normalize_path(Path::new("docs/sub/../guide.md")),
2013            PathBuf::from("docs/guide.md")
2014        );
2015
2016        // Test multiple parent directories
2017        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
2018    }
2019
2020    #[test]
2021    fn test_html_link_with_md_source() {
2022        // Links to .html files should pass if corresponding .md source exists
2023        let temp_dir = tempdir().unwrap();
2024        let base_path = temp_dir.path();
2025
2026        // Create guide.md (source file)
2027        let md_file = base_path.join("guide.md");
2028        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2029
2030        let content = r#"
2031[Read the guide](guide.html)
2032[Also here](getting-started.html)
2033"#;
2034
2035        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2036        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2037        let result = rule.check(&ctx).unwrap();
2038
2039        // guide.html passes (guide.md exists), getting-started.html fails
2040        assert_eq!(
2041            result.len(),
2042            1,
2043            "Should only warn about missing source. Got: {result:?}"
2044        );
2045        assert!(result[0].message.contains("getting-started.html"));
2046    }
2047
2048    #[test]
2049    fn test_htm_link_with_md_source() {
2050        // .htm extension should also check for markdown source
2051        let temp_dir = tempdir().unwrap();
2052        let base_path = temp_dir.path();
2053
2054        let md_file = base_path.join("page.md");
2055        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
2056
2057        let content = "[Page](page.htm)";
2058
2059        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2060        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2061        let result = rule.check(&ctx).unwrap();
2062
2063        assert!(
2064            result.is_empty(),
2065            "Should not warn when .md source exists for .htm link"
2066        );
2067    }
2068
2069    #[test]
2070    fn test_html_link_finds_various_markdown_extensions() {
2071        // Should find .mdx, .markdown, etc. as source files
2072        let temp_dir = tempdir().unwrap();
2073        let base_path = temp_dir.path();
2074
2075        File::create(base_path.join("doc.md")).unwrap();
2076        File::create(base_path.join("tutorial.mdx")).unwrap();
2077        File::create(base_path.join("guide.markdown")).unwrap();
2078
2079        let content = r#"
2080[Doc](doc.html)
2081[Tutorial](tutorial.html)
2082[Guide](guide.html)
2083"#;
2084
2085        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2086        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2087        let result = rule.check(&ctx).unwrap();
2088
2089        assert!(
2090            result.is_empty(),
2091            "Should find all markdown variants as source files. Got: {result:?}"
2092        );
2093    }
2094
2095    #[test]
2096    fn test_html_link_in_subdirectory() {
2097        // Should find markdown source in subdirectories
2098        let temp_dir = tempdir().unwrap();
2099        let base_path = temp_dir.path();
2100
2101        let docs_dir = base_path.join("docs");
2102        std::fs::create_dir(&docs_dir).unwrap();
2103        File::create(docs_dir.join("guide.md"))
2104            .unwrap()
2105            .write_all(b"# Guide")
2106            .unwrap();
2107
2108        let content = "[Guide](docs/guide.html)";
2109
2110        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2111        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2112        let result = rule.check(&ctx).unwrap();
2113
2114        assert!(result.is_empty(), "Should find markdown source in subdirectory");
2115    }
2116
2117    #[test]
2118    fn test_absolute_path_skipped_in_check() {
2119        // Test that absolute paths are skipped during link validation
2120        // This fixes the bug where /pkg/runtime was being flagged
2121        let temp_dir = tempdir().unwrap();
2122        let base_path = temp_dir.path();
2123
2124        let content = r#"
2125# Test Document
2126
2127[Go Runtime](/pkg/runtime)
2128[Go Runtime with Fragment](/pkg/runtime#section)
2129[API Docs](/api/v1/users)
2130[Blog Post](/blog/2024/release.html)
2131[React Hook](/react/hooks/use-state.html)
2132"#;
2133
2134        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2135        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2136        let result = rule.check(&ctx).unwrap();
2137
2138        // Should have NO warnings - all absolute paths should be skipped
2139        assert!(
2140            result.is_empty(),
2141            "Absolute paths should be skipped. Got warnings: {result:?}"
2142        );
2143    }
2144
2145    #[test]
2146    fn test_absolute_path_skipped_in_cross_file_check() {
2147        // Test that absolute paths are skipped in cross_file_check()
2148        use crate::workspace_index::WorkspaceIndex;
2149
2150        let rule = MD057ExistingRelativeLinks::new();
2151
2152        // Create an empty workspace index (no files exist)
2153        let workspace_index = WorkspaceIndex::new();
2154
2155        // Create file index with absolute path links (should be skipped)
2156        let mut file_index = FileIndex::new();
2157        file_index.add_cross_file_link(CrossFileLinkIndex {
2158            target_path: "/pkg/runtime.md".to_string(),
2159            fragment: "".to_string(),
2160            line: 5,
2161            column: 1,
2162        });
2163        file_index.add_cross_file_link(CrossFileLinkIndex {
2164            target_path: "/api/v1/users.md".to_string(),
2165            fragment: "section".to_string(),
2166            line: 10,
2167            column: 1,
2168        });
2169
2170        // Run cross-file check
2171        let warnings = rule
2172            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
2173            .unwrap();
2174
2175        // Should have NO warnings - absolute paths should be skipped
2176        assert!(
2177            warnings.is_empty(),
2178            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
2179        );
2180    }
2181
2182    #[test]
2183    fn test_protocol_relative_url_not_skipped() {
2184        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
2185        // They should still be caught by is_external_url() though
2186        let temp_dir = tempdir().unwrap();
2187        let base_path = temp_dir.path();
2188
2189        let content = r#"
2190# Test Document
2191
2192[External](//example.com/page)
2193[Another](//cdn.example.com/asset.js)
2194"#;
2195
2196        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2197        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2198        let result = rule.check(&ctx).unwrap();
2199
2200        // Should have NO warnings - protocol-relative URLs are external and should be skipped
2201        assert!(
2202            result.is_empty(),
2203            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
2204        );
2205    }
2206
2207    #[test]
2208    fn test_email_addresses_skipped() {
2209        // Test that email addresses without mailto: are skipped
2210        // These are clearly not file links (the @ symbol is definitive)
2211        let temp_dir = tempdir().unwrap();
2212        let base_path = temp_dir.path();
2213
2214        let content = r#"
2215# Test Document
2216
2217[Contact](user@example.com)
2218[Steering](steering@kubernetes.io)
2219[Support](john.doe+filter@company.co.uk)
2220[User](user_name@sub.domain.com)
2221"#;
2222
2223        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2224        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2225        let result = rule.check(&ctx).unwrap();
2226
2227        // Should have NO warnings - email addresses are clearly not file links and should be skipped
2228        assert!(
2229            result.is_empty(),
2230            "Email addresses should be skipped. Got warnings: {result:?}"
2231        );
2232    }
2233
2234    #[test]
2235    fn test_email_addresses_vs_file_paths() {
2236        // Test that email addresses (anything with @) are skipped
2237        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
2238        let temp_dir = tempdir().unwrap();
2239        let base_path = temp_dir.path();
2240
2241        let content = r#"
2242# Test Document
2243
2244[Email](user@example.com)  <!-- Should be skipped (email) -->
2245[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
2246[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
2247"#;
2248
2249        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2250        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2251        let result = rule.check(&ctx).unwrap();
2252
2253        // All should be skipped - anything with @ is treated as an email
2254        assert!(
2255            result.is_empty(),
2256            "All email addresses should be skipped. Got: {result:?}"
2257        );
2258    }
2259
2260    #[test]
2261    fn test_diagnostic_position_accuracy() {
2262        // Test that diagnostics point to the URL, not the link text
2263        let temp_dir = tempdir().unwrap();
2264        let base_path = temp_dir.path();
2265
2266        // Position markers:     0         1         2         3
2267        //                       0123456789012345678901234567890123456789
2268        let content = "prefix [text](missing.md) suffix";
2269        //             The URL "missing.md" starts at 0-indexed position 14
2270        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
2271
2272        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2273        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2274        let result = rule.check(&ctx).unwrap();
2275
2276        assert_eq!(result.len(), 1, "Should have exactly one warning");
2277        assert_eq!(result[0].line, 1, "Should be on line 1");
2278        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
2279        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
2280    }
2281
2282    #[test]
2283    fn test_diagnostic_position_angle_brackets() {
2284        // Test position accuracy with angle bracket links
2285        let temp_dir = tempdir().unwrap();
2286        let base_path = temp_dir.path();
2287
2288        // Position markers:     0         1         2
2289        //                       012345678901234567890
2290        let content = "[link](<missing.md>)";
2291        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
2292
2293        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2294        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2295        let result = rule.check(&ctx).unwrap();
2296
2297        assert_eq!(result.len(), 1, "Should have exactly one warning");
2298        assert_eq!(result[0].line, 1, "Should be on line 1");
2299        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
2300    }
2301
2302    #[test]
2303    fn test_diagnostic_position_multiline() {
2304        // Test that line numbers are correct for links on different lines
2305        let temp_dir = tempdir().unwrap();
2306        let base_path = temp_dir.path();
2307
2308        let content = r#"# Title
2309Some text on line 2
2310[link on line 3](missing1.md)
2311More text
2312[link on line 5](missing2.md)"#;
2313
2314        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2315        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2316        let result = rule.check(&ctx).unwrap();
2317
2318        assert_eq!(result.len(), 2, "Should have two warnings");
2319
2320        // First warning should be on line 3
2321        assert_eq!(result[0].line, 3, "First warning should be on line 3");
2322        assert!(result[0].message.contains("missing1.md"));
2323
2324        // Second warning should be on line 5
2325        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
2326        assert!(result[1].message.contains("missing2.md"));
2327    }
2328
2329    #[test]
2330    fn test_diagnostic_position_with_spaces() {
2331        // Test position with URLs that have spaces in parentheses
2332        let temp_dir = tempdir().unwrap();
2333        let base_path = temp_dir.path();
2334
2335        let content = "[link]( missing.md )";
2336        //             0123456789012345678901
2337        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
2338        //             which is 1-indexed column 9
2339
2340        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2341        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2342        let result = rule.check(&ctx).unwrap();
2343
2344        assert_eq!(result.len(), 1, "Should have exactly one warning");
2345        // The regex captures the URL without leading/trailing spaces
2346        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
2347    }
2348
2349    #[test]
2350    fn test_diagnostic_position_image() {
2351        // Test that image diagnostics also have correct positions
2352        let temp_dir = tempdir().unwrap();
2353        let base_path = temp_dir.path();
2354
2355        let content = "![alt text](missing.jpg)";
2356
2357        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2358        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2359        let result = rule.check(&ctx).unwrap();
2360
2361        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
2362        assert_eq!(result[0].line, 1);
2363        // Images use start_col from the parser, which should point to the URL
2364        assert!(result[0].column > 0, "Should have valid column position");
2365        assert!(result[0].message.contains("missing.jpg"));
2366    }
2367
2368    #[test]
2369    fn test_wikilinks_skipped() {
2370        // Wikilinks should not trigger MD057 warnings
2371        // They use a different linking system (e.g., Obsidian, wiki software)
2372        let temp_dir = tempdir().unwrap();
2373        let base_path = temp_dir.path();
2374
2375        let content = r#"# Test Document
2376
2377[[Microsoft#Windows OS]]
2378[[SomePage]]
2379[[Page With Spaces]]
2380[[path/to/page#section]]
2381[[page|Display Text]]
2382
2383This is a [real missing link](missing.md) that should be flagged.
2384"#;
2385
2386        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2387        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2388        let result = rule.check(&ctx).unwrap();
2389
2390        // Should only warn about the regular markdown link, not wikilinks
2391        assert_eq!(
2392            result.len(),
2393            1,
2394            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
2395        );
2396        assert!(
2397            result[0].message.contains("missing.md"),
2398            "Warning should be for missing.md, not wikilinks"
2399        );
2400    }
2401
2402    #[test]
2403    fn test_wikilinks_not_added_to_index() {
2404        // Wikilinks should not be added to the cross-file link index
2405        let temp_dir = tempdir().unwrap();
2406        let base_path = temp_dir.path();
2407
2408        let content = r#"# Test Document
2409
2410[[Microsoft#Windows OS]]
2411[[SomePage#section]]
2412[Regular Link](other.md)
2413"#;
2414
2415        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2416        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2417
2418        let mut file_index = FileIndex::new();
2419        rule.contribute_to_index(&ctx, &mut file_index);
2420
2421        // Should only have the regular markdown link (if it's a markdown file)
2422        // Wikilinks should not be added
2423        let cross_file_links = &file_index.cross_file_links;
2424        assert_eq!(
2425            cross_file_links.len(),
2426            1,
2427            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
2428        );
2429        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
2430    }
2431
2432    #[test]
2433    fn test_reference_definition_missing_file() {
2434        // Reference definitions [ref]: ./path.md should be checked
2435        let temp_dir = tempdir().unwrap();
2436        let base_path = temp_dir.path();
2437
2438        let content = r#"# Test Document
2439
2440[test]: ./missing.md
2441[example]: ./nonexistent.html
2442
2443Use [test] and [example] here.
2444"#;
2445
2446        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2447        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2448        let result = rule.check(&ctx).unwrap();
2449
2450        // Should have warnings for both reference definitions
2451        assert_eq!(
2452            result.len(),
2453            2,
2454            "Should have warnings for missing reference definition targets. Got: {result:?}"
2455        );
2456        assert!(
2457            result.iter().any(|w| w.message.contains("missing.md")),
2458            "Should warn about missing.md"
2459        );
2460        assert!(
2461            result.iter().any(|w| w.message.contains("nonexistent.html")),
2462            "Should warn about nonexistent.html"
2463        );
2464    }
2465
2466    #[test]
2467    fn test_reference_definition_existing_file() {
2468        // Reference definitions to existing files should NOT trigger warnings
2469        let temp_dir = tempdir().unwrap();
2470        let base_path = temp_dir.path();
2471
2472        // Create an existing file
2473        let exists_path = base_path.join("exists.md");
2474        File::create(&exists_path)
2475            .unwrap()
2476            .write_all(b"# Existing file")
2477            .unwrap();
2478
2479        let content = r#"# Test Document
2480
2481[test]: ./exists.md
2482
2483Use [test] here.
2484"#;
2485
2486        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2487        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2488        let result = rule.check(&ctx).unwrap();
2489
2490        // Should have NO warnings since the file exists
2491        assert!(
2492            result.is_empty(),
2493            "Should not warn about existing file. Got: {result:?}"
2494        );
2495    }
2496
2497    #[test]
2498    fn test_reference_definition_external_url_skipped() {
2499        // Reference definitions with external URLs should be skipped
2500        let temp_dir = tempdir().unwrap();
2501        let base_path = temp_dir.path();
2502
2503        let content = r#"# Test Document
2504
2505[google]: https://google.com
2506[example]: http://example.org
2507[mail]: mailto:test@example.com
2508[ftp]: ftp://files.example.com
2509[local]: ./missing.md
2510
2511Use [google], [example], [mail], [ftp], [local] here.
2512"#;
2513
2514        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2515        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2516        let result = rule.check(&ctx).unwrap();
2517
2518        // Should only warn about the local missing file, not external URLs
2519        assert_eq!(
2520            result.len(),
2521            1,
2522            "Should only warn about local missing file. Got: {result:?}"
2523        );
2524        assert!(
2525            result[0].message.contains("missing.md"),
2526            "Warning should be for missing.md"
2527        );
2528    }
2529
2530    #[test]
2531    fn test_reference_definition_fragment_only_skipped() {
2532        // Reference definitions with fragment-only URLs should be skipped
2533        let temp_dir = tempdir().unwrap();
2534        let base_path = temp_dir.path();
2535
2536        let content = r#"# Test Document
2537
2538[section]: #my-section
2539
2540Use [section] here.
2541"#;
2542
2543        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2544        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2545        let result = rule.check(&ctx).unwrap();
2546
2547        // Should have NO warnings for fragment-only links
2548        assert!(
2549            result.is_empty(),
2550            "Should not warn about fragment-only reference. Got: {result:?}"
2551        );
2552    }
2553
2554    #[test]
2555    fn test_reference_definition_column_position() {
2556        // Test that column position points to the URL in the reference definition
2557        let temp_dir = tempdir().unwrap();
2558        let base_path = temp_dir.path();
2559
2560        // Position markers:     0         1         2
2561        //                       0123456789012345678901
2562        let content = "[ref]: ./missing.md";
2563        //             The URL "./missing.md" starts at 0-indexed position 7
2564        //             which is 1-indexed column 8
2565
2566        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2567        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2568        let result = rule.check(&ctx).unwrap();
2569
2570        assert_eq!(result.len(), 1, "Should have exactly one warning");
2571        assert_eq!(result[0].line, 1, "Should be on line 1");
2572        assert_eq!(result[0].column, 8, "Should point to start of URL './missing.md'");
2573    }
2574
2575    #[test]
2576    fn test_reference_definition_html_with_md_source() {
2577        // Reference definitions to .html files should pass if corresponding .md source exists
2578        let temp_dir = tempdir().unwrap();
2579        let base_path = temp_dir.path();
2580
2581        // Create guide.md (source file)
2582        let md_file = base_path.join("guide.md");
2583        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2584
2585        let content = r#"# Test Document
2586
2587[guide]: ./guide.html
2588[missing]: ./missing.html
2589
2590Use [guide] and [missing] here.
2591"#;
2592
2593        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2594        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2595        let result = rule.check(&ctx).unwrap();
2596
2597        // guide.html passes (guide.md exists), missing.html fails
2598        assert_eq!(
2599            result.len(),
2600            1,
2601            "Should only warn about missing source. Got: {result:?}"
2602        );
2603        assert!(result[0].message.contains("missing.html"));
2604    }
2605
2606    #[test]
2607    fn test_reference_definition_url_encoded() {
2608        // Reference definitions with URL-encoded paths should be decoded before checking
2609        let temp_dir = tempdir().unwrap();
2610        let base_path = temp_dir.path();
2611
2612        // Create a file with spaces in the name
2613        let file_with_spaces = base_path.join("file with spaces.md");
2614        File::create(&file_with_spaces).unwrap().write_all(b"# Spaces").unwrap();
2615
2616        let content = r#"# Test Document
2617
2618[spaces]: ./file%20with%20spaces.md
2619[missing]: ./missing%20file.md
2620
2621Use [spaces] and [missing] here.
2622"#;
2623
2624        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2625        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2626        let result = rule.check(&ctx).unwrap();
2627
2628        // Should only warn about the missing file
2629        assert_eq!(
2630            result.len(),
2631            1,
2632            "Should only warn about missing URL-encoded file. Got: {result:?}"
2633        );
2634        assert!(result[0].message.contains("missing%20file.md"));
2635    }
2636
2637    #[test]
2638    fn test_inline_and_reference_both_checked() {
2639        // Both inline links and reference definitions should be checked
2640        let temp_dir = tempdir().unwrap();
2641        let base_path = temp_dir.path();
2642
2643        let content = r#"# Test Document
2644
2645[inline link](./inline-missing.md)
2646[ref]: ./ref-missing.md
2647
2648Use [ref] here.
2649"#;
2650
2651        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2652        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2653        let result = rule.check(&ctx).unwrap();
2654
2655        // Should warn about both the inline link and the reference definition
2656        assert_eq!(
2657            result.len(),
2658            2,
2659            "Should warn about both inline and reference links. Got: {result:?}"
2660        );
2661        assert!(
2662            result.iter().any(|w| w.message.contains("inline-missing.md")),
2663            "Should warn about inline-missing.md"
2664        );
2665        assert!(
2666            result.iter().any(|w| w.message.contains("ref-missing.md")),
2667            "Should warn about ref-missing.md"
2668        );
2669    }
2670
2671    #[test]
2672    fn test_footnote_definitions_not_flagged() {
2673        // Regression test for issue #286: footnote definitions should not be
2674        // treated as reference definitions and flagged as broken links
2675        let rule = MD057ExistingRelativeLinks::default();
2676
2677        let content = r#"# Title
2678
2679A footnote[^1].
2680
2681[^1]: [link](https://www.google.com).
2682"#;
2683
2684        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2685        let result = rule.check(&ctx).unwrap();
2686
2687        assert!(
2688            result.is_empty(),
2689            "Footnote definitions should not trigger MD057 warnings. Got: {result:?}"
2690        );
2691    }
2692
2693    #[test]
2694    fn test_footnote_with_relative_link_inside() {
2695        // Footnotes containing relative links should not be checked
2696        // (the footnote content is not a URL, it's content that may contain links)
2697        let rule = MD057ExistingRelativeLinks::default();
2698
2699        let content = r#"# Title
2700
2701See the footnote[^1].
2702
2703[^1]: Check out [this file](./existing.md) for more info.
2704[^2]: Also see [missing](./does-not-exist.md).
2705"#;
2706
2707        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2708        let result = rule.check(&ctx).unwrap();
2709
2710        // The inline links INSIDE footnotes should be checked (./existing.md, ./does-not-exist.md)
2711        // but the footnote definition itself should not be treated as a reference definition
2712        // Note: This test verifies that [^1]: and [^2]: are not parsed as ref defs with
2713        // URLs like "[this file](./existing.md)" or "[missing](./does-not-exist.md)"
2714        for warning in &result {
2715            assert!(
2716                !warning.message.contains("[this file]"),
2717                "Footnote content should not be treated as URL: {warning:?}"
2718            );
2719            assert!(
2720                !warning.message.contains("[missing]"),
2721                "Footnote content should not be treated as URL: {warning:?}"
2722            );
2723        }
2724    }
2725
2726    #[test]
2727    fn test_mixed_footnotes_and_reference_definitions() {
2728        // Ensure regular reference definitions are still checked while footnotes are skipped
2729        let temp_dir = tempdir().unwrap();
2730        let base_path = temp_dir.path();
2731
2732        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2733
2734        let content = r#"# Title
2735
2736A footnote[^1] and a [ref link][myref].
2737
2738[^1]: This is a footnote with [link](https://example.com).
2739
2740[myref]: ./missing-file.md "This should be checked"
2741"#;
2742
2743        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2744        let result = rule.check(&ctx).unwrap();
2745
2746        // Should only warn about the regular reference definition, not the footnote
2747        assert_eq!(
2748            result.len(),
2749            1,
2750            "Should only warn about the regular reference definition. Got: {result:?}"
2751        );
2752        assert!(
2753            result[0].message.contains("missing-file.md"),
2754            "Should warn about missing-file.md in reference definition"
2755        );
2756    }
2757
2758    #[test]
2759    fn test_absolute_links_ignore_by_default() {
2760        // By default, absolute links are ignored (not validated)
2761        let temp_dir = tempdir().unwrap();
2762        let base_path = temp_dir.path();
2763
2764        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2765
2766        let content = r#"# Links
2767
2768[API docs](/api/v1/users)
2769[Blog post](/blog/2024/release.html)
2770![Logo](/assets/logo.png)
2771
2772[ref]: /docs/reference.md
2773"#;
2774
2775        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2776        let result = rule.check(&ctx).unwrap();
2777
2778        // No warnings - absolute links are ignored by default
2779        assert!(
2780            result.is_empty(),
2781            "Absolute links should be ignored by default. Got: {result:?}"
2782        );
2783    }
2784
2785    #[test]
2786    fn test_absolute_links_warn_config() {
2787        // When configured to warn, absolute links should generate warnings
2788        let temp_dir = tempdir().unwrap();
2789        let base_path = temp_dir.path();
2790
2791        let config = MD057Config {
2792            absolute_links: AbsoluteLinksOption::Warn,
2793            ..Default::default()
2794        };
2795        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2796
2797        let content = r#"# Links
2798
2799[API docs](/api/v1/users)
2800[Blog post](/blog/2024/release.html)
2801"#;
2802
2803        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2804        let result = rule.check(&ctx).unwrap();
2805
2806        // Should have 2 warnings for the 2 absolute links
2807        assert_eq!(
2808            result.len(),
2809            2,
2810            "Should warn about both absolute links. Got: {result:?}"
2811        );
2812        assert!(
2813            result[0].message.contains("cannot be validated locally"),
2814            "Warning should explain why: {}",
2815            result[0].message
2816        );
2817        assert!(
2818            result[0].message.contains("/api/v1/users"),
2819            "Warning should include the link path"
2820        );
2821    }
2822
2823    #[test]
2824    fn test_absolute_links_warn_images() {
2825        // Images with absolute paths should also warn when configured
2826        let temp_dir = tempdir().unwrap();
2827        let base_path = temp_dir.path();
2828
2829        let config = MD057Config {
2830            absolute_links: AbsoluteLinksOption::Warn,
2831            ..Default::default()
2832        };
2833        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2834
2835        let content = r#"# Images
2836
2837![Logo](/assets/logo.png)
2838"#;
2839
2840        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2841        let result = rule.check(&ctx).unwrap();
2842
2843        assert_eq!(
2844            result.len(),
2845            1,
2846            "Should warn about absolute image path. Got: {result:?}"
2847        );
2848        assert!(
2849            result[0].message.contains("/assets/logo.png"),
2850            "Warning should include the image path"
2851        );
2852    }
2853
2854    #[test]
2855    fn test_absolute_links_warn_reference_definitions() {
2856        // Reference definitions with absolute paths should also warn when configured
2857        let temp_dir = tempdir().unwrap();
2858        let base_path = temp_dir.path();
2859
2860        let config = MD057Config {
2861            absolute_links: AbsoluteLinksOption::Warn,
2862            ..Default::default()
2863        };
2864        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2865
2866        let content = r#"# Reference
2867
2868See the [docs][ref].
2869
2870[ref]: /docs/reference.md
2871"#;
2872
2873        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2874        let result = rule.check(&ctx).unwrap();
2875
2876        assert_eq!(
2877            result.len(),
2878            1,
2879            "Should warn about absolute reference definition. Got: {result:?}"
2880        );
2881        assert!(
2882            result[0].message.contains("/docs/reference.md"),
2883            "Warning should include the reference path"
2884        );
2885    }
2886}