Skip to main content

rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{
7    CrossFileScope, Fix, FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity,
8};
9use crate::utils::element_cache::ElementCache;
10use crate::workspace_index::{FileIndex, extract_cross_file_links};
11use regex::Regex;
12use std::collections::HashMap;
13use std::env;
14use std::path::{Path, PathBuf};
15use std::sync::LazyLock;
16use std::sync::{Arc, Mutex};
17
18mod md057_config;
19use crate::rule_config_serde::RuleConfig;
20use crate::utils::mkdocs_config::resolve_docs_dir;
21pub use md057_config::{AbsoluteLinksOption, MD057Config};
22
23// Thread-safe cache for file existence checks to avoid redundant filesystem operations
24static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
25    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
26
27// Reset the file existence cache (typically between rule runs)
28fn reset_file_existence_cache() {
29    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
30        cache.clear();
31    }
32}
33
34// Check if a file exists with caching
35fn file_exists_with_cache(path: &Path) -> bool {
36    match FILE_EXISTENCE_CACHE.lock() {
37        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
38        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
39    }
40}
41
42/// Check if a file exists, also trying markdown extensions for extensionless links.
43/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
44fn file_exists_or_markdown_extension(path: &Path) -> bool {
45    // First, check exact path
46    if file_exists_with_cache(path) {
47        return true;
48    }
49
50    // If the path has no extension, try adding markdown extensions
51    if path.extension().is_none() {
52        for ext in MARKDOWN_EXTENSIONS {
53            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
54            let path_with_ext = path.with_extension(&ext[1..]);
55            if file_exists_with_cache(&path_with_ext) {
56                return true;
57            }
58        }
59    }
60
61    false
62}
63
64// Regex to match the start of a link - simplified for performance
65static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
66
67/// Regex to extract the URL from an angle-bracketed markdown link
68/// Format: `](<URL>)` or `](<URL> "title")`
69/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
70static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
71    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
72
73/// Regex to extract the URL from a normal markdown link (without angle brackets)
74/// Format: `](URL)` or `](URL "title")`
75static URL_EXTRACT_REGEX: LazyLock<Regex> =
76    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
77
78/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
79/// Matches: scheme:// or scheme: (per RFC 3986)
80/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
81static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
82    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
83
84// Current working directory
85static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
86
87/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
88/// Returns None for non-hex characters.
89#[inline]
90fn hex_digit_to_value(byte: u8) -> Option<u8> {
91    match byte {
92        b'0'..=b'9' => Some(byte - b'0'),
93        b'a'..=b'f' => Some(byte - b'a' + 10),
94        b'A'..=b'F' => Some(byte - b'A' + 10),
95        _ => None,
96    }
97}
98
99/// Supported markdown file extensions
100const MARKDOWN_EXTENSIONS: &[&str] = &[
101    ".md",
102    ".markdown",
103    ".mdx",
104    ".mkd",
105    ".mkdn",
106    ".mdown",
107    ".mdwn",
108    ".qmd",
109    ".rmd",
110];
111
112/// Rule MD057: Existing relative links should point to valid files or directories.
113#[derive(Debug, Clone)]
114pub struct MD057ExistingRelativeLinks {
115    /// Base directory for resolving relative links
116    base_path: Arc<Mutex<Option<PathBuf>>>,
117    /// Configuration for the rule
118    config: MD057Config,
119}
120
121impl Default for MD057ExistingRelativeLinks {
122    fn default() -> Self {
123        Self {
124            base_path: Arc::new(Mutex::new(None)),
125            config: MD057Config::default(),
126        }
127    }
128}
129
130impl MD057ExistingRelativeLinks {
131    /// Create a new instance with default settings
132    pub fn new() -> Self {
133        Self::default()
134    }
135
136    /// Set the base path for resolving relative links
137    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
138        let path = path.as_ref();
139        let dir_path = if path.is_file() {
140            path.parent().map(|p| p.to_path_buf())
141        } else {
142            Some(path.to_path_buf())
143        };
144
145        if let Ok(mut guard) = self.base_path.lock() {
146            *guard = dir_path;
147        }
148        self
149    }
150
151    pub fn from_config_struct(config: MD057Config) -> Self {
152        Self {
153            base_path: Arc::new(Mutex::new(None)),
154            config,
155        }
156    }
157
158    /// Check if a URL is external or should be skipped for validation.
159    ///
160    /// Returns `true` (skip validation) for:
161    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
162    /// - Bare domains: `www.example.com`, `example.com`
163    /// - Email addresses: `user@example.com` (without `mailto:`)
164    /// - Template variables: `{{URL}}`, `{{% include %}}`
165    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
166    ///
167    /// Returns `false` (validate) for:
168    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
169    #[inline]
170    fn is_external_url(&self, url: &str) -> bool {
171        if url.is_empty() {
172            return false;
173        }
174
175        // Quick checks for common external URL patterns
176        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
177            return true;
178        }
179
180        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
181        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
182        if url.starts_with("{{") || url.starts_with("{%") {
183            return true;
184        }
185
186        // Simple check: if URL contains @, it's almost certainly an email address
187        // File paths with @ are extremely rare, so this is a safe heuristic
188        if url.contains('@') {
189            return true; // It's an email address, skip it
190        }
191
192        // Bare domain check (e.g., "example.com")
193        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
194        // Links like [text](nodejs.org/path) without a protocol are broken -
195        // they'll be treated as relative paths by markdown renderers.
196        // Flagging them helps users find missing protocols.
197        // We only skip .com as a minimal safety net for the most common case.
198        if url.ends_with(".com") {
199            return true;
200        }
201
202        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
203        // These are not filesystem paths but module/asset aliases
204        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
205        if url.starts_with('~') || url.starts_with('@') {
206            return true;
207        }
208
209        // All other cases (relative paths, etc.) are not external
210        false
211    }
212
213    /// Check if the URL is a fragment-only link (internal document link)
214    #[inline]
215    fn is_fragment_only_link(&self, url: &str) -> bool {
216        url.starts_with('#')
217    }
218
219    /// Check if the URL is an absolute path (starts with /)
220    /// These are typically routes for published documentation sites.
221    #[inline]
222    fn is_absolute_path(url: &str) -> bool {
223        url.starts_with('/')
224    }
225
226    /// Decode URL percent-encoded sequences in a path.
227    /// Converts `%20` to space, `%2F` to `/`, etc.
228    /// Returns the original string if decoding fails or produces invalid UTF-8.
229    fn url_decode(path: &str) -> String {
230        // Quick check: if no percent sign, return as-is
231        if !path.contains('%') {
232            return path.to_string();
233        }
234
235        let bytes = path.as_bytes();
236        let mut result = Vec::with_capacity(bytes.len());
237        let mut i = 0;
238
239        while i < bytes.len() {
240            if bytes[i] == b'%' && i + 2 < bytes.len() {
241                // Try to parse the two hex digits following %
242                let hex1 = bytes[i + 1];
243                let hex2 = bytes[i + 2];
244                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
245                    result.push(d1 * 16 + d2);
246                    i += 3;
247                    continue;
248                }
249            }
250            result.push(bytes[i]);
251            i += 1;
252        }
253
254        // Convert to UTF-8, falling back to original if invalid
255        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
256    }
257
258    /// Strip query parameters and fragments from a URL for file existence checking.
259    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
260    /// for `path/to/image.png` or `file.md` respectively.
261    ///
262    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
263    /// so we check for `?` first. If a URL has both, only the query is stripped here
264    /// (fragments are handled separately by the regex in `contribute_to_index`).
265    fn strip_query_and_fragment(url: &str) -> &str {
266        // Find the first occurrence of '?' or '#', whichever comes first
267        // This handles both standard URLs (? before #) and edge cases (# before ?)
268        let query_pos = url.find('?');
269        let fragment_pos = url.find('#');
270
271        match (query_pos, fragment_pos) {
272            (Some(q), Some(f)) => {
273                // Both exist - strip at whichever comes first
274                &url[..q.min(f)]
275            }
276            (Some(q), None) => &url[..q],
277            (None, Some(f)) => &url[..f],
278            (None, None) => url,
279        }
280    }
281
282    /// Resolve a relative link against a provided base path
283    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
284        base_path.join(link)
285    }
286
287    /// Check if a relative link can be compacted and return the simplified form.
288    ///
289    /// Returns `None` if compact-paths is disabled, the link has no traversal,
290    /// or the link is already the shortest form.
291    /// Returns `Some(suggestion)` with the full compacted URL (including fragment/query suffix).
292    fn compact_path_suggestion(&self, url: &str, base_path: &Path) -> Option<String> {
293        if !self.config.compact_paths {
294            return None;
295        }
296
297        // Split URL into path and suffix (fragment/query)
298        let path_end = url
299            .find('?')
300            .unwrap_or(url.len())
301            .min(url.find('#').unwrap_or(url.len()));
302        let path_part = &url[..path_end];
303        let suffix = &url[path_end..];
304
305        // URL-decode the path portion for filesystem resolution
306        let decoded_path = Self::url_decode(path_part);
307
308        compute_compact_path(base_path, &decoded_path).map(|compact| format!("{compact}{suffix}"))
309    }
310
311    /// Validate an absolute link by resolving it relative to MkDocs docs_dir.
312    ///
313    /// Returns `Some(warning_message)` if the link is broken, `None` if valid.
314    /// Falls back to a generic warning if no mkdocs.yml is found.
315    fn validate_absolute_link_via_docs_dir(url: &str, source_path: &Path) -> Option<String> {
316        let Some(docs_dir) = resolve_docs_dir(source_path) else {
317            // No mkdocs.yml found — fall back to warn behavior
318            return Some(format!(
319                "Absolute link '{url}' cannot be validated locally (no mkdocs.yml found)"
320            ));
321        };
322
323        // Strip leading / and resolve relative to docs_dir
324        let relative_url = url.trim_start_matches('/');
325
326        // Strip query/fragment before checking existence
327        let file_path = Self::strip_query_and_fragment(relative_url);
328        let decoded = Self::url_decode(file_path);
329        let resolved_path = docs_dir.join(&decoded);
330
331        // For directory-style links (ending with /, bare path to a directory, or empty
332        // decoded path like "/"), check for index.md inside the directory.
333        // This must be checked BEFORE file_exists_or_markdown_extension because
334        // path.exists() returns true for directories — we need to verify index.md exists.
335        let is_directory_link = url.ends_with('/') || decoded.is_empty();
336        if is_directory_link || resolved_path.is_dir() {
337            let index_path = resolved_path.join("index.md");
338            if file_exists_with_cache(&index_path) {
339                return None; // Valid directory link with index.md
340            }
341            // Directory exists but no index.md — fall through to error
342            if resolved_path.is_dir() {
343                return Some(format!(
344                    "Absolute link '{url}' resolves to directory '{}' which has no index.md",
345                    resolved_path.display()
346                ));
347            }
348        }
349
350        // Check existence (with markdown extension fallback for extensionless links)
351        if file_exists_or_markdown_extension(&resolved_path) {
352            return None; // Valid link
353        }
354
355        // For .html/.htm links, check for corresponding markdown source
356        if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
357            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
358            && let (Some(stem), Some(parent)) = (
359                resolved_path.file_stem().and_then(|s| s.to_str()),
360                resolved_path.parent(),
361            )
362        {
363            let has_md_source = MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
364                let source_path = parent.join(format!("{stem}{md_ext}"));
365                file_exists_with_cache(&source_path)
366            });
367            if has_md_source {
368                return None; // Markdown source exists
369            }
370        }
371
372        Some(format!(
373            "Absolute link '{url}' resolves to '{}' which does not exist",
374            resolved_path.display()
375        ))
376    }
377}
378
379impl Rule for MD057ExistingRelativeLinks {
380    fn name(&self) -> &'static str {
381        "MD057"
382    }
383
384    fn description(&self) -> &'static str {
385        "Relative links should point to existing files"
386    }
387
388    fn category(&self) -> RuleCategory {
389        RuleCategory::Link
390    }
391
392    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
393        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
394    }
395
396    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
397        let content = ctx.content;
398
399        // Early returns for performance
400        if content.is_empty() || !content.contains('[') {
401            return Ok(Vec::new());
402        }
403
404        // Quick check for any potential links before expensive operations
405        // Check for inline links "](", reference definitions "]:", or images "!["
406        if !content.contains("](") && !content.contains("]:") {
407            return Ok(Vec::new());
408        }
409
410        // Reset the file existence cache for a fresh run
411        reset_file_existence_cache();
412
413        let mut warnings = Vec::new();
414
415        // Determine base path for resolving relative links
416        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
417        // This ensures each file resolves links relative to its own directory
418        let base_path: Option<PathBuf> = {
419            // First check if base_path was explicitly set via with_path() (for tests)
420            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
421            if explicit_base.is_some() {
422                explicit_base
423            } else if let Some(ref source_file) = ctx.source_file {
424                // Resolve symlinks to get the actual file location
425                // This ensures relative links are resolved from the target's directory,
426                // not the symlink's directory
427                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
428                resolved_file
429                    .parent()
430                    .map(|p| p.to_path_buf())
431                    .or_else(|| Some(CURRENT_DIR.clone()))
432            } else {
433                // No source file available - cannot validate relative links
434                None
435            }
436        };
437
438        // If we still don't have a base path, we can't validate relative links
439        let Some(base_path) = base_path else {
440            return Ok(warnings);
441        };
442
443        // Use LintContext links instead of expensive regex parsing
444        if !ctx.links.is_empty() {
445            // Use LineIndex for correct position calculation across all line ending types
446            let line_index = &ctx.line_index;
447
448            // Create element cache once for all links
449            let element_cache = ElementCache::new(content);
450
451            // Pre-collected lines from context
452            let lines = ctx.raw_lines();
453
454            // Track which lines we've already processed to avoid duplicates
455            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
456            let mut processed_lines = std::collections::HashSet::new();
457
458            for link in &ctx.links {
459                let line_idx = link.line - 1;
460                if line_idx >= lines.len() {
461                    continue;
462                }
463
464                // Skip lines inside PyMdown blocks (MkDocs flavor)
465                // This must be checked BEFORE processed_lines to skip the entire line
466                if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
467                    continue;
468                }
469
470                // Skip if we've already processed this line
471                if !processed_lines.insert(line_idx) {
472                    continue;
473                }
474
475                let line = lines[line_idx];
476
477                // Quick check for link pattern in this line
478                if !line.contains("](") {
479                    continue;
480                }
481
482                // Find all links in this line using optimized regex
483                for link_match in LINK_START_REGEX.find_iter(line) {
484                    let start_pos = link_match.start();
485                    let end_pos = link_match.end();
486
487                    // Calculate absolute position using LineIndex
488                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
489                    let absolute_start_pos = line_start_byte + start_pos;
490
491                    // Skip if this link is in a code span
492                    if element_cache.is_in_code_span(absolute_start_pos) {
493                        continue;
494                    }
495
496                    // Skip if this link is in a math span (LaTeX $...$ or $$...$$)
497                    if ctx.is_in_math_span(absolute_start_pos) {
498                        continue;
499                    }
500
501                    // Find the URL part after the link text
502                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
503                    // Then fall back to normal URL regex
504                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
505                        .captures_at(line, end_pos - 1)
506                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
507                        .or_else(|| {
508                            URL_EXTRACT_REGEX
509                                .captures_at(line, end_pos - 1)
510                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
511                        });
512
513                    if let Some((caps, url_group)) = caps_and_url {
514                        let url = url_group.as_str().trim();
515
516                        // Skip empty URLs
517                        if url.is_empty() {
518                            continue;
519                        }
520
521                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
522                        // These are Rust API references, not file paths
523                        // Example: [`f32::is_subnormal`], [`Vec::push`]
524                        if url.starts_with('`') && url.ends_with('`') {
525                            continue;
526                        }
527
528                        // Skip external URLs and fragment-only links
529                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
530                            continue;
531                        }
532
533                        // Handle absolute paths based on config
534                        if Self::is_absolute_path(url) {
535                            match self.config.absolute_links {
536                                AbsoluteLinksOption::Warn => {
537                                    let url_start = url_group.start();
538                                    let url_end = url_group.end();
539                                    warnings.push(LintWarning {
540                                        rule_name: Some(self.name().to_string()),
541                                        line: link.line,
542                                        column: url_start + 1,
543                                        end_line: link.line,
544                                        end_column: url_end + 1,
545                                        message: format!("Absolute link '{url}' cannot be validated locally"),
546                                        severity: Severity::Warning,
547                                        fix: None,
548                                    });
549                                }
550                                AbsoluteLinksOption::RelativeToDocs => {
551                                    if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
552                                        let url_start = url_group.start();
553                                        let url_end = url_group.end();
554                                        warnings.push(LintWarning {
555                                            rule_name: Some(self.name().to_string()),
556                                            line: link.line,
557                                            column: url_start + 1,
558                                            end_line: link.line,
559                                            end_column: url_end + 1,
560                                            message: msg,
561                                            severity: Severity::Warning,
562                                            fix: None,
563                                        });
564                                    }
565                                }
566                                AbsoluteLinksOption::Ignore => {}
567                            }
568                            continue;
569                        }
570
571                        // Check for unnecessary path traversal (compact-paths)
572                        // Reconstruct full URL including fragment (regex group 2)
573                        // since url_group (group 1) contains only the path part
574                        let full_url_for_compact = if let Some(frag) = caps.get(2) {
575                            format!("{url}{}", frag.as_str())
576                        } else {
577                            url.to_string()
578                        };
579                        if let Some(suggestion) = self.compact_path_suggestion(&full_url_for_compact, &base_path) {
580                            let url_start = url_group.start();
581                            let url_end = caps.get(2).map_or(url_group.end(), |frag| frag.end());
582                            let fix_byte_start = line_start_byte + url_start;
583                            let fix_byte_end = line_start_byte + url_end;
584                            warnings.push(LintWarning {
585                                rule_name: Some(self.name().to_string()),
586                                line: link.line,
587                                column: url_start + 1,
588                                end_line: link.line,
589                                end_column: url_end + 1,
590                                message: format!(
591                                    "Relative link '{full_url_for_compact}' can be simplified to '{suggestion}'"
592                                ),
593                                severity: Severity::Warning,
594                                fix: Some(Fix {
595                                    range: fix_byte_start..fix_byte_end,
596                                    replacement: suggestion,
597                                }),
598                            });
599                        }
600
601                        // Strip query parameters and fragments before checking file existence
602                        let file_path = Self::strip_query_and_fragment(url);
603
604                        // URL-decode the path to handle percent-encoded characters
605                        let decoded_path = Self::url_decode(file_path);
606
607                        // Resolve the relative link against the base path
608                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
609
610                        // Check if the file exists, also trying markdown extensions for extensionless links
611                        if file_exists_or_markdown_extension(&resolved_path) {
612                            continue; // File exists, no warning needed
613                        }
614
615                        // For .html/.htm links, check if a corresponding markdown source exists
616                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
617                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
618                            && let (Some(stem), Some(parent)) = (
619                                resolved_path.file_stem().and_then(|s| s.to_str()),
620                                resolved_path.parent(),
621                            ) {
622                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
623                                let source_path = parent.join(format!("{stem}{md_ext}"));
624                                file_exists_with_cache(&source_path)
625                            })
626                        } else {
627                            false
628                        };
629
630                        if has_md_source {
631                            continue; // Markdown source exists, link is valid
632                        }
633
634                        // File doesn't exist and no source file found
635                        // Use actual URL position from regex capture group
636                        // Note: capture group positions are absolute within the line string
637                        let url_start = url_group.start();
638                        let url_end = url_group.end();
639
640                        warnings.push(LintWarning {
641                            rule_name: Some(self.name().to_string()),
642                            line: link.line,
643                            column: url_start + 1, // 1-indexed
644                            end_line: link.line,
645                            end_column: url_end + 1, // 1-indexed
646                            message: format!("Relative link '{url}' does not exist"),
647                            severity: Severity::Error,
648                            fix: None,
649                        });
650                    }
651                }
652            }
653        }
654
655        // Also process images - they have URLs already parsed
656        for image in &ctx.images {
657            // Skip images inside PyMdown blocks (MkDocs flavor)
658            if ctx.line_info(image.line).is_some_and(|info| info.in_pymdown_block) {
659                continue;
660            }
661
662            let url = image.url.as_ref();
663
664            // Skip empty URLs
665            if url.is_empty() {
666                continue;
667            }
668
669            // Skip external URLs and fragment-only links
670            if self.is_external_url(url) || self.is_fragment_only_link(url) {
671                continue;
672            }
673
674            // Handle absolute paths based on config
675            if Self::is_absolute_path(url) {
676                match self.config.absolute_links {
677                    AbsoluteLinksOption::Warn => {
678                        warnings.push(LintWarning {
679                            rule_name: Some(self.name().to_string()),
680                            line: image.line,
681                            column: image.start_col + 1,
682                            end_line: image.line,
683                            end_column: image.start_col + 1 + url.len(),
684                            message: format!("Absolute link '{url}' cannot be validated locally"),
685                            severity: Severity::Warning,
686                            fix: None,
687                        });
688                    }
689                    AbsoluteLinksOption::RelativeToDocs => {
690                        if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
691                            warnings.push(LintWarning {
692                                rule_name: Some(self.name().to_string()),
693                                line: image.line,
694                                column: image.start_col + 1,
695                                end_line: image.line,
696                                end_column: image.start_col + 1 + url.len(),
697                                message: msg,
698                                severity: Severity::Warning,
699                                fix: None,
700                            });
701                        }
702                    }
703                    AbsoluteLinksOption::Ignore => {}
704                }
705                continue;
706            }
707
708            // Check for unnecessary path traversal (compact-paths)
709            if let Some(suggestion) = self.compact_path_suggestion(url, &base_path) {
710                let img_line_start_byte = ctx.line_index.get_line_start_byte(image.line).unwrap_or(0);
711                let fix_byte_start = img_line_start_byte + image.start_col;
712                let fix_byte_end = fix_byte_start + url.len();
713                warnings.push(LintWarning {
714                    rule_name: Some(self.name().to_string()),
715                    line: image.line,
716                    column: image.start_col + 1,
717                    end_line: image.line,
718                    end_column: image.start_col + 1 + url.len(),
719                    message: format!("Relative link '{url}' can be simplified to '{suggestion}'"),
720                    severity: Severity::Warning,
721                    fix: Some(Fix {
722                        range: fix_byte_start..fix_byte_end,
723                        replacement: suggestion,
724                    }),
725                });
726            }
727
728            // Strip query parameters and fragments before checking file existence
729            let file_path = Self::strip_query_and_fragment(url);
730
731            // URL-decode the path to handle percent-encoded characters
732            let decoded_path = Self::url_decode(file_path);
733
734            // Resolve the relative link against the base path
735            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
736
737            // Check if the file exists, also trying markdown extensions for extensionless links
738            if file_exists_or_markdown_extension(&resolved_path) {
739                continue; // File exists, no warning needed
740            }
741
742            // For .html/.htm links, check if a corresponding markdown source exists
743            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
744                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
745                && let (Some(stem), Some(parent)) = (
746                    resolved_path.file_stem().and_then(|s| s.to_str()),
747                    resolved_path.parent(),
748                ) {
749                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
750                    let source_path = parent.join(format!("{stem}{md_ext}"));
751                    file_exists_with_cache(&source_path)
752                })
753            } else {
754                false
755            };
756
757            if has_md_source {
758                continue; // Markdown source exists, link is valid
759            }
760
761            // File doesn't exist and no source file found
762            // Images already have correct position from parser
763            warnings.push(LintWarning {
764                rule_name: Some(self.name().to_string()),
765                line: image.line,
766                column: image.start_col + 1,
767                end_line: image.line,
768                end_column: image.start_col + 1 + url.len(),
769                message: format!("Relative link '{url}' does not exist"),
770                severity: Severity::Error,
771                fix: None,
772            });
773        }
774
775        // Also process reference definitions: [ref]: ./path.md
776        for ref_def in &ctx.reference_defs {
777            let url = &ref_def.url;
778
779            // Skip empty URLs
780            if url.is_empty() {
781                continue;
782            }
783
784            // Skip external URLs and fragment-only links
785            if self.is_external_url(url) || self.is_fragment_only_link(url) {
786                continue;
787            }
788
789            // Handle absolute paths based on config
790            if Self::is_absolute_path(url) {
791                match self.config.absolute_links {
792                    AbsoluteLinksOption::Warn => {
793                        let line_idx = ref_def.line - 1;
794                        let column = content.lines().nth(line_idx).map_or(1, |line_content| {
795                            line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
796                        });
797                        warnings.push(LintWarning {
798                            rule_name: Some(self.name().to_string()),
799                            line: ref_def.line,
800                            column,
801                            end_line: ref_def.line,
802                            end_column: column + url.len(),
803                            message: format!("Absolute link '{url}' cannot be validated locally"),
804                            severity: Severity::Warning,
805                            fix: None,
806                        });
807                    }
808                    AbsoluteLinksOption::RelativeToDocs => {
809                        if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
810                            let line_idx = ref_def.line - 1;
811                            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
812                                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
813                            });
814                            warnings.push(LintWarning {
815                                rule_name: Some(self.name().to_string()),
816                                line: ref_def.line,
817                                column,
818                                end_line: ref_def.line,
819                                end_column: column + url.len(),
820                                message: msg,
821                                severity: Severity::Warning,
822                                fix: None,
823                            });
824                        }
825                    }
826                    AbsoluteLinksOption::Ignore => {}
827                }
828                continue;
829            }
830
831            // Check for unnecessary path traversal (compact-paths)
832            if let Some(suggestion) = self.compact_path_suggestion(url, &base_path) {
833                let ref_line_idx = ref_def.line - 1;
834                let col = content.lines().nth(ref_line_idx).map_or(1, |line_content| {
835                    line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
836                });
837                let ref_line_start_byte = ctx.line_index.get_line_start_byte(ref_def.line).unwrap_or(0);
838                let fix_byte_start = ref_line_start_byte + col - 1;
839                let fix_byte_end = fix_byte_start + url.len();
840                warnings.push(LintWarning {
841                    rule_name: Some(self.name().to_string()),
842                    line: ref_def.line,
843                    column: col,
844                    end_line: ref_def.line,
845                    end_column: col + url.len(),
846                    message: format!("Relative link '{url}' can be simplified to '{suggestion}'"),
847                    severity: Severity::Warning,
848                    fix: Some(Fix {
849                        range: fix_byte_start..fix_byte_end,
850                        replacement: suggestion,
851                    }),
852                });
853            }
854
855            // Strip query parameters and fragments before checking file existence
856            let file_path = Self::strip_query_and_fragment(url);
857
858            // URL-decode the path to handle percent-encoded characters
859            let decoded_path = Self::url_decode(file_path);
860
861            // Resolve the relative link against the base path
862            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
863
864            // Check if the file exists, also trying markdown extensions for extensionless links
865            if file_exists_or_markdown_extension(&resolved_path) {
866                continue; // File exists, no warning needed
867            }
868
869            // For .html/.htm links, check if a corresponding markdown source exists
870            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
871                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
872                && let (Some(stem), Some(parent)) = (
873                    resolved_path.file_stem().and_then(|s| s.to_str()),
874                    resolved_path.parent(),
875                ) {
876                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
877                    let source_path = parent.join(format!("{stem}{md_ext}"));
878                    file_exists_with_cache(&source_path)
879                })
880            } else {
881                false
882            };
883
884            if has_md_source {
885                continue; // Markdown source exists, link is valid
886            }
887
888            // File doesn't exist and no source file found
889            // Calculate column position: find URL within the line
890            let line_idx = ref_def.line - 1;
891            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
892                // Find URL position in line (after ]: )
893                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
894            });
895
896            warnings.push(LintWarning {
897                rule_name: Some(self.name().to_string()),
898                line: ref_def.line,
899                column,
900                end_line: ref_def.line,
901                end_column: column + url.len(),
902                message: format!("Relative link '{url}' does not exist"),
903                severity: Severity::Error,
904                fix: None,
905            });
906        }
907
908        Ok(warnings)
909    }
910
911    fn fix_capability(&self) -> FixCapability {
912        if self.config.compact_paths {
913            FixCapability::ConditionallyFixable
914        } else {
915            FixCapability::Unfixable
916        }
917    }
918
919    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
920        if !self.config.compact_paths {
921            return Ok(ctx.content.to_string());
922        }
923
924        let warnings = self.check(ctx)?;
925        let mut content = ctx.content.to_string();
926
927        // Collect fixable warnings (compact-paths) sorted by byte offset descending
928        let mut fixes: Vec<_> = warnings.iter().filter_map(|w| w.fix.as_ref()).collect();
929        fixes.sort_by(|a, b| b.range.start.cmp(&a.range.start));
930
931        for fix in fixes {
932            if fix.range.end <= content.len() {
933                content.replace_range(fix.range.clone(), &fix.replacement);
934            }
935        }
936
937        Ok(content)
938    }
939
940    fn as_any(&self) -> &dyn std::any::Any {
941        self
942    }
943
944    fn default_config_section(&self) -> Option<(String, toml::Value)> {
945        let default_config = MD057Config::default();
946        let json_value = serde_json::to_value(&default_config).ok()?;
947        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
948
949        if let toml::Value::Table(table) = toml_value {
950            if !table.is_empty() {
951                Some((MD057Config::RULE_NAME.to_string(), toml::Value::Table(table)))
952            } else {
953                None
954            }
955        } else {
956            None
957        }
958    }
959
960    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
961    where
962        Self: Sized,
963    {
964        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
965        Box::new(Self::from_config_struct(rule_config))
966    }
967
968    fn cross_file_scope(&self) -> CrossFileScope {
969        CrossFileScope::Workspace
970    }
971
972    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
973        // Use the shared utility for cross-file link extraction
974        // This ensures consistent position tracking between CLI and LSP
975        for link in extract_cross_file_links(ctx) {
976            index.add_cross_file_link(link);
977        }
978    }
979
980    fn cross_file_check(
981        &self,
982        file_path: &Path,
983        file_index: &FileIndex,
984        workspace_index: &crate::workspace_index::WorkspaceIndex,
985    ) -> LintResult {
986        let mut warnings = Vec::new();
987
988        // Get the directory containing this file for resolving relative links
989        let file_dir = file_path.parent();
990
991        for cross_link in &file_index.cross_file_links {
992            // URL-decode the path for filesystem operations
993            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
994            let decoded_target = Self::url_decode(&cross_link.target_path);
995
996            // Skip absolute paths — they are already handled by check()
997            // which validates them according to the absolute_links config.
998            // Handling them here too would produce duplicate warnings.
999            if decoded_target.starts_with('/') {
1000                continue;
1001            }
1002
1003            // Resolve relative path
1004            let target_path = if let Some(dir) = file_dir {
1005                dir.join(&decoded_target)
1006            } else {
1007                Path::new(&decoded_target).to_path_buf()
1008            };
1009
1010            // Normalize the path (handle .., ., etc.)
1011            let target_path = normalize_path(&target_path);
1012
1013            // Check if the target file exists, also trying markdown extensions for extensionless links
1014            let file_exists =
1015                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
1016
1017            if !file_exists {
1018                // For .html/.htm links, check if a corresponding markdown source exists
1019                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
1020                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
1021                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
1022                    && let (Some(stem), Some(parent)) =
1023                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
1024                {
1025                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
1026                        let source_path = parent.join(format!("{stem}{md_ext}"));
1027                        workspace_index.contains_file(&source_path) || source_path.exists()
1028                    })
1029                } else {
1030                    false
1031                };
1032
1033                if !has_md_source {
1034                    warnings.push(LintWarning {
1035                        rule_name: Some(self.name().to_string()),
1036                        line: cross_link.line,
1037                        column: cross_link.column,
1038                        end_line: cross_link.line,
1039                        end_column: cross_link.column + cross_link.target_path.len(),
1040                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
1041                        severity: Severity::Error,
1042                        fix: None,
1043                    });
1044                }
1045            }
1046        }
1047
1048        Ok(warnings)
1049    }
1050}
1051
1052/// Compute the shortest relative path from `from_dir` to `to_path`.
1053///
1054/// Both paths must be normalized (no `.` or `..` components).
1055/// Returns a relative `PathBuf` that navigates from `from_dir` to `to_path`.
1056fn shortest_relative_path(from_dir: &Path, to_path: &Path) -> PathBuf {
1057    let from_components: Vec<_> = from_dir.components().collect();
1058    let to_components: Vec<_> = to_path.components().collect();
1059
1060    // Find common prefix length
1061    let common_len = from_components
1062        .iter()
1063        .zip(to_components.iter())
1064        .take_while(|(a, b)| a == b)
1065        .count();
1066
1067    let mut result = PathBuf::new();
1068
1069    // Go up for each remaining component in from_dir
1070    for _ in common_len..from_components.len() {
1071        result.push("..");
1072    }
1073
1074    // Append remaining components from to_path
1075    for component in &to_components[common_len..] {
1076        result.push(component);
1077    }
1078
1079    result
1080}
1081
1082/// Check if a relative link path can be shortened.
1083///
1084/// Given the source directory and the raw link path, computes whether there's
1085/// a shorter equivalent path. Returns `Some(compact_path)` if the link can
1086/// be simplified, `None` if it's already optimal.
1087fn compute_compact_path(source_dir: &Path, raw_link_path: &str) -> Option<String> {
1088    let link_path = Path::new(raw_link_path);
1089
1090    // Only check paths that contain traversal (../ or ./)
1091    let has_traversal = link_path
1092        .components()
1093        .any(|c| matches!(c, std::path::Component::ParentDir | std::path::Component::CurDir));
1094
1095    if !has_traversal {
1096        return None;
1097    }
1098
1099    // Resolve: source_dir + raw_link_path, then normalize
1100    let combined = source_dir.join(link_path);
1101    let normalized_target = normalize_path(&combined);
1102
1103    // Compute shortest path from source_dir back to the normalized target
1104    let normalized_source = normalize_path(source_dir);
1105    let shortest = shortest_relative_path(&normalized_source, &normalized_target);
1106
1107    // Compare against the raw link path — if it differs, the path can be compacted
1108    if shortest != link_path {
1109        let compact = shortest.to_string_lossy().to_string();
1110        // Avoid suggesting empty path
1111        if compact.is_empty() {
1112            return None;
1113        }
1114        // Markdown links always use forward slashes regardless of platform
1115        Some(compact.replace('\\', "/"))
1116    } else {
1117        None
1118    }
1119}
1120
1121/// Normalize a path by resolving . and .. components
1122fn normalize_path(path: &Path) -> PathBuf {
1123    let mut components = Vec::new();
1124
1125    for component in path.components() {
1126        match component {
1127            std::path::Component::ParentDir => {
1128                // Go up one level if possible
1129                if !components.is_empty() {
1130                    components.pop();
1131                }
1132            }
1133            std::path::Component::CurDir => {
1134                // Skip current directory markers
1135            }
1136            _ => {
1137                components.push(component);
1138            }
1139        }
1140    }
1141
1142    components.iter().collect()
1143}
1144
1145#[cfg(test)]
1146mod tests {
1147    use super::*;
1148    use crate::workspace_index::CrossFileLinkIndex;
1149    use std::fs::File;
1150    use std::io::Write;
1151    use tempfile::tempdir;
1152
1153    #[test]
1154    fn test_strip_query_and_fragment() {
1155        // Test query parameter stripping
1156        assert_eq!(
1157            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
1158            "file.png"
1159        );
1160        assert_eq!(
1161            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
1162            "file.png"
1163        );
1164        assert_eq!(
1165            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
1166            "file.png"
1167        );
1168
1169        // Test fragment stripping
1170        assert_eq!(
1171            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
1172            "file.md"
1173        );
1174        assert_eq!(
1175            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
1176            "file.md"
1177        );
1178
1179        // Test both query and fragment (query comes first, per RFC 3986)
1180        assert_eq!(
1181            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
1182            "file.md"
1183        );
1184
1185        // Test no query or fragment
1186        assert_eq!(
1187            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
1188            "file.png"
1189        );
1190
1191        // Test with path
1192        assert_eq!(
1193            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
1194            "path/to/image.png"
1195        );
1196        assert_eq!(
1197            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
1198            "path/to/image.png"
1199        );
1200
1201        // Edge case: fragment before query (non-standard but possible)
1202        assert_eq!(
1203            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
1204            "file.md"
1205        );
1206    }
1207
1208    #[test]
1209    fn test_url_decode() {
1210        // Simple space encoding
1211        assert_eq!(
1212            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
1213            "penguin with space.jpg"
1214        );
1215
1216        // Path with encoded spaces
1217        assert_eq!(
1218            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
1219            "assets/my file name.png"
1220        );
1221
1222        // Multiple encoded characters
1223        assert_eq!(
1224            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
1225            "hello world!.md"
1226        );
1227
1228        // Lowercase hex
1229        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
1230
1231        // Uppercase hex
1232        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
1233
1234        // Mixed case hex
1235        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
1236
1237        // No encoding - return as-is
1238        assert_eq!(
1239            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
1240            "normal-file.md"
1241        );
1242
1243        // Incomplete percent encoding - leave as-is
1244        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
1245
1246        // Percent at end - leave as-is
1247        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
1248
1249        // Invalid hex digits - leave as-is
1250        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
1251
1252        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
1253        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
1254
1255        // Empty string
1256        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
1257
1258        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
1259        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
1260
1261        // Multiple consecutive encoded characters
1262        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
1263
1264        // Encoded path separators
1265        assert_eq!(
1266            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
1267            "path/to/file.md"
1268        );
1269
1270        // Mixed encoded and non-encoded
1271        assert_eq!(
1272            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
1273            "hello world/foo bar.md"
1274        );
1275
1276        // Special characters that are commonly encoded
1277        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
1278
1279        // Percent at position that looks like encoding but isn't valid
1280        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
1281    }
1282
1283    #[test]
1284    fn test_url_encoded_filenames() {
1285        // Create a temporary directory for test files
1286        let temp_dir = tempdir().unwrap();
1287        let base_path = temp_dir.path();
1288
1289        // Create a file with spaces in the name
1290        let file_with_spaces = base_path.join("penguin with space.jpg");
1291        File::create(&file_with_spaces)
1292            .unwrap()
1293            .write_all(b"image data")
1294            .unwrap();
1295
1296        // Create a subdirectory with spaces
1297        let subdir = base_path.join("my images");
1298        std::fs::create_dir(&subdir).unwrap();
1299        let nested_file = subdir.join("photo 1.png");
1300        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
1301
1302        // Test content with URL-encoded links
1303        let content = r#"
1304# Test Document with URL-Encoded Links
1305
1306![Penguin](penguin%20with%20space.jpg)
1307![Photo](my%20images/photo%201.png)
1308![Missing](missing%20file.jpg)
1309"#;
1310
1311        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1312
1313        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1314        let result = rule.check(&ctx).unwrap();
1315
1316        // Should only have one warning for the missing file
1317        assert_eq!(
1318            result.len(),
1319            1,
1320            "Should only warn about missing%20file.jpg. Got: {result:?}"
1321        );
1322        assert!(
1323            result[0].message.contains("missing%20file.jpg"),
1324            "Warning should mention the URL-encoded filename"
1325        );
1326    }
1327
1328    #[test]
1329    fn test_external_urls() {
1330        let rule = MD057ExistingRelativeLinks::new();
1331
1332        // Common web protocols
1333        assert!(rule.is_external_url("https://example.com"));
1334        assert!(rule.is_external_url("http://example.com"));
1335        assert!(rule.is_external_url("ftp://example.com"));
1336        assert!(rule.is_external_url("www.example.com"));
1337        assert!(rule.is_external_url("example.com"));
1338
1339        // Special URI schemes
1340        assert!(rule.is_external_url("file:///path/to/file"));
1341        assert!(rule.is_external_url("smb://server/share"));
1342        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
1343        assert!(rule.is_external_url("mailto:user@example.com"));
1344        assert!(rule.is_external_url("tel:+1234567890"));
1345        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
1346        assert!(rule.is_external_url("javascript:void(0)"));
1347        assert!(rule.is_external_url("ssh://git@github.com/repo"));
1348        assert!(rule.is_external_url("git://github.com/repo.git"));
1349
1350        // Email addresses without mailto: protocol
1351        // These are clearly not file links and should be skipped
1352        assert!(rule.is_external_url("user@example.com"));
1353        assert!(rule.is_external_url("steering@kubernetes.io"));
1354        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
1355        assert!(rule.is_external_url("user_name@sub.domain.com"));
1356        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
1357
1358        // Template variables should be skipped (not checked as relative links)
1359        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
1360        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
1361        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
1362        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
1363        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
1364        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
1365
1366        // Absolute paths are NOT external (handled separately via is_absolute_path)
1367        // By default they are ignored, but can be configured to warn
1368        assert!(!rule.is_external_url("/api/v1/users"));
1369        assert!(!rule.is_external_url("/blog/2024/release.html"));
1370        assert!(!rule.is_external_url("/react/hooks/use-state.html"));
1371        assert!(!rule.is_external_url("/pkg/runtime"));
1372        assert!(!rule.is_external_url("/doc/go1compat"));
1373        assert!(!rule.is_external_url("/index.html"));
1374        assert!(!rule.is_external_url("/assets/logo.png"));
1375
1376        // But is_absolute_path should detect them
1377        assert!(MD057ExistingRelativeLinks::is_absolute_path("/api/v1/users"));
1378        assert!(MD057ExistingRelativeLinks::is_absolute_path("/blog/2024/release.html"));
1379        assert!(MD057ExistingRelativeLinks::is_absolute_path("/index.html"));
1380        assert!(!MD057ExistingRelativeLinks::is_absolute_path("./relative.md"));
1381        assert!(!MD057ExistingRelativeLinks::is_absolute_path("relative.md"));
1382
1383        // Framework path aliases should be skipped (resolved by build tools)
1384        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
1385        assert!(rule.is_external_url("~/assets/image.png"));
1386        assert!(rule.is_external_url("~/components/Button.vue"));
1387        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
1388
1389        // @ prefix (common in Vue, webpack, Vite aliases)
1390        assert!(rule.is_external_url("@/components/Header.vue"));
1391        assert!(rule.is_external_url("@images/photo.jpg"));
1392        assert!(rule.is_external_url("@assets/styles.css"));
1393
1394        // Relative paths should NOT be external (should be validated)
1395        assert!(!rule.is_external_url("./relative/path.md"));
1396        assert!(!rule.is_external_url("relative/path.md"));
1397        assert!(!rule.is_external_url("../parent/path.md"));
1398    }
1399
1400    #[test]
1401    fn test_framework_path_aliases() {
1402        // Create a temporary directory for test files
1403        let temp_dir = tempdir().unwrap();
1404        let base_path = temp_dir.path();
1405
1406        // Test content with framework path aliases (should all be skipped)
1407        let content = r#"
1408# Framework Path Aliases
1409
1410![Image 1](~/assets/penguin.jpg)
1411![Image 2](~assets/logo.svg)
1412![Image 3](@images/photo.jpg)
1413![Image 4](@/components/icon.svg)
1414[Link](@/pages/about.md)
1415
1416This is a [real missing link](missing.md) that should be flagged.
1417"#;
1418
1419        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1420
1421        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1422        let result = rule.check(&ctx).unwrap();
1423
1424        // Should only have one warning for the real missing link
1425        assert_eq!(
1426            result.len(),
1427            1,
1428            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
1429        );
1430        assert!(
1431            result[0].message.contains("missing.md"),
1432            "Warning should be for missing.md"
1433        );
1434    }
1435
1436    #[test]
1437    fn test_url_decode_security_path_traversal() {
1438        // Ensure URL decoding doesn't enable path traversal attacks
1439        // The decoded path is still validated against the base path
1440        let temp_dir = tempdir().unwrap();
1441        let base_path = temp_dir.path();
1442
1443        // Create a file in the temp directory
1444        let file_in_base = base_path.join("safe.md");
1445        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
1446
1447        // Test with encoded path traversal attempt
1448        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
1449        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
1450        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
1451        let content = r#"
1452[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
1453[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
1454[Safe link](safe.md)
1455"#;
1456
1457        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1458
1459        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1460        let result = rule.check(&ctx).unwrap();
1461
1462        // The traversal attempts should still be flagged as missing
1463        // (they don't exist relative to base_path after decoding)
1464        assert_eq!(
1465            result.len(),
1466            2,
1467            "Should have warnings for traversal attempts. Got: {result:?}"
1468        );
1469    }
1470
1471    #[test]
1472    fn test_url_encoded_utf8_filenames() {
1473        // Test with actual UTF-8 encoded filenames
1474        let temp_dir = tempdir().unwrap();
1475        let base_path = temp_dir.path();
1476
1477        // Create files with unicode names
1478        let cafe_file = base_path.join("café.md");
1479        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1480
1481        let content = r#"
1482[Café link](caf%C3%A9.md)
1483[Missing unicode](r%C3%A9sum%C3%A9.md)
1484"#;
1485
1486        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1487
1488        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1489        let result = rule.check(&ctx).unwrap();
1490
1491        // Should only warn about the missing file
1492        assert_eq!(
1493            result.len(),
1494            1,
1495            "Should only warn about missing résumé.md. Got: {result:?}"
1496        );
1497        assert!(
1498            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1499            "Warning should mention the URL-encoded filename"
1500        );
1501    }
1502
1503    #[test]
1504    fn test_url_encoded_emoji_filenames() {
1505        // URL-encoded emoji paths should be correctly resolved
1506        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1507        let temp_dir = tempdir().unwrap();
1508        let base_path = temp_dir.path();
1509
1510        // Create directory with emoji in name: 👤 Personal
1511        let emoji_dir = base_path.join("👤 Personal");
1512        std::fs::create_dir(&emoji_dir).unwrap();
1513
1514        // Create file in that directory: TV Shows.md
1515        let file_path = emoji_dir.join("TV Shows.md");
1516        File::create(&file_path)
1517            .unwrap()
1518            .write_all(b"# TV Shows\n\nContent here.")
1519            .unwrap();
1520
1521        // Test content with URL-encoded emoji link
1522        // %F0%9F%91%A4 = 👤, %20 = space
1523        let content = r#"
1524# Test Document
1525
1526[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1527[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1528"#;
1529
1530        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1531
1532        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1533        let result = rule.check(&ctx).unwrap();
1534
1535        // Should only warn about the missing file, not the valid emoji path
1536        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1537        assert!(
1538            result[0].message.contains("Missing.md"),
1539            "Warning should be for Missing.md, got: {}",
1540            result[0].message
1541        );
1542    }
1543
1544    #[test]
1545    fn test_no_warnings_without_base_path() {
1546        let rule = MD057ExistingRelativeLinks::new();
1547        let content = "[Link](missing.md)";
1548
1549        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1550        let result = rule.check(&ctx).unwrap();
1551        assert!(result.is_empty(), "Should have no warnings without base path");
1552    }
1553
1554    #[test]
1555    fn test_existing_and_missing_links() {
1556        // Create a temporary directory for test files
1557        let temp_dir = tempdir().unwrap();
1558        let base_path = temp_dir.path();
1559
1560        // Create an existing file
1561        let exists_path = base_path.join("exists.md");
1562        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1563
1564        // Verify the file exists
1565        assert!(exists_path.exists(), "exists.md should exist for this test");
1566
1567        // Create test content with both existing and missing links
1568        let content = r#"
1569# Test Document
1570
1571[Valid Link](exists.md)
1572[Invalid Link](missing.md)
1573[External Link](https://example.com)
1574[Media Link](image.jpg)
1575        "#;
1576
1577        // Initialize rule with the base path (default: check all files including media)
1578        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1579
1580        // Test the rule
1581        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1582        let result = rule.check(&ctx).unwrap();
1583
1584        // Should have two warnings: missing.md and image.jpg (both don't exist)
1585        assert_eq!(result.len(), 2);
1586        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1587        assert!(messages.iter().any(|m| m.contains("missing.md")));
1588        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1589    }
1590
1591    #[test]
1592    fn test_angle_bracket_links() {
1593        // Create a temporary directory for test files
1594        let temp_dir = tempdir().unwrap();
1595        let base_path = temp_dir.path();
1596
1597        // Create an existing file
1598        let exists_path = base_path.join("exists.md");
1599        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1600
1601        // Create test content with angle bracket links
1602        let content = r#"
1603# Test Document
1604
1605[Valid Link](<exists.md>)
1606[Invalid Link](<missing.md>)
1607[External Link](<https://example.com>)
1608    "#;
1609
1610        // Test with default settings
1611        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1612
1613        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1614        let result = rule.check(&ctx).unwrap();
1615
1616        // Should have one warning for missing.md
1617        assert_eq!(result.len(), 1, "Should have exactly one warning");
1618        assert!(
1619            result[0].message.contains("missing.md"),
1620            "Warning should mention missing.md"
1621        );
1622    }
1623
1624    #[test]
1625    fn test_angle_bracket_links_with_parens() {
1626        // Create a temporary directory for test files
1627        let temp_dir = tempdir().unwrap();
1628        let base_path = temp_dir.path();
1629
1630        // Create directory structure with parentheses in path
1631        let app_dir = base_path.join("app");
1632        std::fs::create_dir(&app_dir).unwrap();
1633        let upload_dir = app_dir.join("(upload)");
1634        std::fs::create_dir(&upload_dir).unwrap();
1635        let page_file = upload_dir.join("page.tsx");
1636        File::create(&page_file)
1637            .unwrap()
1638            .write_all(b"export default function Page() {}")
1639            .unwrap();
1640
1641        // Create test content with angle bracket links containing parentheses
1642        let content = r#"
1643# Test Document with Paths Containing Parens
1644
1645[Upload Page](<app/(upload)/page.tsx>)
1646[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1647[Missing](<app/(missing)/file.md>)
1648"#;
1649
1650        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1651
1652        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1653        let result = rule.check(&ctx).unwrap();
1654
1655        // Should only have one warning for the missing file
1656        assert_eq!(
1657            result.len(),
1658            1,
1659            "Should have exactly one warning for missing file. Got: {result:?}"
1660        );
1661        assert!(
1662            result[0].message.contains("app/(missing)/file.md"),
1663            "Warning should mention app/(missing)/file.md"
1664        );
1665    }
1666
1667    #[test]
1668    fn test_all_file_types_checked() {
1669        // Create a temporary directory for test files
1670        let temp_dir = tempdir().unwrap();
1671        let base_path = temp_dir.path();
1672
1673        // Create a test with various file types - all should be checked
1674        let content = r#"
1675[Image Link](image.jpg)
1676[Video Link](video.mp4)
1677[Markdown Link](document.md)
1678[PDF Link](file.pdf)
1679"#;
1680
1681        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1682
1683        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1684        let result = rule.check(&ctx).unwrap();
1685
1686        // Should warn about all missing files regardless of extension
1687        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1688    }
1689
1690    #[test]
1691    fn test_code_span_detection() {
1692        let rule = MD057ExistingRelativeLinks::new();
1693
1694        // Create a temporary directory for test files
1695        let temp_dir = tempdir().unwrap();
1696        let base_path = temp_dir.path();
1697
1698        let rule = rule.with_path(base_path);
1699
1700        // Test with document structure
1701        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1702
1703        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1704        let result = rule.check(&ctx).unwrap();
1705
1706        // Should only find the real link, not the one in code
1707        assert_eq!(result.len(), 1, "Should only flag the real link");
1708        assert!(result[0].message.contains("nonexistent.md"));
1709    }
1710
1711    #[test]
1712    fn test_inline_code_spans() {
1713        // Create a temporary directory for test files
1714        let temp_dir = tempdir().unwrap();
1715        let base_path = temp_dir.path();
1716
1717        // Create test content with links in inline code spans
1718        let content = r#"
1719# Test Document
1720
1721This is a normal link: [Link](missing.md)
1722
1723This is a code span with a link: `[Link](another-missing.md)`
1724
1725Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1726
1727    "#;
1728
1729        // Initialize rule with the base path
1730        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1731
1732        // Test the rule
1733        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1734        let result = rule.check(&ctx).unwrap();
1735
1736        // Should only have warning for the normal link, not for links in code spans
1737        assert_eq!(result.len(), 1, "Should have exactly one warning");
1738        assert!(
1739            result[0].message.contains("missing.md"),
1740            "Warning should be for missing.md"
1741        );
1742        assert!(
1743            !result.iter().any(|w| w.message.contains("another-missing.md")),
1744            "Should not warn about link in code span"
1745        );
1746        assert!(
1747            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1748            "Should not warn about link in inline code"
1749        );
1750    }
1751
1752    #[test]
1753    fn test_extensionless_link_resolution() {
1754        // Create a temporary directory for test files
1755        let temp_dir = tempdir().unwrap();
1756        let base_path = temp_dir.path();
1757
1758        // Create a markdown file WITHOUT specifying .md extension in the link
1759        let page_path = base_path.join("page.md");
1760        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1761
1762        // Test content with extensionless link that should resolve to page.md
1763        let content = r#"
1764# Test Document
1765
1766[Link without extension](page)
1767[Link with extension](page.md)
1768[Missing link](nonexistent)
1769"#;
1770
1771        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1772
1773        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1774        let result = rule.check(&ctx).unwrap();
1775
1776        // Should only have warning for nonexistent link
1777        // Both "page" and "page.md" should resolve to the same file
1778        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1779        assert!(
1780            result[0].message.contains("nonexistent"),
1781            "Warning should be for 'nonexistent' not 'page'"
1782        );
1783    }
1784
1785    // Cross-file validation tests
1786    #[test]
1787    fn test_cross_file_scope() {
1788        let rule = MD057ExistingRelativeLinks::new();
1789        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1790    }
1791
1792    #[test]
1793    fn test_contribute_to_index_extracts_markdown_links() {
1794        let rule = MD057ExistingRelativeLinks::new();
1795        let content = r#"
1796# Document
1797
1798[Link to docs](./docs/guide.md)
1799[Link with fragment](./other.md#section)
1800[External link](https://example.com)
1801[Image link](image.png)
1802[Media file](video.mp4)
1803"#;
1804
1805        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1806        let mut index = FileIndex::new();
1807        rule.contribute_to_index(&ctx, &mut index);
1808
1809        // Should only index markdown file links
1810        assert_eq!(index.cross_file_links.len(), 2);
1811
1812        // Check first link
1813        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1814        assert_eq!(index.cross_file_links[0].fragment, "");
1815
1816        // Check second link (with fragment)
1817        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1818        assert_eq!(index.cross_file_links[1].fragment, "section");
1819    }
1820
1821    #[test]
1822    fn test_contribute_to_index_skips_external_and_anchors() {
1823        let rule = MD057ExistingRelativeLinks::new();
1824        let content = r#"
1825# Document
1826
1827[External](https://example.com)
1828[Another external](http://example.org)
1829[Fragment only](#section)
1830[FTP link](ftp://files.example.com)
1831[Mail link](mailto:test@example.com)
1832[WWW link](www.example.com)
1833"#;
1834
1835        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1836        let mut index = FileIndex::new();
1837        rule.contribute_to_index(&ctx, &mut index);
1838
1839        // Should not index any of these
1840        assert_eq!(index.cross_file_links.len(), 0);
1841    }
1842
1843    #[test]
1844    fn test_cross_file_check_valid_link() {
1845        use crate::workspace_index::WorkspaceIndex;
1846
1847        let rule = MD057ExistingRelativeLinks::new();
1848
1849        // Create a workspace index with the target file
1850        let mut workspace_index = WorkspaceIndex::new();
1851        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1852
1853        // Create file index with a link to an existing file
1854        let mut file_index = FileIndex::new();
1855        file_index.add_cross_file_link(CrossFileLinkIndex {
1856            target_path: "guide.md".to_string(),
1857            fragment: "".to_string(),
1858            line: 5,
1859            column: 1,
1860        });
1861
1862        // Run cross-file check from docs/index.md
1863        let warnings = rule
1864            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1865            .unwrap();
1866
1867        // Should have no warnings - file exists
1868        assert!(warnings.is_empty());
1869    }
1870
1871    #[test]
1872    fn test_cross_file_check_missing_link() {
1873        use crate::workspace_index::WorkspaceIndex;
1874
1875        let rule = MD057ExistingRelativeLinks::new();
1876
1877        // Create an empty workspace index
1878        let workspace_index = WorkspaceIndex::new();
1879
1880        // Create file index with a link to a missing file
1881        let mut file_index = FileIndex::new();
1882        file_index.add_cross_file_link(CrossFileLinkIndex {
1883            target_path: "missing.md".to_string(),
1884            fragment: "".to_string(),
1885            line: 5,
1886            column: 1,
1887        });
1888
1889        // Run cross-file check
1890        let warnings = rule
1891            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1892            .unwrap();
1893
1894        // Should have one warning for the missing file
1895        assert_eq!(warnings.len(), 1);
1896        assert!(warnings[0].message.contains("missing.md"));
1897        assert!(warnings[0].message.contains("does not exist"));
1898    }
1899
1900    #[test]
1901    fn test_cross_file_check_parent_path() {
1902        use crate::workspace_index::WorkspaceIndex;
1903
1904        let rule = MD057ExistingRelativeLinks::new();
1905
1906        // Create a workspace index with the target file at the root
1907        let mut workspace_index = WorkspaceIndex::new();
1908        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1909
1910        // Create file index with a parent path link
1911        let mut file_index = FileIndex::new();
1912        file_index.add_cross_file_link(CrossFileLinkIndex {
1913            target_path: "../readme.md".to_string(),
1914            fragment: "".to_string(),
1915            line: 5,
1916            column: 1,
1917        });
1918
1919        // Run cross-file check from docs/guide.md
1920        let warnings = rule
1921            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1922            .unwrap();
1923
1924        // Should have no warnings - file exists at normalized path
1925        assert!(warnings.is_empty());
1926    }
1927
1928    #[test]
1929    fn test_cross_file_check_html_link_with_md_source() {
1930        // Test that .html links are accepted when corresponding .md source exists
1931        // This supports mdBook and similar doc generators that compile .md to .html
1932        use crate::workspace_index::WorkspaceIndex;
1933
1934        let rule = MD057ExistingRelativeLinks::new();
1935
1936        // Create a workspace index with the .md source file
1937        let mut workspace_index = WorkspaceIndex::new();
1938        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1939
1940        // Create file index with an .html link (from another rule like MD051)
1941        let mut file_index = FileIndex::new();
1942        file_index.add_cross_file_link(CrossFileLinkIndex {
1943            target_path: "guide.html".to_string(),
1944            fragment: "section".to_string(),
1945            line: 10,
1946            column: 5,
1947        });
1948
1949        // Run cross-file check from docs/index.md
1950        let warnings = rule
1951            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1952            .unwrap();
1953
1954        // Should have no warnings - .md source exists for the .html link
1955        assert!(
1956            warnings.is_empty(),
1957            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1958        );
1959    }
1960
1961    #[test]
1962    fn test_cross_file_check_html_link_without_source() {
1963        // Test that .html links without corresponding .md source ARE flagged
1964        use crate::workspace_index::WorkspaceIndex;
1965
1966        let rule = MD057ExistingRelativeLinks::new();
1967
1968        // Create an empty workspace index
1969        let workspace_index = WorkspaceIndex::new();
1970
1971        // Create file index with an .html link to a non-existent file
1972        let mut file_index = FileIndex::new();
1973        file_index.add_cross_file_link(CrossFileLinkIndex {
1974            target_path: "missing.html".to_string(),
1975            fragment: "".to_string(),
1976            line: 10,
1977            column: 5,
1978        });
1979
1980        // Run cross-file check from docs/index.md
1981        let warnings = rule
1982            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1983            .unwrap();
1984
1985        // Should have one warning - no .md source exists
1986        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1987        assert!(warnings[0].message.contains("missing.html"));
1988    }
1989
1990    #[test]
1991    fn test_normalize_path_function() {
1992        // Test simple cases
1993        assert_eq!(
1994            normalize_path(Path::new("docs/guide.md")),
1995            PathBuf::from("docs/guide.md")
1996        );
1997
1998        // Test current directory removal
1999        assert_eq!(
2000            normalize_path(Path::new("./docs/guide.md")),
2001            PathBuf::from("docs/guide.md")
2002        );
2003
2004        // Test parent directory resolution
2005        assert_eq!(
2006            normalize_path(Path::new("docs/sub/../guide.md")),
2007            PathBuf::from("docs/guide.md")
2008        );
2009
2010        // Test multiple parent directories
2011        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
2012    }
2013
2014    #[test]
2015    fn test_html_link_with_md_source() {
2016        // Links to .html files should pass if corresponding .md source exists
2017        let temp_dir = tempdir().unwrap();
2018        let base_path = temp_dir.path();
2019
2020        // Create guide.md (source file)
2021        let md_file = base_path.join("guide.md");
2022        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2023
2024        let content = r#"
2025[Read the guide](guide.html)
2026[Also here](getting-started.html)
2027"#;
2028
2029        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2030        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2031        let result = rule.check(&ctx).unwrap();
2032
2033        // guide.html passes (guide.md exists), getting-started.html fails
2034        assert_eq!(
2035            result.len(),
2036            1,
2037            "Should only warn about missing source. Got: {result:?}"
2038        );
2039        assert!(result[0].message.contains("getting-started.html"));
2040    }
2041
2042    #[test]
2043    fn test_htm_link_with_md_source() {
2044        // .htm extension should also check for markdown source
2045        let temp_dir = tempdir().unwrap();
2046        let base_path = temp_dir.path();
2047
2048        let md_file = base_path.join("page.md");
2049        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
2050
2051        let content = "[Page](page.htm)";
2052
2053        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2054        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2055        let result = rule.check(&ctx).unwrap();
2056
2057        assert!(
2058            result.is_empty(),
2059            "Should not warn when .md source exists for .htm link"
2060        );
2061    }
2062
2063    #[test]
2064    fn test_html_link_finds_various_markdown_extensions() {
2065        // Should find .mdx, .markdown, etc. as source files
2066        let temp_dir = tempdir().unwrap();
2067        let base_path = temp_dir.path();
2068
2069        File::create(base_path.join("doc.md")).unwrap();
2070        File::create(base_path.join("tutorial.mdx")).unwrap();
2071        File::create(base_path.join("guide.markdown")).unwrap();
2072
2073        let content = r#"
2074[Doc](doc.html)
2075[Tutorial](tutorial.html)
2076[Guide](guide.html)
2077"#;
2078
2079        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2080        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2081        let result = rule.check(&ctx).unwrap();
2082
2083        assert!(
2084            result.is_empty(),
2085            "Should find all markdown variants as source files. Got: {result:?}"
2086        );
2087    }
2088
2089    #[test]
2090    fn test_html_link_in_subdirectory() {
2091        // Should find markdown source in subdirectories
2092        let temp_dir = tempdir().unwrap();
2093        let base_path = temp_dir.path();
2094
2095        let docs_dir = base_path.join("docs");
2096        std::fs::create_dir(&docs_dir).unwrap();
2097        File::create(docs_dir.join("guide.md"))
2098            .unwrap()
2099            .write_all(b"# Guide")
2100            .unwrap();
2101
2102        let content = "[Guide](docs/guide.html)";
2103
2104        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2105        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2106        let result = rule.check(&ctx).unwrap();
2107
2108        assert!(result.is_empty(), "Should find markdown source in subdirectory");
2109    }
2110
2111    #[test]
2112    fn test_absolute_path_skipped_in_check() {
2113        // Test that absolute paths are skipped during link validation
2114        // This fixes the bug where /pkg/runtime was being flagged
2115        let temp_dir = tempdir().unwrap();
2116        let base_path = temp_dir.path();
2117
2118        let content = r#"
2119# Test Document
2120
2121[Go Runtime](/pkg/runtime)
2122[Go Runtime with Fragment](/pkg/runtime#section)
2123[API Docs](/api/v1/users)
2124[Blog Post](/blog/2024/release.html)
2125[React Hook](/react/hooks/use-state.html)
2126"#;
2127
2128        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2129        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2130        let result = rule.check(&ctx).unwrap();
2131
2132        // Should have NO warnings - all absolute paths should be skipped
2133        assert!(
2134            result.is_empty(),
2135            "Absolute paths should be skipped. Got warnings: {result:?}"
2136        );
2137    }
2138
2139    #[test]
2140    fn test_absolute_path_skipped_in_cross_file_check() {
2141        // Test that absolute paths are skipped in cross_file_check()
2142        use crate::workspace_index::WorkspaceIndex;
2143
2144        let rule = MD057ExistingRelativeLinks::new();
2145
2146        // Create an empty workspace index (no files exist)
2147        let workspace_index = WorkspaceIndex::new();
2148
2149        // Create file index with absolute path links (should be skipped)
2150        let mut file_index = FileIndex::new();
2151        file_index.add_cross_file_link(CrossFileLinkIndex {
2152            target_path: "/pkg/runtime.md".to_string(),
2153            fragment: "".to_string(),
2154            line: 5,
2155            column: 1,
2156        });
2157        file_index.add_cross_file_link(CrossFileLinkIndex {
2158            target_path: "/api/v1/users.md".to_string(),
2159            fragment: "section".to_string(),
2160            line: 10,
2161            column: 1,
2162        });
2163
2164        // Run cross-file check
2165        let warnings = rule
2166            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
2167            .unwrap();
2168
2169        // Should have NO warnings - absolute paths should be skipped
2170        assert!(
2171            warnings.is_empty(),
2172            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
2173        );
2174    }
2175
2176    #[test]
2177    fn test_protocol_relative_url_not_skipped() {
2178        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
2179        // They should still be caught by is_external_url() though
2180        let temp_dir = tempdir().unwrap();
2181        let base_path = temp_dir.path();
2182
2183        let content = r#"
2184# Test Document
2185
2186[External](//example.com/page)
2187[Another](//cdn.example.com/asset.js)
2188"#;
2189
2190        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2191        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2192        let result = rule.check(&ctx).unwrap();
2193
2194        // Should have NO warnings - protocol-relative URLs are external and should be skipped
2195        assert!(
2196            result.is_empty(),
2197            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
2198        );
2199    }
2200
2201    #[test]
2202    fn test_email_addresses_skipped() {
2203        // Test that email addresses without mailto: are skipped
2204        // These are clearly not file links (the @ symbol is definitive)
2205        let temp_dir = tempdir().unwrap();
2206        let base_path = temp_dir.path();
2207
2208        let content = r#"
2209# Test Document
2210
2211[Contact](user@example.com)
2212[Steering](steering@kubernetes.io)
2213[Support](john.doe+filter@company.co.uk)
2214[User](user_name@sub.domain.com)
2215"#;
2216
2217        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2218        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2219        let result = rule.check(&ctx).unwrap();
2220
2221        // Should have NO warnings - email addresses are clearly not file links and should be skipped
2222        assert!(
2223            result.is_empty(),
2224            "Email addresses should be skipped. Got warnings: {result:?}"
2225        );
2226    }
2227
2228    #[test]
2229    fn test_email_addresses_vs_file_paths() {
2230        // Test that email addresses (anything with @) are skipped
2231        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
2232        let temp_dir = tempdir().unwrap();
2233        let base_path = temp_dir.path();
2234
2235        let content = r#"
2236# Test Document
2237
2238[Email](user@example.com)  <!-- Should be skipped (email) -->
2239[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
2240[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
2241"#;
2242
2243        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2244        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2245        let result = rule.check(&ctx).unwrap();
2246
2247        // All should be skipped - anything with @ is treated as an email
2248        assert!(
2249            result.is_empty(),
2250            "All email addresses should be skipped. Got: {result:?}"
2251        );
2252    }
2253
2254    #[test]
2255    fn test_diagnostic_position_accuracy() {
2256        // Test that diagnostics point to the URL, not the link text
2257        let temp_dir = tempdir().unwrap();
2258        let base_path = temp_dir.path();
2259
2260        // Position markers:     0         1         2         3
2261        //                       0123456789012345678901234567890123456789
2262        let content = "prefix [text](missing.md) suffix";
2263        //             The URL "missing.md" starts at 0-indexed position 14
2264        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
2265
2266        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2267        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2268        let result = rule.check(&ctx).unwrap();
2269
2270        assert_eq!(result.len(), 1, "Should have exactly one warning");
2271        assert_eq!(result[0].line, 1, "Should be on line 1");
2272        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
2273        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
2274    }
2275
2276    #[test]
2277    fn test_diagnostic_position_angle_brackets() {
2278        // Test position accuracy with angle bracket links
2279        let temp_dir = tempdir().unwrap();
2280        let base_path = temp_dir.path();
2281
2282        // Position markers:     0         1         2
2283        //                       012345678901234567890
2284        let content = "[link](<missing.md>)";
2285        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
2286
2287        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2288        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2289        let result = rule.check(&ctx).unwrap();
2290
2291        assert_eq!(result.len(), 1, "Should have exactly one warning");
2292        assert_eq!(result[0].line, 1, "Should be on line 1");
2293        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
2294    }
2295
2296    #[test]
2297    fn test_diagnostic_position_multiline() {
2298        // Test that line numbers are correct for links on different lines
2299        let temp_dir = tempdir().unwrap();
2300        let base_path = temp_dir.path();
2301
2302        let content = r#"# Title
2303Some text on line 2
2304[link on line 3](missing1.md)
2305More text
2306[link on line 5](missing2.md)"#;
2307
2308        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2309        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2310        let result = rule.check(&ctx).unwrap();
2311
2312        assert_eq!(result.len(), 2, "Should have two warnings");
2313
2314        // First warning should be on line 3
2315        assert_eq!(result[0].line, 3, "First warning should be on line 3");
2316        assert!(result[0].message.contains("missing1.md"));
2317
2318        // Second warning should be on line 5
2319        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
2320        assert!(result[1].message.contains("missing2.md"));
2321    }
2322
2323    #[test]
2324    fn test_diagnostic_position_with_spaces() {
2325        // Test position with URLs that have spaces in parentheses
2326        let temp_dir = tempdir().unwrap();
2327        let base_path = temp_dir.path();
2328
2329        let content = "[link]( missing.md )";
2330        //             0123456789012345678901
2331        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
2332        //             which is 1-indexed column 9
2333
2334        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2335        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2336        let result = rule.check(&ctx).unwrap();
2337
2338        assert_eq!(result.len(), 1, "Should have exactly one warning");
2339        // The regex captures the URL without leading/trailing spaces
2340        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
2341    }
2342
2343    #[test]
2344    fn test_diagnostic_position_image() {
2345        // Test that image diagnostics also have correct positions
2346        let temp_dir = tempdir().unwrap();
2347        let base_path = temp_dir.path();
2348
2349        let content = "![alt text](missing.jpg)";
2350
2351        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2352        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2353        let result = rule.check(&ctx).unwrap();
2354
2355        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
2356        assert_eq!(result[0].line, 1);
2357        // Images use start_col from the parser, which should point to the URL
2358        assert!(result[0].column > 0, "Should have valid column position");
2359        assert!(result[0].message.contains("missing.jpg"));
2360    }
2361
2362    #[test]
2363    fn test_wikilinks_skipped() {
2364        // Wikilinks should not trigger MD057 warnings
2365        // They use a different linking system (e.g., Obsidian, wiki software)
2366        let temp_dir = tempdir().unwrap();
2367        let base_path = temp_dir.path();
2368
2369        let content = r#"# Test Document
2370
2371[[Microsoft#Windows OS]]
2372[[SomePage]]
2373[[Page With Spaces]]
2374[[path/to/page#section]]
2375[[page|Display Text]]
2376
2377This is a [real missing link](missing.md) that should be flagged.
2378"#;
2379
2380        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2381        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2382        let result = rule.check(&ctx).unwrap();
2383
2384        // Should only warn about the regular markdown link, not wikilinks
2385        assert_eq!(
2386            result.len(),
2387            1,
2388            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
2389        );
2390        assert!(
2391            result[0].message.contains("missing.md"),
2392            "Warning should be for missing.md, not wikilinks"
2393        );
2394    }
2395
2396    #[test]
2397    fn test_wikilinks_not_added_to_index() {
2398        // Wikilinks should not be added to the cross-file link index
2399        let temp_dir = tempdir().unwrap();
2400        let base_path = temp_dir.path();
2401
2402        let content = r#"# Test Document
2403
2404[[Microsoft#Windows OS]]
2405[[SomePage#section]]
2406[Regular Link](other.md)
2407"#;
2408
2409        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2410        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2411
2412        let mut file_index = FileIndex::new();
2413        rule.contribute_to_index(&ctx, &mut file_index);
2414
2415        // Should only have the regular markdown link (if it's a markdown file)
2416        // Wikilinks should not be added
2417        let cross_file_links = &file_index.cross_file_links;
2418        assert_eq!(
2419            cross_file_links.len(),
2420            1,
2421            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
2422        );
2423        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
2424    }
2425
2426    #[test]
2427    fn test_reference_definition_missing_file() {
2428        // Reference definitions [ref]: ./path.md should be checked
2429        let temp_dir = tempdir().unwrap();
2430        let base_path = temp_dir.path();
2431
2432        let content = r#"# Test Document
2433
2434[test]: ./missing.md
2435[example]: ./nonexistent.html
2436
2437Use [test] and [example] here.
2438"#;
2439
2440        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2441        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2442        let result = rule.check(&ctx).unwrap();
2443
2444        // Should have warnings for both reference definitions
2445        assert_eq!(
2446            result.len(),
2447            2,
2448            "Should have warnings for missing reference definition targets. Got: {result:?}"
2449        );
2450        assert!(
2451            result.iter().any(|w| w.message.contains("missing.md")),
2452            "Should warn about missing.md"
2453        );
2454        assert!(
2455            result.iter().any(|w| w.message.contains("nonexistent.html")),
2456            "Should warn about nonexistent.html"
2457        );
2458    }
2459
2460    #[test]
2461    fn test_reference_definition_existing_file() {
2462        // Reference definitions to existing files should NOT trigger warnings
2463        let temp_dir = tempdir().unwrap();
2464        let base_path = temp_dir.path();
2465
2466        // Create an existing file
2467        let exists_path = base_path.join("exists.md");
2468        File::create(&exists_path)
2469            .unwrap()
2470            .write_all(b"# Existing file")
2471            .unwrap();
2472
2473        let content = r#"# Test Document
2474
2475[test]: ./exists.md
2476
2477Use [test] here.
2478"#;
2479
2480        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2481        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2482        let result = rule.check(&ctx).unwrap();
2483
2484        // Should have NO warnings since the file exists
2485        assert!(
2486            result.is_empty(),
2487            "Should not warn about existing file. Got: {result:?}"
2488        );
2489    }
2490
2491    #[test]
2492    fn test_reference_definition_external_url_skipped() {
2493        // Reference definitions with external URLs should be skipped
2494        let temp_dir = tempdir().unwrap();
2495        let base_path = temp_dir.path();
2496
2497        let content = r#"# Test Document
2498
2499[google]: https://google.com
2500[example]: http://example.org
2501[mail]: mailto:test@example.com
2502[ftp]: ftp://files.example.com
2503[local]: ./missing.md
2504
2505Use [google], [example], [mail], [ftp], [local] here.
2506"#;
2507
2508        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2509        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2510        let result = rule.check(&ctx).unwrap();
2511
2512        // Should only warn about the local missing file, not external URLs
2513        assert_eq!(
2514            result.len(),
2515            1,
2516            "Should only warn about local missing file. Got: {result:?}"
2517        );
2518        assert!(
2519            result[0].message.contains("missing.md"),
2520            "Warning should be for missing.md"
2521        );
2522    }
2523
2524    #[test]
2525    fn test_reference_definition_fragment_only_skipped() {
2526        // Reference definitions with fragment-only URLs should be skipped
2527        let temp_dir = tempdir().unwrap();
2528        let base_path = temp_dir.path();
2529
2530        let content = r#"# Test Document
2531
2532[section]: #my-section
2533
2534Use [section] here.
2535"#;
2536
2537        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2538        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2539        let result = rule.check(&ctx).unwrap();
2540
2541        // Should have NO warnings for fragment-only links
2542        assert!(
2543            result.is_empty(),
2544            "Should not warn about fragment-only reference. Got: {result:?}"
2545        );
2546    }
2547
2548    #[test]
2549    fn test_reference_definition_column_position() {
2550        // Test that column position points to the URL in the reference definition
2551        let temp_dir = tempdir().unwrap();
2552        let base_path = temp_dir.path();
2553
2554        // Position markers:     0         1         2
2555        //                       0123456789012345678901
2556        let content = "[ref]: ./missing.md";
2557        //             The URL "./missing.md" starts at 0-indexed position 7
2558        //             which is 1-indexed column 8
2559
2560        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2561        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2562        let result = rule.check(&ctx).unwrap();
2563
2564        assert_eq!(result.len(), 1, "Should have exactly one warning");
2565        assert_eq!(result[0].line, 1, "Should be on line 1");
2566        assert_eq!(result[0].column, 8, "Should point to start of URL './missing.md'");
2567    }
2568
2569    #[test]
2570    fn test_reference_definition_html_with_md_source() {
2571        // Reference definitions to .html files should pass if corresponding .md source exists
2572        let temp_dir = tempdir().unwrap();
2573        let base_path = temp_dir.path();
2574
2575        // Create guide.md (source file)
2576        let md_file = base_path.join("guide.md");
2577        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2578
2579        let content = r#"# Test Document
2580
2581[guide]: ./guide.html
2582[missing]: ./missing.html
2583
2584Use [guide] and [missing] here.
2585"#;
2586
2587        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2588        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2589        let result = rule.check(&ctx).unwrap();
2590
2591        // guide.html passes (guide.md exists), missing.html fails
2592        assert_eq!(
2593            result.len(),
2594            1,
2595            "Should only warn about missing source. Got: {result:?}"
2596        );
2597        assert!(result[0].message.contains("missing.html"));
2598    }
2599
2600    #[test]
2601    fn test_reference_definition_url_encoded() {
2602        // Reference definitions with URL-encoded paths should be decoded before checking
2603        let temp_dir = tempdir().unwrap();
2604        let base_path = temp_dir.path();
2605
2606        // Create a file with spaces in the name
2607        let file_with_spaces = base_path.join("file with spaces.md");
2608        File::create(&file_with_spaces).unwrap().write_all(b"# Spaces").unwrap();
2609
2610        let content = r#"# Test Document
2611
2612[spaces]: ./file%20with%20spaces.md
2613[missing]: ./missing%20file.md
2614
2615Use [spaces] and [missing] here.
2616"#;
2617
2618        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2619        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2620        let result = rule.check(&ctx).unwrap();
2621
2622        // Should only warn about the missing file
2623        assert_eq!(
2624            result.len(),
2625            1,
2626            "Should only warn about missing URL-encoded file. Got: {result:?}"
2627        );
2628        assert!(result[0].message.contains("missing%20file.md"));
2629    }
2630
2631    #[test]
2632    fn test_inline_and_reference_both_checked() {
2633        // Both inline links and reference definitions should be checked
2634        let temp_dir = tempdir().unwrap();
2635        let base_path = temp_dir.path();
2636
2637        let content = r#"# Test Document
2638
2639[inline link](./inline-missing.md)
2640[ref]: ./ref-missing.md
2641
2642Use [ref] here.
2643"#;
2644
2645        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2646        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2647        let result = rule.check(&ctx).unwrap();
2648
2649        // Should warn about both the inline link and the reference definition
2650        assert_eq!(
2651            result.len(),
2652            2,
2653            "Should warn about both inline and reference links. Got: {result:?}"
2654        );
2655        assert!(
2656            result.iter().any(|w| w.message.contains("inline-missing.md")),
2657            "Should warn about inline-missing.md"
2658        );
2659        assert!(
2660            result.iter().any(|w| w.message.contains("ref-missing.md")),
2661            "Should warn about ref-missing.md"
2662        );
2663    }
2664
2665    #[test]
2666    fn test_footnote_definitions_not_flagged() {
2667        // Regression test for issue #286: footnote definitions should not be
2668        // treated as reference definitions and flagged as broken links
2669        let rule = MD057ExistingRelativeLinks::default();
2670
2671        let content = r#"# Title
2672
2673A footnote[^1].
2674
2675[^1]: [link](https://www.google.com).
2676"#;
2677
2678        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2679        let result = rule.check(&ctx).unwrap();
2680
2681        assert!(
2682            result.is_empty(),
2683            "Footnote definitions should not trigger MD057 warnings. Got: {result:?}"
2684        );
2685    }
2686
2687    #[test]
2688    fn test_footnote_with_relative_link_inside() {
2689        // Footnotes containing relative links should not be checked
2690        // (the footnote content is not a URL, it's content that may contain links)
2691        let rule = MD057ExistingRelativeLinks::default();
2692
2693        let content = r#"# Title
2694
2695See the footnote[^1].
2696
2697[^1]: Check out [this file](./existing.md) for more info.
2698[^2]: Also see [missing](./does-not-exist.md).
2699"#;
2700
2701        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2702        let result = rule.check(&ctx).unwrap();
2703
2704        // The inline links INSIDE footnotes should be checked (./existing.md, ./does-not-exist.md)
2705        // but the footnote definition itself should not be treated as a reference definition
2706        // Note: This test verifies that [^1]: and [^2]: are not parsed as ref defs with
2707        // URLs like "[this file](./existing.md)" or "[missing](./does-not-exist.md)"
2708        for warning in &result {
2709            assert!(
2710                !warning.message.contains("[this file]"),
2711                "Footnote content should not be treated as URL: {warning:?}"
2712            );
2713            assert!(
2714                !warning.message.contains("[missing]"),
2715                "Footnote content should not be treated as URL: {warning:?}"
2716            );
2717        }
2718    }
2719
2720    #[test]
2721    fn test_mixed_footnotes_and_reference_definitions() {
2722        // Ensure regular reference definitions are still checked while footnotes are skipped
2723        let temp_dir = tempdir().unwrap();
2724        let base_path = temp_dir.path();
2725
2726        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2727
2728        let content = r#"# Title
2729
2730A footnote[^1] and a [ref link][myref].
2731
2732[^1]: This is a footnote with [link](https://example.com).
2733
2734[myref]: ./missing-file.md "This should be checked"
2735"#;
2736
2737        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2738        let result = rule.check(&ctx).unwrap();
2739
2740        // Should only warn about the regular reference definition, not the footnote
2741        assert_eq!(
2742            result.len(),
2743            1,
2744            "Should only warn about the regular reference definition. Got: {result:?}"
2745        );
2746        assert!(
2747            result[0].message.contains("missing-file.md"),
2748            "Should warn about missing-file.md in reference definition"
2749        );
2750    }
2751
2752    #[test]
2753    fn test_absolute_links_ignore_by_default() {
2754        // By default, absolute links are ignored (not validated)
2755        let temp_dir = tempdir().unwrap();
2756        let base_path = temp_dir.path();
2757
2758        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2759
2760        let content = r#"# Links
2761
2762[API docs](/api/v1/users)
2763[Blog post](/blog/2024/release.html)
2764![Logo](/assets/logo.png)
2765
2766[ref]: /docs/reference.md
2767"#;
2768
2769        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2770        let result = rule.check(&ctx).unwrap();
2771
2772        // No warnings - absolute links are ignored by default
2773        assert!(
2774            result.is_empty(),
2775            "Absolute links should be ignored by default. Got: {result:?}"
2776        );
2777    }
2778
2779    #[test]
2780    fn test_absolute_links_warn_config() {
2781        // When configured to warn, absolute links should generate warnings
2782        let temp_dir = tempdir().unwrap();
2783        let base_path = temp_dir.path();
2784
2785        let config = MD057Config {
2786            absolute_links: AbsoluteLinksOption::Warn,
2787            ..Default::default()
2788        };
2789        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2790
2791        let content = r#"# Links
2792
2793[API docs](/api/v1/users)
2794[Blog post](/blog/2024/release.html)
2795"#;
2796
2797        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2798        let result = rule.check(&ctx).unwrap();
2799
2800        // Should have 2 warnings for the 2 absolute links
2801        assert_eq!(
2802            result.len(),
2803            2,
2804            "Should warn about both absolute links. Got: {result:?}"
2805        );
2806        assert!(
2807            result[0].message.contains("cannot be validated locally"),
2808            "Warning should explain why: {}",
2809            result[0].message
2810        );
2811        assert!(
2812            result[0].message.contains("/api/v1/users"),
2813            "Warning should include the link path"
2814        );
2815    }
2816
2817    #[test]
2818    fn test_absolute_links_warn_images() {
2819        // Images with absolute paths should also warn when configured
2820        let temp_dir = tempdir().unwrap();
2821        let base_path = temp_dir.path();
2822
2823        let config = MD057Config {
2824            absolute_links: AbsoluteLinksOption::Warn,
2825            ..Default::default()
2826        };
2827        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2828
2829        let content = r#"# Images
2830
2831![Logo](/assets/logo.png)
2832"#;
2833
2834        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2835        let result = rule.check(&ctx).unwrap();
2836
2837        assert_eq!(
2838            result.len(),
2839            1,
2840            "Should warn about absolute image path. Got: {result:?}"
2841        );
2842        assert!(
2843            result[0].message.contains("/assets/logo.png"),
2844            "Warning should include the image path"
2845        );
2846    }
2847
2848    #[test]
2849    fn test_absolute_links_warn_reference_definitions() {
2850        // Reference definitions with absolute paths should also warn when configured
2851        let temp_dir = tempdir().unwrap();
2852        let base_path = temp_dir.path();
2853
2854        let config = MD057Config {
2855            absolute_links: AbsoluteLinksOption::Warn,
2856            ..Default::default()
2857        };
2858        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2859
2860        let content = r#"# Reference
2861
2862See the [docs][ref].
2863
2864[ref]: /docs/reference.md
2865"#;
2866
2867        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2868        let result = rule.check(&ctx).unwrap();
2869
2870        assert_eq!(
2871            result.len(),
2872            1,
2873            "Should warn about absolute reference definition. Got: {result:?}"
2874        );
2875        assert!(
2876            result[0].message.contains("/docs/reference.md"),
2877            "Warning should include the reference path"
2878        );
2879    }
2880}