rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{FileIndex, extract_cross_file_links};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
26        cache.clear();
27    }
28}
29
30// Check if a file exists with caching
31fn file_exists_with_cache(path: &Path) -> bool {
32    match FILE_EXISTENCE_CACHE.lock() {
33        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
34        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
35    }
36}
37
38/// Check if a file exists, also trying markdown extensions for extensionless links.
39/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
40fn file_exists_or_markdown_extension(path: &Path) -> bool {
41    // First, check exact path
42    if file_exists_with_cache(path) {
43        return true;
44    }
45
46    // If the path has no extension, try adding markdown extensions
47    if path.extension().is_none() {
48        for ext in MARKDOWN_EXTENSIONS {
49            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
50            let path_with_ext = path.with_extension(&ext[1..]);
51            if file_exists_with_cache(&path_with_ext) {
52                return true;
53            }
54        }
55    }
56
57    false
58}
59
60// Regex to match the start of a link - simplified for performance
61static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
62
63/// Regex to extract the URL from an angle-bracketed markdown link
64/// Format: `](<URL>)` or `](<URL> "title")`
65/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
66static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
68
69/// Regex to extract the URL from a normal markdown link (without angle brackets)
70/// Format: `](URL)` or `](URL "title")`
71static URL_EXTRACT_REGEX: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
73
74/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
75/// Matches: scheme:// or scheme: (per RFC 3986)
76/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
77static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
78    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
79
80// Current working directory
81static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
82
83/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
84/// Returns None for non-hex characters.
85#[inline]
86fn hex_digit_to_value(byte: u8) -> Option<u8> {
87    match byte {
88        b'0'..=b'9' => Some(byte - b'0'),
89        b'a'..=b'f' => Some(byte - b'a' + 10),
90        b'A'..=b'F' => Some(byte - b'A' + 10),
91        _ => None,
92    }
93}
94
95/// Supported markdown file extensions
96const MARKDOWN_EXTENSIONS: &[&str] = &[
97    ".md",
98    ".markdown",
99    ".mdx",
100    ".mkd",
101    ".mkdn",
102    ".mdown",
103    ".mdwn",
104    ".qmd",
105    ".rmd",
106];
107
108/// Rule MD057: Existing relative links should point to valid files or directories.
109#[derive(Debug, Clone, Default)]
110pub struct MD057ExistingRelativeLinks {
111    /// Base directory for resolving relative links
112    base_path: Arc<Mutex<Option<PathBuf>>>,
113}
114
115impl MD057ExistingRelativeLinks {
116    /// Create a new instance with default settings
117    pub fn new() -> Self {
118        Self::default()
119    }
120
121    /// Set the base path for resolving relative links
122    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
123        let path = path.as_ref();
124        let dir_path = if path.is_file() {
125            path.parent().map(|p| p.to_path_buf())
126        } else {
127            Some(path.to_path_buf())
128        };
129
130        if let Ok(mut guard) = self.base_path.lock() {
131            *guard = dir_path;
132        }
133        self
134    }
135
136    #[allow(unused_variables)]
137    pub fn from_config_struct(config: MD057Config) -> Self {
138        Self::default()
139    }
140
141    /// Check if a URL is external or should be skipped for validation.
142    ///
143    /// Returns `true` (skip validation) for:
144    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
145    /// - Bare domains: `www.example.com`, `example.com`
146    /// - Email addresses: `user@example.com` (without `mailto:`)
147    /// - Template variables: `{{URL}}`, `{{% include %}}`
148    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
149    ///
150    /// Returns `false` (validate) for:
151    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
152    #[inline]
153    fn is_external_url(&self, url: &str) -> bool {
154        if url.is_empty() {
155            return false;
156        }
157
158        // Quick checks for common external URL patterns
159        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
160            return true;
161        }
162
163        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
164        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
165        if url.starts_with("{{") || url.starts_with("{%") {
166            return true;
167        }
168
169        // Simple check: if URL contains @, it's almost certainly an email address
170        // File paths with @ are extremely rare, so this is a safe heuristic
171        if url.contains('@') {
172            return true; // It's an email address, skip it
173        }
174
175        // Bare domain check (e.g., "example.com")
176        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
177        // Links like [text](nodejs.org/path) without a protocol are broken -
178        // they'll be treated as relative paths by markdown renderers.
179        // Flagging them helps users find missing protocols.
180        // We only skip .com as a minimal safety net for the most common case.
181        if url.ends_with(".com") {
182            return true;
183        }
184
185        // Absolute URL paths (e.g., /api/docs, /blog/post.html) are treated as web paths
186        // and skipped. These are typically routes for published documentation sites,
187        // not filesystem paths that can be validated locally.
188        if url.starts_with('/') {
189            return true;
190        }
191
192        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
193        // These are not filesystem paths but module/asset aliases
194        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
195        if url.starts_with('~') || url.starts_with('@') {
196            return true;
197        }
198
199        // All other cases (relative paths, etc.) are not external
200        false
201    }
202
203    /// Check if the URL is a fragment-only link (internal document link)
204    #[inline]
205    fn is_fragment_only_link(&self, url: &str) -> bool {
206        url.starts_with('#')
207    }
208
209    /// Decode URL percent-encoded sequences in a path.
210    /// Converts `%20` to space, `%2F` to `/`, etc.
211    /// Returns the original string if decoding fails or produces invalid UTF-8.
212    fn url_decode(path: &str) -> String {
213        // Quick check: if no percent sign, return as-is
214        if !path.contains('%') {
215            return path.to_string();
216        }
217
218        let bytes = path.as_bytes();
219        let mut result = Vec::with_capacity(bytes.len());
220        let mut i = 0;
221
222        while i < bytes.len() {
223            if bytes[i] == b'%' && i + 2 < bytes.len() {
224                // Try to parse the two hex digits following %
225                let hex1 = bytes[i + 1];
226                let hex2 = bytes[i + 2];
227                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
228                    result.push(d1 * 16 + d2);
229                    i += 3;
230                    continue;
231                }
232            }
233            result.push(bytes[i]);
234            i += 1;
235        }
236
237        // Convert to UTF-8, falling back to original if invalid
238        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
239    }
240
241    /// Strip query parameters and fragments from a URL for file existence checking.
242    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
243    /// for `path/to/image.png` or `file.md` respectively.
244    ///
245    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
246    /// so we check for `?` first. If a URL has both, only the query is stripped here
247    /// (fragments are handled separately by the regex in `contribute_to_index`).
248    fn strip_query_and_fragment(url: &str) -> &str {
249        // Find the first occurrence of '?' or '#', whichever comes first
250        // This handles both standard URLs (? before #) and edge cases (# before ?)
251        let query_pos = url.find('?');
252        let fragment_pos = url.find('#');
253
254        match (query_pos, fragment_pos) {
255            (Some(q), Some(f)) => {
256                // Both exist - strip at whichever comes first
257                &url[..q.min(f)]
258            }
259            (Some(q), None) => &url[..q],
260            (None, Some(f)) => &url[..f],
261            (None, None) => url,
262        }
263    }
264
265    /// Resolve a relative link against a provided base path
266    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
267        base_path.join(link)
268    }
269}
270
271impl Rule for MD057ExistingRelativeLinks {
272    fn name(&self) -> &'static str {
273        "MD057"
274    }
275
276    fn description(&self) -> &'static str {
277        "Relative links should point to existing files"
278    }
279
280    fn category(&self) -> RuleCategory {
281        RuleCategory::Link
282    }
283
284    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
285        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
286    }
287
288    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
289        let content = ctx.content;
290
291        // Early returns for performance
292        if content.is_empty() || !content.contains('[') {
293            return Ok(Vec::new());
294        }
295
296        // Quick check for any potential links before expensive operations
297        // Check for inline links "](", reference definitions "]:", or images "!["
298        if !content.contains("](") && !content.contains("]:") {
299            return Ok(Vec::new());
300        }
301
302        // Reset the file existence cache for a fresh run
303        reset_file_existence_cache();
304
305        let mut warnings = Vec::new();
306
307        // Determine base path for resolving relative links
308        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
309        // This ensures each file resolves links relative to its own directory
310        let base_path: Option<PathBuf> = {
311            // First check if base_path was explicitly set via with_path() (for tests)
312            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
313            if explicit_base.is_some() {
314                explicit_base
315            } else if let Some(ref source_file) = ctx.source_file {
316                // Resolve symlinks to get the actual file location
317                // This ensures relative links are resolved from the target's directory,
318                // not the symlink's directory
319                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
320                resolved_file
321                    .parent()
322                    .map(|p| p.to_path_buf())
323                    .or_else(|| Some(CURRENT_DIR.clone()))
324            } else {
325                // No source file available - cannot validate relative links
326                None
327            }
328        };
329
330        // If we still don't have a base path, we can't validate relative links
331        let Some(base_path) = base_path else {
332            return Ok(warnings);
333        };
334
335        // Use LintContext links instead of expensive regex parsing
336        if !ctx.links.is_empty() {
337            // Use LineIndex for correct position calculation across all line ending types
338            let line_index = &ctx.line_index;
339
340            // Create element cache once for all links
341            let element_cache = ElementCache::new(content);
342
343            // Pre-collect lines to avoid repeated line iteration
344            let lines: Vec<&str> = content.lines().collect();
345
346            // Track which lines we've already processed to avoid duplicates
347            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
348            let mut processed_lines = std::collections::HashSet::new();
349
350            for link in &ctx.links {
351                let line_idx = link.line - 1;
352                if line_idx >= lines.len() {
353                    continue;
354                }
355
356                // Skip if we've already processed this line
357                if !processed_lines.insert(line_idx) {
358                    continue;
359                }
360
361                let line = lines[line_idx];
362
363                // Quick check for link pattern in this line
364                if !line.contains("](") {
365                    continue;
366                }
367
368                // Find all links in this line using optimized regex
369                for link_match in LINK_START_REGEX.find_iter(line) {
370                    let start_pos = link_match.start();
371                    let end_pos = link_match.end();
372
373                    // Calculate absolute position using LineIndex
374                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
375                    let absolute_start_pos = line_start_byte + start_pos;
376
377                    // Skip if this link is in a code span
378                    if element_cache.is_in_code_span(absolute_start_pos) {
379                        continue;
380                    }
381
382                    // Find the URL part after the link text
383                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
384                    // Then fall back to normal URL regex
385                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
386                        .captures_at(line, end_pos - 1)
387                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
388                        .or_else(|| {
389                            URL_EXTRACT_REGEX
390                                .captures_at(line, end_pos - 1)
391                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
392                        });
393
394                    if let Some((_caps, url_group)) = caps_and_url {
395                        let url = url_group.as_str().trim();
396
397                        // Skip empty URLs
398                        if url.is_empty() {
399                            continue;
400                        }
401
402                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
403                        // These are Rust API references, not file paths
404                        // Example: [`f32::is_subnormal`], [`Vec::push`]
405                        if url.starts_with('`') && url.ends_with('`') {
406                            continue;
407                        }
408
409                        // Skip external URLs, absolute paths, and fragment-only links
410                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
411                            continue;
412                        }
413
414                        // Strip query parameters and fragments before checking file existence
415                        let file_path = Self::strip_query_and_fragment(url);
416
417                        // URL-decode the path to handle percent-encoded characters
418                        let decoded_path = Self::url_decode(file_path);
419
420                        // Resolve the relative link against the base path
421                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
422
423                        // Check if the file exists, also trying markdown extensions for extensionless links
424                        if file_exists_or_markdown_extension(&resolved_path) {
425                            continue; // File exists, no warning needed
426                        }
427
428                        // For .html/.htm links, check if a corresponding markdown source exists
429                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
430                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
431                            && let (Some(stem), Some(parent)) = (
432                                resolved_path.file_stem().and_then(|s| s.to_str()),
433                                resolved_path.parent(),
434                            ) {
435                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
436                                let source_path = parent.join(format!("{stem}{md_ext}"));
437                                file_exists_with_cache(&source_path)
438                            })
439                        } else {
440                            false
441                        };
442
443                        if has_md_source {
444                            continue; // Markdown source exists, link is valid
445                        }
446
447                        // File doesn't exist and no source file found
448                        // Use actual URL position from regex capture group
449                        // Note: capture group positions are absolute within the line string
450                        let url_start = url_group.start();
451                        let url_end = url_group.end();
452
453                        warnings.push(LintWarning {
454                            rule_name: Some(self.name().to_string()),
455                            line: link.line,
456                            column: url_start + 1, // 1-indexed
457                            end_line: link.line,
458                            end_column: url_end + 1, // 1-indexed
459                            message: format!("Relative link '{url}' does not exist"),
460                            severity: Severity::Error,
461                            fix: None,
462                        });
463                    }
464                }
465            }
466        }
467
468        // Also process images - they have URLs already parsed
469        for image in &ctx.images {
470            let url = image.url.as_ref();
471
472            // Skip empty URLs
473            if url.is_empty() {
474                continue;
475            }
476
477            // Skip external URLs, absolute paths, and fragment-only links
478            if self.is_external_url(url) || self.is_fragment_only_link(url) {
479                continue;
480            }
481
482            // Strip query parameters and fragments before checking file existence
483            let file_path = Self::strip_query_and_fragment(url);
484
485            // URL-decode the path to handle percent-encoded characters
486            let decoded_path = Self::url_decode(file_path);
487
488            // Resolve the relative link against the base path
489            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
490
491            // Check if the file exists, also trying markdown extensions for extensionless links
492            if file_exists_or_markdown_extension(&resolved_path) {
493                continue; // File exists, no warning needed
494            }
495
496            // For .html/.htm links, check if a corresponding markdown source exists
497            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
498                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
499                && let (Some(stem), Some(parent)) = (
500                    resolved_path.file_stem().and_then(|s| s.to_str()),
501                    resolved_path.parent(),
502                ) {
503                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
504                    let source_path = parent.join(format!("{stem}{md_ext}"));
505                    file_exists_with_cache(&source_path)
506                })
507            } else {
508                false
509            };
510
511            if has_md_source {
512                continue; // Markdown source exists, link is valid
513            }
514
515            // File doesn't exist and no source file found
516            // Images already have correct position from parser
517            warnings.push(LintWarning {
518                rule_name: Some(self.name().to_string()),
519                line: image.line,
520                column: image.start_col + 1,
521                end_line: image.line,
522                end_column: image.start_col + 1 + url.len(),
523                message: format!("Relative link '{url}' does not exist"),
524                severity: Severity::Error,
525                fix: None,
526            });
527        }
528
529        // Also process reference definitions: [ref]: ./path.md
530        for ref_def in &ctx.reference_defs {
531            let url = &ref_def.url;
532
533            // Skip empty URLs
534            if url.is_empty() {
535                continue;
536            }
537
538            // Skip external URLs, absolute paths, and fragment-only links
539            if self.is_external_url(url) || self.is_fragment_only_link(url) {
540                continue;
541            }
542
543            // Strip query parameters and fragments before checking file existence
544            let file_path = Self::strip_query_and_fragment(url);
545
546            // URL-decode the path to handle percent-encoded characters
547            let decoded_path = Self::url_decode(file_path);
548
549            // Resolve the relative link against the base path
550            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
551
552            // Check if the file exists, also trying markdown extensions for extensionless links
553            if file_exists_or_markdown_extension(&resolved_path) {
554                continue; // File exists, no warning needed
555            }
556
557            // For .html/.htm links, check if a corresponding markdown source exists
558            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
559                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
560                && let (Some(stem), Some(parent)) = (
561                    resolved_path.file_stem().and_then(|s| s.to_str()),
562                    resolved_path.parent(),
563                ) {
564                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
565                    let source_path = parent.join(format!("{stem}{md_ext}"));
566                    file_exists_with_cache(&source_path)
567                })
568            } else {
569                false
570            };
571
572            if has_md_source {
573                continue; // Markdown source exists, link is valid
574            }
575
576            // File doesn't exist and no source file found
577            // Calculate column position: find URL within the line
578            let line_idx = ref_def.line - 1;
579            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
580                // Find URL position in line (after ]: )
581                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
582            });
583
584            warnings.push(LintWarning {
585                rule_name: Some(self.name().to_string()),
586                line: ref_def.line,
587                column,
588                end_line: ref_def.line,
589                end_column: column + url.len(),
590                message: format!("Relative link '{url}' does not exist"),
591                severity: Severity::Error,
592                fix: None,
593            });
594        }
595
596        Ok(warnings)
597    }
598
599    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
600        Ok(ctx.content.to_string())
601    }
602
603    fn as_any(&self) -> &dyn std::any::Any {
604        self
605    }
606
607    fn default_config_section(&self) -> Option<(String, toml::Value)> {
608        // No configurable options for this rule
609        None
610    }
611
612    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
613    where
614        Self: Sized,
615    {
616        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
617        Box::new(Self::from_config_struct(rule_config))
618    }
619
620    fn cross_file_scope(&self) -> CrossFileScope {
621        CrossFileScope::Workspace
622    }
623
624    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
625        // Use the shared utility for cross-file link extraction
626        // This ensures consistent position tracking between CLI and LSP
627        for link in extract_cross_file_links(ctx) {
628            index.add_cross_file_link(link);
629        }
630    }
631
632    fn cross_file_check(
633        &self,
634        file_path: &Path,
635        file_index: &FileIndex,
636        workspace_index: &crate::workspace_index::WorkspaceIndex,
637    ) -> LintResult {
638        let mut warnings = Vec::new();
639
640        // Get the directory containing this file for resolving relative links
641        let file_dir = file_path.parent();
642
643        for cross_link in &file_index.cross_file_links {
644            // URL-decode the path for filesystem operations
645            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
646            let decoded_target = Self::url_decode(&cross_link.target_path);
647
648            // Skip absolute/protocol-relative paths (web paths, not filesystem paths)
649            if decoded_target.starts_with('/') {
650                continue;
651            }
652
653            // Resolve relative path
654            let target_path = if let Some(dir) = file_dir {
655                dir.join(&decoded_target)
656            } else {
657                Path::new(&decoded_target).to_path_buf()
658            };
659
660            // Normalize the path (handle .., ., etc.)
661            let target_path = normalize_path(&target_path);
662
663            // Check if the target file exists, also trying markdown extensions for extensionless links
664            let file_exists =
665                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
666
667            if !file_exists {
668                // For .html/.htm links, check if a corresponding markdown source exists
669                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
670                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
671                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
672                    && let (Some(stem), Some(parent)) =
673                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
674                {
675                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
676                        let source_path = parent.join(format!("{stem}{md_ext}"));
677                        workspace_index.contains_file(&source_path) || source_path.exists()
678                    })
679                } else {
680                    false
681                };
682
683                if !has_md_source {
684                    warnings.push(LintWarning {
685                        rule_name: Some(self.name().to_string()),
686                        line: cross_link.line,
687                        column: cross_link.column,
688                        end_line: cross_link.line,
689                        end_column: cross_link.column + cross_link.target_path.len(),
690                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
691                        severity: Severity::Error,
692                        fix: None,
693                    });
694                }
695            }
696        }
697
698        Ok(warnings)
699    }
700}
701
702/// Normalize a path by resolving . and .. components
703fn normalize_path(path: &Path) -> PathBuf {
704    let mut components = Vec::new();
705
706    for component in path.components() {
707        match component {
708            std::path::Component::ParentDir => {
709                // Go up one level if possible
710                if !components.is_empty() {
711                    components.pop();
712                }
713            }
714            std::path::Component::CurDir => {
715                // Skip current directory markers
716            }
717            _ => {
718                components.push(component);
719            }
720        }
721    }
722
723    components.iter().collect()
724}
725
726#[cfg(test)]
727mod tests {
728    use super::*;
729    use crate::workspace_index::CrossFileLinkIndex;
730    use std::fs::File;
731    use std::io::Write;
732    use tempfile::tempdir;
733
734    #[test]
735    fn test_strip_query_and_fragment() {
736        // Test query parameter stripping
737        assert_eq!(
738            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
739            "file.png"
740        );
741        assert_eq!(
742            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
743            "file.png"
744        );
745        assert_eq!(
746            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
747            "file.png"
748        );
749
750        // Test fragment stripping
751        assert_eq!(
752            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
753            "file.md"
754        );
755        assert_eq!(
756            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
757            "file.md"
758        );
759
760        // Test both query and fragment (query comes first, per RFC 3986)
761        assert_eq!(
762            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
763            "file.md"
764        );
765
766        // Test no query or fragment
767        assert_eq!(
768            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
769            "file.png"
770        );
771
772        // Test with path
773        assert_eq!(
774            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
775            "path/to/image.png"
776        );
777        assert_eq!(
778            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
779            "path/to/image.png"
780        );
781
782        // Edge case: fragment before query (non-standard but possible)
783        assert_eq!(
784            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
785            "file.md"
786        );
787    }
788
789    #[test]
790    fn test_url_decode() {
791        // Simple space encoding
792        assert_eq!(
793            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
794            "penguin with space.jpg"
795        );
796
797        // Path with encoded spaces
798        assert_eq!(
799            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
800            "assets/my file name.png"
801        );
802
803        // Multiple encoded characters
804        assert_eq!(
805            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
806            "hello world!.md"
807        );
808
809        // Lowercase hex
810        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
811
812        // Uppercase hex
813        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
814
815        // Mixed case hex
816        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
817
818        // No encoding - return as-is
819        assert_eq!(
820            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
821            "normal-file.md"
822        );
823
824        // Incomplete percent encoding - leave as-is
825        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
826
827        // Percent at end - leave as-is
828        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
829
830        // Invalid hex digits - leave as-is
831        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
832
833        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
834        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
835
836        // Empty string
837        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
838
839        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
840        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
841
842        // Multiple consecutive encoded characters
843        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
844
845        // Encoded path separators
846        assert_eq!(
847            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
848            "path/to/file.md"
849        );
850
851        // Mixed encoded and non-encoded
852        assert_eq!(
853            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
854            "hello world/foo bar.md"
855        );
856
857        // Special characters that are commonly encoded
858        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
859
860        // Percent at position that looks like encoding but isn't valid
861        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
862    }
863
864    #[test]
865    fn test_url_encoded_filenames() {
866        // Create a temporary directory for test files
867        let temp_dir = tempdir().unwrap();
868        let base_path = temp_dir.path();
869
870        // Create a file with spaces in the name
871        let file_with_spaces = base_path.join("penguin with space.jpg");
872        File::create(&file_with_spaces)
873            .unwrap()
874            .write_all(b"image data")
875            .unwrap();
876
877        // Create a subdirectory with spaces
878        let subdir = base_path.join("my images");
879        std::fs::create_dir(&subdir).unwrap();
880        let nested_file = subdir.join("photo 1.png");
881        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
882
883        // Test content with URL-encoded links
884        let content = r#"
885# Test Document with URL-Encoded Links
886
887![Penguin](penguin%20with%20space.jpg)
888![Photo](my%20images/photo%201.png)
889![Missing](missing%20file.jpg)
890"#;
891
892        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
893
894        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
895        let result = rule.check(&ctx).unwrap();
896
897        // Should only have one warning for the missing file
898        assert_eq!(
899            result.len(),
900            1,
901            "Should only warn about missing%20file.jpg. Got: {result:?}"
902        );
903        assert!(
904            result[0].message.contains("missing%20file.jpg"),
905            "Warning should mention the URL-encoded filename"
906        );
907    }
908
909    #[test]
910    fn test_external_urls() {
911        let rule = MD057ExistingRelativeLinks::new();
912
913        // Common web protocols
914        assert!(rule.is_external_url("https://example.com"));
915        assert!(rule.is_external_url("http://example.com"));
916        assert!(rule.is_external_url("ftp://example.com"));
917        assert!(rule.is_external_url("www.example.com"));
918        assert!(rule.is_external_url("example.com"));
919
920        // Special URI schemes
921        assert!(rule.is_external_url("file:///path/to/file"));
922        assert!(rule.is_external_url("smb://server/share"));
923        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
924        assert!(rule.is_external_url("mailto:user@example.com"));
925        assert!(rule.is_external_url("tel:+1234567890"));
926        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
927        assert!(rule.is_external_url("javascript:void(0)"));
928        assert!(rule.is_external_url("ssh://git@github.com/repo"));
929        assert!(rule.is_external_url("git://github.com/repo.git"));
930
931        // Email addresses without mailto: protocol
932        // These are clearly not file links and should be skipped
933        assert!(rule.is_external_url("user@example.com"));
934        assert!(rule.is_external_url("steering@kubernetes.io"));
935        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
936        assert!(rule.is_external_url("user_name@sub.domain.com"));
937        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
938
939        // Template variables should be skipped (not checked as relative links)
940        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
941        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
942        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
943        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
944        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
945        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
946
947        // Absolute web URL paths should be skipped (not validated)
948        // These are typically routes for published documentation sites
949        assert!(rule.is_external_url("/api/v1/users"));
950        assert!(rule.is_external_url("/blog/2024/release.html"));
951        assert!(rule.is_external_url("/react/hooks/use-state.html"));
952        assert!(rule.is_external_url("/pkg/runtime"));
953        assert!(rule.is_external_url("/doc/go1compat"));
954        assert!(rule.is_external_url("/index.html"));
955        assert!(rule.is_external_url("/assets/logo.png"));
956
957        // Framework path aliases should be skipped (resolved by build tools)
958        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
959        assert!(rule.is_external_url("~/assets/image.png"));
960        assert!(rule.is_external_url("~/components/Button.vue"));
961        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
962
963        // @ prefix (common in Vue, webpack, Vite aliases)
964        assert!(rule.is_external_url("@/components/Header.vue"));
965        assert!(rule.is_external_url("@images/photo.jpg"));
966        assert!(rule.is_external_url("@assets/styles.css"));
967
968        // Relative paths should NOT be external (should be validated)
969        assert!(!rule.is_external_url("./relative/path.md"));
970        assert!(!rule.is_external_url("relative/path.md"));
971        assert!(!rule.is_external_url("../parent/path.md"));
972    }
973
974    #[test]
975    fn test_framework_path_aliases() {
976        // Create a temporary directory for test files
977        let temp_dir = tempdir().unwrap();
978        let base_path = temp_dir.path();
979
980        // Test content with framework path aliases (should all be skipped)
981        let content = r#"
982# Framework Path Aliases
983
984![Image 1](~/assets/penguin.jpg)
985![Image 2](~assets/logo.svg)
986![Image 3](@images/photo.jpg)
987![Image 4](@/components/icon.svg)
988[Link](@/pages/about.md)
989
990This is a [real missing link](missing.md) that should be flagged.
991"#;
992
993        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
994
995        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
996        let result = rule.check(&ctx).unwrap();
997
998        // Should only have one warning for the real missing link
999        assert_eq!(
1000            result.len(),
1001            1,
1002            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
1003        );
1004        assert!(
1005            result[0].message.contains("missing.md"),
1006            "Warning should be for missing.md"
1007        );
1008    }
1009
1010    #[test]
1011    fn test_url_decode_security_path_traversal() {
1012        // Ensure URL decoding doesn't enable path traversal attacks
1013        // The decoded path is still validated against the base path
1014        let temp_dir = tempdir().unwrap();
1015        let base_path = temp_dir.path();
1016
1017        // Create a file in the temp directory
1018        let file_in_base = base_path.join("safe.md");
1019        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
1020
1021        // Test with encoded path traversal attempt
1022        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
1023        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
1024        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
1025        let content = r#"
1026[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
1027[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
1028[Safe link](safe.md)
1029"#;
1030
1031        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1032
1033        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1034        let result = rule.check(&ctx).unwrap();
1035
1036        // The traversal attempts should still be flagged as missing
1037        // (they don't exist relative to base_path after decoding)
1038        assert_eq!(
1039            result.len(),
1040            2,
1041            "Should have warnings for traversal attempts. Got: {result:?}"
1042        );
1043    }
1044
1045    #[test]
1046    fn test_url_encoded_utf8_filenames() {
1047        // Test with actual UTF-8 encoded filenames
1048        let temp_dir = tempdir().unwrap();
1049        let base_path = temp_dir.path();
1050
1051        // Create files with unicode names
1052        let cafe_file = base_path.join("café.md");
1053        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1054
1055        let content = r#"
1056[Café link](caf%C3%A9.md)
1057[Missing unicode](r%C3%A9sum%C3%A9.md)
1058"#;
1059
1060        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1061
1062        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1063        let result = rule.check(&ctx).unwrap();
1064
1065        // Should only warn about the missing file
1066        assert_eq!(
1067            result.len(),
1068            1,
1069            "Should only warn about missing résumé.md. Got: {result:?}"
1070        );
1071        assert!(
1072            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1073            "Warning should mention the URL-encoded filename"
1074        );
1075    }
1076
1077    #[test]
1078    fn test_url_encoded_emoji_filenames() {
1079        // URL-encoded emoji paths should be correctly resolved
1080        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1081        let temp_dir = tempdir().unwrap();
1082        let base_path = temp_dir.path();
1083
1084        // Create directory with emoji in name: 👤 Personal
1085        let emoji_dir = base_path.join("👤 Personal");
1086        std::fs::create_dir(&emoji_dir).unwrap();
1087
1088        // Create file in that directory: TV Shows.md
1089        let file_path = emoji_dir.join("TV Shows.md");
1090        File::create(&file_path)
1091            .unwrap()
1092            .write_all(b"# TV Shows\n\nContent here.")
1093            .unwrap();
1094
1095        // Test content with URL-encoded emoji link
1096        // %F0%9F%91%A4 = 👤, %20 = space
1097        let content = r#"
1098# Test Document
1099
1100[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1101[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1102"#;
1103
1104        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1105
1106        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1107        let result = rule.check(&ctx).unwrap();
1108
1109        // Should only warn about the missing file, not the valid emoji path
1110        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1111        assert!(
1112            result[0].message.contains("Missing.md"),
1113            "Warning should be for Missing.md, got: {}",
1114            result[0].message
1115        );
1116    }
1117
1118    #[test]
1119    fn test_no_warnings_without_base_path() {
1120        let rule = MD057ExistingRelativeLinks::new();
1121        let content = "[Link](missing.md)";
1122
1123        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1124        let result = rule.check(&ctx).unwrap();
1125        assert!(result.is_empty(), "Should have no warnings without base path");
1126    }
1127
1128    #[test]
1129    fn test_existing_and_missing_links() {
1130        // Create a temporary directory for test files
1131        let temp_dir = tempdir().unwrap();
1132        let base_path = temp_dir.path();
1133
1134        // Create an existing file
1135        let exists_path = base_path.join("exists.md");
1136        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1137
1138        // Verify the file exists
1139        assert!(exists_path.exists(), "exists.md should exist for this test");
1140
1141        // Create test content with both existing and missing links
1142        let content = r#"
1143# Test Document
1144
1145[Valid Link](exists.md)
1146[Invalid Link](missing.md)
1147[External Link](https://example.com)
1148[Media Link](image.jpg)
1149        "#;
1150
1151        // Initialize rule with the base path (default: check all files including media)
1152        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1153
1154        // Test the rule
1155        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1156        let result = rule.check(&ctx).unwrap();
1157
1158        // Should have two warnings: missing.md and image.jpg (both don't exist)
1159        assert_eq!(result.len(), 2);
1160        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1161        assert!(messages.iter().any(|m| m.contains("missing.md")));
1162        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1163    }
1164
1165    #[test]
1166    fn test_angle_bracket_links() {
1167        // Create a temporary directory for test files
1168        let temp_dir = tempdir().unwrap();
1169        let base_path = temp_dir.path();
1170
1171        // Create an existing file
1172        let exists_path = base_path.join("exists.md");
1173        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1174
1175        // Create test content with angle bracket links
1176        let content = r#"
1177# Test Document
1178
1179[Valid Link](<exists.md>)
1180[Invalid Link](<missing.md>)
1181[External Link](<https://example.com>)
1182    "#;
1183
1184        // Test with default settings
1185        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1186
1187        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1188        let result = rule.check(&ctx).unwrap();
1189
1190        // Should have one warning for missing.md
1191        assert_eq!(result.len(), 1, "Should have exactly one warning");
1192        assert!(
1193            result[0].message.contains("missing.md"),
1194            "Warning should mention missing.md"
1195        );
1196    }
1197
1198    #[test]
1199    fn test_angle_bracket_links_with_parens() {
1200        // Create a temporary directory for test files
1201        let temp_dir = tempdir().unwrap();
1202        let base_path = temp_dir.path();
1203
1204        // Create directory structure with parentheses in path
1205        let app_dir = base_path.join("app");
1206        std::fs::create_dir(&app_dir).unwrap();
1207        let upload_dir = app_dir.join("(upload)");
1208        std::fs::create_dir(&upload_dir).unwrap();
1209        let page_file = upload_dir.join("page.tsx");
1210        File::create(&page_file)
1211            .unwrap()
1212            .write_all(b"export default function Page() {}")
1213            .unwrap();
1214
1215        // Create test content with angle bracket links containing parentheses
1216        let content = r#"
1217# Test Document with Paths Containing Parens
1218
1219[Upload Page](<app/(upload)/page.tsx>)
1220[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1221[Missing](<app/(missing)/file.md>)
1222"#;
1223
1224        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1225
1226        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1227        let result = rule.check(&ctx).unwrap();
1228
1229        // Should only have one warning for the missing file
1230        assert_eq!(
1231            result.len(),
1232            1,
1233            "Should have exactly one warning for missing file. Got: {result:?}"
1234        );
1235        assert!(
1236            result[0].message.contains("app/(missing)/file.md"),
1237            "Warning should mention app/(missing)/file.md"
1238        );
1239    }
1240
1241    #[test]
1242    fn test_all_file_types_checked() {
1243        // Create a temporary directory for test files
1244        let temp_dir = tempdir().unwrap();
1245        let base_path = temp_dir.path();
1246
1247        // Create a test with various file types - all should be checked
1248        let content = r#"
1249[Image Link](image.jpg)
1250[Video Link](video.mp4)
1251[Markdown Link](document.md)
1252[PDF Link](file.pdf)
1253"#;
1254
1255        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1256
1257        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1258        let result = rule.check(&ctx).unwrap();
1259
1260        // Should warn about all missing files regardless of extension
1261        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1262    }
1263
1264    #[test]
1265    fn test_code_span_detection() {
1266        let rule = MD057ExistingRelativeLinks::new();
1267
1268        // Create a temporary directory for test files
1269        let temp_dir = tempdir().unwrap();
1270        let base_path = temp_dir.path();
1271
1272        let rule = rule.with_path(base_path);
1273
1274        // Test with document structure
1275        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1276
1277        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1278        let result = rule.check(&ctx).unwrap();
1279
1280        // Should only find the real link, not the one in code
1281        assert_eq!(result.len(), 1, "Should only flag the real link");
1282        assert!(result[0].message.contains("nonexistent.md"));
1283    }
1284
1285    #[test]
1286    fn test_inline_code_spans() {
1287        // Create a temporary directory for test files
1288        let temp_dir = tempdir().unwrap();
1289        let base_path = temp_dir.path();
1290
1291        // Create test content with links in inline code spans
1292        let content = r#"
1293# Test Document
1294
1295This is a normal link: [Link](missing.md)
1296
1297This is a code span with a link: `[Link](another-missing.md)`
1298
1299Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1300
1301    "#;
1302
1303        // Initialize rule with the base path
1304        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1305
1306        // Test the rule
1307        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1308        let result = rule.check(&ctx).unwrap();
1309
1310        // Should only have warning for the normal link, not for links in code spans
1311        assert_eq!(result.len(), 1, "Should have exactly one warning");
1312        assert!(
1313            result[0].message.contains("missing.md"),
1314            "Warning should be for missing.md"
1315        );
1316        assert!(
1317            !result.iter().any(|w| w.message.contains("another-missing.md")),
1318            "Should not warn about link in code span"
1319        );
1320        assert!(
1321            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1322            "Should not warn about link in inline code"
1323        );
1324    }
1325
1326    #[test]
1327    fn test_extensionless_link_resolution() {
1328        // Create a temporary directory for test files
1329        let temp_dir = tempdir().unwrap();
1330        let base_path = temp_dir.path();
1331
1332        // Create a markdown file WITHOUT specifying .md extension in the link
1333        let page_path = base_path.join("page.md");
1334        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1335
1336        // Test content with extensionless link that should resolve to page.md
1337        let content = r#"
1338# Test Document
1339
1340[Link without extension](page)
1341[Link with extension](page.md)
1342[Missing link](nonexistent)
1343"#;
1344
1345        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1346
1347        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1348        let result = rule.check(&ctx).unwrap();
1349
1350        // Should only have warning for nonexistent link
1351        // Both "page" and "page.md" should resolve to the same file
1352        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1353        assert!(
1354            result[0].message.contains("nonexistent"),
1355            "Warning should be for 'nonexistent' not 'page'"
1356        );
1357    }
1358
1359    // Cross-file validation tests
1360    #[test]
1361    fn test_cross_file_scope() {
1362        let rule = MD057ExistingRelativeLinks::new();
1363        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1364    }
1365
1366    #[test]
1367    fn test_contribute_to_index_extracts_markdown_links() {
1368        let rule = MD057ExistingRelativeLinks::new();
1369        let content = r#"
1370# Document
1371
1372[Link to docs](./docs/guide.md)
1373[Link with fragment](./other.md#section)
1374[External link](https://example.com)
1375[Image link](image.png)
1376[Media file](video.mp4)
1377"#;
1378
1379        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1380        let mut index = FileIndex::new();
1381        rule.contribute_to_index(&ctx, &mut index);
1382
1383        // Should only index markdown file links
1384        assert_eq!(index.cross_file_links.len(), 2);
1385
1386        // Check first link
1387        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1388        assert_eq!(index.cross_file_links[0].fragment, "");
1389
1390        // Check second link (with fragment)
1391        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1392        assert_eq!(index.cross_file_links[1].fragment, "section");
1393    }
1394
1395    #[test]
1396    fn test_contribute_to_index_skips_external_and_anchors() {
1397        let rule = MD057ExistingRelativeLinks::new();
1398        let content = r#"
1399# Document
1400
1401[External](https://example.com)
1402[Another external](http://example.org)
1403[Fragment only](#section)
1404[FTP link](ftp://files.example.com)
1405[Mail link](mailto:test@example.com)
1406[WWW link](www.example.com)
1407"#;
1408
1409        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1410        let mut index = FileIndex::new();
1411        rule.contribute_to_index(&ctx, &mut index);
1412
1413        // Should not index any of these
1414        assert_eq!(index.cross_file_links.len(), 0);
1415    }
1416
1417    #[test]
1418    fn test_cross_file_check_valid_link() {
1419        use crate::workspace_index::WorkspaceIndex;
1420
1421        let rule = MD057ExistingRelativeLinks::new();
1422
1423        // Create a workspace index with the target file
1424        let mut workspace_index = WorkspaceIndex::new();
1425        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1426
1427        // Create file index with a link to an existing file
1428        let mut file_index = FileIndex::new();
1429        file_index.add_cross_file_link(CrossFileLinkIndex {
1430            target_path: "guide.md".to_string(),
1431            fragment: "".to_string(),
1432            line: 5,
1433            column: 1,
1434        });
1435
1436        // Run cross-file check from docs/index.md
1437        let warnings = rule
1438            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1439            .unwrap();
1440
1441        // Should have no warnings - file exists
1442        assert!(warnings.is_empty());
1443    }
1444
1445    #[test]
1446    fn test_cross_file_check_missing_link() {
1447        use crate::workspace_index::WorkspaceIndex;
1448
1449        let rule = MD057ExistingRelativeLinks::new();
1450
1451        // Create an empty workspace index
1452        let workspace_index = WorkspaceIndex::new();
1453
1454        // Create file index with a link to a missing file
1455        let mut file_index = FileIndex::new();
1456        file_index.add_cross_file_link(CrossFileLinkIndex {
1457            target_path: "missing.md".to_string(),
1458            fragment: "".to_string(),
1459            line: 5,
1460            column: 1,
1461        });
1462
1463        // Run cross-file check
1464        let warnings = rule
1465            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1466            .unwrap();
1467
1468        // Should have one warning for the missing file
1469        assert_eq!(warnings.len(), 1);
1470        assert!(warnings[0].message.contains("missing.md"));
1471        assert!(warnings[0].message.contains("does not exist"));
1472    }
1473
1474    #[test]
1475    fn test_cross_file_check_parent_path() {
1476        use crate::workspace_index::WorkspaceIndex;
1477
1478        let rule = MD057ExistingRelativeLinks::new();
1479
1480        // Create a workspace index with the target file at the root
1481        let mut workspace_index = WorkspaceIndex::new();
1482        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1483
1484        // Create file index with a parent path link
1485        let mut file_index = FileIndex::new();
1486        file_index.add_cross_file_link(CrossFileLinkIndex {
1487            target_path: "../readme.md".to_string(),
1488            fragment: "".to_string(),
1489            line: 5,
1490            column: 1,
1491        });
1492
1493        // Run cross-file check from docs/guide.md
1494        let warnings = rule
1495            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1496            .unwrap();
1497
1498        // Should have no warnings - file exists at normalized path
1499        assert!(warnings.is_empty());
1500    }
1501
1502    #[test]
1503    fn test_cross_file_check_html_link_with_md_source() {
1504        // Test that .html links are accepted when corresponding .md source exists
1505        // This supports mdBook and similar doc generators that compile .md to .html
1506        use crate::workspace_index::WorkspaceIndex;
1507
1508        let rule = MD057ExistingRelativeLinks::new();
1509
1510        // Create a workspace index with the .md source file
1511        let mut workspace_index = WorkspaceIndex::new();
1512        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1513
1514        // Create file index with an .html link (from another rule like MD051)
1515        let mut file_index = FileIndex::new();
1516        file_index.add_cross_file_link(CrossFileLinkIndex {
1517            target_path: "guide.html".to_string(),
1518            fragment: "section".to_string(),
1519            line: 10,
1520            column: 5,
1521        });
1522
1523        // Run cross-file check from docs/index.md
1524        let warnings = rule
1525            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1526            .unwrap();
1527
1528        // Should have no warnings - .md source exists for the .html link
1529        assert!(
1530            warnings.is_empty(),
1531            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1532        );
1533    }
1534
1535    #[test]
1536    fn test_cross_file_check_html_link_without_source() {
1537        // Test that .html links without corresponding .md source ARE flagged
1538        use crate::workspace_index::WorkspaceIndex;
1539
1540        let rule = MD057ExistingRelativeLinks::new();
1541
1542        // Create an empty workspace index
1543        let workspace_index = WorkspaceIndex::new();
1544
1545        // Create file index with an .html link to a non-existent file
1546        let mut file_index = FileIndex::new();
1547        file_index.add_cross_file_link(CrossFileLinkIndex {
1548            target_path: "missing.html".to_string(),
1549            fragment: "".to_string(),
1550            line: 10,
1551            column: 5,
1552        });
1553
1554        // Run cross-file check from docs/index.md
1555        let warnings = rule
1556            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1557            .unwrap();
1558
1559        // Should have one warning - no .md source exists
1560        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1561        assert!(warnings[0].message.contains("missing.html"));
1562    }
1563
1564    #[test]
1565    fn test_normalize_path_function() {
1566        // Test simple cases
1567        assert_eq!(
1568            normalize_path(Path::new("docs/guide.md")),
1569            PathBuf::from("docs/guide.md")
1570        );
1571
1572        // Test current directory removal
1573        assert_eq!(
1574            normalize_path(Path::new("./docs/guide.md")),
1575            PathBuf::from("docs/guide.md")
1576        );
1577
1578        // Test parent directory resolution
1579        assert_eq!(
1580            normalize_path(Path::new("docs/sub/../guide.md")),
1581            PathBuf::from("docs/guide.md")
1582        );
1583
1584        // Test multiple parent directories
1585        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
1586    }
1587
1588    #[test]
1589    fn test_html_link_with_md_source() {
1590        // Links to .html files should pass if corresponding .md source exists
1591        let temp_dir = tempdir().unwrap();
1592        let base_path = temp_dir.path();
1593
1594        // Create guide.md (source file)
1595        let md_file = base_path.join("guide.md");
1596        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
1597
1598        let content = r#"
1599[Read the guide](guide.html)
1600[Also here](getting-started.html)
1601"#;
1602
1603        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1604        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1605        let result = rule.check(&ctx).unwrap();
1606
1607        // guide.html passes (guide.md exists), getting-started.html fails
1608        assert_eq!(
1609            result.len(),
1610            1,
1611            "Should only warn about missing source. Got: {result:?}"
1612        );
1613        assert!(result[0].message.contains("getting-started.html"));
1614    }
1615
1616    #[test]
1617    fn test_htm_link_with_md_source() {
1618        // .htm extension should also check for markdown source
1619        let temp_dir = tempdir().unwrap();
1620        let base_path = temp_dir.path();
1621
1622        let md_file = base_path.join("page.md");
1623        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
1624
1625        let content = "[Page](page.htm)";
1626
1627        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1628        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1629        let result = rule.check(&ctx).unwrap();
1630
1631        assert!(
1632            result.is_empty(),
1633            "Should not warn when .md source exists for .htm link"
1634        );
1635    }
1636
1637    #[test]
1638    fn test_html_link_finds_various_markdown_extensions() {
1639        // Should find .mdx, .markdown, etc. as source files
1640        let temp_dir = tempdir().unwrap();
1641        let base_path = temp_dir.path();
1642
1643        File::create(base_path.join("doc.md")).unwrap();
1644        File::create(base_path.join("tutorial.mdx")).unwrap();
1645        File::create(base_path.join("guide.markdown")).unwrap();
1646
1647        let content = r#"
1648[Doc](doc.html)
1649[Tutorial](tutorial.html)
1650[Guide](guide.html)
1651"#;
1652
1653        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1654        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1655        let result = rule.check(&ctx).unwrap();
1656
1657        assert!(
1658            result.is_empty(),
1659            "Should find all markdown variants as source files. Got: {result:?}"
1660        );
1661    }
1662
1663    #[test]
1664    fn test_html_link_in_subdirectory() {
1665        // Should find markdown source in subdirectories
1666        let temp_dir = tempdir().unwrap();
1667        let base_path = temp_dir.path();
1668
1669        let docs_dir = base_path.join("docs");
1670        std::fs::create_dir(&docs_dir).unwrap();
1671        File::create(docs_dir.join("guide.md"))
1672            .unwrap()
1673            .write_all(b"# Guide")
1674            .unwrap();
1675
1676        let content = "[Guide](docs/guide.html)";
1677
1678        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1679        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1680        let result = rule.check(&ctx).unwrap();
1681
1682        assert!(result.is_empty(), "Should find markdown source in subdirectory");
1683    }
1684
1685    #[test]
1686    fn test_absolute_path_skipped_in_check() {
1687        // Test that absolute paths are skipped during link validation
1688        // This fixes the bug where /pkg/runtime was being flagged
1689        let temp_dir = tempdir().unwrap();
1690        let base_path = temp_dir.path();
1691
1692        let content = r#"
1693# Test Document
1694
1695[Go Runtime](/pkg/runtime)
1696[Go Runtime with Fragment](/pkg/runtime#section)
1697[API Docs](/api/v1/users)
1698[Blog Post](/blog/2024/release.html)
1699[React Hook](/react/hooks/use-state.html)
1700"#;
1701
1702        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1703        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1704        let result = rule.check(&ctx).unwrap();
1705
1706        // Should have NO warnings - all absolute paths should be skipped
1707        assert!(
1708            result.is_empty(),
1709            "Absolute paths should be skipped. Got warnings: {result:?}"
1710        );
1711    }
1712
1713    #[test]
1714    fn test_absolute_path_skipped_in_cross_file_check() {
1715        // Test that absolute paths are skipped in cross_file_check()
1716        use crate::workspace_index::WorkspaceIndex;
1717
1718        let rule = MD057ExistingRelativeLinks::new();
1719
1720        // Create an empty workspace index (no files exist)
1721        let workspace_index = WorkspaceIndex::new();
1722
1723        // Create file index with absolute path links (should be skipped)
1724        let mut file_index = FileIndex::new();
1725        file_index.add_cross_file_link(CrossFileLinkIndex {
1726            target_path: "/pkg/runtime.md".to_string(),
1727            fragment: "".to_string(),
1728            line: 5,
1729            column: 1,
1730        });
1731        file_index.add_cross_file_link(CrossFileLinkIndex {
1732            target_path: "/api/v1/users.md".to_string(),
1733            fragment: "section".to_string(),
1734            line: 10,
1735            column: 1,
1736        });
1737
1738        // Run cross-file check
1739        let warnings = rule
1740            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1741            .unwrap();
1742
1743        // Should have NO warnings - absolute paths should be skipped
1744        assert!(
1745            warnings.is_empty(),
1746            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
1747        );
1748    }
1749
1750    #[test]
1751    fn test_protocol_relative_url_not_skipped() {
1752        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
1753        // They should still be caught by is_external_url() though
1754        let temp_dir = tempdir().unwrap();
1755        let base_path = temp_dir.path();
1756
1757        let content = r#"
1758# Test Document
1759
1760[External](//example.com/page)
1761[Another](//cdn.example.com/asset.js)
1762"#;
1763
1764        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1765        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1766        let result = rule.check(&ctx).unwrap();
1767
1768        // Should have NO warnings - protocol-relative URLs are external and should be skipped
1769        assert!(
1770            result.is_empty(),
1771            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
1772        );
1773    }
1774
1775    #[test]
1776    fn test_email_addresses_skipped() {
1777        // Test that email addresses without mailto: are skipped
1778        // These are clearly not file links (the @ symbol is definitive)
1779        let temp_dir = tempdir().unwrap();
1780        let base_path = temp_dir.path();
1781
1782        let content = r#"
1783# Test Document
1784
1785[Contact](user@example.com)
1786[Steering](steering@kubernetes.io)
1787[Support](john.doe+filter@company.co.uk)
1788[User](user_name@sub.domain.com)
1789"#;
1790
1791        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1792        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1793        let result = rule.check(&ctx).unwrap();
1794
1795        // Should have NO warnings - email addresses are clearly not file links and should be skipped
1796        assert!(
1797            result.is_empty(),
1798            "Email addresses should be skipped. Got warnings: {result:?}"
1799        );
1800    }
1801
1802    #[test]
1803    fn test_email_addresses_vs_file_paths() {
1804        // Test that email addresses (anything with @) are skipped
1805        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
1806        let temp_dir = tempdir().unwrap();
1807        let base_path = temp_dir.path();
1808
1809        let content = r#"
1810# Test Document
1811
1812[Email](user@example.com)  <!-- Should be skipped (email) -->
1813[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
1814[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
1815"#;
1816
1817        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1818        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1819        let result = rule.check(&ctx).unwrap();
1820
1821        // All should be skipped - anything with @ is treated as an email
1822        assert!(
1823            result.is_empty(),
1824            "All email addresses should be skipped. Got: {result:?}"
1825        );
1826    }
1827
1828    #[test]
1829    fn test_diagnostic_position_accuracy() {
1830        // Test that diagnostics point to the URL, not the link text
1831        let temp_dir = tempdir().unwrap();
1832        let base_path = temp_dir.path();
1833
1834        // Position markers:     0         1         2         3
1835        //                       0123456789012345678901234567890123456789
1836        let content = "prefix [text](missing.md) suffix";
1837        //             The URL "missing.md" starts at 0-indexed position 14
1838        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
1839
1840        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1841        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1842        let result = rule.check(&ctx).unwrap();
1843
1844        assert_eq!(result.len(), 1, "Should have exactly one warning");
1845        assert_eq!(result[0].line, 1, "Should be on line 1");
1846        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
1847        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
1848    }
1849
1850    #[test]
1851    fn test_diagnostic_position_angle_brackets() {
1852        // Test position accuracy with angle bracket links
1853        let temp_dir = tempdir().unwrap();
1854        let base_path = temp_dir.path();
1855
1856        // Position markers:     0         1         2
1857        //                       012345678901234567890
1858        let content = "[link](<missing.md>)";
1859        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
1860
1861        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1862        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1863        let result = rule.check(&ctx).unwrap();
1864
1865        assert_eq!(result.len(), 1, "Should have exactly one warning");
1866        assert_eq!(result[0].line, 1, "Should be on line 1");
1867        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
1868    }
1869
1870    #[test]
1871    fn test_diagnostic_position_multiline() {
1872        // Test that line numbers are correct for links on different lines
1873        let temp_dir = tempdir().unwrap();
1874        let base_path = temp_dir.path();
1875
1876        let content = r#"# Title
1877Some text on line 2
1878[link on line 3](missing1.md)
1879More text
1880[link on line 5](missing2.md)"#;
1881
1882        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1883        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1884        let result = rule.check(&ctx).unwrap();
1885
1886        assert_eq!(result.len(), 2, "Should have two warnings");
1887
1888        // First warning should be on line 3
1889        assert_eq!(result[0].line, 3, "First warning should be on line 3");
1890        assert!(result[0].message.contains("missing1.md"));
1891
1892        // Second warning should be on line 5
1893        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
1894        assert!(result[1].message.contains("missing2.md"));
1895    }
1896
1897    #[test]
1898    fn test_diagnostic_position_with_spaces() {
1899        // Test position with URLs that have spaces in parentheses
1900        let temp_dir = tempdir().unwrap();
1901        let base_path = temp_dir.path();
1902
1903        let content = "[link]( missing.md )";
1904        //             0123456789012345678901
1905        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
1906        //             which is 1-indexed column 9
1907
1908        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1909        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1910        let result = rule.check(&ctx).unwrap();
1911
1912        assert_eq!(result.len(), 1, "Should have exactly one warning");
1913        // The regex captures the URL without leading/trailing spaces
1914        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
1915    }
1916
1917    #[test]
1918    fn test_diagnostic_position_image() {
1919        // Test that image diagnostics also have correct positions
1920        let temp_dir = tempdir().unwrap();
1921        let base_path = temp_dir.path();
1922
1923        let content = "![alt text](missing.jpg)";
1924
1925        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1926        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1927        let result = rule.check(&ctx).unwrap();
1928
1929        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
1930        assert_eq!(result[0].line, 1);
1931        // Images use start_col from the parser, which should point to the URL
1932        assert!(result[0].column > 0, "Should have valid column position");
1933        assert!(result[0].message.contains("missing.jpg"));
1934    }
1935
1936    #[test]
1937    fn test_wikilinks_skipped() {
1938        // Wikilinks should not trigger MD057 warnings
1939        // They use a different linking system (e.g., Obsidian, wiki software)
1940        let temp_dir = tempdir().unwrap();
1941        let base_path = temp_dir.path();
1942
1943        let content = r#"# Test Document
1944
1945[[Microsoft#Windows OS]]
1946[[SomePage]]
1947[[Page With Spaces]]
1948[[path/to/page#section]]
1949[[page|Display Text]]
1950
1951This is a [real missing link](missing.md) that should be flagged.
1952"#;
1953
1954        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1955        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1956        let result = rule.check(&ctx).unwrap();
1957
1958        // Should only warn about the regular markdown link, not wikilinks
1959        assert_eq!(
1960            result.len(),
1961            1,
1962            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
1963        );
1964        assert!(
1965            result[0].message.contains("missing.md"),
1966            "Warning should be for missing.md, not wikilinks"
1967        );
1968    }
1969
1970    #[test]
1971    fn test_wikilinks_not_added_to_index() {
1972        // Wikilinks should not be added to the cross-file link index
1973        let temp_dir = tempdir().unwrap();
1974        let base_path = temp_dir.path();
1975
1976        let content = r#"# Test Document
1977
1978[[Microsoft#Windows OS]]
1979[[SomePage#section]]
1980[Regular Link](other.md)
1981"#;
1982
1983        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1984        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1985
1986        let mut file_index = FileIndex::new();
1987        rule.contribute_to_index(&ctx, &mut file_index);
1988
1989        // Should only have the regular markdown link (if it's a markdown file)
1990        // Wikilinks should not be added
1991        let cross_file_links = &file_index.cross_file_links;
1992        assert_eq!(
1993            cross_file_links.len(),
1994            1,
1995            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
1996        );
1997        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
1998    }
1999
2000    #[test]
2001    fn test_reference_definition_missing_file() {
2002        // Reference definitions [ref]: ./path.md should be checked
2003        let temp_dir = tempdir().unwrap();
2004        let base_path = temp_dir.path();
2005
2006        let content = r#"# Test Document
2007
2008[test]: ./missing.md
2009[example]: ./nonexistent.html
2010
2011Use [test] and [example] here.
2012"#;
2013
2014        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2015        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2016        let result = rule.check(&ctx).unwrap();
2017
2018        // Should have warnings for both reference definitions
2019        assert_eq!(
2020            result.len(),
2021            2,
2022            "Should have warnings for missing reference definition targets. Got: {result:?}"
2023        );
2024        assert!(
2025            result.iter().any(|w| w.message.contains("missing.md")),
2026            "Should warn about missing.md"
2027        );
2028        assert!(
2029            result.iter().any(|w| w.message.contains("nonexistent.html")),
2030            "Should warn about nonexistent.html"
2031        );
2032    }
2033
2034    #[test]
2035    fn test_reference_definition_existing_file() {
2036        // Reference definitions to existing files should NOT trigger warnings
2037        let temp_dir = tempdir().unwrap();
2038        let base_path = temp_dir.path();
2039
2040        // Create an existing file
2041        let exists_path = base_path.join("exists.md");
2042        File::create(&exists_path)
2043            .unwrap()
2044            .write_all(b"# Existing file")
2045            .unwrap();
2046
2047        let content = r#"# Test Document
2048
2049[test]: ./exists.md
2050
2051Use [test] here.
2052"#;
2053
2054        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2055        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2056        let result = rule.check(&ctx).unwrap();
2057
2058        // Should have NO warnings since the file exists
2059        assert!(
2060            result.is_empty(),
2061            "Should not warn about existing file. Got: {result:?}"
2062        );
2063    }
2064
2065    #[test]
2066    fn test_reference_definition_external_url_skipped() {
2067        // Reference definitions with external URLs should be skipped
2068        let temp_dir = tempdir().unwrap();
2069        let base_path = temp_dir.path();
2070
2071        let content = r#"# Test Document
2072
2073[google]: https://google.com
2074[example]: http://example.org
2075[mail]: mailto:test@example.com
2076[ftp]: ftp://files.example.com
2077[local]: ./missing.md
2078
2079Use [google], [example], [mail], [ftp], [local] here.
2080"#;
2081
2082        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2083        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2084        let result = rule.check(&ctx).unwrap();
2085
2086        // Should only warn about the local missing file, not external URLs
2087        assert_eq!(
2088            result.len(),
2089            1,
2090            "Should only warn about local missing file. Got: {result:?}"
2091        );
2092        assert!(
2093            result[0].message.contains("missing.md"),
2094            "Warning should be for missing.md"
2095        );
2096    }
2097
2098    #[test]
2099    fn test_reference_definition_fragment_only_skipped() {
2100        // Reference definitions with fragment-only URLs should be skipped
2101        let temp_dir = tempdir().unwrap();
2102        let base_path = temp_dir.path();
2103
2104        let content = r#"# Test Document
2105
2106[section]: #my-section
2107
2108Use [section] here.
2109"#;
2110
2111        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2112        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2113        let result = rule.check(&ctx).unwrap();
2114
2115        // Should have NO warnings for fragment-only links
2116        assert!(
2117            result.is_empty(),
2118            "Should not warn about fragment-only reference. Got: {result:?}"
2119        );
2120    }
2121
2122    #[test]
2123    fn test_reference_definition_column_position() {
2124        // Test that column position points to the URL in the reference definition
2125        let temp_dir = tempdir().unwrap();
2126        let base_path = temp_dir.path();
2127
2128        // Position markers:     0         1         2
2129        //                       0123456789012345678901
2130        let content = "[ref]: ./missing.md";
2131        //             The URL "./missing.md" starts at 0-indexed position 7
2132        //             which is 1-indexed column 8
2133
2134        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2135        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2136        let result = rule.check(&ctx).unwrap();
2137
2138        assert_eq!(result.len(), 1, "Should have exactly one warning");
2139        assert_eq!(result[0].line, 1, "Should be on line 1");
2140        assert_eq!(result[0].column, 8, "Should point to start of URL './missing.md'");
2141    }
2142
2143    #[test]
2144    fn test_reference_definition_html_with_md_source() {
2145        // Reference definitions to .html files should pass if corresponding .md source exists
2146        let temp_dir = tempdir().unwrap();
2147        let base_path = temp_dir.path();
2148
2149        // Create guide.md (source file)
2150        let md_file = base_path.join("guide.md");
2151        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2152
2153        let content = r#"# Test Document
2154
2155[guide]: ./guide.html
2156[missing]: ./missing.html
2157
2158Use [guide] and [missing] here.
2159"#;
2160
2161        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2162        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2163        let result = rule.check(&ctx).unwrap();
2164
2165        // guide.html passes (guide.md exists), missing.html fails
2166        assert_eq!(
2167            result.len(),
2168            1,
2169            "Should only warn about missing source. Got: {result:?}"
2170        );
2171        assert!(result[0].message.contains("missing.html"));
2172    }
2173
2174    #[test]
2175    fn test_reference_definition_url_encoded() {
2176        // Reference definitions with URL-encoded paths should be decoded before checking
2177        let temp_dir = tempdir().unwrap();
2178        let base_path = temp_dir.path();
2179
2180        // Create a file with spaces in the name
2181        let file_with_spaces = base_path.join("file with spaces.md");
2182        File::create(&file_with_spaces).unwrap().write_all(b"# Spaces").unwrap();
2183
2184        let content = r#"# Test Document
2185
2186[spaces]: ./file%20with%20spaces.md
2187[missing]: ./missing%20file.md
2188
2189Use [spaces] and [missing] here.
2190"#;
2191
2192        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2193        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2194        let result = rule.check(&ctx).unwrap();
2195
2196        // Should only warn about the missing file
2197        assert_eq!(
2198            result.len(),
2199            1,
2200            "Should only warn about missing URL-encoded file. Got: {result:?}"
2201        );
2202        assert!(result[0].message.contains("missing%20file.md"));
2203    }
2204
2205    #[test]
2206    fn test_inline_and_reference_both_checked() {
2207        // Both inline links and reference definitions should be checked
2208        let temp_dir = tempdir().unwrap();
2209        let base_path = temp_dir.path();
2210
2211        let content = r#"# Test Document
2212
2213[inline link](./inline-missing.md)
2214[ref]: ./ref-missing.md
2215
2216Use [ref] here.
2217"#;
2218
2219        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2220        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2221        let result = rule.check(&ctx).unwrap();
2222
2223        // Should warn about both the inline link and the reference definition
2224        assert_eq!(
2225            result.len(),
2226            2,
2227            "Should warn about both inline and reference links. Got: {result:?}"
2228        );
2229        assert!(
2230            result.iter().any(|w| w.message.contains("inline-missing.md")),
2231            "Should warn about inline-missing.md"
2232        );
2233        assert!(
2234            result.iter().any(|w| w.message.contains("ref-missing.md")),
2235            "Should warn about ref-missing.md"
2236        );
2237    }
2238}