rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{FileIndex, extract_cross_file_links};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
26        cache.clear();
27    }
28}
29
30// Check if a file exists with caching
31fn file_exists_with_cache(path: &Path) -> bool {
32    match FILE_EXISTENCE_CACHE.lock() {
33        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
34        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
35    }
36}
37
38/// Check if a file exists, also trying markdown extensions for extensionless links.
39/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
40fn file_exists_or_markdown_extension(path: &Path) -> bool {
41    // First, check exact path
42    if file_exists_with_cache(path) {
43        return true;
44    }
45
46    // If the path has no extension, try adding markdown extensions
47    if path.extension().is_none() {
48        for ext in MARKDOWN_EXTENSIONS {
49            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
50            let path_with_ext = path.with_extension(&ext[1..]);
51            if file_exists_with_cache(&path_with_ext) {
52                return true;
53            }
54        }
55    }
56
57    false
58}
59
60// Regex to match the start of a link - simplified for performance
61static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
62
63/// Regex to extract the URL from an angle-bracketed markdown link
64/// Format: `](<URL>)` or `](<URL> "title")`
65/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
66static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
68
69/// Regex to extract the URL from a normal markdown link (without angle brackets)
70/// Format: `](URL)` or `](URL "title")`
71static URL_EXTRACT_REGEX: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
73
74/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
75/// Matches: scheme:// or scheme: (per RFC 3986)
76/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
77static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
78    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
79
80// Current working directory
81static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
82
83/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
84/// Returns None for non-hex characters.
85#[inline]
86fn hex_digit_to_value(byte: u8) -> Option<u8> {
87    match byte {
88        b'0'..=b'9' => Some(byte - b'0'),
89        b'a'..=b'f' => Some(byte - b'a' + 10),
90        b'A'..=b'F' => Some(byte - b'A' + 10),
91        _ => None,
92    }
93}
94
95/// Supported markdown file extensions
96const MARKDOWN_EXTENSIONS: &[&str] = &[
97    ".md",
98    ".markdown",
99    ".mdx",
100    ".mkd",
101    ".mkdn",
102    ".mdown",
103    ".mdwn",
104    ".qmd",
105    ".rmd",
106];
107
108/// Rule MD057: Existing relative links should point to valid files or directories.
109#[derive(Debug, Clone, Default)]
110pub struct MD057ExistingRelativeLinks {
111    /// Base directory for resolving relative links
112    base_path: Arc<Mutex<Option<PathBuf>>>,
113}
114
115impl MD057ExistingRelativeLinks {
116    /// Create a new instance with default settings
117    pub fn new() -> Self {
118        Self::default()
119    }
120
121    /// Set the base path for resolving relative links
122    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
123        let path = path.as_ref();
124        let dir_path = if path.is_file() {
125            path.parent().map(|p| p.to_path_buf())
126        } else {
127            Some(path.to_path_buf())
128        };
129
130        if let Ok(mut guard) = self.base_path.lock() {
131            *guard = dir_path;
132        }
133        self
134    }
135
136    #[allow(unused_variables)]
137    pub fn from_config_struct(config: MD057Config) -> Self {
138        Self::default()
139    }
140
141    /// Check if a URL is external or should be skipped for validation.
142    ///
143    /// Returns `true` (skip validation) for:
144    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
145    /// - Bare domains: `www.example.com`, `example.com`
146    /// - Email addresses: `user@example.com` (without `mailto:`)
147    /// - Template variables: `{{URL}}`, `{{% include %}}`
148    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
149    ///
150    /// Returns `false` (validate) for:
151    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
152    #[inline]
153    fn is_external_url(&self, url: &str) -> bool {
154        if url.is_empty() {
155            return false;
156        }
157
158        // Quick checks for common external URL patterns
159        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
160            return true;
161        }
162
163        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
164        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
165        if url.starts_with("{{") || url.starts_with("{%") {
166            return true;
167        }
168
169        // Simple check: if URL contains @, it's almost certainly an email address
170        // File paths with @ are extremely rare, so this is a safe heuristic
171        if url.contains('@') {
172            return true; // It's an email address, skip it
173        }
174
175        // Bare domain check (e.g., "example.com")
176        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
177        // Links like [text](nodejs.org/path) without a protocol are broken -
178        // they'll be treated as relative paths by markdown renderers.
179        // Flagging them helps users find missing protocols.
180        // We only skip .com as a minimal safety net for the most common case.
181        if url.ends_with(".com") {
182            return true;
183        }
184
185        // Absolute URL paths (e.g., /api/docs, /blog/post.html) are treated as web paths
186        // and skipped. These are typically routes for published documentation sites,
187        // not filesystem paths that can be validated locally.
188        if url.starts_with('/') {
189            return true;
190        }
191
192        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
193        // These are not filesystem paths but module/asset aliases
194        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
195        if url.starts_with('~') || url.starts_with('@') {
196            return true;
197        }
198
199        // All other cases (relative paths, etc.) are not external
200        false
201    }
202
203    /// Check if the URL is a fragment-only link (internal document link)
204    #[inline]
205    fn is_fragment_only_link(&self, url: &str) -> bool {
206        url.starts_with('#')
207    }
208
209    /// Decode URL percent-encoded sequences in a path.
210    /// Converts `%20` to space, `%2F` to `/`, etc.
211    /// Returns the original string if decoding fails or produces invalid UTF-8.
212    fn url_decode(path: &str) -> String {
213        // Quick check: if no percent sign, return as-is
214        if !path.contains('%') {
215            return path.to_string();
216        }
217
218        let bytes = path.as_bytes();
219        let mut result = Vec::with_capacity(bytes.len());
220        let mut i = 0;
221
222        while i < bytes.len() {
223            if bytes[i] == b'%' && i + 2 < bytes.len() {
224                // Try to parse the two hex digits following %
225                let hex1 = bytes[i + 1];
226                let hex2 = bytes[i + 2];
227                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
228                    result.push(d1 * 16 + d2);
229                    i += 3;
230                    continue;
231                }
232            }
233            result.push(bytes[i]);
234            i += 1;
235        }
236
237        // Convert to UTF-8, falling back to original if invalid
238        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
239    }
240
241    /// Strip query parameters and fragments from a URL for file existence checking.
242    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
243    /// for `path/to/image.png` or `file.md` respectively.
244    ///
245    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
246    /// so we check for `?` first. If a URL has both, only the query is stripped here
247    /// (fragments are handled separately by the regex in `contribute_to_index`).
248    fn strip_query_and_fragment(url: &str) -> &str {
249        // Find the first occurrence of '?' or '#', whichever comes first
250        // This handles both standard URLs (? before #) and edge cases (# before ?)
251        let query_pos = url.find('?');
252        let fragment_pos = url.find('#');
253
254        match (query_pos, fragment_pos) {
255            (Some(q), Some(f)) => {
256                // Both exist - strip at whichever comes first
257                &url[..q.min(f)]
258            }
259            (Some(q), None) => &url[..q],
260            (None, Some(f)) => &url[..f],
261            (None, None) => url,
262        }
263    }
264
265    /// Resolve a relative link against a provided base path
266    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
267        base_path.join(link)
268    }
269}
270
271impl Rule for MD057ExistingRelativeLinks {
272    fn name(&self) -> &'static str {
273        "MD057"
274    }
275
276    fn description(&self) -> &'static str {
277        "Relative links should point to existing files"
278    }
279
280    fn category(&self) -> RuleCategory {
281        RuleCategory::Link
282    }
283
284    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
285        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
286    }
287
288    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
289        let content = ctx.content;
290
291        // Early returns for performance
292        if content.is_empty() || !content.contains('[') {
293            return Ok(Vec::new());
294        }
295
296        // Quick check for any potential links before expensive operations
297        // Check for inline links "](", reference definitions "]:", or images "!["
298        if !content.contains("](") && !content.contains("]:") {
299            return Ok(Vec::new());
300        }
301
302        // Reset the file existence cache for a fresh run
303        reset_file_existence_cache();
304
305        let mut warnings = Vec::new();
306
307        // Determine base path for resolving relative links
308        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
309        // This ensures each file resolves links relative to its own directory
310        let base_path: Option<PathBuf> = {
311            // First check if base_path was explicitly set via with_path() (for tests)
312            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
313            if explicit_base.is_some() {
314                explicit_base
315            } else if let Some(ref source_file) = ctx.source_file {
316                // Resolve symlinks to get the actual file location
317                // This ensures relative links are resolved from the target's directory,
318                // not the symlink's directory
319                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
320                resolved_file
321                    .parent()
322                    .map(|p| p.to_path_buf())
323                    .or_else(|| Some(CURRENT_DIR.clone()))
324            } else {
325                // No source file available - cannot validate relative links
326                None
327            }
328        };
329
330        // If we still don't have a base path, we can't validate relative links
331        let Some(base_path) = base_path else {
332            return Ok(warnings);
333        };
334
335        // Use LintContext links instead of expensive regex parsing
336        if !ctx.links.is_empty() {
337            // Use LineIndex for correct position calculation across all line ending types
338            let line_index = &ctx.line_index;
339
340            // Create element cache once for all links
341            let element_cache = ElementCache::new(content);
342
343            // Pre-collect lines to avoid repeated line iteration
344            let lines: Vec<&str> = content.lines().collect();
345
346            // Track which lines we've already processed to avoid duplicates
347            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
348            let mut processed_lines = std::collections::HashSet::new();
349
350            for link in &ctx.links {
351                let line_idx = link.line - 1;
352                if line_idx >= lines.len() {
353                    continue;
354                }
355
356                // Skip if we've already processed this line
357                if !processed_lines.insert(line_idx) {
358                    continue;
359                }
360
361                let line = lines[line_idx];
362
363                // Quick check for link pattern in this line
364                if !line.contains("](") {
365                    continue;
366                }
367
368                // Find all links in this line using optimized regex
369                for link_match in LINK_START_REGEX.find_iter(line) {
370                    let start_pos = link_match.start();
371                    let end_pos = link_match.end();
372
373                    // Calculate absolute position using LineIndex
374                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
375                    let absolute_start_pos = line_start_byte + start_pos;
376
377                    // Skip if this link is in a code span
378                    if element_cache.is_in_code_span(absolute_start_pos) {
379                        continue;
380                    }
381
382                    // Skip if this link is in a math span (LaTeX $...$ or $$...$$)
383                    if ctx.is_in_math_span(absolute_start_pos) {
384                        continue;
385                    }
386
387                    // Find the URL part after the link text
388                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
389                    // Then fall back to normal URL regex
390                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
391                        .captures_at(line, end_pos - 1)
392                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
393                        .or_else(|| {
394                            URL_EXTRACT_REGEX
395                                .captures_at(line, end_pos - 1)
396                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
397                        });
398
399                    if let Some((_caps, url_group)) = caps_and_url {
400                        let url = url_group.as_str().trim();
401
402                        // Skip empty URLs
403                        if url.is_empty() {
404                            continue;
405                        }
406
407                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
408                        // These are Rust API references, not file paths
409                        // Example: [`f32::is_subnormal`], [`Vec::push`]
410                        if url.starts_with('`') && url.ends_with('`') {
411                            continue;
412                        }
413
414                        // Skip external URLs, absolute paths, and fragment-only links
415                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
416                            continue;
417                        }
418
419                        // Strip query parameters and fragments before checking file existence
420                        let file_path = Self::strip_query_and_fragment(url);
421
422                        // URL-decode the path to handle percent-encoded characters
423                        let decoded_path = Self::url_decode(file_path);
424
425                        // Resolve the relative link against the base path
426                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
427
428                        // Check if the file exists, also trying markdown extensions for extensionless links
429                        if file_exists_or_markdown_extension(&resolved_path) {
430                            continue; // File exists, no warning needed
431                        }
432
433                        // For .html/.htm links, check if a corresponding markdown source exists
434                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
435                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
436                            && let (Some(stem), Some(parent)) = (
437                                resolved_path.file_stem().and_then(|s| s.to_str()),
438                                resolved_path.parent(),
439                            ) {
440                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
441                                let source_path = parent.join(format!("{stem}{md_ext}"));
442                                file_exists_with_cache(&source_path)
443                            })
444                        } else {
445                            false
446                        };
447
448                        if has_md_source {
449                            continue; // Markdown source exists, link is valid
450                        }
451
452                        // File doesn't exist and no source file found
453                        // Use actual URL position from regex capture group
454                        // Note: capture group positions are absolute within the line string
455                        let url_start = url_group.start();
456                        let url_end = url_group.end();
457
458                        warnings.push(LintWarning {
459                            rule_name: Some(self.name().to_string()),
460                            line: link.line,
461                            column: url_start + 1, // 1-indexed
462                            end_line: link.line,
463                            end_column: url_end + 1, // 1-indexed
464                            message: format!("Relative link '{url}' does not exist"),
465                            severity: Severity::Error,
466                            fix: None,
467                        });
468                    }
469                }
470            }
471        }
472
473        // Also process images - they have URLs already parsed
474        for image in &ctx.images {
475            let url = image.url.as_ref();
476
477            // Skip empty URLs
478            if url.is_empty() {
479                continue;
480            }
481
482            // Skip external URLs, absolute paths, and fragment-only links
483            if self.is_external_url(url) || self.is_fragment_only_link(url) {
484                continue;
485            }
486
487            // Strip query parameters and fragments before checking file existence
488            let file_path = Self::strip_query_and_fragment(url);
489
490            // URL-decode the path to handle percent-encoded characters
491            let decoded_path = Self::url_decode(file_path);
492
493            // Resolve the relative link against the base path
494            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
495
496            // Check if the file exists, also trying markdown extensions for extensionless links
497            if file_exists_or_markdown_extension(&resolved_path) {
498                continue; // File exists, no warning needed
499            }
500
501            // For .html/.htm links, check if a corresponding markdown source exists
502            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
503                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
504                && let (Some(stem), Some(parent)) = (
505                    resolved_path.file_stem().and_then(|s| s.to_str()),
506                    resolved_path.parent(),
507                ) {
508                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
509                    let source_path = parent.join(format!("{stem}{md_ext}"));
510                    file_exists_with_cache(&source_path)
511                })
512            } else {
513                false
514            };
515
516            if has_md_source {
517                continue; // Markdown source exists, link is valid
518            }
519
520            // File doesn't exist and no source file found
521            // Images already have correct position from parser
522            warnings.push(LintWarning {
523                rule_name: Some(self.name().to_string()),
524                line: image.line,
525                column: image.start_col + 1,
526                end_line: image.line,
527                end_column: image.start_col + 1 + url.len(),
528                message: format!("Relative link '{url}' does not exist"),
529                severity: Severity::Error,
530                fix: None,
531            });
532        }
533
534        // Also process reference definitions: [ref]: ./path.md
535        for ref_def in &ctx.reference_defs {
536            let url = &ref_def.url;
537
538            // Skip empty URLs
539            if url.is_empty() {
540                continue;
541            }
542
543            // Skip external URLs, absolute paths, and fragment-only links
544            if self.is_external_url(url) || self.is_fragment_only_link(url) {
545                continue;
546            }
547
548            // Strip query parameters and fragments before checking file existence
549            let file_path = Self::strip_query_and_fragment(url);
550
551            // URL-decode the path to handle percent-encoded characters
552            let decoded_path = Self::url_decode(file_path);
553
554            // Resolve the relative link against the base path
555            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
556
557            // Check if the file exists, also trying markdown extensions for extensionless links
558            if file_exists_or_markdown_extension(&resolved_path) {
559                continue; // File exists, no warning needed
560            }
561
562            // For .html/.htm links, check if a corresponding markdown source exists
563            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
564                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
565                && let (Some(stem), Some(parent)) = (
566                    resolved_path.file_stem().and_then(|s| s.to_str()),
567                    resolved_path.parent(),
568                ) {
569                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
570                    let source_path = parent.join(format!("{stem}{md_ext}"));
571                    file_exists_with_cache(&source_path)
572                })
573            } else {
574                false
575            };
576
577            if has_md_source {
578                continue; // Markdown source exists, link is valid
579            }
580
581            // File doesn't exist and no source file found
582            // Calculate column position: find URL within the line
583            let line_idx = ref_def.line - 1;
584            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
585                // Find URL position in line (after ]: )
586                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
587            });
588
589            warnings.push(LintWarning {
590                rule_name: Some(self.name().to_string()),
591                line: ref_def.line,
592                column,
593                end_line: ref_def.line,
594                end_column: column + url.len(),
595                message: format!("Relative link '{url}' does not exist"),
596                severity: Severity::Error,
597                fix: None,
598            });
599        }
600
601        Ok(warnings)
602    }
603
604    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
605        Ok(ctx.content.to_string())
606    }
607
608    fn as_any(&self) -> &dyn std::any::Any {
609        self
610    }
611
612    fn default_config_section(&self) -> Option<(String, toml::Value)> {
613        // No configurable options for this rule
614        None
615    }
616
617    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
618    where
619        Self: Sized,
620    {
621        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
622        Box::new(Self::from_config_struct(rule_config))
623    }
624
625    fn cross_file_scope(&self) -> CrossFileScope {
626        CrossFileScope::Workspace
627    }
628
629    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
630        // Use the shared utility for cross-file link extraction
631        // This ensures consistent position tracking between CLI and LSP
632        for link in extract_cross_file_links(ctx) {
633            index.add_cross_file_link(link);
634        }
635    }
636
637    fn cross_file_check(
638        &self,
639        file_path: &Path,
640        file_index: &FileIndex,
641        workspace_index: &crate::workspace_index::WorkspaceIndex,
642    ) -> LintResult {
643        let mut warnings = Vec::new();
644
645        // Get the directory containing this file for resolving relative links
646        let file_dir = file_path.parent();
647
648        for cross_link in &file_index.cross_file_links {
649            // URL-decode the path for filesystem operations
650            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
651            let decoded_target = Self::url_decode(&cross_link.target_path);
652
653            // Skip absolute/protocol-relative paths (web paths, not filesystem paths)
654            if decoded_target.starts_with('/') {
655                continue;
656            }
657
658            // Resolve relative path
659            let target_path = if let Some(dir) = file_dir {
660                dir.join(&decoded_target)
661            } else {
662                Path::new(&decoded_target).to_path_buf()
663            };
664
665            // Normalize the path (handle .., ., etc.)
666            let target_path = normalize_path(&target_path);
667
668            // Check if the target file exists, also trying markdown extensions for extensionless links
669            let file_exists =
670                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
671
672            if !file_exists {
673                // For .html/.htm links, check if a corresponding markdown source exists
674                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
675                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
676                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
677                    && let (Some(stem), Some(parent)) =
678                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
679                {
680                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
681                        let source_path = parent.join(format!("{stem}{md_ext}"));
682                        workspace_index.contains_file(&source_path) || source_path.exists()
683                    })
684                } else {
685                    false
686                };
687
688                if !has_md_source {
689                    warnings.push(LintWarning {
690                        rule_name: Some(self.name().to_string()),
691                        line: cross_link.line,
692                        column: cross_link.column,
693                        end_line: cross_link.line,
694                        end_column: cross_link.column + cross_link.target_path.len(),
695                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
696                        severity: Severity::Error,
697                        fix: None,
698                    });
699                }
700            }
701        }
702
703        Ok(warnings)
704    }
705}
706
707/// Normalize a path by resolving . and .. components
708fn normalize_path(path: &Path) -> PathBuf {
709    let mut components = Vec::new();
710
711    for component in path.components() {
712        match component {
713            std::path::Component::ParentDir => {
714                // Go up one level if possible
715                if !components.is_empty() {
716                    components.pop();
717                }
718            }
719            std::path::Component::CurDir => {
720                // Skip current directory markers
721            }
722            _ => {
723                components.push(component);
724            }
725        }
726    }
727
728    components.iter().collect()
729}
730
731#[cfg(test)]
732mod tests {
733    use super::*;
734    use crate::workspace_index::CrossFileLinkIndex;
735    use std::fs::File;
736    use std::io::Write;
737    use tempfile::tempdir;
738
739    #[test]
740    fn test_strip_query_and_fragment() {
741        // Test query parameter stripping
742        assert_eq!(
743            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
744            "file.png"
745        );
746        assert_eq!(
747            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
748            "file.png"
749        );
750        assert_eq!(
751            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
752            "file.png"
753        );
754
755        // Test fragment stripping
756        assert_eq!(
757            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
758            "file.md"
759        );
760        assert_eq!(
761            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
762            "file.md"
763        );
764
765        // Test both query and fragment (query comes first, per RFC 3986)
766        assert_eq!(
767            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
768            "file.md"
769        );
770
771        // Test no query or fragment
772        assert_eq!(
773            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
774            "file.png"
775        );
776
777        // Test with path
778        assert_eq!(
779            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
780            "path/to/image.png"
781        );
782        assert_eq!(
783            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
784            "path/to/image.png"
785        );
786
787        // Edge case: fragment before query (non-standard but possible)
788        assert_eq!(
789            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
790            "file.md"
791        );
792    }
793
794    #[test]
795    fn test_url_decode() {
796        // Simple space encoding
797        assert_eq!(
798            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
799            "penguin with space.jpg"
800        );
801
802        // Path with encoded spaces
803        assert_eq!(
804            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
805            "assets/my file name.png"
806        );
807
808        // Multiple encoded characters
809        assert_eq!(
810            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
811            "hello world!.md"
812        );
813
814        // Lowercase hex
815        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
816
817        // Uppercase hex
818        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
819
820        // Mixed case hex
821        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
822
823        // No encoding - return as-is
824        assert_eq!(
825            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
826            "normal-file.md"
827        );
828
829        // Incomplete percent encoding - leave as-is
830        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
831
832        // Percent at end - leave as-is
833        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
834
835        // Invalid hex digits - leave as-is
836        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
837
838        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
839        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
840
841        // Empty string
842        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
843
844        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
845        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
846
847        // Multiple consecutive encoded characters
848        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
849
850        // Encoded path separators
851        assert_eq!(
852            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
853            "path/to/file.md"
854        );
855
856        // Mixed encoded and non-encoded
857        assert_eq!(
858            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
859            "hello world/foo bar.md"
860        );
861
862        // Special characters that are commonly encoded
863        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
864
865        // Percent at position that looks like encoding but isn't valid
866        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
867    }
868
869    #[test]
870    fn test_url_encoded_filenames() {
871        // Create a temporary directory for test files
872        let temp_dir = tempdir().unwrap();
873        let base_path = temp_dir.path();
874
875        // Create a file with spaces in the name
876        let file_with_spaces = base_path.join("penguin with space.jpg");
877        File::create(&file_with_spaces)
878            .unwrap()
879            .write_all(b"image data")
880            .unwrap();
881
882        // Create a subdirectory with spaces
883        let subdir = base_path.join("my images");
884        std::fs::create_dir(&subdir).unwrap();
885        let nested_file = subdir.join("photo 1.png");
886        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
887
888        // Test content with URL-encoded links
889        let content = r#"
890# Test Document with URL-Encoded Links
891
892![Penguin](penguin%20with%20space.jpg)
893![Photo](my%20images/photo%201.png)
894![Missing](missing%20file.jpg)
895"#;
896
897        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
898
899        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
900        let result = rule.check(&ctx).unwrap();
901
902        // Should only have one warning for the missing file
903        assert_eq!(
904            result.len(),
905            1,
906            "Should only warn about missing%20file.jpg. Got: {result:?}"
907        );
908        assert!(
909            result[0].message.contains("missing%20file.jpg"),
910            "Warning should mention the URL-encoded filename"
911        );
912    }
913
914    #[test]
915    fn test_external_urls() {
916        let rule = MD057ExistingRelativeLinks::new();
917
918        // Common web protocols
919        assert!(rule.is_external_url("https://example.com"));
920        assert!(rule.is_external_url("http://example.com"));
921        assert!(rule.is_external_url("ftp://example.com"));
922        assert!(rule.is_external_url("www.example.com"));
923        assert!(rule.is_external_url("example.com"));
924
925        // Special URI schemes
926        assert!(rule.is_external_url("file:///path/to/file"));
927        assert!(rule.is_external_url("smb://server/share"));
928        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
929        assert!(rule.is_external_url("mailto:user@example.com"));
930        assert!(rule.is_external_url("tel:+1234567890"));
931        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
932        assert!(rule.is_external_url("javascript:void(0)"));
933        assert!(rule.is_external_url("ssh://git@github.com/repo"));
934        assert!(rule.is_external_url("git://github.com/repo.git"));
935
936        // Email addresses without mailto: protocol
937        // These are clearly not file links and should be skipped
938        assert!(rule.is_external_url("user@example.com"));
939        assert!(rule.is_external_url("steering@kubernetes.io"));
940        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
941        assert!(rule.is_external_url("user_name@sub.domain.com"));
942        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
943
944        // Template variables should be skipped (not checked as relative links)
945        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
946        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
947        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
948        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
949        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
950        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
951
952        // Absolute web URL paths should be skipped (not validated)
953        // These are typically routes for published documentation sites
954        assert!(rule.is_external_url("/api/v1/users"));
955        assert!(rule.is_external_url("/blog/2024/release.html"));
956        assert!(rule.is_external_url("/react/hooks/use-state.html"));
957        assert!(rule.is_external_url("/pkg/runtime"));
958        assert!(rule.is_external_url("/doc/go1compat"));
959        assert!(rule.is_external_url("/index.html"));
960        assert!(rule.is_external_url("/assets/logo.png"));
961
962        // Framework path aliases should be skipped (resolved by build tools)
963        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
964        assert!(rule.is_external_url("~/assets/image.png"));
965        assert!(rule.is_external_url("~/components/Button.vue"));
966        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
967
968        // @ prefix (common in Vue, webpack, Vite aliases)
969        assert!(rule.is_external_url("@/components/Header.vue"));
970        assert!(rule.is_external_url("@images/photo.jpg"));
971        assert!(rule.is_external_url("@assets/styles.css"));
972
973        // Relative paths should NOT be external (should be validated)
974        assert!(!rule.is_external_url("./relative/path.md"));
975        assert!(!rule.is_external_url("relative/path.md"));
976        assert!(!rule.is_external_url("../parent/path.md"));
977    }
978
979    #[test]
980    fn test_framework_path_aliases() {
981        // Create a temporary directory for test files
982        let temp_dir = tempdir().unwrap();
983        let base_path = temp_dir.path();
984
985        // Test content with framework path aliases (should all be skipped)
986        let content = r#"
987# Framework Path Aliases
988
989![Image 1](~/assets/penguin.jpg)
990![Image 2](~assets/logo.svg)
991![Image 3](@images/photo.jpg)
992![Image 4](@/components/icon.svg)
993[Link](@/pages/about.md)
994
995This is a [real missing link](missing.md) that should be flagged.
996"#;
997
998        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
999
1000        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1001        let result = rule.check(&ctx).unwrap();
1002
1003        // Should only have one warning for the real missing link
1004        assert_eq!(
1005            result.len(),
1006            1,
1007            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
1008        );
1009        assert!(
1010            result[0].message.contains("missing.md"),
1011            "Warning should be for missing.md"
1012        );
1013    }
1014
1015    #[test]
1016    fn test_url_decode_security_path_traversal() {
1017        // Ensure URL decoding doesn't enable path traversal attacks
1018        // The decoded path is still validated against the base path
1019        let temp_dir = tempdir().unwrap();
1020        let base_path = temp_dir.path();
1021
1022        // Create a file in the temp directory
1023        let file_in_base = base_path.join("safe.md");
1024        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
1025
1026        // Test with encoded path traversal attempt
1027        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
1028        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
1029        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
1030        let content = r#"
1031[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
1032[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
1033[Safe link](safe.md)
1034"#;
1035
1036        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1037
1038        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1039        let result = rule.check(&ctx).unwrap();
1040
1041        // The traversal attempts should still be flagged as missing
1042        // (they don't exist relative to base_path after decoding)
1043        assert_eq!(
1044            result.len(),
1045            2,
1046            "Should have warnings for traversal attempts. Got: {result:?}"
1047        );
1048    }
1049
1050    #[test]
1051    fn test_url_encoded_utf8_filenames() {
1052        // Test with actual UTF-8 encoded filenames
1053        let temp_dir = tempdir().unwrap();
1054        let base_path = temp_dir.path();
1055
1056        // Create files with unicode names
1057        let cafe_file = base_path.join("café.md");
1058        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1059
1060        let content = r#"
1061[Café link](caf%C3%A9.md)
1062[Missing unicode](r%C3%A9sum%C3%A9.md)
1063"#;
1064
1065        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1066
1067        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1068        let result = rule.check(&ctx).unwrap();
1069
1070        // Should only warn about the missing file
1071        assert_eq!(
1072            result.len(),
1073            1,
1074            "Should only warn about missing résumé.md. Got: {result:?}"
1075        );
1076        assert!(
1077            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1078            "Warning should mention the URL-encoded filename"
1079        );
1080    }
1081
1082    #[test]
1083    fn test_url_encoded_emoji_filenames() {
1084        // URL-encoded emoji paths should be correctly resolved
1085        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1086        let temp_dir = tempdir().unwrap();
1087        let base_path = temp_dir.path();
1088
1089        // Create directory with emoji in name: 👤 Personal
1090        let emoji_dir = base_path.join("👤 Personal");
1091        std::fs::create_dir(&emoji_dir).unwrap();
1092
1093        // Create file in that directory: TV Shows.md
1094        let file_path = emoji_dir.join("TV Shows.md");
1095        File::create(&file_path)
1096            .unwrap()
1097            .write_all(b"# TV Shows\n\nContent here.")
1098            .unwrap();
1099
1100        // Test content with URL-encoded emoji link
1101        // %F0%9F%91%A4 = 👤, %20 = space
1102        let content = r#"
1103# Test Document
1104
1105[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1106[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1107"#;
1108
1109        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1110
1111        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1112        let result = rule.check(&ctx).unwrap();
1113
1114        // Should only warn about the missing file, not the valid emoji path
1115        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1116        assert!(
1117            result[0].message.contains("Missing.md"),
1118            "Warning should be for Missing.md, got: {}",
1119            result[0].message
1120        );
1121    }
1122
1123    #[test]
1124    fn test_no_warnings_without_base_path() {
1125        let rule = MD057ExistingRelativeLinks::new();
1126        let content = "[Link](missing.md)";
1127
1128        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1129        let result = rule.check(&ctx).unwrap();
1130        assert!(result.is_empty(), "Should have no warnings without base path");
1131    }
1132
1133    #[test]
1134    fn test_existing_and_missing_links() {
1135        // Create a temporary directory for test files
1136        let temp_dir = tempdir().unwrap();
1137        let base_path = temp_dir.path();
1138
1139        // Create an existing file
1140        let exists_path = base_path.join("exists.md");
1141        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1142
1143        // Verify the file exists
1144        assert!(exists_path.exists(), "exists.md should exist for this test");
1145
1146        // Create test content with both existing and missing links
1147        let content = r#"
1148# Test Document
1149
1150[Valid Link](exists.md)
1151[Invalid Link](missing.md)
1152[External Link](https://example.com)
1153[Media Link](image.jpg)
1154        "#;
1155
1156        // Initialize rule with the base path (default: check all files including media)
1157        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1158
1159        // Test the rule
1160        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1161        let result = rule.check(&ctx).unwrap();
1162
1163        // Should have two warnings: missing.md and image.jpg (both don't exist)
1164        assert_eq!(result.len(), 2);
1165        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1166        assert!(messages.iter().any(|m| m.contains("missing.md")));
1167        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1168    }
1169
1170    #[test]
1171    fn test_angle_bracket_links() {
1172        // Create a temporary directory for test files
1173        let temp_dir = tempdir().unwrap();
1174        let base_path = temp_dir.path();
1175
1176        // Create an existing file
1177        let exists_path = base_path.join("exists.md");
1178        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1179
1180        // Create test content with angle bracket links
1181        let content = r#"
1182# Test Document
1183
1184[Valid Link](<exists.md>)
1185[Invalid Link](<missing.md>)
1186[External Link](<https://example.com>)
1187    "#;
1188
1189        // Test with default settings
1190        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1191
1192        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1193        let result = rule.check(&ctx).unwrap();
1194
1195        // Should have one warning for missing.md
1196        assert_eq!(result.len(), 1, "Should have exactly one warning");
1197        assert!(
1198            result[0].message.contains("missing.md"),
1199            "Warning should mention missing.md"
1200        );
1201    }
1202
1203    #[test]
1204    fn test_angle_bracket_links_with_parens() {
1205        // Create a temporary directory for test files
1206        let temp_dir = tempdir().unwrap();
1207        let base_path = temp_dir.path();
1208
1209        // Create directory structure with parentheses in path
1210        let app_dir = base_path.join("app");
1211        std::fs::create_dir(&app_dir).unwrap();
1212        let upload_dir = app_dir.join("(upload)");
1213        std::fs::create_dir(&upload_dir).unwrap();
1214        let page_file = upload_dir.join("page.tsx");
1215        File::create(&page_file)
1216            .unwrap()
1217            .write_all(b"export default function Page() {}")
1218            .unwrap();
1219
1220        // Create test content with angle bracket links containing parentheses
1221        let content = r#"
1222# Test Document with Paths Containing Parens
1223
1224[Upload Page](<app/(upload)/page.tsx>)
1225[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1226[Missing](<app/(missing)/file.md>)
1227"#;
1228
1229        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1230
1231        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1232        let result = rule.check(&ctx).unwrap();
1233
1234        // Should only have one warning for the missing file
1235        assert_eq!(
1236            result.len(),
1237            1,
1238            "Should have exactly one warning for missing file. Got: {result:?}"
1239        );
1240        assert!(
1241            result[0].message.contains("app/(missing)/file.md"),
1242            "Warning should mention app/(missing)/file.md"
1243        );
1244    }
1245
1246    #[test]
1247    fn test_all_file_types_checked() {
1248        // Create a temporary directory for test files
1249        let temp_dir = tempdir().unwrap();
1250        let base_path = temp_dir.path();
1251
1252        // Create a test with various file types - all should be checked
1253        let content = r#"
1254[Image Link](image.jpg)
1255[Video Link](video.mp4)
1256[Markdown Link](document.md)
1257[PDF Link](file.pdf)
1258"#;
1259
1260        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1261
1262        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1263        let result = rule.check(&ctx).unwrap();
1264
1265        // Should warn about all missing files regardless of extension
1266        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1267    }
1268
1269    #[test]
1270    fn test_code_span_detection() {
1271        let rule = MD057ExistingRelativeLinks::new();
1272
1273        // Create a temporary directory for test files
1274        let temp_dir = tempdir().unwrap();
1275        let base_path = temp_dir.path();
1276
1277        let rule = rule.with_path(base_path);
1278
1279        // Test with document structure
1280        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1281
1282        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1283        let result = rule.check(&ctx).unwrap();
1284
1285        // Should only find the real link, not the one in code
1286        assert_eq!(result.len(), 1, "Should only flag the real link");
1287        assert!(result[0].message.contains("nonexistent.md"));
1288    }
1289
1290    #[test]
1291    fn test_inline_code_spans() {
1292        // Create a temporary directory for test files
1293        let temp_dir = tempdir().unwrap();
1294        let base_path = temp_dir.path();
1295
1296        // Create test content with links in inline code spans
1297        let content = r#"
1298# Test Document
1299
1300This is a normal link: [Link](missing.md)
1301
1302This is a code span with a link: `[Link](another-missing.md)`
1303
1304Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1305
1306    "#;
1307
1308        // Initialize rule with the base path
1309        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1310
1311        // Test the rule
1312        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1313        let result = rule.check(&ctx).unwrap();
1314
1315        // Should only have warning for the normal link, not for links in code spans
1316        assert_eq!(result.len(), 1, "Should have exactly one warning");
1317        assert!(
1318            result[0].message.contains("missing.md"),
1319            "Warning should be for missing.md"
1320        );
1321        assert!(
1322            !result.iter().any(|w| w.message.contains("another-missing.md")),
1323            "Should not warn about link in code span"
1324        );
1325        assert!(
1326            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1327            "Should not warn about link in inline code"
1328        );
1329    }
1330
1331    #[test]
1332    fn test_extensionless_link_resolution() {
1333        // Create a temporary directory for test files
1334        let temp_dir = tempdir().unwrap();
1335        let base_path = temp_dir.path();
1336
1337        // Create a markdown file WITHOUT specifying .md extension in the link
1338        let page_path = base_path.join("page.md");
1339        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1340
1341        // Test content with extensionless link that should resolve to page.md
1342        let content = r#"
1343# Test Document
1344
1345[Link without extension](page)
1346[Link with extension](page.md)
1347[Missing link](nonexistent)
1348"#;
1349
1350        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1351
1352        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1353        let result = rule.check(&ctx).unwrap();
1354
1355        // Should only have warning for nonexistent link
1356        // Both "page" and "page.md" should resolve to the same file
1357        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1358        assert!(
1359            result[0].message.contains("nonexistent"),
1360            "Warning should be for 'nonexistent' not 'page'"
1361        );
1362    }
1363
1364    // Cross-file validation tests
1365    #[test]
1366    fn test_cross_file_scope() {
1367        let rule = MD057ExistingRelativeLinks::new();
1368        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1369    }
1370
1371    #[test]
1372    fn test_contribute_to_index_extracts_markdown_links() {
1373        let rule = MD057ExistingRelativeLinks::new();
1374        let content = r#"
1375# Document
1376
1377[Link to docs](./docs/guide.md)
1378[Link with fragment](./other.md#section)
1379[External link](https://example.com)
1380[Image link](image.png)
1381[Media file](video.mp4)
1382"#;
1383
1384        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1385        let mut index = FileIndex::new();
1386        rule.contribute_to_index(&ctx, &mut index);
1387
1388        // Should only index markdown file links
1389        assert_eq!(index.cross_file_links.len(), 2);
1390
1391        // Check first link
1392        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1393        assert_eq!(index.cross_file_links[0].fragment, "");
1394
1395        // Check second link (with fragment)
1396        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1397        assert_eq!(index.cross_file_links[1].fragment, "section");
1398    }
1399
1400    #[test]
1401    fn test_contribute_to_index_skips_external_and_anchors() {
1402        let rule = MD057ExistingRelativeLinks::new();
1403        let content = r#"
1404# Document
1405
1406[External](https://example.com)
1407[Another external](http://example.org)
1408[Fragment only](#section)
1409[FTP link](ftp://files.example.com)
1410[Mail link](mailto:test@example.com)
1411[WWW link](www.example.com)
1412"#;
1413
1414        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1415        let mut index = FileIndex::new();
1416        rule.contribute_to_index(&ctx, &mut index);
1417
1418        // Should not index any of these
1419        assert_eq!(index.cross_file_links.len(), 0);
1420    }
1421
1422    #[test]
1423    fn test_cross_file_check_valid_link() {
1424        use crate::workspace_index::WorkspaceIndex;
1425
1426        let rule = MD057ExistingRelativeLinks::new();
1427
1428        // Create a workspace index with the target file
1429        let mut workspace_index = WorkspaceIndex::new();
1430        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1431
1432        // Create file index with a link to an existing file
1433        let mut file_index = FileIndex::new();
1434        file_index.add_cross_file_link(CrossFileLinkIndex {
1435            target_path: "guide.md".to_string(),
1436            fragment: "".to_string(),
1437            line: 5,
1438            column: 1,
1439        });
1440
1441        // Run cross-file check from docs/index.md
1442        let warnings = rule
1443            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1444            .unwrap();
1445
1446        // Should have no warnings - file exists
1447        assert!(warnings.is_empty());
1448    }
1449
1450    #[test]
1451    fn test_cross_file_check_missing_link() {
1452        use crate::workspace_index::WorkspaceIndex;
1453
1454        let rule = MD057ExistingRelativeLinks::new();
1455
1456        // Create an empty workspace index
1457        let workspace_index = WorkspaceIndex::new();
1458
1459        // Create file index with a link to a missing file
1460        let mut file_index = FileIndex::new();
1461        file_index.add_cross_file_link(CrossFileLinkIndex {
1462            target_path: "missing.md".to_string(),
1463            fragment: "".to_string(),
1464            line: 5,
1465            column: 1,
1466        });
1467
1468        // Run cross-file check
1469        let warnings = rule
1470            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1471            .unwrap();
1472
1473        // Should have one warning for the missing file
1474        assert_eq!(warnings.len(), 1);
1475        assert!(warnings[0].message.contains("missing.md"));
1476        assert!(warnings[0].message.contains("does not exist"));
1477    }
1478
1479    #[test]
1480    fn test_cross_file_check_parent_path() {
1481        use crate::workspace_index::WorkspaceIndex;
1482
1483        let rule = MD057ExistingRelativeLinks::new();
1484
1485        // Create a workspace index with the target file at the root
1486        let mut workspace_index = WorkspaceIndex::new();
1487        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1488
1489        // Create file index with a parent path link
1490        let mut file_index = FileIndex::new();
1491        file_index.add_cross_file_link(CrossFileLinkIndex {
1492            target_path: "../readme.md".to_string(),
1493            fragment: "".to_string(),
1494            line: 5,
1495            column: 1,
1496        });
1497
1498        // Run cross-file check from docs/guide.md
1499        let warnings = rule
1500            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1501            .unwrap();
1502
1503        // Should have no warnings - file exists at normalized path
1504        assert!(warnings.is_empty());
1505    }
1506
1507    #[test]
1508    fn test_cross_file_check_html_link_with_md_source() {
1509        // Test that .html links are accepted when corresponding .md source exists
1510        // This supports mdBook and similar doc generators that compile .md to .html
1511        use crate::workspace_index::WorkspaceIndex;
1512
1513        let rule = MD057ExistingRelativeLinks::new();
1514
1515        // Create a workspace index with the .md source file
1516        let mut workspace_index = WorkspaceIndex::new();
1517        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1518
1519        // Create file index with an .html link (from another rule like MD051)
1520        let mut file_index = FileIndex::new();
1521        file_index.add_cross_file_link(CrossFileLinkIndex {
1522            target_path: "guide.html".to_string(),
1523            fragment: "section".to_string(),
1524            line: 10,
1525            column: 5,
1526        });
1527
1528        // Run cross-file check from docs/index.md
1529        let warnings = rule
1530            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1531            .unwrap();
1532
1533        // Should have no warnings - .md source exists for the .html link
1534        assert!(
1535            warnings.is_empty(),
1536            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1537        );
1538    }
1539
1540    #[test]
1541    fn test_cross_file_check_html_link_without_source() {
1542        // Test that .html links without corresponding .md source ARE flagged
1543        use crate::workspace_index::WorkspaceIndex;
1544
1545        let rule = MD057ExistingRelativeLinks::new();
1546
1547        // Create an empty workspace index
1548        let workspace_index = WorkspaceIndex::new();
1549
1550        // Create file index with an .html link to a non-existent file
1551        let mut file_index = FileIndex::new();
1552        file_index.add_cross_file_link(CrossFileLinkIndex {
1553            target_path: "missing.html".to_string(),
1554            fragment: "".to_string(),
1555            line: 10,
1556            column: 5,
1557        });
1558
1559        // Run cross-file check from docs/index.md
1560        let warnings = rule
1561            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1562            .unwrap();
1563
1564        // Should have one warning - no .md source exists
1565        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1566        assert!(warnings[0].message.contains("missing.html"));
1567    }
1568
1569    #[test]
1570    fn test_normalize_path_function() {
1571        // Test simple cases
1572        assert_eq!(
1573            normalize_path(Path::new("docs/guide.md")),
1574            PathBuf::from("docs/guide.md")
1575        );
1576
1577        // Test current directory removal
1578        assert_eq!(
1579            normalize_path(Path::new("./docs/guide.md")),
1580            PathBuf::from("docs/guide.md")
1581        );
1582
1583        // Test parent directory resolution
1584        assert_eq!(
1585            normalize_path(Path::new("docs/sub/../guide.md")),
1586            PathBuf::from("docs/guide.md")
1587        );
1588
1589        // Test multiple parent directories
1590        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
1591    }
1592
1593    #[test]
1594    fn test_html_link_with_md_source() {
1595        // Links to .html files should pass if corresponding .md source exists
1596        let temp_dir = tempdir().unwrap();
1597        let base_path = temp_dir.path();
1598
1599        // Create guide.md (source file)
1600        let md_file = base_path.join("guide.md");
1601        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
1602
1603        let content = r#"
1604[Read the guide](guide.html)
1605[Also here](getting-started.html)
1606"#;
1607
1608        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1609        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1610        let result = rule.check(&ctx).unwrap();
1611
1612        // guide.html passes (guide.md exists), getting-started.html fails
1613        assert_eq!(
1614            result.len(),
1615            1,
1616            "Should only warn about missing source. Got: {result:?}"
1617        );
1618        assert!(result[0].message.contains("getting-started.html"));
1619    }
1620
1621    #[test]
1622    fn test_htm_link_with_md_source() {
1623        // .htm extension should also check for markdown source
1624        let temp_dir = tempdir().unwrap();
1625        let base_path = temp_dir.path();
1626
1627        let md_file = base_path.join("page.md");
1628        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
1629
1630        let content = "[Page](page.htm)";
1631
1632        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1633        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1634        let result = rule.check(&ctx).unwrap();
1635
1636        assert!(
1637            result.is_empty(),
1638            "Should not warn when .md source exists for .htm link"
1639        );
1640    }
1641
1642    #[test]
1643    fn test_html_link_finds_various_markdown_extensions() {
1644        // Should find .mdx, .markdown, etc. as source files
1645        let temp_dir = tempdir().unwrap();
1646        let base_path = temp_dir.path();
1647
1648        File::create(base_path.join("doc.md")).unwrap();
1649        File::create(base_path.join("tutorial.mdx")).unwrap();
1650        File::create(base_path.join("guide.markdown")).unwrap();
1651
1652        let content = r#"
1653[Doc](doc.html)
1654[Tutorial](tutorial.html)
1655[Guide](guide.html)
1656"#;
1657
1658        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1659        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1660        let result = rule.check(&ctx).unwrap();
1661
1662        assert!(
1663            result.is_empty(),
1664            "Should find all markdown variants as source files. Got: {result:?}"
1665        );
1666    }
1667
1668    #[test]
1669    fn test_html_link_in_subdirectory() {
1670        // Should find markdown source in subdirectories
1671        let temp_dir = tempdir().unwrap();
1672        let base_path = temp_dir.path();
1673
1674        let docs_dir = base_path.join("docs");
1675        std::fs::create_dir(&docs_dir).unwrap();
1676        File::create(docs_dir.join("guide.md"))
1677            .unwrap()
1678            .write_all(b"# Guide")
1679            .unwrap();
1680
1681        let content = "[Guide](docs/guide.html)";
1682
1683        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1684        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1685        let result = rule.check(&ctx).unwrap();
1686
1687        assert!(result.is_empty(), "Should find markdown source in subdirectory");
1688    }
1689
1690    #[test]
1691    fn test_absolute_path_skipped_in_check() {
1692        // Test that absolute paths are skipped during link validation
1693        // This fixes the bug where /pkg/runtime was being flagged
1694        let temp_dir = tempdir().unwrap();
1695        let base_path = temp_dir.path();
1696
1697        let content = r#"
1698# Test Document
1699
1700[Go Runtime](/pkg/runtime)
1701[Go Runtime with Fragment](/pkg/runtime#section)
1702[API Docs](/api/v1/users)
1703[Blog Post](/blog/2024/release.html)
1704[React Hook](/react/hooks/use-state.html)
1705"#;
1706
1707        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1708        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1709        let result = rule.check(&ctx).unwrap();
1710
1711        // Should have NO warnings - all absolute paths should be skipped
1712        assert!(
1713            result.is_empty(),
1714            "Absolute paths should be skipped. Got warnings: {result:?}"
1715        );
1716    }
1717
1718    #[test]
1719    fn test_absolute_path_skipped_in_cross_file_check() {
1720        // Test that absolute paths are skipped in cross_file_check()
1721        use crate::workspace_index::WorkspaceIndex;
1722
1723        let rule = MD057ExistingRelativeLinks::new();
1724
1725        // Create an empty workspace index (no files exist)
1726        let workspace_index = WorkspaceIndex::new();
1727
1728        // Create file index with absolute path links (should be skipped)
1729        let mut file_index = FileIndex::new();
1730        file_index.add_cross_file_link(CrossFileLinkIndex {
1731            target_path: "/pkg/runtime.md".to_string(),
1732            fragment: "".to_string(),
1733            line: 5,
1734            column: 1,
1735        });
1736        file_index.add_cross_file_link(CrossFileLinkIndex {
1737            target_path: "/api/v1/users.md".to_string(),
1738            fragment: "section".to_string(),
1739            line: 10,
1740            column: 1,
1741        });
1742
1743        // Run cross-file check
1744        let warnings = rule
1745            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1746            .unwrap();
1747
1748        // Should have NO warnings - absolute paths should be skipped
1749        assert!(
1750            warnings.is_empty(),
1751            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
1752        );
1753    }
1754
1755    #[test]
1756    fn test_protocol_relative_url_not_skipped() {
1757        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
1758        // They should still be caught by is_external_url() though
1759        let temp_dir = tempdir().unwrap();
1760        let base_path = temp_dir.path();
1761
1762        let content = r#"
1763# Test Document
1764
1765[External](//example.com/page)
1766[Another](//cdn.example.com/asset.js)
1767"#;
1768
1769        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1770        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1771        let result = rule.check(&ctx).unwrap();
1772
1773        // Should have NO warnings - protocol-relative URLs are external and should be skipped
1774        assert!(
1775            result.is_empty(),
1776            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
1777        );
1778    }
1779
1780    #[test]
1781    fn test_email_addresses_skipped() {
1782        // Test that email addresses without mailto: are skipped
1783        // These are clearly not file links (the @ symbol is definitive)
1784        let temp_dir = tempdir().unwrap();
1785        let base_path = temp_dir.path();
1786
1787        let content = r#"
1788# Test Document
1789
1790[Contact](user@example.com)
1791[Steering](steering@kubernetes.io)
1792[Support](john.doe+filter@company.co.uk)
1793[User](user_name@sub.domain.com)
1794"#;
1795
1796        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1797        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1798        let result = rule.check(&ctx).unwrap();
1799
1800        // Should have NO warnings - email addresses are clearly not file links and should be skipped
1801        assert!(
1802            result.is_empty(),
1803            "Email addresses should be skipped. Got warnings: {result:?}"
1804        );
1805    }
1806
1807    #[test]
1808    fn test_email_addresses_vs_file_paths() {
1809        // Test that email addresses (anything with @) are skipped
1810        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
1811        let temp_dir = tempdir().unwrap();
1812        let base_path = temp_dir.path();
1813
1814        let content = r#"
1815# Test Document
1816
1817[Email](user@example.com)  <!-- Should be skipped (email) -->
1818[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
1819[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
1820"#;
1821
1822        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1823        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1824        let result = rule.check(&ctx).unwrap();
1825
1826        // All should be skipped - anything with @ is treated as an email
1827        assert!(
1828            result.is_empty(),
1829            "All email addresses should be skipped. Got: {result:?}"
1830        );
1831    }
1832
1833    #[test]
1834    fn test_diagnostic_position_accuracy() {
1835        // Test that diagnostics point to the URL, not the link text
1836        let temp_dir = tempdir().unwrap();
1837        let base_path = temp_dir.path();
1838
1839        // Position markers:     0         1         2         3
1840        //                       0123456789012345678901234567890123456789
1841        let content = "prefix [text](missing.md) suffix";
1842        //             The URL "missing.md" starts at 0-indexed position 14
1843        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
1844
1845        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1846        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1847        let result = rule.check(&ctx).unwrap();
1848
1849        assert_eq!(result.len(), 1, "Should have exactly one warning");
1850        assert_eq!(result[0].line, 1, "Should be on line 1");
1851        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
1852        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
1853    }
1854
1855    #[test]
1856    fn test_diagnostic_position_angle_brackets() {
1857        // Test position accuracy with angle bracket links
1858        let temp_dir = tempdir().unwrap();
1859        let base_path = temp_dir.path();
1860
1861        // Position markers:     0         1         2
1862        //                       012345678901234567890
1863        let content = "[link](<missing.md>)";
1864        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
1865
1866        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1867        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1868        let result = rule.check(&ctx).unwrap();
1869
1870        assert_eq!(result.len(), 1, "Should have exactly one warning");
1871        assert_eq!(result[0].line, 1, "Should be on line 1");
1872        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
1873    }
1874
1875    #[test]
1876    fn test_diagnostic_position_multiline() {
1877        // Test that line numbers are correct for links on different lines
1878        let temp_dir = tempdir().unwrap();
1879        let base_path = temp_dir.path();
1880
1881        let content = r#"# Title
1882Some text on line 2
1883[link on line 3](missing1.md)
1884More text
1885[link on line 5](missing2.md)"#;
1886
1887        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1888        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1889        let result = rule.check(&ctx).unwrap();
1890
1891        assert_eq!(result.len(), 2, "Should have two warnings");
1892
1893        // First warning should be on line 3
1894        assert_eq!(result[0].line, 3, "First warning should be on line 3");
1895        assert!(result[0].message.contains("missing1.md"));
1896
1897        // Second warning should be on line 5
1898        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
1899        assert!(result[1].message.contains("missing2.md"));
1900    }
1901
1902    #[test]
1903    fn test_diagnostic_position_with_spaces() {
1904        // Test position with URLs that have spaces in parentheses
1905        let temp_dir = tempdir().unwrap();
1906        let base_path = temp_dir.path();
1907
1908        let content = "[link]( missing.md )";
1909        //             0123456789012345678901
1910        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
1911        //             which is 1-indexed column 9
1912
1913        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1914        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1915        let result = rule.check(&ctx).unwrap();
1916
1917        assert_eq!(result.len(), 1, "Should have exactly one warning");
1918        // The regex captures the URL without leading/trailing spaces
1919        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
1920    }
1921
1922    #[test]
1923    fn test_diagnostic_position_image() {
1924        // Test that image diagnostics also have correct positions
1925        let temp_dir = tempdir().unwrap();
1926        let base_path = temp_dir.path();
1927
1928        let content = "![alt text](missing.jpg)";
1929
1930        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1931        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1932        let result = rule.check(&ctx).unwrap();
1933
1934        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
1935        assert_eq!(result[0].line, 1);
1936        // Images use start_col from the parser, which should point to the URL
1937        assert!(result[0].column > 0, "Should have valid column position");
1938        assert!(result[0].message.contains("missing.jpg"));
1939    }
1940
1941    #[test]
1942    fn test_wikilinks_skipped() {
1943        // Wikilinks should not trigger MD057 warnings
1944        // They use a different linking system (e.g., Obsidian, wiki software)
1945        let temp_dir = tempdir().unwrap();
1946        let base_path = temp_dir.path();
1947
1948        let content = r#"# Test Document
1949
1950[[Microsoft#Windows OS]]
1951[[SomePage]]
1952[[Page With Spaces]]
1953[[path/to/page#section]]
1954[[page|Display Text]]
1955
1956This is a [real missing link](missing.md) that should be flagged.
1957"#;
1958
1959        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1960        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1961        let result = rule.check(&ctx).unwrap();
1962
1963        // Should only warn about the regular markdown link, not wikilinks
1964        assert_eq!(
1965            result.len(),
1966            1,
1967            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
1968        );
1969        assert!(
1970            result[0].message.contains("missing.md"),
1971            "Warning should be for missing.md, not wikilinks"
1972        );
1973    }
1974
1975    #[test]
1976    fn test_wikilinks_not_added_to_index() {
1977        // Wikilinks should not be added to the cross-file link index
1978        let temp_dir = tempdir().unwrap();
1979        let base_path = temp_dir.path();
1980
1981        let content = r#"# Test Document
1982
1983[[Microsoft#Windows OS]]
1984[[SomePage#section]]
1985[Regular Link](other.md)
1986"#;
1987
1988        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1989        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1990
1991        let mut file_index = FileIndex::new();
1992        rule.contribute_to_index(&ctx, &mut file_index);
1993
1994        // Should only have the regular markdown link (if it's a markdown file)
1995        // Wikilinks should not be added
1996        let cross_file_links = &file_index.cross_file_links;
1997        assert_eq!(
1998            cross_file_links.len(),
1999            1,
2000            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
2001        );
2002        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
2003    }
2004
2005    #[test]
2006    fn test_reference_definition_missing_file() {
2007        // Reference definitions [ref]: ./path.md should be checked
2008        let temp_dir = tempdir().unwrap();
2009        let base_path = temp_dir.path();
2010
2011        let content = r#"# Test Document
2012
2013[test]: ./missing.md
2014[example]: ./nonexistent.html
2015
2016Use [test] and [example] here.
2017"#;
2018
2019        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2020        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2021        let result = rule.check(&ctx).unwrap();
2022
2023        // Should have warnings for both reference definitions
2024        assert_eq!(
2025            result.len(),
2026            2,
2027            "Should have warnings for missing reference definition targets. Got: {result:?}"
2028        );
2029        assert!(
2030            result.iter().any(|w| w.message.contains("missing.md")),
2031            "Should warn about missing.md"
2032        );
2033        assert!(
2034            result.iter().any(|w| w.message.contains("nonexistent.html")),
2035            "Should warn about nonexistent.html"
2036        );
2037    }
2038
2039    #[test]
2040    fn test_reference_definition_existing_file() {
2041        // Reference definitions to existing files should NOT trigger warnings
2042        let temp_dir = tempdir().unwrap();
2043        let base_path = temp_dir.path();
2044
2045        // Create an existing file
2046        let exists_path = base_path.join("exists.md");
2047        File::create(&exists_path)
2048            .unwrap()
2049            .write_all(b"# Existing file")
2050            .unwrap();
2051
2052        let content = r#"# Test Document
2053
2054[test]: ./exists.md
2055
2056Use [test] here.
2057"#;
2058
2059        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2060        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2061        let result = rule.check(&ctx).unwrap();
2062
2063        // Should have NO warnings since the file exists
2064        assert!(
2065            result.is_empty(),
2066            "Should not warn about existing file. Got: {result:?}"
2067        );
2068    }
2069
2070    #[test]
2071    fn test_reference_definition_external_url_skipped() {
2072        // Reference definitions with external URLs should be skipped
2073        let temp_dir = tempdir().unwrap();
2074        let base_path = temp_dir.path();
2075
2076        let content = r#"# Test Document
2077
2078[google]: https://google.com
2079[example]: http://example.org
2080[mail]: mailto:test@example.com
2081[ftp]: ftp://files.example.com
2082[local]: ./missing.md
2083
2084Use [google], [example], [mail], [ftp], [local] here.
2085"#;
2086
2087        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2088        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2089        let result = rule.check(&ctx).unwrap();
2090
2091        // Should only warn about the local missing file, not external URLs
2092        assert_eq!(
2093            result.len(),
2094            1,
2095            "Should only warn about local missing file. Got: {result:?}"
2096        );
2097        assert!(
2098            result[0].message.contains("missing.md"),
2099            "Warning should be for missing.md"
2100        );
2101    }
2102
2103    #[test]
2104    fn test_reference_definition_fragment_only_skipped() {
2105        // Reference definitions with fragment-only URLs should be skipped
2106        let temp_dir = tempdir().unwrap();
2107        let base_path = temp_dir.path();
2108
2109        let content = r#"# Test Document
2110
2111[section]: #my-section
2112
2113Use [section] here.
2114"#;
2115
2116        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2117        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2118        let result = rule.check(&ctx).unwrap();
2119
2120        // Should have NO warnings for fragment-only links
2121        assert!(
2122            result.is_empty(),
2123            "Should not warn about fragment-only reference. Got: {result:?}"
2124        );
2125    }
2126
2127    #[test]
2128    fn test_reference_definition_column_position() {
2129        // Test that column position points to the URL in the reference definition
2130        let temp_dir = tempdir().unwrap();
2131        let base_path = temp_dir.path();
2132
2133        // Position markers:     0         1         2
2134        //                       0123456789012345678901
2135        let content = "[ref]: ./missing.md";
2136        //             The URL "./missing.md" starts at 0-indexed position 7
2137        //             which is 1-indexed column 8
2138
2139        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2140        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2141        let result = rule.check(&ctx).unwrap();
2142
2143        assert_eq!(result.len(), 1, "Should have exactly one warning");
2144        assert_eq!(result[0].line, 1, "Should be on line 1");
2145        assert_eq!(result[0].column, 8, "Should point to start of URL './missing.md'");
2146    }
2147
2148    #[test]
2149    fn test_reference_definition_html_with_md_source() {
2150        // Reference definitions to .html files should pass if corresponding .md source exists
2151        let temp_dir = tempdir().unwrap();
2152        let base_path = temp_dir.path();
2153
2154        // Create guide.md (source file)
2155        let md_file = base_path.join("guide.md");
2156        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2157
2158        let content = r#"# Test Document
2159
2160[guide]: ./guide.html
2161[missing]: ./missing.html
2162
2163Use [guide] and [missing] here.
2164"#;
2165
2166        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2167        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2168        let result = rule.check(&ctx).unwrap();
2169
2170        // guide.html passes (guide.md exists), missing.html fails
2171        assert_eq!(
2172            result.len(),
2173            1,
2174            "Should only warn about missing source. Got: {result:?}"
2175        );
2176        assert!(result[0].message.contains("missing.html"));
2177    }
2178
2179    #[test]
2180    fn test_reference_definition_url_encoded() {
2181        // Reference definitions with URL-encoded paths should be decoded before checking
2182        let temp_dir = tempdir().unwrap();
2183        let base_path = temp_dir.path();
2184
2185        // Create a file with spaces in the name
2186        let file_with_spaces = base_path.join("file with spaces.md");
2187        File::create(&file_with_spaces).unwrap().write_all(b"# Spaces").unwrap();
2188
2189        let content = r#"# Test Document
2190
2191[spaces]: ./file%20with%20spaces.md
2192[missing]: ./missing%20file.md
2193
2194Use [spaces] and [missing] here.
2195"#;
2196
2197        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2198        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2199        let result = rule.check(&ctx).unwrap();
2200
2201        // Should only warn about the missing file
2202        assert_eq!(
2203            result.len(),
2204            1,
2205            "Should only warn about missing URL-encoded file. Got: {result:?}"
2206        );
2207        assert!(result[0].message.contains("missing%20file.md"));
2208    }
2209
2210    #[test]
2211    fn test_inline_and_reference_both_checked() {
2212        // Both inline links and reference definitions should be checked
2213        let temp_dir = tempdir().unwrap();
2214        let base_path = temp_dir.path();
2215
2216        let content = r#"# Test Document
2217
2218[inline link](./inline-missing.md)
2219[ref]: ./ref-missing.md
2220
2221Use [ref] here.
2222"#;
2223
2224        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2225        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2226        let result = rule.check(&ctx).unwrap();
2227
2228        // Should warn about both the inline link and the reference definition
2229        assert_eq!(
2230            result.len(),
2231            2,
2232            "Should warn about both inline and reference links. Got: {result:?}"
2233        );
2234        assert!(
2235            result.iter().any(|w| w.message.contains("inline-missing.md")),
2236            "Should warn about inline-missing.md"
2237        );
2238        assert!(
2239            result.iter().any(|w| w.message.contains("ref-missing.md")),
2240            "Should warn about ref-missing.md"
2241        );
2242    }
2243
2244    #[test]
2245    fn test_footnote_definitions_not_flagged() {
2246        // Regression test for issue #286: footnote definitions should not be
2247        // treated as reference definitions and flagged as broken links
2248        let rule = MD057ExistingRelativeLinks::default();
2249
2250        let content = r#"# Title
2251
2252A footnote[^1].
2253
2254[^1]: [link](https://www.google.com).
2255"#;
2256
2257        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2258        let result = rule.check(&ctx).unwrap();
2259
2260        assert!(
2261            result.is_empty(),
2262            "Footnote definitions should not trigger MD057 warnings. Got: {result:?}"
2263        );
2264    }
2265
2266    #[test]
2267    fn test_footnote_with_relative_link_inside() {
2268        // Footnotes containing relative links should not be checked
2269        // (the footnote content is not a URL, it's content that may contain links)
2270        let rule = MD057ExistingRelativeLinks::default();
2271
2272        let content = r#"# Title
2273
2274See the footnote[^1].
2275
2276[^1]: Check out [this file](./existing.md) for more info.
2277[^2]: Also see [missing](./does-not-exist.md).
2278"#;
2279
2280        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2281        let result = rule.check(&ctx).unwrap();
2282
2283        // The inline links INSIDE footnotes should be checked (./existing.md, ./does-not-exist.md)
2284        // but the footnote definition itself should not be treated as a reference definition
2285        // Note: This test verifies that [^1]: and [^2]: are not parsed as ref defs with
2286        // URLs like "[this file](./existing.md)" or "[missing](./does-not-exist.md)"
2287        for warning in &result {
2288            assert!(
2289                !warning.message.contains("[this file]"),
2290                "Footnote content should not be treated as URL: {warning:?}"
2291            );
2292            assert!(
2293                !warning.message.contains("[missing]"),
2294                "Footnote content should not be treated as URL: {warning:?}"
2295            );
2296        }
2297    }
2298
2299    #[test]
2300    fn test_mixed_footnotes_and_reference_definitions() {
2301        // Ensure regular reference definitions are still checked while footnotes are skipped
2302        let temp_dir = tempdir().unwrap();
2303        let base_path = temp_dir.path();
2304
2305        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2306
2307        let content = r#"# Title
2308
2309A footnote[^1] and a [ref link][myref].
2310
2311[^1]: This is a footnote with [link](https://example.com).
2312
2313[myref]: ./missing-file.md "This should be checked"
2314"#;
2315
2316        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2317        let result = rule.check(&ctx).unwrap();
2318
2319        // Should only warn about the regular reference definition, not the footnote
2320        assert_eq!(
2321            result.len(),
2322            1,
2323            "Should only warn about the regular reference definition. Got: {result:?}"
2324        );
2325        assert!(
2326            result[0].message.contains("missing-file.md"),
2327            "Should warn about missing-file.md in reference definition"
2328        );
2329    }
2330}