Skip to main content

rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{FileIndex, extract_cross_file_links};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use md057_config::MD057Config;
18
19// Thread-safe cache for file existence checks to avoid redundant filesystem operations
20static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
21    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
22
23// Reset the file existence cache (typically between rule runs)
24fn reset_file_existence_cache() {
25    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
26        cache.clear();
27    }
28}
29
30// Check if a file exists with caching
31fn file_exists_with_cache(path: &Path) -> bool {
32    match FILE_EXISTENCE_CACHE.lock() {
33        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
34        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
35    }
36}
37
38/// Check if a file exists, also trying markdown extensions for extensionless links.
39/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
40fn file_exists_or_markdown_extension(path: &Path) -> bool {
41    // First, check exact path
42    if file_exists_with_cache(path) {
43        return true;
44    }
45
46    // If the path has no extension, try adding markdown extensions
47    if path.extension().is_none() {
48        for ext in MARKDOWN_EXTENSIONS {
49            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
50            let path_with_ext = path.with_extension(&ext[1..]);
51            if file_exists_with_cache(&path_with_ext) {
52                return true;
53            }
54        }
55    }
56
57    false
58}
59
60// Regex to match the start of a link - simplified for performance
61static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
62
63/// Regex to extract the URL from an angle-bracketed markdown link
64/// Format: `](<URL>)` or `](<URL> "title")`
65/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
66static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
68
69/// Regex to extract the URL from a normal markdown link (without angle brackets)
70/// Format: `](URL)` or `](URL "title")`
71static URL_EXTRACT_REGEX: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
73
74/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
75/// Matches: scheme:// or scheme: (per RFC 3986)
76/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
77static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
78    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
79
80// Current working directory
81static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
82
83/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
84/// Returns None for non-hex characters.
85#[inline]
86fn hex_digit_to_value(byte: u8) -> Option<u8> {
87    match byte {
88        b'0'..=b'9' => Some(byte - b'0'),
89        b'a'..=b'f' => Some(byte - b'a' + 10),
90        b'A'..=b'F' => Some(byte - b'A' + 10),
91        _ => None,
92    }
93}
94
95/// Supported markdown file extensions
96const MARKDOWN_EXTENSIONS: &[&str] = &[
97    ".md",
98    ".markdown",
99    ".mdx",
100    ".mkd",
101    ".mkdn",
102    ".mdown",
103    ".mdwn",
104    ".qmd",
105    ".rmd",
106];
107
108/// Rule MD057: Existing relative links should point to valid files or directories.
109#[derive(Debug, Clone, Default)]
110pub struct MD057ExistingRelativeLinks {
111    /// Base directory for resolving relative links
112    base_path: Arc<Mutex<Option<PathBuf>>>,
113}
114
115impl MD057ExistingRelativeLinks {
116    /// Create a new instance with default settings
117    pub fn new() -> Self {
118        Self::default()
119    }
120
121    /// Set the base path for resolving relative links
122    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
123        let path = path.as_ref();
124        let dir_path = if path.is_file() {
125            path.parent().map(|p| p.to_path_buf())
126        } else {
127            Some(path.to_path_buf())
128        };
129
130        if let Ok(mut guard) = self.base_path.lock() {
131            *guard = dir_path;
132        }
133        self
134    }
135
136    #[allow(unused_variables)]
137    pub fn from_config_struct(config: MD057Config) -> Self {
138        Self::default()
139    }
140
141    /// Check if a URL is external or should be skipped for validation.
142    ///
143    /// Returns `true` (skip validation) for:
144    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
145    /// - Bare domains: `www.example.com`, `example.com`
146    /// - Email addresses: `user@example.com` (without `mailto:`)
147    /// - Template variables: `{{URL}}`, `{{% include %}}`
148    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
149    ///
150    /// Returns `false` (validate) for:
151    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
152    #[inline]
153    fn is_external_url(&self, url: &str) -> bool {
154        if url.is_empty() {
155            return false;
156        }
157
158        // Quick checks for common external URL patterns
159        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
160            return true;
161        }
162
163        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
164        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
165        if url.starts_with("{{") || url.starts_with("{%") {
166            return true;
167        }
168
169        // Simple check: if URL contains @, it's almost certainly an email address
170        // File paths with @ are extremely rare, so this is a safe heuristic
171        if url.contains('@') {
172            return true; // It's an email address, skip it
173        }
174
175        // Bare domain check (e.g., "example.com")
176        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
177        // Links like [text](nodejs.org/path) without a protocol are broken -
178        // they'll be treated as relative paths by markdown renderers.
179        // Flagging them helps users find missing protocols.
180        // We only skip .com as a minimal safety net for the most common case.
181        if url.ends_with(".com") {
182            return true;
183        }
184
185        // Absolute URL paths (e.g., /api/docs, /blog/post.html) are treated as web paths
186        // and skipped. These are typically routes for published documentation sites,
187        // not filesystem paths that can be validated locally.
188        if url.starts_with('/') {
189            return true;
190        }
191
192        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
193        // These are not filesystem paths but module/asset aliases
194        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
195        if url.starts_with('~') || url.starts_with('@') {
196            return true;
197        }
198
199        // All other cases (relative paths, etc.) are not external
200        false
201    }
202
203    /// Check if the URL is a fragment-only link (internal document link)
204    #[inline]
205    fn is_fragment_only_link(&self, url: &str) -> bool {
206        url.starts_with('#')
207    }
208
209    /// Decode URL percent-encoded sequences in a path.
210    /// Converts `%20` to space, `%2F` to `/`, etc.
211    /// Returns the original string if decoding fails or produces invalid UTF-8.
212    fn url_decode(path: &str) -> String {
213        // Quick check: if no percent sign, return as-is
214        if !path.contains('%') {
215            return path.to_string();
216        }
217
218        let bytes = path.as_bytes();
219        let mut result = Vec::with_capacity(bytes.len());
220        let mut i = 0;
221
222        while i < bytes.len() {
223            if bytes[i] == b'%' && i + 2 < bytes.len() {
224                // Try to parse the two hex digits following %
225                let hex1 = bytes[i + 1];
226                let hex2 = bytes[i + 2];
227                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
228                    result.push(d1 * 16 + d2);
229                    i += 3;
230                    continue;
231                }
232            }
233            result.push(bytes[i]);
234            i += 1;
235        }
236
237        // Convert to UTF-8, falling back to original if invalid
238        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
239    }
240
241    /// Strip query parameters and fragments from a URL for file existence checking.
242    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
243    /// for `path/to/image.png` or `file.md` respectively.
244    ///
245    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
246    /// so we check for `?` first. If a URL has both, only the query is stripped here
247    /// (fragments are handled separately by the regex in `contribute_to_index`).
248    fn strip_query_and_fragment(url: &str) -> &str {
249        // Find the first occurrence of '?' or '#', whichever comes first
250        // This handles both standard URLs (? before #) and edge cases (# before ?)
251        let query_pos = url.find('?');
252        let fragment_pos = url.find('#');
253
254        match (query_pos, fragment_pos) {
255            (Some(q), Some(f)) => {
256                // Both exist - strip at whichever comes first
257                &url[..q.min(f)]
258            }
259            (Some(q), None) => &url[..q],
260            (None, Some(f)) => &url[..f],
261            (None, None) => url,
262        }
263    }
264
265    /// Resolve a relative link against a provided base path
266    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
267        base_path.join(link)
268    }
269}
270
271impl Rule for MD057ExistingRelativeLinks {
272    fn name(&self) -> &'static str {
273        "MD057"
274    }
275
276    fn description(&self) -> &'static str {
277        "Relative links should point to existing files"
278    }
279
280    fn category(&self) -> RuleCategory {
281        RuleCategory::Link
282    }
283
284    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
285        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
286    }
287
288    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
289        let content = ctx.content;
290
291        // Early returns for performance
292        if content.is_empty() || !content.contains('[') {
293            return Ok(Vec::new());
294        }
295
296        // Quick check for any potential links before expensive operations
297        // Check for inline links "](", reference definitions "]:", or images "!["
298        if !content.contains("](") && !content.contains("]:") {
299            return Ok(Vec::new());
300        }
301
302        // Reset the file existence cache for a fresh run
303        reset_file_existence_cache();
304
305        let mut warnings = Vec::new();
306
307        // Determine base path for resolving relative links
308        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
309        // This ensures each file resolves links relative to its own directory
310        let base_path: Option<PathBuf> = {
311            // First check if base_path was explicitly set via with_path() (for tests)
312            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
313            if explicit_base.is_some() {
314                explicit_base
315            } else if let Some(ref source_file) = ctx.source_file {
316                // Resolve symlinks to get the actual file location
317                // This ensures relative links are resolved from the target's directory,
318                // not the symlink's directory
319                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
320                resolved_file
321                    .parent()
322                    .map(|p| p.to_path_buf())
323                    .or_else(|| Some(CURRENT_DIR.clone()))
324            } else {
325                // No source file available - cannot validate relative links
326                None
327            }
328        };
329
330        // If we still don't have a base path, we can't validate relative links
331        let Some(base_path) = base_path else {
332            return Ok(warnings);
333        };
334
335        // Use LintContext links instead of expensive regex parsing
336        if !ctx.links.is_empty() {
337            // Use LineIndex for correct position calculation across all line ending types
338            let line_index = &ctx.line_index;
339
340            // Create element cache once for all links
341            let element_cache = ElementCache::new(content);
342
343            // Pre-collect lines to avoid repeated line iteration
344            let lines: Vec<&str> = content.lines().collect();
345
346            // Track which lines we've already processed to avoid duplicates
347            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
348            let mut processed_lines = std::collections::HashSet::new();
349
350            for link in &ctx.links {
351                let line_idx = link.line - 1;
352                if line_idx >= lines.len() {
353                    continue;
354                }
355
356                // Skip lines inside PyMdown blocks (MkDocs flavor)
357                // This must be checked BEFORE processed_lines to skip the entire line
358                if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
359                    continue;
360                }
361
362                // Skip if we've already processed this line
363                if !processed_lines.insert(line_idx) {
364                    continue;
365                }
366
367                let line = lines[line_idx];
368
369                // Quick check for link pattern in this line
370                if !line.contains("](") {
371                    continue;
372                }
373
374                // Find all links in this line using optimized regex
375                for link_match in LINK_START_REGEX.find_iter(line) {
376                    let start_pos = link_match.start();
377                    let end_pos = link_match.end();
378
379                    // Calculate absolute position using LineIndex
380                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
381                    let absolute_start_pos = line_start_byte + start_pos;
382
383                    // Skip if this link is in a code span
384                    if element_cache.is_in_code_span(absolute_start_pos) {
385                        continue;
386                    }
387
388                    // Skip if this link is in a math span (LaTeX $...$ or $$...$$)
389                    if ctx.is_in_math_span(absolute_start_pos) {
390                        continue;
391                    }
392
393                    // Find the URL part after the link text
394                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
395                    // Then fall back to normal URL regex
396                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
397                        .captures_at(line, end_pos - 1)
398                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
399                        .or_else(|| {
400                            URL_EXTRACT_REGEX
401                                .captures_at(line, end_pos - 1)
402                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
403                        });
404
405                    if let Some((_caps, url_group)) = caps_and_url {
406                        let url = url_group.as_str().trim();
407
408                        // Skip empty URLs
409                        if url.is_empty() {
410                            continue;
411                        }
412
413                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
414                        // These are Rust API references, not file paths
415                        // Example: [`f32::is_subnormal`], [`Vec::push`]
416                        if url.starts_with('`') && url.ends_with('`') {
417                            continue;
418                        }
419
420                        // Skip external URLs, absolute paths, and fragment-only links
421                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
422                            continue;
423                        }
424
425                        // Strip query parameters and fragments before checking file existence
426                        let file_path = Self::strip_query_and_fragment(url);
427
428                        // URL-decode the path to handle percent-encoded characters
429                        let decoded_path = Self::url_decode(file_path);
430
431                        // Resolve the relative link against the base path
432                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
433
434                        // Check if the file exists, also trying markdown extensions for extensionless links
435                        if file_exists_or_markdown_extension(&resolved_path) {
436                            continue; // File exists, no warning needed
437                        }
438
439                        // For .html/.htm links, check if a corresponding markdown source exists
440                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
441                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
442                            && let (Some(stem), Some(parent)) = (
443                                resolved_path.file_stem().and_then(|s| s.to_str()),
444                                resolved_path.parent(),
445                            ) {
446                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
447                                let source_path = parent.join(format!("{stem}{md_ext}"));
448                                file_exists_with_cache(&source_path)
449                            })
450                        } else {
451                            false
452                        };
453
454                        if has_md_source {
455                            continue; // Markdown source exists, link is valid
456                        }
457
458                        // File doesn't exist and no source file found
459                        // Use actual URL position from regex capture group
460                        // Note: capture group positions are absolute within the line string
461                        let url_start = url_group.start();
462                        let url_end = url_group.end();
463
464                        warnings.push(LintWarning {
465                            rule_name: Some(self.name().to_string()),
466                            line: link.line,
467                            column: url_start + 1, // 1-indexed
468                            end_line: link.line,
469                            end_column: url_end + 1, // 1-indexed
470                            message: format!("Relative link '{url}' does not exist"),
471                            severity: Severity::Error,
472                            fix: None,
473                        });
474                    }
475                }
476            }
477        }
478
479        // Also process images - they have URLs already parsed
480        for image in &ctx.images {
481            // Skip images inside PyMdown blocks (MkDocs flavor)
482            if ctx.line_info(image.line).is_some_and(|info| info.in_pymdown_block) {
483                continue;
484            }
485
486            let url = image.url.as_ref();
487
488            // Skip empty URLs
489            if url.is_empty() {
490                continue;
491            }
492
493            // Skip external URLs, absolute paths, and fragment-only links
494            if self.is_external_url(url) || self.is_fragment_only_link(url) {
495                continue;
496            }
497
498            // Strip query parameters and fragments before checking file existence
499            let file_path = Self::strip_query_and_fragment(url);
500
501            // URL-decode the path to handle percent-encoded characters
502            let decoded_path = Self::url_decode(file_path);
503
504            // Resolve the relative link against the base path
505            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
506
507            // Check if the file exists, also trying markdown extensions for extensionless links
508            if file_exists_or_markdown_extension(&resolved_path) {
509                continue; // File exists, no warning needed
510            }
511
512            // For .html/.htm links, check if a corresponding markdown source exists
513            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
514                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
515                && let (Some(stem), Some(parent)) = (
516                    resolved_path.file_stem().and_then(|s| s.to_str()),
517                    resolved_path.parent(),
518                ) {
519                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
520                    let source_path = parent.join(format!("{stem}{md_ext}"));
521                    file_exists_with_cache(&source_path)
522                })
523            } else {
524                false
525            };
526
527            if has_md_source {
528                continue; // Markdown source exists, link is valid
529            }
530
531            // File doesn't exist and no source file found
532            // Images already have correct position from parser
533            warnings.push(LintWarning {
534                rule_name: Some(self.name().to_string()),
535                line: image.line,
536                column: image.start_col + 1,
537                end_line: image.line,
538                end_column: image.start_col + 1 + url.len(),
539                message: format!("Relative link '{url}' does not exist"),
540                severity: Severity::Error,
541                fix: None,
542            });
543        }
544
545        // Also process reference definitions: [ref]: ./path.md
546        for ref_def in &ctx.reference_defs {
547            let url = &ref_def.url;
548
549            // Skip empty URLs
550            if url.is_empty() {
551                continue;
552            }
553
554            // Skip external URLs, absolute paths, and fragment-only links
555            if self.is_external_url(url) || self.is_fragment_only_link(url) {
556                continue;
557            }
558
559            // Strip query parameters and fragments before checking file existence
560            let file_path = Self::strip_query_and_fragment(url);
561
562            // URL-decode the path to handle percent-encoded characters
563            let decoded_path = Self::url_decode(file_path);
564
565            // Resolve the relative link against the base path
566            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
567
568            // Check if the file exists, also trying markdown extensions for extensionless links
569            if file_exists_or_markdown_extension(&resolved_path) {
570                continue; // File exists, no warning needed
571            }
572
573            // For .html/.htm links, check if a corresponding markdown source exists
574            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
575                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
576                && let (Some(stem), Some(parent)) = (
577                    resolved_path.file_stem().and_then(|s| s.to_str()),
578                    resolved_path.parent(),
579                ) {
580                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
581                    let source_path = parent.join(format!("{stem}{md_ext}"));
582                    file_exists_with_cache(&source_path)
583                })
584            } else {
585                false
586            };
587
588            if has_md_source {
589                continue; // Markdown source exists, link is valid
590            }
591
592            // File doesn't exist and no source file found
593            // Calculate column position: find URL within the line
594            let line_idx = ref_def.line - 1;
595            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
596                // Find URL position in line (after ]: )
597                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
598            });
599
600            warnings.push(LintWarning {
601                rule_name: Some(self.name().to_string()),
602                line: ref_def.line,
603                column,
604                end_line: ref_def.line,
605                end_column: column + url.len(),
606                message: format!("Relative link '{url}' does not exist"),
607                severity: Severity::Error,
608                fix: None,
609            });
610        }
611
612        Ok(warnings)
613    }
614
615    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
616        Ok(ctx.content.to_string())
617    }
618
619    fn as_any(&self) -> &dyn std::any::Any {
620        self
621    }
622
623    fn default_config_section(&self) -> Option<(String, toml::Value)> {
624        // No configurable options for this rule
625        None
626    }
627
628    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
629    where
630        Self: Sized,
631    {
632        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
633        Box::new(Self::from_config_struct(rule_config))
634    }
635
636    fn cross_file_scope(&self) -> CrossFileScope {
637        CrossFileScope::Workspace
638    }
639
640    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
641        // Use the shared utility for cross-file link extraction
642        // This ensures consistent position tracking between CLI and LSP
643        for link in extract_cross_file_links(ctx) {
644            index.add_cross_file_link(link);
645        }
646    }
647
648    fn cross_file_check(
649        &self,
650        file_path: &Path,
651        file_index: &FileIndex,
652        workspace_index: &crate::workspace_index::WorkspaceIndex,
653    ) -> LintResult {
654        let mut warnings = Vec::new();
655
656        // Get the directory containing this file for resolving relative links
657        let file_dir = file_path.parent();
658
659        for cross_link in &file_index.cross_file_links {
660            // URL-decode the path for filesystem operations
661            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
662            let decoded_target = Self::url_decode(&cross_link.target_path);
663
664            // Skip absolute/protocol-relative paths (web paths, not filesystem paths)
665            if decoded_target.starts_with('/') {
666                continue;
667            }
668
669            // Resolve relative path
670            let target_path = if let Some(dir) = file_dir {
671                dir.join(&decoded_target)
672            } else {
673                Path::new(&decoded_target).to_path_buf()
674            };
675
676            // Normalize the path (handle .., ., etc.)
677            let target_path = normalize_path(&target_path);
678
679            // Check if the target file exists, also trying markdown extensions for extensionless links
680            let file_exists =
681                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
682
683            if !file_exists {
684                // For .html/.htm links, check if a corresponding markdown source exists
685                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
686                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
687                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
688                    && let (Some(stem), Some(parent)) =
689                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
690                {
691                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
692                        let source_path = parent.join(format!("{stem}{md_ext}"));
693                        workspace_index.contains_file(&source_path) || source_path.exists()
694                    })
695                } else {
696                    false
697                };
698
699                if !has_md_source {
700                    warnings.push(LintWarning {
701                        rule_name: Some(self.name().to_string()),
702                        line: cross_link.line,
703                        column: cross_link.column,
704                        end_line: cross_link.line,
705                        end_column: cross_link.column + cross_link.target_path.len(),
706                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
707                        severity: Severity::Error,
708                        fix: None,
709                    });
710                }
711            }
712        }
713
714        Ok(warnings)
715    }
716}
717
718/// Normalize a path by resolving . and .. components
719fn normalize_path(path: &Path) -> PathBuf {
720    let mut components = Vec::new();
721
722    for component in path.components() {
723        match component {
724            std::path::Component::ParentDir => {
725                // Go up one level if possible
726                if !components.is_empty() {
727                    components.pop();
728                }
729            }
730            std::path::Component::CurDir => {
731                // Skip current directory markers
732            }
733            _ => {
734                components.push(component);
735            }
736        }
737    }
738
739    components.iter().collect()
740}
741
742#[cfg(test)]
743mod tests {
744    use super::*;
745    use crate::workspace_index::CrossFileLinkIndex;
746    use std::fs::File;
747    use std::io::Write;
748    use tempfile::tempdir;
749
750    #[test]
751    fn test_strip_query_and_fragment() {
752        // Test query parameter stripping
753        assert_eq!(
754            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
755            "file.png"
756        );
757        assert_eq!(
758            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
759            "file.png"
760        );
761        assert_eq!(
762            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
763            "file.png"
764        );
765
766        // Test fragment stripping
767        assert_eq!(
768            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
769            "file.md"
770        );
771        assert_eq!(
772            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
773            "file.md"
774        );
775
776        // Test both query and fragment (query comes first, per RFC 3986)
777        assert_eq!(
778            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
779            "file.md"
780        );
781
782        // Test no query or fragment
783        assert_eq!(
784            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
785            "file.png"
786        );
787
788        // Test with path
789        assert_eq!(
790            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
791            "path/to/image.png"
792        );
793        assert_eq!(
794            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
795            "path/to/image.png"
796        );
797
798        // Edge case: fragment before query (non-standard but possible)
799        assert_eq!(
800            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
801            "file.md"
802        );
803    }
804
805    #[test]
806    fn test_url_decode() {
807        // Simple space encoding
808        assert_eq!(
809            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
810            "penguin with space.jpg"
811        );
812
813        // Path with encoded spaces
814        assert_eq!(
815            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
816            "assets/my file name.png"
817        );
818
819        // Multiple encoded characters
820        assert_eq!(
821            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
822            "hello world!.md"
823        );
824
825        // Lowercase hex
826        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
827
828        // Uppercase hex
829        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
830
831        // Mixed case hex
832        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
833
834        // No encoding - return as-is
835        assert_eq!(
836            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
837            "normal-file.md"
838        );
839
840        // Incomplete percent encoding - leave as-is
841        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
842
843        // Percent at end - leave as-is
844        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
845
846        // Invalid hex digits - leave as-is
847        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
848
849        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
850        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
851
852        // Empty string
853        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
854
855        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
856        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
857
858        // Multiple consecutive encoded characters
859        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
860
861        // Encoded path separators
862        assert_eq!(
863            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
864            "path/to/file.md"
865        );
866
867        // Mixed encoded and non-encoded
868        assert_eq!(
869            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
870            "hello world/foo bar.md"
871        );
872
873        // Special characters that are commonly encoded
874        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
875
876        // Percent at position that looks like encoding but isn't valid
877        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
878    }
879
880    #[test]
881    fn test_url_encoded_filenames() {
882        // Create a temporary directory for test files
883        let temp_dir = tempdir().unwrap();
884        let base_path = temp_dir.path();
885
886        // Create a file with spaces in the name
887        let file_with_spaces = base_path.join("penguin with space.jpg");
888        File::create(&file_with_spaces)
889            .unwrap()
890            .write_all(b"image data")
891            .unwrap();
892
893        // Create a subdirectory with spaces
894        let subdir = base_path.join("my images");
895        std::fs::create_dir(&subdir).unwrap();
896        let nested_file = subdir.join("photo 1.png");
897        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
898
899        // Test content with URL-encoded links
900        let content = r#"
901# Test Document with URL-Encoded Links
902
903![Penguin](penguin%20with%20space.jpg)
904![Photo](my%20images/photo%201.png)
905![Missing](missing%20file.jpg)
906"#;
907
908        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
909
910        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
911        let result = rule.check(&ctx).unwrap();
912
913        // Should only have one warning for the missing file
914        assert_eq!(
915            result.len(),
916            1,
917            "Should only warn about missing%20file.jpg. Got: {result:?}"
918        );
919        assert!(
920            result[0].message.contains("missing%20file.jpg"),
921            "Warning should mention the URL-encoded filename"
922        );
923    }
924
925    #[test]
926    fn test_external_urls() {
927        let rule = MD057ExistingRelativeLinks::new();
928
929        // Common web protocols
930        assert!(rule.is_external_url("https://example.com"));
931        assert!(rule.is_external_url("http://example.com"));
932        assert!(rule.is_external_url("ftp://example.com"));
933        assert!(rule.is_external_url("www.example.com"));
934        assert!(rule.is_external_url("example.com"));
935
936        // Special URI schemes
937        assert!(rule.is_external_url("file:///path/to/file"));
938        assert!(rule.is_external_url("smb://server/share"));
939        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
940        assert!(rule.is_external_url("mailto:user@example.com"));
941        assert!(rule.is_external_url("tel:+1234567890"));
942        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
943        assert!(rule.is_external_url("javascript:void(0)"));
944        assert!(rule.is_external_url("ssh://git@github.com/repo"));
945        assert!(rule.is_external_url("git://github.com/repo.git"));
946
947        // Email addresses without mailto: protocol
948        // These are clearly not file links and should be skipped
949        assert!(rule.is_external_url("user@example.com"));
950        assert!(rule.is_external_url("steering@kubernetes.io"));
951        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
952        assert!(rule.is_external_url("user_name@sub.domain.com"));
953        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
954
955        // Template variables should be skipped (not checked as relative links)
956        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
957        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
958        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
959        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
960        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
961        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
962
963        // Absolute web URL paths should be skipped (not validated)
964        // These are typically routes for published documentation sites
965        assert!(rule.is_external_url("/api/v1/users"));
966        assert!(rule.is_external_url("/blog/2024/release.html"));
967        assert!(rule.is_external_url("/react/hooks/use-state.html"));
968        assert!(rule.is_external_url("/pkg/runtime"));
969        assert!(rule.is_external_url("/doc/go1compat"));
970        assert!(rule.is_external_url("/index.html"));
971        assert!(rule.is_external_url("/assets/logo.png"));
972
973        // Framework path aliases should be skipped (resolved by build tools)
974        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
975        assert!(rule.is_external_url("~/assets/image.png"));
976        assert!(rule.is_external_url("~/components/Button.vue"));
977        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
978
979        // @ prefix (common in Vue, webpack, Vite aliases)
980        assert!(rule.is_external_url("@/components/Header.vue"));
981        assert!(rule.is_external_url("@images/photo.jpg"));
982        assert!(rule.is_external_url("@assets/styles.css"));
983
984        // Relative paths should NOT be external (should be validated)
985        assert!(!rule.is_external_url("./relative/path.md"));
986        assert!(!rule.is_external_url("relative/path.md"));
987        assert!(!rule.is_external_url("../parent/path.md"));
988    }
989
990    #[test]
991    fn test_framework_path_aliases() {
992        // Create a temporary directory for test files
993        let temp_dir = tempdir().unwrap();
994        let base_path = temp_dir.path();
995
996        // Test content with framework path aliases (should all be skipped)
997        let content = r#"
998# Framework Path Aliases
999
1000![Image 1](~/assets/penguin.jpg)
1001![Image 2](~assets/logo.svg)
1002![Image 3](@images/photo.jpg)
1003![Image 4](@/components/icon.svg)
1004[Link](@/pages/about.md)
1005
1006This is a [real missing link](missing.md) that should be flagged.
1007"#;
1008
1009        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1010
1011        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1012        let result = rule.check(&ctx).unwrap();
1013
1014        // Should only have one warning for the real missing link
1015        assert_eq!(
1016            result.len(),
1017            1,
1018            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
1019        );
1020        assert!(
1021            result[0].message.contains("missing.md"),
1022            "Warning should be for missing.md"
1023        );
1024    }
1025
1026    #[test]
1027    fn test_url_decode_security_path_traversal() {
1028        // Ensure URL decoding doesn't enable path traversal attacks
1029        // The decoded path is still validated against the base path
1030        let temp_dir = tempdir().unwrap();
1031        let base_path = temp_dir.path();
1032
1033        // Create a file in the temp directory
1034        let file_in_base = base_path.join("safe.md");
1035        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
1036
1037        // Test with encoded path traversal attempt
1038        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
1039        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
1040        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
1041        let content = r#"
1042[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
1043[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
1044[Safe link](safe.md)
1045"#;
1046
1047        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1048
1049        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1050        let result = rule.check(&ctx).unwrap();
1051
1052        // The traversal attempts should still be flagged as missing
1053        // (they don't exist relative to base_path after decoding)
1054        assert_eq!(
1055            result.len(),
1056            2,
1057            "Should have warnings for traversal attempts. Got: {result:?}"
1058        );
1059    }
1060
1061    #[test]
1062    fn test_url_encoded_utf8_filenames() {
1063        // Test with actual UTF-8 encoded filenames
1064        let temp_dir = tempdir().unwrap();
1065        let base_path = temp_dir.path();
1066
1067        // Create files with unicode names
1068        let cafe_file = base_path.join("café.md");
1069        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1070
1071        let content = r#"
1072[Café link](caf%C3%A9.md)
1073[Missing unicode](r%C3%A9sum%C3%A9.md)
1074"#;
1075
1076        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1077
1078        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1079        let result = rule.check(&ctx).unwrap();
1080
1081        // Should only warn about the missing file
1082        assert_eq!(
1083            result.len(),
1084            1,
1085            "Should only warn about missing résumé.md. Got: {result:?}"
1086        );
1087        assert!(
1088            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1089            "Warning should mention the URL-encoded filename"
1090        );
1091    }
1092
1093    #[test]
1094    fn test_url_encoded_emoji_filenames() {
1095        // URL-encoded emoji paths should be correctly resolved
1096        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1097        let temp_dir = tempdir().unwrap();
1098        let base_path = temp_dir.path();
1099
1100        // Create directory with emoji in name: 👤 Personal
1101        let emoji_dir = base_path.join("👤 Personal");
1102        std::fs::create_dir(&emoji_dir).unwrap();
1103
1104        // Create file in that directory: TV Shows.md
1105        let file_path = emoji_dir.join("TV Shows.md");
1106        File::create(&file_path)
1107            .unwrap()
1108            .write_all(b"# TV Shows\n\nContent here.")
1109            .unwrap();
1110
1111        // Test content with URL-encoded emoji link
1112        // %F0%9F%91%A4 = 👤, %20 = space
1113        let content = r#"
1114# Test Document
1115
1116[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1117[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1118"#;
1119
1120        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1121
1122        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1123        let result = rule.check(&ctx).unwrap();
1124
1125        // Should only warn about the missing file, not the valid emoji path
1126        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1127        assert!(
1128            result[0].message.contains("Missing.md"),
1129            "Warning should be for Missing.md, got: {}",
1130            result[0].message
1131        );
1132    }
1133
1134    #[test]
1135    fn test_no_warnings_without_base_path() {
1136        let rule = MD057ExistingRelativeLinks::new();
1137        let content = "[Link](missing.md)";
1138
1139        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1140        let result = rule.check(&ctx).unwrap();
1141        assert!(result.is_empty(), "Should have no warnings without base path");
1142    }
1143
1144    #[test]
1145    fn test_existing_and_missing_links() {
1146        // Create a temporary directory for test files
1147        let temp_dir = tempdir().unwrap();
1148        let base_path = temp_dir.path();
1149
1150        // Create an existing file
1151        let exists_path = base_path.join("exists.md");
1152        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1153
1154        // Verify the file exists
1155        assert!(exists_path.exists(), "exists.md should exist for this test");
1156
1157        // Create test content with both existing and missing links
1158        let content = r#"
1159# Test Document
1160
1161[Valid Link](exists.md)
1162[Invalid Link](missing.md)
1163[External Link](https://example.com)
1164[Media Link](image.jpg)
1165        "#;
1166
1167        // Initialize rule with the base path (default: check all files including media)
1168        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1169
1170        // Test the rule
1171        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1172        let result = rule.check(&ctx).unwrap();
1173
1174        // Should have two warnings: missing.md and image.jpg (both don't exist)
1175        assert_eq!(result.len(), 2);
1176        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1177        assert!(messages.iter().any(|m| m.contains("missing.md")));
1178        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1179    }
1180
1181    #[test]
1182    fn test_angle_bracket_links() {
1183        // Create a temporary directory for test files
1184        let temp_dir = tempdir().unwrap();
1185        let base_path = temp_dir.path();
1186
1187        // Create an existing file
1188        let exists_path = base_path.join("exists.md");
1189        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1190
1191        // Create test content with angle bracket links
1192        let content = r#"
1193# Test Document
1194
1195[Valid Link](<exists.md>)
1196[Invalid Link](<missing.md>)
1197[External Link](<https://example.com>)
1198    "#;
1199
1200        // Test with default settings
1201        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1202
1203        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1204        let result = rule.check(&ctx).unwrap();
1205
1206        // Should have one warning for missing.md
1207        assert_eq!(result.len(), 1, "Should have exactly one warning");
1208        assert!(
1209            result[0].message.contains("missing.md"),
1210            "Warning should mention missing.md"
1211        );
1212    }
1213
1214    #[test]
1215    fn test_angle_bracket_links_with_parens() {
1216        // Create a temporary directory for test files
1217        let temp_dir = tempdir().unwrap();
1218        let base_path = temp_dir.path();
1219
1220        // Create directory structure with parentheses in path
1221        let app_dir = base_path.join("app");
1222        std::fs::create_dir(&app_dir).unwrap();
1223        let upload_dir = app_dir.join("(upload)");
1224        std::fs::create_dir(&upload_dir).unwrap();
1225        let page_file = upload_dir.join("page.tsx");
1226        File::create(&page_file)
1227            .unwrap()
1228            .write_all(b"export default function Page() {}")
1229            .unwrap();
1230
1231        // Create test content with angle bracket links containing parentheses
1232        let content = r#"
1233# Test Document with Paths Containing Parens
1234
1235[Upload Page](<app/(upload)/page.tsx>)
1236[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1237[Missing](<app/(missing)/file.md>)
1238"#;
1239
1240        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1241
1242        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1243        let result = rule.check(&ctx).unwrap();
1244
1245        // Should only have one warning for the missing file
1246        assert_eq!(
1247            result.len(),
1248            1,
1249            "Should have exactly one warning for missing file. Got: {result:?}"
1250        );
1251        assert!(
1252            result[0].message.contains("app/(missing)/file.md"),
1253            "Warning should mention app/(missing)/file.md"
1254        );
1255    }
1256
1257    #[test]
1258    fn test_all_file_types_checked() {
1259        // Create a temporary directory for test files
1260        let temp_dir = tempdir().unwrap();
1261        let base_path = temp_dir.path();
1262
1263        // Create a test with various file types - all should be checked
1264        let content = r#"
1265[Image Link](image.jpg)
1266[Video Link](video.mp4)
1267[Markdown Link](document.md)
1268[PDF Link](file.pdf)
1269"#;
1270
1271        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1272
1273        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1274        let result = rule.check(&ctx).unwrap();
1275
1276        // Should warn about all missing files regardless of extension
1277        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1278    }
1279
1280    #[test]
1281    fn test_code_span_detection() {
1282        let rule = MD057ExistingRelativeLinks::new();
1283
1284        // Create a temporary directory for test files
1285        let temp_dir = tempdir().unwrap();
1286        let base_path = temp_dir.path();
1287
1288        let rule = rule.with_path(base_path);
1289
1290        // Test with document structure
1291        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1292
1293        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1294        let result = rule.check(&ctx).unwrap();
1295
1296        // Should only find the real link, not the one in code
1297        assert_eq!(result.len(), 1, "Should only flag the real link");
1298        assert!(result[0].message.contains("nonexistent.md"));
1299    }
1300
1301    #[test]
1302    fn test_inline_code_spans() {
1303        // Create a temporary directory for test files
1304        let temp_dir = tempdir().unwrap();
1305        let base_path = temp_dir.path();
1306
1307        // Create test content with links in inline code spans
1308        let content = r#"
1309# Test Document
1310
1311This is a normal link: [Link](missing.md)
1312
1313This is a code span with a link: `[Link](another-missing.md)`
1314
1315Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1316
1317    "#;
1318
1319        // Initialize rule with the base path
1320        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1321
1322        // Test the rule
1323        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1324        let result = rule.check(&ctx).unwrap();
1325
1326        // Should only have warning for the normal link, not for links in code spans
1327        assert_eq!(result.len(), 1, "Should have exactly one warning");
1328        assert!(
1329            result[0].message.contains("missing.md"),
1330            "Warning should be for missing.md"
1331        );
1332        assert!(
1333            !result.iter().any(|w| w.message.contains("another-missing.md")),
1334            "Should not warn about link in code span"
1335        );
1336        assert!(
1337            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1338            "Should not warn about link in inline code"
1339        );
1340    }
1341
1342    #[test]
1343    fn test_extensionless_link_resolution() {
1344        // Create a temporary directory for test files
1345        let temp_dir = tempdir().unwrap();
1346        let base_path = temp_dir.path();
1347
1348        // Create a markdown file WITHOUT specifying .md extension in the link
1349        let page_path = base_path.join("page.md");
1350        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1351
1352        // Test content with extensionless link that should resolve to page.md
1353        let content = r#"
1354# Test Document
1355
1356[Link without extension](page)
1357[Link with extension](page.md)
1358[Missing link](nonexistent)
1359"#;
1360
1361        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1362
1363        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1364        let result = rule.check(&ctx).unwrap();
1365
1366        // Should only have warning for nonexistent link
1367        // Both "page" and "page.md" should resolve to the same file
1368        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1369        assert!(
1370            result[0].message.contains("nonexistent"),
1371            "Warning should be for 'nonexistent' not 'page'"
1372        );
1373    }
1374
1375    // Cross-file validation tests
1376    #[test]
1377    fn test_cross_file_scope() {
1378        let rule = MD057ExistingRelativeLinks::new();
1379        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1380    }
1381
1382    #[test]
1383    fn test_contribute_to_index_extracts_markdown_links() {
1384        let rule = MD057ExistingRelativeLinks::new();
1385        let content = r#"
1386# Document
1387
1388[Link to docs](./docs/guide.md)
1389[Link with fragment](./other.md#section)
1390[External link](https://example.com)
1391[Image link](image.png)
1392[Media file](video.mp4)
1393"#;
1394
1395        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1396        let mut index = FileIndex::new();
1397        rule.contribute_to_index(&ctx, &mut index);
1398
1399        // Should only index markdown file links
1400        assert_eq!(index.cross_file_links.len(), 2);
1401
1402        // Check first link
1403        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1404        assert_eq!(index.cross_file_links[0].fragment, "");
1405
1406        // Check second link (with fragment)
1407        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1408        assert_eq!(index.cross_file_links[1].fragment, "section");
1409    }
1410
1411    #[test]
1412    fn test_contribute_to_index_skips_external_and_anchors() {
1413        let rule = MD057ExistingRelativeLinks::new();
1414        let content = r#"
1415# Document
1416
1417[External](https://example.com)
1418[Another external](http://example.org)
1419[Fragment only](#section)
1420[FTP link](ftp://files.example.com)
1421[Mail link](mailto:test@example.com)
1422[WWW link](www.example.com)
1423"#;
1424
1425        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1426        let mut index = FileIndex::new();
1427        rule.contribute_to_index(&ctx, &mut index);
1428
1429        // Should not index any of these
1430        assert_eq!(index.cross_file_links.len(), 0);
1431    }
1432
1433    #[test]
1434    fn test_cross_file_check_valid_link() {
1435        use crate::workspace_index::WorkspaceIndex;
1436
1437        let rule = MD057ExistingRelativeLinks::new();
1438
1439        // Create a workspace index with the target file
1440        let mut workspace_index = WorkspaceIndex::new();
1441        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1442
1443        // Create file index with a link to an existing file
1444        let mut file_index = FileIndex::new();
1445        file_index.add_cross_file_link(CrossFileLinkIndex {
1446            target_path: "guide.md".to_string(),
1447            fragment: "".to_string(),
1448            line: 5,
1449            column: 1,
1450        });
1451
1452        // Run cross-file check from docs/index.md
1453        let warnings = rule
1454            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1455            .unwrap();
1456
1457        // Should have no warnings - file exists
1458        assert!(warnings.is_empty());
1459    }
1460
1461    #[test]
1462    fn test_cross_file_check_missing_link() {
1463        use crate::workspace_index::WorkspaceIndex;
1464
1465        let rule = MD057ExistingRelativeLinks::new();
1466
1467        // Create an empty workspace index
1468        let workspace_index = WorkspaceIndex::new();
1469
1470        // Create file index with a link to a missing file
1471        let mut file_index = FileIndex::new();
1472        file_index.add_cross_file_link(CrossFileLinkIndex {
1473            target_path: "missing.md".to_string(),
1474            fragment: "".to_string(),
1475            line: 5,
1476            column: 1,
1477        });
1478
1479        // Run cross-file check
1480        let warnings = rule
1481            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1482            .unwrap();
1483
1484        // Should have one warning for the missing file
1485        assert_eq!(warnings.len(), 1);
1486        assert!(warnings[0].message.contains("missing.md"));
1487        assert!(warnings[0].message.contains("does not exist"));
1488    }
1489
1490    #[test]
1491    fn test_cross_file_check_parent_path() {
1492        use crate::workspace_index::WorkspaceIndex;
1493
1494        let rule = MD057ExistingRelativeLinks::new();
1495
1496        // Create a workspace index with the target file at the root
1497        let mut workspace_index = WorkspaceIndex::new();
1498        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1499
1500        // Create file index with a parent path link
1501        let mut file_index = FileIndex::new();
1502        file_index.add_cross_file_link(CrossFileLinkIndex {
1503            target_path: "../readme.md".to_string(),
1504            fragment: "".to_string(),
1505            line: 5,
1506            column: 1,
1507        });
1508
1509        // Run cross-file check from docs/guide.md
1510        let warnings = rule
1511            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1512            .unwrap();
1513
1514        // Should have no warnings - file exists at normalized path
1515        assert!(warnings.is_empty());
1516    }
1517
1518    #[test]
1519    fn test_cross_file_check_html_link_with_md_source() {
1520        // Test that .html links are accepted when corresponding .md source exists
1521        // This supports mdBook and similar doc generators that compile .md to .html
1522        use crate::workspace_index::WorkspaceIndex;
1523
1524        let rule = MD057ExistingRelativeLinks::new();
1525
1526        // Create a workspace index with the .md source file
1527        let mut workspace_index = WorkspaceIndex::new();
1528        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1529
1530        // Create file index with an .html link (from another rule like MD051)
1531        let mut file_index = FileIndex::new();
1532        file_index.add_cross_file_link(CrossFileLinkIndex {
1533            target_path: "guide.html".to_string(),
1534            fragment: "section".to_string(),
1535            line: 10,
1536            column: 5,
1537        });
1538
1539        // Run cross-file check from docs/index.md
1540        let warnings = rule
1541            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1542            .unwrap();
1543
1544        // Should have no warnings - .md source exists for the .html link
1545        assert!(
1546            warnings.is_empty(),
1547            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1548        );
1549    }
1550
1551    #[test]
1552    fn test_cross_file_check_html_link_without_source() {
1553        // Test that .html links without corresponding .md source ARE flagged
1554        use crate::workspace_index::WorkspaceIndex;
1555
1556        let rule = MD057ExistingRelativeLinks::new();
1557
1558        // Create an empty workspace index
1559        let workspace_index = WorkspaceIndex::new();
1560
1561        // Create file index with an .html link to a non-existent file
1562        let mut file_index = FileIndex::new();
1563        file_index.add_cross_file_link(CrossFileLinkIndex {
1564            target_path: "missing.html".to_string(),
1565            fragment: "".to_string(),
1566            line: 10,
1567            column: 5,
1568        });
1569
1570        // Run cross-file check from docs/index.md
1571        let warnings = rule
1572            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1573            .unwrap();
1574
1575        // Should have one warning - no .md source exists
1576        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1577        assert!(warnings[0].message.contains("missing.html"));
1578    }
1579
1580    #[test]
1581    fn test_normalize_path_function() {
1582        // Test simple cases
1583        assert_eq!(
1584            normalize_path(Path::new("docs/guide.md")),
1585            PathBuf::from("docs/guide.md")
1586        );
1587
1588        // Test current directory removal
1589        assert_eq!(
1590            normalize_path(Path::new("./docs/guide.md")),
1591            PathBuf::from("docs/guide.md")
1592        );
1593
1594        // Test parent directory resolution
1595        assert_eq!(
1596            normalize_path(Path::new("docs/sub/../guide.md")),
1597            PathBuf::from("docs/guide.md")
1598        );
1599
1600        // Test multiple parent directories
1601        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
1602    }
1603
1604    #[test]
1605    fn test_html_link_with_md_source() {
1606        // Links to .html files should pass if corresponding .md source exists
1607        let temp_dir = tempdir().unwrap();
1608        let base_path = temp_dir.path();
1609
1610        // Create guide.md (source file)
1611        let md_file = base_path.join("guide.md");
1612        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
1613
1614        let content = r#"
1615[Read the guide](guide.html)
1616[Also here](getting-started.html)
1617"#;
1618
1619        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1620        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1621        let result = rule.check(&ctx).unwrap();
1622
1623        // guide.html passes (guide.md exists), getting-started.html fails
1624        assert_eq!(
1625            result.len(),
1626            1,
1627            "Should only warn about missing source. Got: {result:?}"
1628        );
1629        assert!(result[0].message.contains("getting-started.html"));
1630    }
1631
1632    #[test]
1633    fn test_htm_link_with_md_source() {
1634        // .htm extension should also check for markdown source
1635        let temp_dir = tempdir().unwrap();
1636        let base_path = temp_dir.path();
1637
1638        let md_file = base_path.join("page.md");
1639        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
1640
1641        let content = "[Page](page.htm)";
1642
1643        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1644        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1645        let result = rule.check(&ctx).unwrap();
1646
1647        assert!(
1648            result.is_empty(),
1649            "Should not warn when .md source exists for .htm link"
1650        );
1651    }
1652
1653    #[test]
1654    fn test_html_link_finds_various_markdown_extensions() {
1655        // Should find .mdx, .markdown, etc. as source files
1656        let temp_dir = tempdir().unwrap();
1657        let base_path = temp_dir.path();
1658
1659        File::create(base_path.join("doc.md")).unwrap();
1660        File::create(base_path.join("tutorial.mdx")).unwrap();
1661        File::create(base_path.join("guide.markdown")).unwrap();
1662
1663        let content = r#"
1664[Doc](doc.html)
1665[Tutorial](tutorial.html)
1666[Guide](guide.html)
1667"#;
1668
1669        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1670        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1671        let result = rule.check(&ctx).unwrap();
1672
1673        assert!(
1674            result.is_empty(),
1675            "Should find all markdown variants as source files. Got: {result:?}"
1676        );
1677    }
1678
1679    #[test]
1680    fn test_html_link_in_subdirectory() {
1681        // Should find markdown source in subdirectories
1682        let temp_dir = tempdir().unwrap();
1683        let base_path = temp_dir.path();
1684
1685        let docs_dir = base_path.join("docs");
1686        std::fs::create_dir(&docs_dir).unwrap();
1687        File::create(docs_dir.join("guide.md"))
1688            .unwrap()
1689            .write_all(b"# Guide")
1690            .unwrap();
1691
1692        let content = "[Guide](docs/guide.html)";
1693
1694        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1695        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1696        let result = rule.check(&ctx).unwrap();
1697
1698        assert!(result.is_empty(), "Should find markdown source in subdirectory");
1699    }
1700
1701    #[test]
1702    fn test_absolute_path_skipped_in_check() {
1703        // Test that absolute paths are skipped during link validation
1704        // This fixes the bug where /pkg/runtime was being flagged
1705        let temp_dir = tempdir().unwrap();
1706        let base_path = temp_dir.path();
1707
1708        let content = r#"
1709# Test Document
1710
1711[Go Runtime](/pkg/runtime)
1712[Go Runtime with Fragment](/pkg/runtime#section)
1713[API Docs](/api/v1/users)
1714[Blog Post](/blog/2024/release.html)
1715[React Hook](/react/hooks/use-state.html)
1716"#;
1717
1718        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1719        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1720        let result = rule.check(&ctx).unwrap();
1721
1722        // Should have NO warnings - all absolute paths should be skipped
1723        assert!(
1724            result.is_empty(),
1725            "Absolute paths should be skipped. Got warnings: {result:?}"
1726        );
1727    }
1728
1729    #[test]
1730    fn test_absolute_path_skipped_in_cross_file_check() {
1731        // Test that absolute paths are skipped in cross_file_check()
1732        use crate::workspace_index::WorkspaceIndex;
1733
1734        let rule = MD057ExistingRelativeLinks::new();
1735
1736        // Create an empty workspace index (no files exist)
1737        let workspace_index = WorkspaceIndex::new();
1738
1739        // Create file index with absolute path links (should be skipped)
1740        let mut file_index = FileIndex::new();
1741        file_index.add_cross_file_link(CrossFileLinkIndex {
1742            target_path: "/pkg/runtime.md".to_string(),
1743            fragment: "".to_string(),
1744            line: 5,
1745            column: 1,
1746        });
1747        file_index.add_cross_file_link(CrossFileLinkIndex {
1748            target_path: "/api/v1/users.md".to_string(),
1749            fragment: "section".to_string(),
1750            line: 10,
1751            column: 1,
1752        });
1753
1754        // Run cross-file check
1755        let warnings = rule
1756            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1757            .unwrap();
1758
1759        // Should have NO warnings - absolute paths should be skipped
1760        assert!(
1761            warnings.is_empty(),
1762            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
1763        );
1764    }
1765
1766    #[test]
1767    fn test_protocol_relative_url_not_skipped() {
1768        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
1769        // They should still be caught by is_external_url() though
1770        let temp_dir = tempdir().unwrap();
1771        let base_path = temp_dir.path();
1772
1773        let content = r#"
1774# Test Document
1775
1776[External](//example.com/page)
1777[Another](//cdn.example.com/asset.js)
1778"#;
1779
1780        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1781        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1782        let result = rule.check(&ctx).unwrap();
1783
1784        // Should have NO warnings - protocol-relative URLs are external and should be skipped
1785        assert!(
1786            result.is_empty(),
1787            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
1788        );
1789    }
1790
1791    #[test]
1792    fn test_email_addresses_skipped() {
1793        // Test that email addresses without mailto: are skipped
1794        // These are clearly not file links (the @ symbol is definitive)
1795        let temp_dir = tempdir().unwrap();
1796        let base_path = temp_dir.path();
1797
1798        let content = r#"
1799# Test Document
1800
1801[Contact](user@example.com)
1802[Steering](steering@kubernetes.io)
1803[Support](john.doe+filter@company.co.uk)
1804[User](user_name@sub.domain.com)
1805"#;
1806
1807        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1808        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1809        let result = rule.check(&ctx).unwrap();
1810
1811        // Should have NO warnings - email addresses are clearly not file links and should be skipped
1812        assert!(
1813            result.is_empty(),
1814            "Email addresses should be skipped. Got warnings: {result:?}"
1815        );
1816    }
1817
1818    #[test]
1819    fn test_email_addresses_vs_file_paths() {
1820        // Test that email addresses (anything with @) are skipped
1821        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
1822        let temp_dir = tempdir().unwrap();
1823        let base_path = temp_dir.path();
1824
1825        let content = r#"
1826# Test Document
1827
1828[Email](user@example.com)  <!-- Should be skipped (email) -->
1829[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
1830[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
1831"#;
1832
1833        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1834        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1835        let result = rule.check(&ctx).unwrap();
1836
1837        // All should be skipped - anything with @ is treated as an email
1838        assert!(
1839            result.is_empty(),
1840            "All email addresses should be skipped. Got: {result:?}"
1841        );
1842    }
1843
1844    #[test]
1845    fn test_diagnostic_position_accuracy() {
1846        // Test that diagnostics point to the URL, not the link text
1847        let temp_dir = tempdir().unwrap();
1848        let base_path = temp_dir.path();
1849
1850        // Position markers:     0         1         2         3
1851        //                       0123456789012345678901234567890123456789
1852        let content = "prefix [text](missing.md) suffix";
1853        //             The URL "missing.md" starts at 0-indexed position 14
1854        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
1855
1856        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1857        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1858        let result = rule.check(&ctx).unwrap();
1859
1860        assert_eq!(result.len(), 1, "Should have exactly one warning");
1861        assert_eq!(result[0].line, 1, "Should be on line 1");
1862        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
1863        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
1864    }
1865
1866    #[test]
1867    fn test_diagnostic_position_angle_brackets() {
1868        // Test position accuracy with angle bracket links
1869        let temp_dir = tempdir().unwrap();
1870        let base_path = temp_dir.path();
1871
1872        // Position markers:     0         1         2
1873        //                       012345678901234567890
1874        let content = "[link](<missing.md>)";
1875        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
1876
1877        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1878        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1879        let result = rule.check(&ctx).unwrap();
1880
1881        assert_eq!(result.len(), 1, "Should have exactly one warning");
1882        assert_eq!(result[0].line, 1, "Should be on line 1");
1883        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
1884    }
1885
1886    #[test]
1887    fn test_diagnostic_position_multiline() {
1888        // Test that line numbers are correct for links on different lines
1889        let temp_dir = tempdir().unwrap();
1890        let base_path = temp_dir.path();
1891
1892        let content = r#"# Title
1893Some text on line 2
1894[link on line 3](missing1.md)
1895More text
1896[link on line 5](missing2.md)"#;
1897
1898        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1899        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1900        let result = rule.check(&ctx).unwrap();
1901
1902        assert_eq!(result.len(), 2, "Should have two warnings");
1903
1904        // First warning should be on line 3
1905        assert_eq!(result[0].line, 3, "First warning should be on line 3");
1906        assert!(result[0].message.contains("missing1.md"));
1907
1908        // Second warning should be on line 5
1909        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
1910        assert!(result[1].message.contains("missing2.md"));
1911    }
1912
1913    #[test]
1914    fn test_diagnostic_position_with_spaces() {
1915        // Test position with URLs that have spaces in parentheses
1916        let temp_dir = tempdir().unwrap();
1917        let base_path = temp_dir.path();
1918
1919        let content = "[link]( missing.md )";
1920        //             0123456789012345678901
1921        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
1922        //             which is 1-indexed column 9
1923
1924        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1925        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1926        let result = rule.check(&ctx).unwrap();
1927
1928        assert_eq!(result.len(), 1, "Should have exactly one warning");
1929        // The regex captures the URL without leading/trailing spaces
1930        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
1931    }
1932
1933    #[test]
1934    fn test_diagnostic_position_image() {
1935        // Test that image diagnostics also have correct positions
1936        let temp_dir = tempdir().unwrap();
1937        let base_path = temp_dir.path();
1938
1939        let content = "![alt text](missing.jpg)";
1940
1941        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1942        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1943        let result = rule.check(&ctx).unwrap();
1944
1945        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
1946        assert_eq!(result[0].line, 1);
1947        // Images use start_col from the parser, which should point to the URL
1948        assert!(result[0].column > 0, "Should have valid column position");
1949        assert!(result[0].message.contains("missing.jpg"));
1950    }
1951
1952    #[test]
1953    fn test_wikilinks_skipped() {
1954        // Wikilinks should not trigger MD057 warnings
1955        // They use a different linking system (e.g., Obsidian, wiki software)
1956        let temp_dir = tempdir().unwrap();
1957        let base_path = temp_dir.path();
1958
1959        let content = r#"# Test Document
1960
1961[[Microsoft#Windows OS]]
1962[[SomePage]]
1963[[Page With Spaces]]
1964[[path/to/page#section]]
1965[[page|Display Text]]
1966
1967This is a [real missing link](missing.md) that should be flagged.
1968"#;
1969
1970        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1971        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1972        let result = rule.check(&ctx).unwrap();
1973
1974        // Should only warn about the regular markdown link, not wikilinks
1975        assert_eq!(
1976            result.len(),
1977            1,
1978            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
1979        );
1980        assert!(
1981            result[0].message.contains("missing.md"),
1982            "Warning should be for missing.md, not wikilinks"
1983        );
1984    }
1985
1986    #[test]
1987    fn test_wikilinks_not_added_to_index() {
1988        // Wikilinks should not be added to the cross-file link index
1989        let temp_dir = tempdir().unwrap();
1990        let base_path = temp_dir.path();
1991
1992        let content = r#"# Test Document
1993
1994[[Microsoft#Windows OS]]
1995[[SomePage#section]]
1996[Regular Link](other.md)
1997"#;
1998
1999        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2000        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2001
2002        let mut file_index = FileIndex::new();
2003        rule.contribute_to_index(&ctx, &mut file_index);
2004
2005        // Should only have the regular markdown link (if it's a markdown file)
2006        // Wikilinks should not be added
2007        let cross_file_links = &file_index.cross_file_links;
2008        assert_eq!(
2009            cross_file_links.len(),
2010            1,
2011            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
2012        );
2013        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
2014    }
2015
2016    #[test]
2017    fn test_reference_definition_missing_file() {
2018        // Reference definitions [ref]: ./path.md should be checked
2019        let temp_dir = tempdir().unwrap();
2020        let base_path = temp_dir.path();
2021
2022        let content = r#"# Test Document
2023
2024[test]: ./missing.md
2025[example]: ./nonexistent.html
2026
2027Use [test] and [example] here.
2028"#;
2029
2030        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2031        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2032        let result = rule.check(&ctx).unwrap();
2033
2034        // Should have warnings for both reference definitions
2035        assert_eq!(
2036            result.len(),
2037            2,
2038            "Should have warnings for missing reference definition targets. Got: {result:?}"
2039        );
2040        assert!(
2041            result.iter().any(|w| w.message.contains("missing.md")),
2042            "Should warn about missing.md"
2043        );
2044        assert!(
2045            result.iter().any(|w| w.message.contains("nonexistent.html")),
2046            "Should warn about nonexistent.html"
2047        );
2048    }
2049
2050    #[test]
2051    fn test_reference_definition_existing_file() {
2052        // Reference definitions to existing files should NOT trigger warnings
2053        let temp_dir = tempdir().unwrap();
2054        let base_path = temp_dir.path();
2055
2056        // Create an existing file
2057        let exists_path = base_path.join("exists.md");
2058        File::create(&exists_path)
2059            .unwrap()
2060            .write_all(b"# Existing file")
2061            .unwrap();
2062
2063        let content = r#"# Test Document
2064
2065[test]: ./exists.md
2066
2067Use [test] here.
2068"#;
2069
2070        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2071        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2072        let result = rule.check(&ctx).unwrap();
2073
2074        // Should have NO warnings since the file exists
2075        assert!(
2076            result.is_empty(),
2077            "Should not warn about existing file. Got: {result:?}"
2078        );
2079    }
2080
2081    #[test]
2082    fn test_reference_definition_external_url_skipped() {
2083        // Reference definitions with external URLs should be skipped
2084        let temp_dir = tempdir().unwrap();
2085        let base_path = temp_dir.path();
2086
2087        let content = r#"# Test Document
2088
2089[google]: https://google.com
2090[example]: http://example.org
2091[mail]: mailto:test@example.com
2092[ftp]: ftp://files.example.com
2093[local]: ./missing.md
2094
2095Use [google], [example], [mail], [ftp], [local] here.
2096"#;
2097
2098        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2099        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2100        let result = rule.check(&ctx).unwrap();
2101
2102        // Should only warn about the local missing file, not external URLs
2103        assert_eq!(
2104            result.len(),
2105            1,
2106            "Should only warn about local missing file. Got: {result:?}"
2107        );
2108        assert!(
2109            result[0].message.contains("missing.md"),
2110            "Warning should be for missing.md"
2111        );
2112    }
2113
2114    #[test]
2115    fn test_reference_definition_fragment_only_skipped() {
2116        // Reference definitions with fragment-only URLs should be skipped
2117        let temp_dir = tempdir().unwrap();
2118        let base_path = temp_dir.path();
2119
2120        let content = r#"# Test Document
2121
2122[section]: #my-section
2123
2124Use [section] here.
2125"#;
2126
2127        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2128        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2129        let result = rule.check(&ctx).unwrap();
2130
2131        // Should have NO warnings for fragment-only links
2132        assert!(
2133            result.is_empty(),
2134            "Should not warn about fragment-only reference. Got: {result:?}"
2135        );
2136    }
2137
2138    #[test]
2139    fn test_reference_definition_column_position() {
2140        // Test that column position points to the URL in the reference definition
2141        let temp_dir = tempdir().unwrap();
2142        let base_path = temp_dir.path();
2143
2144        // Position markers:     0         1         2
2145        //                       0123456789012345678901
2146        let content = "[ref]: ./missing.md";
2147        //             The URL "./missing.md" starts at 0-indexed position 7
2148        //             which is 1-indexed column 8
2149
2150        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2151        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2152        let result = rule.check(&ctx).unwrap();
2153
2154        assert_eq!(result.len(), 1, "Should have exactly one warning");
2155        assert_eq!(result[0].line, 1, "Should be on line 1");
2156        assert_eq!(result[0].column, 8, "Should point to start of URL './missing.md'");
2157    }
2158
2159    #[test]
2160    fn test_reference_definition_html_with_md_source() {
2161        // Reference definitions to .html files should pass if corresponding .md source exists
2162        let temp_dir = tempdir().unwrap();
2163        let base_path = temp_dir.path();
2164
2165        // Create guide.md (source file)
2166        let md_file = base_path.join("guide.md");
2167        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2168
2169        let content = r#"# Test Document
2170
2171[guide]: ./guide.html
2172[missing]: ./missing.html
2173
2174Use [guide] and [missing] here.
2175"#;
2176
2177        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2178        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2179        let result = rule.check(&ctx).unwrap();
2180
2181        // guide.html passes (guide.md exists), missing.html fails
2182        assert_eq!(
2183            result.len(),
2184            1,
2185            "Should only warn about missing source. Got: {result:?}"
2186        );
2187        assert!(result[0].message.contains("missing.html"));
2188    }
2189
2190    #[test]
2191    fn test_reference_definition_url_encoded() {
2192        // Reference definitions with URL-encoded paths should be decoded before checking
2193        let temp_dir = tempdir().unwrap();
2194        let base_path = temp_dir.path();
2195
2196        // Create a file with spaces in the name
2197        let file_with_spaces = base_path.join("file with spaces.md");
2198        File::create(&file_with_spaces).unwrap().write_all(b"# Spaces").unwrap();
2199
2200        let content = r#"# Test Document
2201
2202[spaces]: ./file%20with%20spaces.md
2203[missing]: ./missing%20file.md
2204
2205Use [spaces] and [missing] here.
2206"#;
2207
2208        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2209        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2210        let result = rule.check(&ctx).unwrap();
2211
2212        // Should only warn about the missing file
2213        assert_eq!(
2214            result.len(),
2215            1,
2216            "Should only warn about missing URL-encoded file. Got: {result:?}"
2217        );
2218        assert!(result[0].message.contains("missing%20file.md"));
2219    }
2220
2221    #[test]
2222    fn test_inline_and_reference_both_checked() {
2223        // Both inline links and reference definitions should be checked
2224        let temp_dir = tempdir().unwrap();
2225        let base_path = temp_dir.path();
2226
2227        let content = r#"# Test Document
2228
2229[inline link](./inline-missing.md)
2230[ref]: ./ref-missing.md
2231
2232Use [ref] here.
2233"#;
2234
2235        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2236        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2237        let result = rule.check(&ctx).unwrap();
2238
2239        // Should warn about both the inline link and the reference definition
2240        assert_eq!(
2241            result.len(),
2242            2,
2243            "Should warn about both inline and reference links. Got: {result:?}"
2244        );
2245        assert!(
2246            result.iter().any(|w| w.message.contains("inline-missing.md")),
2247            "Should warn about inline-missing.md"
2248        );
2249        assert!(
2250            result.iter().any(|w| w.message.contains("ref-missing.md")),
2251            "Should warn about ref-missing.md"
2252        );
2253    }
2254
2255    #[test]
2256    fn test_footnote_definitions_not_flagged() {
2257        // Regression test for issue #286: footnote definitions should not be
2258        // treated as reference definitions and flagged as broken links
2259        let rule = MD057ExistingRelativeLinks::default();
2260
2261        let content = r#"# Title
2262
2263A footnote[^1].
2264
2265[^1]: [link](https://www.google.com).
2266"#;
2267
2268        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2269        let result = rule.check(&ctx).unwrap();
2270
2271        assert!(
2272            result.is_empty(),
2273            "Footnote definitions should not trigger MD057 warnings. Got: {result:?}"
2274        );
2275    }
2276
2277    #[test]
2278    fn test_footnote_with_relative_link_inside() {
2279        // Footnotes containing relative links should not be checked
2280        // (the footnote content is not a URL, it's content that may contain links)
2281        let rule = MD057ExistingRelativeLinks::default();
2282
2283        let content = r#"# Title
2284
2285See the footnote[^1].
2286
2287[^1]: Check out [this file](./existing.md) for more info.
2288[^2]: Also see [missing](./does-not-exist.md).
2289"#;
2290
2291        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2292        let result = rule.check(&ctx).unwrap();
2293
2294        // The inline links INSIDE footnotes should be checked (./existing.md, ./does-not-exist.md)
2295        // but the footnote definition itself should not be treated as a reference definition
2296        // Note: This test verifies that [^1]: and [^2]: are not parsed as ref defs with
2297        // URLs like "[this file](./existing.md)" or "[missing](./does-not-exist.md)"
2298        for warning in &result {
2299            assert!(
2300                !warning.message.contains("[this file]"),
2301                "Footnote content should not be treated as URL: {warning:?}"
2302            );
2303            assert!(
2304                !warning.message.contains("[missing]"),
2305                "Footnote content should not be treated as URL: {warning:?}"
2306            );
2307        }
2308    }
2309
2310    #[test]
2311    fn test_mixed_footnotes_and_reference_definitions() {
2312        // Ensure regular reference definitions are still checked while footnotes are skipped
2313        let temp_dir = tempdir().unwrap();
2314        let base_path = temp_dir.path();
2315
2316        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2317
2318        let content = r#"# Title
2319
2320A footnote[^1] and a [ref link][myref].
2321
2322[^1]: This is a footnote with [link](https://example.com).
2323
2324[myref]: ./missing-file.md "This should be checked"
2325"#;
2326
2327        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2328        let result = rule.check(&ctx).unwrap();
2329
2330        // Should only warn about the regular reference definition, not the footnote
2331        assert_eq!(
2332            result.len(),
2333            1,
2334            "Should only warn about the regular reference definition. Got: {result:?}"
2335        );
2336        assert!(
2337            result[0].message.contains("missing-file.md"),
2338            "Should warn about missing-file.md in reference definition"
2339        );
2340    }
2341}