Skip to main content

rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::element_cache::ElementCache;
8use crate::workspace_index::{FileIndex, extract_cross_file_links};
9use regex::Regex;
10use std::collections::HashMap;
11use std::env;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::sync::{Arc, Mutex};
15
16mod md057_config;
17use crate::rule_config_serde::RuleConfig;
18use md057_config::{AbsoluteLinksOption, MD057Config};
19
20// Thread-safe cache for file existence checks to avoid redundant filesystem operations
21static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
22    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
23
24// Reset the file existence cache (typically between rule runs)
25fn reset_file_existence_cache() {
26    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
27        cache.clear();
28    }
29}
30
31// Check if a file exists with caching
32fn file_exists_with_cache(path: &Path) -> bool {
33    match FILE_EXISTENCE_CACHE.lock() {
34        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
35        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
36    }
37}
38
39/// Check if a file exists, also trying markdown extensions for extensionless links.
40/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
41fn file_exists_or_markdown_extension(path: &Path) -> bool {
42    // First, check exact path
43    if file_exists_with_cache(path) {
44        return true;
45    }
46
47    // If the path has no extension, try adding markdown extensions
48    if path.extension().is_none() {
49        for ext in MARKDOWN_EXTENSIONS {
50            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
51            let path_with_ext = path.with_extension(&ext[1..]);
52            if file_exists_with_cache(&path_with_ext) {
53                return true;
54            }
55        }
56    }
57
58    false
59}
60
61// Regex to match the start of a link - simplified for performance
62static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
63
64/// Regex to extract the URL from an angle-bracketed markdown link
65/// Format: `](<URL>)` or `](<URL> "title")`
66/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
67static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
68    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
69
70/// Regex to extract the URL from a normal markdown link (without angle brackets)
71/// Format: `](URL)` or `](URL "title")`
72static URL_EXTRACT_REGEX: LazyLock<Regex> =
73    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
74
75/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
76/// Matches: scheme:// or scheme: (per RFC 3986)
77/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
78static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
79    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
80
81// Current working directory
82static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
83
84/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
85/// Returns None for non-hex characters.
86#[inline]
87fn hex_digit_to_value(byte: u8) -> Option<u8> {
88    match byte {
89        b'0'..=b'9' => Some(byte - b'0'),
90        b'a'..=b'f' => Some(byte - b'a' + 10),
91        b'A'..=b'F' => Some(byte - b'A' + 10),
92        _ => None,
93    }
94}
95
96/// Supported markdown file extensions
97const MARKDOWN_EXTENSIONS: &[&str] = &[
98    ".md",
99    ".markdown",
100    ".mdx",
101    ".mkd",
102    ".mkdn",
103    ".mdown",
104    ".mdwn",
105    ".qmd",
106    ".rmd",
107];
108
109/// Rule MD057: Existing relative links should point to valid files or directories.
110#[derive(Debug, Clone)]
111pub struct MD057ExistingRelativeLinks {
112    /// Base directory for resolving relative links
113    base_path: Arc<Mutex<Option<PathBuf>>>,
114    /// Configuration for the rule
115    config: MD057Config,
116}
117
118impl Default for MD057ExistingRelativeLinks {
119    fn default() -> Self {
120        Self {
121            base_path: Arc::new(Mutex::new(None)),
122            config: MD057Config::default(),
123        }
124    }
125}
126
127impl MD057ExistingRelativeLinks {
128    /// Create a new instance with default settings
129    pub fn new() -> Self {
130        Self::default()
131    }
132
133    /// Set the base path for resolving relative links
134    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
135        let path = path.as_ref();
136        let dir_path = if path.is_file() {
137            path.parent().map(|p| p.to_path_buf())
138        } else {
139            Some(path.to_path_buf())
140        };
141
142        if let Ok(mut guard) = self.base_path.lock() {
143            *guard = dir_path;
144        }
145        self
146    }
147
148    pub fn from_config_struct(config: MD057Config) -> Self {
149        Self {
150            base_path: Arc::new(Mutex::new(None)),
151            config,
152        }
153    }
154
155    /// Check if a URL is external or should be skipped for validation.
156    ///
157    /// Returns `true` (skip validation) for:
158    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
159    /// - Bare domains: `www.example.com`, `example.com`
160    /// - Email addresses: `user@example.com` (without `mailto:`)
161    /// - Template variables: `{{URL}}`, `{{% include %}}`
162    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
163    ///
164    /// Returns `false` (validate) for:
165    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
166    #[inline]
167    fn is_external_url(&self, url: &str) -> bool {
168        if url.is_empty() {
169            return false;
170        }
171
172        // Quick checks for common external URL patterns
173        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
174            return true;
175        }
176
177        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
178        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
179        if url.starts_with("{{") || url.starts_with("{%") {
180            return true;
181        }
182
183        // Simple check: if URL contains @, it's almost certainly an email address
184        // File paths with @ are extremely rare, so this is a safe heuristic
185        if url.contains('@') {
186            return true; // It's an email address, skip it
187        }
188
189        // Bare domain check (e.g., "example.com")
190        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
191        // Links like [text](nodejs.org/path) without a protocol are broken -
192        // they'll be treated as relative paths by markdown renderers.
193        // Flagging them helps users find missing protocols.
194        // We only skip .com as a minimal safety net for the most common case.
195        if url.ends_with(".com") {
196            return true;
197        }
198
199        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
200        // These are not filesystem paths but module/asset aliases
201        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
202        if url.starts_with('~') || url.starts_with('@') {
203            return true;
204        }
205
206        // All other cases (relative paths, etc.) are not external
207        false
208    }
209
210    /// Check if the URL is a fragment-only link (internal document link)
211    #[inline]
212    fn is_fragment_only_link(&self, url: &str) -> bool {
213        url.starts_with('#')
214    }
215
216    /// Check if the URL is an absolute path (starts with /)
217    /// These are typically routes for published documentation sites.
218    #[inline]
219    fn is_absolute_path(url: &str) -> bool {
220        url.starts_with('/')
221    }
222
223    /// Decode URL percent-encoded sequences in a path.
224    /// Converts `%20` to space, `%2F` to `/`, etc.
225    /// Returns the original string if decoding fails or produces invalid UTF-8.
226    fn url_decode(path: &str) -> String {
227        // Quick check: if no percent sign, return as-is
228        if !path.contains('%') {
229            return path.to_string();
230        }
231
232        let bytes = path.as_bytes();
233        let mut result = Vec::with_capacity(bytes.len());
234        let mut i = 0;
235
236        while i < bytes.len() {
237            if bytes[i] == b'%' && i + 2 < bytes.len() {
238                // Try to parse the two hex digits following %
239                let hex1 = bytes[i + 1];
240                let hex2 = bytes[i + 2];
241                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
242                    result.push(d1 * 16 + d2);
243                    i += 3;
244                    continue;
245                }
246            }
247            result.push(bytes[i]);
248            i += 1;
249        }
250
251        // Convert to UTF-8, falling back to original if invalid
252        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
253    }
254
255    /// Strip query parameters and fragments from a URL for file existence checking.
256    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
257    /// for `path/to/image.png` or `file.md` respectively.
258    ///
259    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
260    /// so we check for `?` first. If a URL has both, only the query is stripped here
261    /// (fragments are handled separately by the regex in `contribute_to_index`).
262    fn strip_query_and_fragment(url: &str) -> &str {
263        // Find the first occurrence of '?' or '#', whichever comes first
264        // This handles both standard URLs (? before #) and edge cases (# before ?)
265        let query_pos = url.find('?');
266        let fragment_pos = url.find('#');
267
268        match (query_pos, fragment_pos) {
269            (Some(q), Some(f)) => {
270                // Both exist - strip at whichever comes first
271                &url[..q.min(f)]
272            }
273            (Some(q), None) => &url[..q],
274            (None, Some(f)) => &url[..f],
275            (None, None) => url,
276        }
277    }
278
279    /// Resolve a relative link against a provided base path
280    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
281        base_path.join(link)
282    }
283}
284
285impl Rule for MD057ExistingRelativeLinks {
286    fn name(&self) -> &'static str {
287        "MD057"
288    }
289
290    fn description(&self) -> &'static str {
291        "Relative links should point to existing files"
292    }
293
294    fn category(&self) -> RuleCategory {
295        RuleCategory::Link
296    }
297
298    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
299        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
300    }
301
302    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
303        let content = ctx.content;
304
305        // Early returns for performance
306        if content.is_empty() || !content.contains('[') {
307            return Ok(Vec::new());
308        }
309
310        // Quick check for any potential links before expensive operations
311        // Check for inline links "](", reference definitions "]:", or images "!["
312        if !content.contains("](") && !content.contains("]:") {
313            return Ok(Vec::new());
314        }
315
316        // Reset the file existence cache for a fresh run
317        reset_file_existence_cache();
318
319        let mut warnings = Vec::new();
320
321        // Determine base path for resolving relative links
322        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
323        // This ensures each file resolves links relative to its own directory
324        let base_path: Option<PathBuf> = {
325            // First check if base_path was explicitly set via with_path() (for tests)
326            let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
327            if explicit_base.is_some() {
328                explicit_base
329            } else if let Some(ref source_file) = ctx.source_file {
330                // Resolve symlinks to get the actual file location
331                // This ensures relative links are resolved from the target's directory,
332                // not the symlink's directory
333                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
334                resolved_file
335                    .parent()
336                    .map(|p| p.to_path_buf())
337                    .or_else(|| Some(CURRENT_DIR.clone()))
338            } else {
339                // No source file available - cannot validate relative links
340                None
341            }
342        };
343
344        // If we still don't have a base path, we can't validate relative links
345        let Some(base_path) = base_path else {
346            return Ok(warnings);
347        };
348
349        // Use LintContext links instead of expensive regex parsing
350        if !ctx.links.is_empty() {
351            // Use LineIndex for correct position calculation across all line ending types
352            let line_index = &ctx.line_index;
353
354            // Create element cache once for all links
355            let element_cache = ElementCache::new(content);
356
357            // Pre-collect lines to avoid repeated line iteration
358            let lines: Vec<&str> = content.lines().collect();
359
360            // Track which lines we've already processed to avoid duplicates
361            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
362            let mut processed_lines = std::collections::HashSet::new();
363
364            for link in &ctx.links {
365                let line_idx = link.line - 1;
366                if line_idx >= lines.len() {
367                    continue;
368                }
369
370                // Skip lines inside PyMdown blocks (MkDocs flavor)
371                // This must be checked BEFORE processed_lines to skip the entire line
372                if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
373                    continue;
374                }
375
376                // Skip if we've already processed this line
377                if !processed_lines.insert(line_idx) {
378                    continue;
379                }
380
381                let line = lines[line_idx];
382
383                // Quick check for link pattern in this line
384                if !line.contains("](") {
385                    continue;
386                }
387
388                // Find all links in this line using optimized regex
389                for link_match in LINK_START_REGEX.find_iter(line) {
390                    let start_pos = link_match.start();
391                    let end_pos = link_match.end();
392
393                    // Calculate absolute position using LineIndex
394                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
395                    let absolute_start_pos = line_start_byte + start_pos;
396
397                    // Skip if this link is in a code span
398                    if element_cache.is_in_code_span(absolute_start_pos) {
399                        continue;
400                    }
401
402                    // Skip if this link is in a math span (LaTeX $...$ or $$...$$)
403                    if ctx.is_in_math_span(absolute_start_pos) {
404                        continue;
405                    }
406
407                    // Find the URL part after the link text
408                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
409                    // Then fall back to normal URL regex
410                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
411                        .captures_at(line, end_pos - 1)
412                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
413                        .or_else(|| {
414                            URL_EXTRACT_REGEX
415                                .captures_at(line, end_pos - 1)
416                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
417                        });
418
419                    if let Some((_caps, url_group)) = caps_and_url {
420                        let url = url_group.as_str().trim();
421
422                        // Skip empty URLs
423                        if url.is_empty() {
424                            continue;
425                        }
426
427                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
428                        // These are Rust API references, not file paths
429                        // Example: [`f32::is_subnormal`], [`Vec::push`]
430                        if url.starts_with('`') && url.ends_with('`') {
431                            continue;
432                        }
433
434                        // Skip external URLs and fragment-only links
435                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
436                            continue;
437                        }
438
439                        // Handle absolute paths based on config
440                        if Self::is_absolute_path(url) {
441                            match self.config.absolute_links {
442                                AbsoluteLinksOption::Warn => {
443                                    let url_start = url_group.start();
444                                    let url_end = url_group.end();
445                                    warnings.push(LintWarning {
446                                        rule_name: Some(self.name().to_string()),
447                                        line: link.line,
448                                        column: url_start + 1,
449                                        end_line: link.line,
450                                        end_column: url_end + 1,
451                                        message: format!("Absolute link '{url}' cannot be validated locally"),
452                                        severity: Severity::Warning,
453                                        fix: None,
454                                    });
455                                }
456                                AbsoluteLinksOption::Ignore => {}
457                            }
458                            continue;
459                        }
460
461                        // Strip query parameters and fragments before checking file existence
462                        let file_path = Self::strip_query_and_fragment(url);
463
464                        // URL-decode the path to handle percent-encoded characters
465                        let decoded_path = Self::url_decode(file_path);
466
467                        // Resolve the relative link against the base path
468                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
469
470                        // Check if the file exists, also trying markdown extensions for extensionless links
471                        if file_exists_or_markdown_extension(&resolved_path) {
472                            continue; // File exists, no warning needed
473                        }
474
475                        // For .html/.htm links, check if a corresponding markdown source exists
476                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
477                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
478                            && let (Some(stem), Some(parent)) = (
479                                resolved_path.file_stem().and_then(|s| s.to_str()),
480                                resolved_path.parent(),
481                            ) {
482                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
483                                let source_path = parent.join(format!("{stem}{md_ext}"));
484                                file_exists_with_cache(&source_path)
485                            })
486                        } else {
487                            false
488                        };
489
490                        if has_md_source {
491                            continue; // Markdown source exists, link is valid
492                        }
493
494                        // File doesn't exist and no source file found
495                        // Use actual URL position from regex capture group
496                        // Note: capture group positions are absolute within the line string
497                        let url_start = url_group.start();
498                        let url_end = url_group.end();
499
500                        warnings.push(LintWarning {
501                            rule_name: Some(self.name().to_string()),
502                            line: link.line,
503                            column: url_start + 1, // 1-indexed
504                            end_line: link.line,
505                            end_column: url_end + 1, // 1-indexed
506                            message: format!("Relative link '{url}' does not exist"),
507                            severity: Severity::Error,
508                            fix: None,
509                        });
510                    }
511                }
512            }
513        }
514
515        // Also process images - they have URLs already parsed
516        for image in &ctx.images {
517            // Skip images inside PyMdown blocks (MkDocs flavor)
518            if ctx.line_info(image.line).is_some_and(|info| info.in_pymdown_block) {
519                continue;
520            }
521
522            let url = image.url.as_ref();
523
524            // Skip empty URLs
525            if url.is_empty() {
526                continue;
527            }
528
529            // Skip external URLs and fragment-only links
530            if self.is_external_url(url) || self.is_fragment_only_link(url) {
531                continue;
532            }
533
534            // Handle absolute paths based on config
535            if Self::is_absolute_path(url) {
536                match self.config.absolute_links {
537                    AbsoluteLinksOption::Warn => {
538                        warnings.push(LintWarning {
539                            rule_name: Some(self.name().to_string()),
540                            line: image.line,
541                            column: image.start_col + 1,
542                            end_line: image.line,
543                            end_column: image.start_col + 1 + url.len(),
544                            message: format!("Absolute link '{url}' cannot be validated locally"),
545                            severity: Severity::Warning,
546                            fix: None,
547                        });
548                    }
549                    AbsoluteLinksOption::Ignore => {}
550                }
551                continue;
552            }
553
554            // Strip query parameters and fragments before checking file existence
555            let file_path = Self::strip_query_and_fragment(url);
556
557            // URL-decode the path to handle percent-encoded characters
558            let decoded_path = Self::url_decode(file_path);
559
560            // Resolve the relative link against the base path
561            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
562
563            // Check if the file exists, also trying markdown extensions for extensionless links
564            if file_exists_or_markdown_extension(&resolved_path) {
565                continue; // File exists, no warning needed
566            }
567
568            // For .html/.htm links, check if a corresponding markdown source exists
569            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
570                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
571                && let (Some(stem), Some(parent)) = (
572                    resolved_path.file_stem().and_then(|s| s.to_str()),
573                    resolved_path.parent(),
574                ) {
575                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
576                    let source_path = parent.join(format!("{stem}{md_ext}"));
577                    file_exists_with_cache(&source_path)
578                })
579            } else {
580                false
581            };
582
583            if has_md_source {
584                continue; // Markdown source exists, link is valid
585            }
586
587            // File doesn't exist and no source file found
588            // Images already have correct position from parser
589            warnings.push(LintWarning {
590                rule_name: Some(self.name().to_string()),
591                line: image.line,
592                column: image.start_col + 1,
593                end_line: image.line,
594                end_column: image.start_col + 1 + url.len(),
595                message: format!("Relative link '{url}' does not exist"),
596                severity: Severity::Error,
597                fix: None,
598            });
599        }
600
601        // Also process reference definitions: [ref]: ./path.md
602        for ref_def in &ctx.reference_defs {
603            let url = &ref_def.url;
604
605            // Skip empty URLs
606            if url.is_empty() {
607                continue;
608            }
609
610            // Skip external URLs and fragment-only links
611            if self.is_external_url(url) || self.is_fragment_only_link(url) {
612                continue;
613            }
614
615            // Handle absolute paths based on config
616            if Self::is_absolute_path(url) {
617                match self.config.absolute_links {
618                    AbsoluteLinksOption::Warn => {
619                        let line_idx = ref_def.line - 1;
620                        let column = content.lines().nth(line_idx).map_or(1, |line_content| {
621                            line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
622                        });
623                        warnings.push(LintWarning {
624                            rule_name: Some(self.name().to_string()),
625                            line: ref_def.line,
626                            column,
627                            end_line: ref_def.line,
628                            end_column: column + url.len(),
629                            message: format!("Absolute link '{url}' cannot be validated locally"),
630                            severity: Severity::Warning,
631                            fix: None,
632                        });
633                    }
634                    AbsoluteLinksOption::Ignore => {}
635                }
636                continue;
637            }
638
639            // Strip query parameters and fragments before checking file existence
640            let file_path = Self::strip_query_and_fragment(url);
641
642            // URL-decode the path to handle percent-encoded characters
643            let decoded_path = Self::url_decode(file_path);
644
645            // Resolve the relative link against the base path
646            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
647
648            // Check if the file exists, also trying markdown extensions for extensionless links
649            if file_exists_or_markdown_extension(&resolved_path) {
650                continue; // File exists, no warning needed
651            }
652
653            // For .html/.htm links, check if a corresponding markdown source exists
654            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
655                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
656                && let (Some(stem), Some(parent)) = (
657                    resolved_path.file_stem().and_then(|s| s.to_str()),
658                    resolved_path.parent(),
659                ) {
660                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
661                    let source_path = parent.join(format!("{stem}{md_ext}"));
662                    file_exists_with_cache(&source_path)
663                })
664            } else {
665                false
666            };
667
668            if has_md_source {
669                continue; // Markdown source exists, link is valid
670            }
671
672            // File doesn't exist and no source file found
673            // Calculate column position: find URL within the line
674            let line_idx = ref_def.line - 1;
675            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
676                // Find URL position in line (after ]: )
677                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
678            });
679
680            warnings.push(LintWarning {
681                rule_name: Some(self.name().to_string()),
682                line: ref_def.line,
683                column,
684                end_line: ref_def.line,
685                end_column: column + url.len(),
686                message: format!("Relative link '{url}' does not exist"),
687                severity: Severity::Error,
688                fix: None,
689            });
690        }
691
692        Ok(warnings)
693    }
694
695    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
696        Ok(ctx.content.to_string())
697    }
698
699    fn as_any(&self) -> &dyn std::any::Any {
700        self
701    }
702
703    fn default_config_section(&self) -> Option<(String, toml::Value)> {
704        let default_config = MD057Config::default();
705        let json_value = serde_json::to_value(&default_config).ok()?;
706        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
707
708        if let toml::Value::Table(table) = toml_value {
709            if !table.is_empty() {
710                Some((MD057Config::RULE_NAME.to_string(), toml::Value::Table(table)))
711            } else {
712                None
713            }
714        } else {
715            None
716        }
717    }
718
719    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
720    where
721        Self: Sized,
722    {
723        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
724        Box::new(Self::from_config_struct(rule_config))
725    }
726
727    fn cross_file_scope(&self) -> CrossFileScope {
728        CrossFileScope::Workspace
729    }
730
731    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
732        // Use the shared utility for cross-file link extraction
733        // This ensures consistent position tracking between CLI and LSP
734        for link in extract_cross_file_links(ctx) {
735            index.add_cross_file_link(link);
736        }
737    }
738
739    fn cross_file_check(
740        &self,
741        file_path: &Path,
742        file_index: &FileIndex,
743        workspace_index: &crate::workspace_index::WorkspaceIndex,
744    ) -> LintResult {
745        let mut warnings = Vec::new();
746
747        // Get the directory containing this file for resolving relative links
748        let file_dir = file_path.parent();
749
750        for cross_link in &file_index.cross_file_links {
751            // URL-decode the path for filesystem operations
752            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
753            let decoded_target = Self::url_decode(&cross_link.target_path);
754
755            // Skip absolute/protocol-relative paths (web paths, not filesystem paths)
756            if decoded_target.starts_with('/') {
757                continue;
758            }
759
760            // Resolve relative path
761            let target_path = if let Some(dir) = file_dir {
762                dir.join(&decoded_target)
763            } else {
764                Path::new(&decoded_target).to_path_buf()
765            };
766
767            // Normalize the path (handle .., ., etc.)
768            let target_path = normalize_path(&target_path);
769
770            // Check if the target file exists, also trying markdown extensions for extensionless links
771            let file_exists =
772                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
773
774            if !file_exists {
775                // For .html/.htm links, check if a corresponding markdown source exists
776                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
777                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
778                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
779                    && let (Some(stem), Some(parent)) =
780                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
781                {
782                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
783                        let source_path = parent.join(format!("{stem}{md_ext}"));
784                        workspace_index.contains_file(&source_path) || source_path.exists()
785                    })
786                } else {
787                    false
788                };
789
790                if !has_md_source {
791                    warnings.push(LintWarning {
792                        rule_name: Some(self.name().to_string()),
793                        line: cross_link.line,
794                        column: cross_link.column,
795                        end_line: cross_link.line,
796                        end_column: cross_link.column + cross_link.target_path.len(),
797                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
798                        severity: Severity::Error,
799                        fix: None,
800                    });
801                }
802            }
803        }
804
805        Ok(warnings)
806    }
807}
808
809/// Normalize a path by resolving . and .. components
810fn normalize_path(path: &Path) -> PathBuf {
811    let mut components = Vec::new();
812
813    for component in path.components() {
814        match component {
815            std::path::Component::ParentDir => {
816                // Go up one level if possible
817                if !components.is_empty() {
818                    components.pop();
819                }
820            }
821            std::path::Component::CurDir => {
822                // Skip current directory markers
823            }
824            _ => {
825                components.push(component);
826            }
827        }
828    }
829
830    components.iter().collect()
831}
832
833#[cfg(test)]
834mod tests {
835    use super::*;
836    use crate::workspace_index::CrossFileLinkIndex;
837    use std::fs::File;
838    use std::io::Write;
839    use tempfile::tempdir;
840
841    #[test]
842    fn test_strip_query_and_fragment() {
843        // Test query parameter stripping
844        assert_eq!(
845            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
846            "file.png"
847        );
848        assert_eq!(
849            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
850            "file.png"
851        );
852        assert_eq!(
853            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
854            "file.png"
855        );
856
857        // Test fragment stripping
858        assert_eq!(
859            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
860            "file.md"
861        );
862        assert_eq!(
863            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
864            "file.md"
865        );
866
867        // Test both query and fragment (query comes first, per RFC 3986)
868        assert_eq!(
869            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
870            "file.md"
871        );
872
873        // Test no query or fragment
874        assert_eq!(
875            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
876            "file.png"
877        );
878
879        // Test with path
880        assert_eq!(
881            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
882            "path/to/image.png"
883        );
884        assert_eq!(
885            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
886            "path/to/image.png"
887        );
888
889        // Edge case: fragment before query (non-standard but possible)
890        assert_eq!(
891            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
892            "file.md"
893        );
894    }
895
896    #[test]
897    fn test_url_decode() {
898        // Simple space encoding
899        assert_eq!(
900            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
901            "penguin with space.jpg"
902        );
903
904        // Path with encoded spaces
905        assert_eq!(
906            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
907            "assets/my file name.png"
908        );
909
910        // Multiple encoded characters
911        assert_eq!(
912            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
913            "hello world!.md"
914        );
915
916        // Lowercase hex
917        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
918
919        // Uppercase hex
920        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
921
922        // Mixed case hex
923        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
924
925        // No encoding - return as-is
926        assert_eq!(
927            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
928            "normal-file.md"
929        );
930
931        // Incomplete percent encoding - leave as-is
932        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
933
934        // Percent at end - leave as-is
935        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
936
937        // Invalid hex digits - leave as-is
938        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
939
940        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
941        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
942
943        // Empty string
944        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
945
946        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
947        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
948
949        // Multiple consecutive encoded characters
950        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
951
952        // Encoded path separators
953        assert_eq!(
954            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
955            "path/to/file.md"
956        );
957
958        // Mixed encoded and non-encoded
959        assert_eq!(
960            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
961            "hello world/foo bar.md"
962        );
963
964        // Special characters that are commonly encoded
965        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
966
967        // Percent at position that looks like encoding but isn't valid
968        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
969    }
970
971    #[test]
972    fn test_url_encoded_filenames() {
973        // Create a temporary directory for test files
974        let temp_dir = tempdir().unwrap();
975        let base_path = temp_dir.path();
976
977        // Create a file with spaces in the name
978        let file_with_spaces = base_path.join("penguin with space.jpg");
979        File::create(&file_with_spaces)
980            .unwrap()
981            .write_all(b"image data")
982            .unwrap();
983
984        // Create a subdirectory with spaces
985        let subdir = base_path.join("my images");
986        std::fs::create_dir(&subdir).unwrap();
987        let nested_file = subdir.join("photo 1.png");
988        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
989
990        // Test content with URL-encoded links
991        let content = r#"
992# Test Document with URL-Encoded Links
993
994![Penguin](penguin%20with%20space.jpg)
995![Photo](my%20images/photo%201.png)
996![Missing](missing%20file.jpg)
997"#;
998
999        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1000
1001        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1002        let result = rule.check(&ctx).unwrap();
1003
1004        // Should only have one warning for the missing file
1005        assert_eq!(
1006            result.len(),
1007            1,
1008            "Should only warn about missing%20file.jpg. Got: {result:?}"
1009        );
1010        assert!(
1011            result[0].message.contains("missing%20file.jpg"),
1012            "Warning should mention the URL-encoded filename"
1013        );
1014    }
1015
1016    #[test]
1017    fn test_external_urls() {
1018        let rule = MD057ExistingRelativeLinks::new();
1019
1020        // Common web protocols
1021        assert!(rule.is_external_url("https://example.com"));
1022        assert!(rule.is_external_url("http://example.com"));
1023        assert!(rule.is_external_url("ftp://example.com"));
1024        assert!(rule.is_external_url("www.example.com"));
1025        assert!(rule.is_external_url("example.com"));
1026
1027        // Special URI schemes
1028        assert!(rule.is_external_url("file:///path/to/file"));
1029        assert!(rule.is_external_url("smb://server/share"));
1030        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
1031        assert!(rule.is_external_url("mailto:user@example.com"));
1032        assert!(rule.is_external_url("tel:+1234567890"));
1033        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
1034        assert!(rule.is_external_url("javascript:void(0)"));
1035        assert!(rule.is_external_url("ssh://git@github.com/repo"));
1036        assert!(rule.is_external_url("git://github.com/repo.git"));
1037
1038        // Email addresses without mailto: protocol
1039        // These are clearly not file links and should be skipped
1040        assert!(rule.is_external_url("user@example.com"));
1041        assert!(rule.is_external_url("steering@kubernetes.io"));
1042        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
1043        assert!(rule.is_external_url("user_name@sub.domain.com"));
1044        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
1045
1046        // Template variables should be skipped (not checked as relative links)
1047        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
1048        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
1049        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
1050        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
1051        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
1052        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
1053
1054        // Absolute paths are NOT external (handled separately via is_absolute_path)
1055        // By default they are ignored, but can be configured to warn
1056        assert!(!rule.is_external_url("/api/v1/users"));
1057        assert!(!rule.is_external_url("/blog/2024/release.html"));
1058        assert!(!rule.is_external_url("/react/hooks/use-state.html"));
1059        assert!(!rule.is_external_url("/pkg/runtime"));
1060        assert!(!rule.is_external_url("/doc/go1compat"));
1061        assert!(!rule.is_external_url("/index.html"));
1062        assert!(!rule.is_external_url("/assets/logo.png"));
1063
1064        // But is_absolute_path should detect them
1065        assert!(MD057ExistingRelativeLinks::is_absolute_path("/api/v1/users"));
1066        assert!(MD057ExistingRelativeLinks::is_absolute_path("/blog/2024/release.html"));
1067        assert!(MD057ExistingRelativeLinks::is_absolute_path("/index.html"));
1068        assert!(!MD057ExistingRelativeLinks::is_absolute_path("./relative.md"));
1069        assert!(!MD057ExistingRelativeLinks::is_absolute_path("relative.md"));
1070
1071        // Framework path aliases should be skipped (resolved by build tools)
1072        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
1073        assert!(rule.is_external_url("~/assets/image.png"));
1074        assert!(rule.is_external_url("~/components/Button.vue"));
1075        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
1076
1077        // @ prefix (common in Vue, webpack, Vite aliases)
1078        assert!(rule.is_external_url("@/components/Header.vue"));
1079        assert!(rule.is_external_url("@images/photo.jpg"));
1080        assert!(rule.is_external_url("@assets/styles.css"));
1081
1082        // Relative paths should NOT be external (should be validated)
1083        assert!(!rule.is_external_url("./relative/path.md"));
1084        assert!(!rule.is_external_url("relative/path.md"));
1085        assert!(!rule.is_external_url("../parent/path.md"));
1086    }
1087
1088    #[test]
1089    fn test_framework_path_aliases() {
1090        // Create a temporary directory for test files
1091        let temp_dir = tempdir().unwrap();
1092        let base_path = temp_dir.path();
1093
1094        // Test content with framework path aliases (should all be skipped)
1095        let content = r#"
1096# Framework Path Aliases
1097
1098![Image 1](~/assets/penguin.jpg)
1099![Image 2](~assets/logo.svg)
1100![Image 3](@images/photo.jpg)
1101![Image 4](@/components/icon.svg)
1102[Link](@/pages/about.md)
1103
1104This is a [real missing link](missing.md) that should be flagged.
1105"#;
1106
1107        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1108
1109        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1110        let result = rule.check(&ctx).unwrap();
1111
1112        // Should only have one warning for the real missing link
1113        assert_eq!(
1114            result.len(),
1115            1,
1116            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
1117        );
1118        assert!(
1119            result[0].message.contains("missing.md"),
1120            "Warning should be for missing.md"
1121        );
1122    }
1123
1124    #[test]
1125    fn test_url_decode_security_path_traversal() {
1126        // Ensure URL decoding doesn't enable path traversal attacks
1127        // The decoded path is still validated against the base path
1128        let temp_dir = tempdir().unwrap();
1129        let base_path = temp_dir.path();
1130
1131        // Create a file in the temp directory
1132        let file_in_base = base_path.join("safe.md");
1133        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
1134
1135        // Test with encoded path traversal attempt
1136        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
1137        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
1138        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
1139        let content = r#"
1140[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
1141[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
1142[Safe link](safe.md)
1143"#;
1144
1145        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1146
1147        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1148        let result = rule.check(&ctx).unwrap();
1149
1150        // The traversal attempts should still be flagged as missing
1151        // (they don't exist relative to base_path after decoding)
1152        assert_eq!(
1153            result.len(),
1154            2,
1155            "Should have warnings for traversal attempts. Got: {result:?}"
1156        );
1157    }
1158
1159    #[test]
1160    fn test_url_encoded_utf8_filenames() {
1161        // Test with actual UTF-8 encoded filenames
1162        let temp_dir = tempdir().unwrap();
1163        let base_path = temp_dir.path();
1164
1165        // Create files with unicode names
1166        let cafe_file = base_path.join("café.md");
1167        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1168
1169        let content = r#"
1170[Café link](caf%C3%A9.md)
1171[Missing unicode](r%C3%A9sum%C3%A9.md)
1172"#;
1173
1174        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1175
1176        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1177        let result = rule.check(&ctx).unwrap();
1178
1179        // Should only warn about the missing file
1180        assert_eq!(
1181            result.len(),
1182            1,
1183            "Should only warn about missing résumé.md. Got: {result:?}"
1184        );
1185        assert!(
1186            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1187            "Warning should mention the URL-encoded filename"
1188        );
1189    }
1190
1191    #[test]
1192    fn test_url_encoded_emoji_filenames() {
1193        // URL-encoded emoji paths should be correctly resolved
1194        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1195        let temp_dir = tempdir().unwrap();
1196        let base_path = temp_dir.path();
1197
1198        // Create directory with emoji in name: 👤 Personal
1199        let emoji_dir = base_path.join("👤 Personal");
1200        std::fs::create_dir(&emoji_dir).unwrap();
1201
1202        // Create file in that directory: TV Shows.md
1203        let file_path = emoji_dir.join("TV Shows.md");
1204        File::create(&file_path)
1205            .unwrap()
1206            .write_all(b"# TV Shows\n\nContent here.")
1207            .unwrap();
1208
1209        // Test content with URL-encoded emoji link
1210        // %F0%9F%91%A4 = 👤, %20 = space
1211        let content = r#"
1212# Test Document
1213
1214[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1215[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1216"#;
1217
1218        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1219
1220        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1221        let result = rule.check(&ctx).unwrap();
1222
1223        // Should only warn about the missing file, not the valid emoji path
1224        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1225        assert!(
1226            result[0].message.contains("Missing.md"),
1227            "Warning should be for Missing.md, got: {}",
1228            result[0].message
1229        );
1230    }
1231
1232    #[test]
1233    fn test_no_warnings_without_base_path() {
1234        let rule = MD057ExistingRelativeLinks::new();
1235        let content = "[Link](missing.md)";
1236
1237        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1238        let result = rule.check(&ctx).unwrap();
1239        assert!(result.is_empty(), "Should have no warnings without base path");
1240    }
1241
1242    #[test]
1243    fn test_existing_and_missing_links() {
1244        // Create a temporary directory for test files
1245        let temp_dir = tempdir().unwrap();
1246        let base_path = temp_dir.path();
1247
1248        // Create an existing file
1249        let exists_path = base_path.join("exists.md");
1250        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1251
1252        // Verify the file exists
1253        assert!(exists_path.exists(), "exists.md should exist for this test");
1254
1255        // Create test content with both existing and missing links
1256        let content = r#"
1257# Test Document
1258
1259[Valid Link](exists.md)
1260[Invalid Link](missing.md)
1261[External Link](https://example.com)
1262[Media Link](image.jpg)
1263        "#;
1264
1265        // Initialize rule with the base path (default: check all files including media)
1266        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1267
1268        // Test the rule
1269        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1270        let result = rule.check(&ctx).unwrap();
1271
1272        // Should have two warnings: missing.md and image.jpg (both don't exist)
1273        assert_eq!(result.len(), 2);
1274        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1275        assert!(messages.iter().any(|m| m.contains("missing.md")));
1276        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1277    }
1278
1279    #[test]
1280    fn test_angle_bracket_links() {
1281        // Create a temporary directory for test files
1282        let temp_dir = tempdir().unwrap();
1283        let base_path = temp_dir.path();
1284
1285        // Create an existing file
1286        let exists_path = base_path.join("exists.md");
1287        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1288
1289        // Create test content with angle bracket links
1290        let content = r#"
1291# Test Document
1292
1293[Valid Link](<exists.md>)
1294[Invalid Link](<missing.md>)
1295[External Link](<https://example.com>)
1296    "#;
1297
1298        // Test with default settings
1299        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1300
1301        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1302        let result = rule.check(&ctx).unwrap();
1303
1304        // Should have one warning for missing.md
1305        assert_eq!(result.len(), 1, "Should have exactly one warning");
1306        assert!(
1307            result[0].message.contains("missing.md"),
1308            "Warning should mention missing.md"
1309        );
1310    }
1311
1312    #[test]
1313    fn test_angle_bracket_links_with_parens() {
1314        // Create a temporary directory for test files
1315        let temp_dir = tempdir().unwrap();
1316        let base_path = temp_dir.path();
1317
1318        // Create directory structure with parentheses in path
1319        let app_dir = base_path.join("app");
1320        std::fs::create_dir(&app_dir).unwrap();
1321        let upload_dir = app_dir.join("(upload)");
1322        std::fs::create_dir(&upload_dir).unwrap();
1323        let page_file = upload_dir.join("page.tsx");
1324        File::create(&page_file)
1325            .unwrap()
1326            .write_all(b"export default function Page() {}")
1327            .unwrap();
1328
1329        // Create test content with angle bracket links containing parentheses
1330        let content = r#"
1331# Test Document with Paths Containing Parens
1332
1333[Upload Page](<app/(upload)/page.tsx>)
1334[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1335[Missing](<app/(missing)/file.md>)
1336"#;
1337
1338        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1339
1340        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1341        let result = rule.check(&ctx).unwrap();
1342
1343        // Should only have one warning for the missing file
1344        assert_eq!(
1345            result.len(),
1346            1,
1347            "Should have exactly one warning for missing file. Got: {result:?}"
1348        );
1349        assert!(
1350            result[0].message.contains("app/(missing)/file.md"),
1351            "Warning should mention app/(missing)/file.md"
1352        );
1353    }
1354
1355    #[test]
1356    fn test_all_file_types_checked() {
1357        // Create a temporary directory for test files
1358        let temp_dir = tempdir().unwrap();
1359        let base_path = temp_dir.path();
1360
1361        // Create a test with various file types - all should be checked
1362        let content = r#"
1363[Image Link](image.jpg)
1364[Video Link](video.mp4)
1365[Markdown Link](document.md)
1366[PDF Link](file.pdf)
1367"#;
1368
1369        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1370
1371        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1372        let result = rule.check(&ctx).unwrap();
1373
1374        // Should warn about all missing files regardless of extension
1375        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1376    }
1377
1378    #[test]
1379    fn test_code_span_detection() {
1380        let rule = MD057ExistingRelativeLinks::new();
1381
1382        // Create a temporary directory for test files
1383        let temp_dir = tempdir().unwrap();
1384        let base_path = temp_dir.path();
1385
1386        let rule = rule.with_path(base_path);
1387
1388        // Test with document structure
1389        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1390
1391        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1392        let result = rule.check(&ctx).unwrap();
1393
1394        // Should only find the real link, not the one in code
1395        assert_eq!(result.len(), 1, "Should only flag the real link");
1396        assert!(result[0].message.contains("nonexistent.md"));
1397    }
1398
1399    #[test]
1400    fn test_inline_code_spans() {
1401        // Create a temporary directory for test files
1402        let temp_dir = tempdir().unwrap();
1403        let base_path = temp_dir.path();
1404
1405        // Create test content with links in inline code spans
1406        let content = r#"
1407# Test Document
1408
1409This is a normal link: [Link](missing.md)
1410
1411This is a code span with a link: `[Link](another-missing.md)`
1412
1413Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1414
1415    "#;
1416
1417        // Initialize rule with the base path
1418        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1419
1420        // Test the rule
1421        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1422        let result = rule.check(&ctx).unwrap();
1423
1424        // Should only have warning for the normal link, not for links in code spans
1425        assert_eq!(result.len(), 1, "Should have exactly one warning");
1426        assert!(
1427            result[0].message.contains("missing.md"),
1428            "Warning should be for missing.md"
1429        );
1430        assert!(
1431            !result.iter().any(|w| w.message.contains("another-missing.md")),
1432            "Should not warn about link in code span"
1433        );
1434        assert!(
1435            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1436            "Should not warn about link in inline code"
1437        );
1438    }
1439
1440    #[test]
1441    fn test_extensionless_link_resolution() {
1442        // Create a temporary directory for test files
1443        let temp_dir = tempdir().unwrap();
1444        let base_path = temp_dir.path();
1445
1446        // Create a markdown file WITHOUT specifying .md extension in the link
1447        let page_path = base_path.join("page.md");
1448        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
1449
1450        // Test content with extensionless link that should resolve to page.md
1451        let content = r#"
1452# Test Document
1453
1454[Link without extension](page)
1455[Link with extension](page.md)
1456[Missing link](nonexistent)
1457"#;
1458
1459        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1460
1461        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1462        let result = rule.check(&ctx).unwrap();
1463
1464        // Should only have warning for nonexistent link
1465        // Both "page" and "page.md" should resolve to the same file
1466        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
1467        assert!(
1468            result[0].message.contains("nonexistent"),
1469            "Warning should be for 'nonexistent' not 'page'"
1470        );
1471    }
1472
1473    // Cross-file validation tests
1474    #[test]
1475    fn test_cross_file_scope() {
1476        let rule = MD057ExistingRelativeLinks::new();
1477        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1478    }
1479
1480    #[test]
1481    fn test_contribute_to_index_extracts_markdown_links() {
1482        let rule = MD057ExistingRelativeLinks::new();
1483        let content = r#"
1484# Document
1485
1486[Link to docs](./docs/guide.md)
1487[Link with fragment](./other.md#section)
1488[External link](https://example.com)
1489[Image link](image.png)
1490[Media file](video.mp4)
1491"#;
1492
1493        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1494        let mut index = FileIndex::new();
1495        rule.contribute_to_index(&ctx, &mut index);
1496
1497        // Should only index markdown file links
1498        assert_eq!(index.cross_file_links.len(), 2);
1499
1500        // Check first link
1501        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
1502        assert_eq!(index.cross_file_links[0].fragment, "");
1503
1504        // Check second link (with fragment)
1505        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
1506        assert_eq!(index.cross_file_links[1].fragment, "section");
1507    }
1508
1509    #[test]
1510    fn test_contribute_to_index_skips_external_and_anchors() {
1511        let rule = MD057ExistingRelativeLinks::new();
1512        let content = r#"
1513# Document
1514
1515[External](https://example.com)
1516[Another external](http://example.org)
1517[Fragment only](#section)
1518[FTP link](ftp://files.example.com)
1519[Mail link](mailto:test@example.com)
1520[WWW link](www.example.com)
1521"#;
1522
1523        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1524        let mut index = FileIndex::new();
1525        rule.contribute_to_index(&ctx, &mut index);
1526
1527        // Should not index any of these
1528        assert_eq!(index.cross_file_links.len(), 0);
1529    }
1530
1531    #[test]
1532    fn test_cross_file_check_valid_link() {
1533        use crate::workspace_index::WorkspaceIndex;
1534
1535        let rule = MD057ExistingRelativeLinks::new();
1536
1537        // Create a workspace index with the target file
1538        let mut workspace_index = WorkspaceIndex::new();
1539        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1540
1541        // Create file index with a link to an existing file
1542        let mut file_index = FileIndex::new();
1543        file_index.add_cross_file_link(CrossFileLinkIndex {
1544            target_path: "guide.md".to_string(),
1545            fragment: "".to_string(),
1546            line: 5,
1547            column: 1,
1548        });
1549
1550        // Run cross-file check from docs/index.md
1551        let warnings = rule
1552            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1553            .unwrap();
1554
1555        // Should have no warnings - file exists
1556        assert!(warnings.is_empty());
1557    }
1558
1559    #[test]
1560    fn test_cross_file_check_missing_link() {
1561        use crate::workspace_index::WorkspaceIndex;
1562
1563        let rule = MD057ExistingRelativeLinks::new();
1564
1565        // Create an empty workspace index
1566        let workspace_index = WorkspaceIndex::new();
1567
1568        // Create file index with a link to a missing file
1569        let mut file_index = FileIndex::new();
1570        file_index.add_cross_file_link(CrossFileLinkIndex {
1571            target_path: "missing.md".to_string(),
1572            fragment: "".to_string(),
1573            line: 5,
1574            column: 1,
1575        });
1576
1577        // Run cross-file check
1578        let warnings = rule
1579            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1580            .unwrap();
1581
1582        // Should have one warning for the missing file
1583        assert_eq!(warnings.len(), 1);
1584        assert!(warnings[0].message.contains("missing.md"));
1585        assert!(warnings[0].message.contains("does not exist"));
1586    }
1587
1588    #[test]
1589    fn test_cross_file_check_parent_path() {
1590        use crate::workspace_index::WorkspaceIndex;
1591
1592        let rule = MD057ExistingRelativeLinks::new();
1593
1594        // Create a workspace index with the target file at the root
1595        let mut workspace_index = WorkspaceIndex::new();
1596        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
1597
1598        // Create file index with a parent path link
1599        let mut file_index = FileIndex::new();
1600        file_index.add_cross_file_link(CrossFileLinkIndex {
1601            target_path: "../readme.md".to_string(),
1602            fragment: "".to_string(),
1603            line: 5,
1604            column: 1,
1605        });
1606
1607        // Run cross-file check from docs/guide.md
1608        let warnings = rule
1609            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
1610            .unwrap();
1611
1612        // Should have no warnings - file exists at normalized path
1613        assert!(warnings.is_empty());
1614    }
1615
1616    #[test]
1617    fn test_cross_file_check_html_link_with_md_source() {
1618        // Test that .html links are accepted when corresponding .md source exists
1619        // This supports mdBook and similar doc generators that compile .md to .html
1620        use crate::workspace_index::WorkspaceIndex;
1621
1622        let rule = MD057ExistingRelativeLinks::new();
1623
1624        // Create a workspace index with the .md source file
1625        let mut workspace_index = WorkspaceIndex::new();
1626        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
1627
1628        // Create file index with an .html link (from another rule like MD051)
1629        let mut file_index = FileIndex::new();
1630        file_index.add_cross_file_link(CrossFileLinkIndex {
1631            target_path: "guide.html".to_string(),
1632            fragment: "section".to_string(),
1633            line: 10,
1634            column: 5,
1635        });
1636
1637        // Run cross-file check from docs/index.md
1638        let warnings = rule
1639            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1640            .unwrap();
1641
1642        // Should have no warnings - .md source exists for the .html link
1643        assert!(
1644            warnings.is_empty(),
1645            "Expected no warnings for .html link with .md source, got: {warnings:?}"
1646        );
1647    }
1648
1649    #[test]
1650    fn test_cross_file_check_html_link_without_source() {
1651        // Test that .html links without corresponding .md source ARE flagged
1652        use crate::workspace_index::WorkspaceIndex;
1653
1654        let rule = MD057ExistingRelativeLinks::new();
1655
1656        // Create an empty workspace index
1657        let workspace_index = WorkspaceIndex::new();
1658
1659        // Create file index with an .html link to a non-existent file
1660        let mut file_index = FileIndex::new();
1661        file_index.add_cross_file_link(CrossFileLinkIndex {
1662            target_path: "missing.html".to_string(),
1663            fragment: "".to_string(),
1664            line: 10,
1665            column: 5,
1666        });
1667
1668        // Run cross-file check from docs/index.md
1669        let warnings = rule
1670            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1671            .unwrap();
1672
1673        // Should have one warning - no .md source exists
1674        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
1675        assert!(warnings[0].message.contains("missing.html"));
1676    }
1677
1678    #[test]
1679    fn test_normalize_path_function() {
1680        // Test simple cases
1681        assert_eq!(
1682            normalize_path(Path::new("docs/guide.md")),
1683            PathBuf::from("docs/guide.md")
1684        );
1685
1686        // Test current directory removal
1687        assert_eq!(
1688            normalize_path(Path::new("./docs/guide.md")),
1689            PathBuf::from("docs/guide.md")
1690        );
1691
1692        // Test parent directory resolution
1693        assert_eq!(
1694            normalize_path(Path::new("docs/sub/../guide.md")),
1695            PathBuf::from("docs/guide.md")
1696        );
1697
1698        // Test multiple parent directories
1699        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
1700    }
1701
1702    #[test]
1703    fn test_html_link_with_md_source() {
1704        // Links to .html files should pass if corresponding .md source exists
1705        let temp_dir = tempdir().unwrap();
1706        let base_path = temp_dir.path();
1707
1708        // Create guide.md (source file)
1709        let md_file = base_path.join("guide.md");
1710        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
1711
1712        let content = r#"
1713[Read the guide](guide.html)
1714[Also here](getting-started.html)
1715"#;
1716
1717        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1718        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1719        let result = rule.check(&ctx).unwrap();
1720
1721        // guide.html passes (guide.md exists), getting-started.html fails
1722        assert_eq!(
1723            result.len(),
1724            1,
1725            "Should only warn about missing source. Got: {result:?}"
1726        );
1727        assert!(result[0].message.contains("getting-started.html"));
1728    }
1729
1730    #[test]
1731    fn test_htm_link_with_md_source() {
1732        // .htm extension should also check for markdown source
1733        let temp_dir = tempdir().unwrap();
1734        let base_path = temp_dir.path();
1735
1736        let md_file = base_path.join("page.md");
1737        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
1738
1739        let content = "[Page](page.htm)";
1740
1741        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1742        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1743        let result = rule.check(&ctx).unwrap();
1744
1745        assert!(
1746            result.is_empty(),
1747            "Should not warn when .md source exists for .htm link"
1748        );
1749    }
1750
1751    #[test]
1752    fn test_html_link_finds_various_markdown_extensions() {
1753        // Should find .mdx, .markdown, etc. as source files
1754        let temp_dir = tempdir().unwrap();
1755        let base_path = temp_dir.path();
1756
1757        File::create(base_path.join("doc.md")).unwrap();
1758        File::create(base_path.join("tutorial.mdx")).unwrap();
1759        File::create(base_path.join("guide.markdown")).unwrap();
1760
1761        let content = r#"
1762[Doc](doc.html)
1763[Tutorial](tutorial.html)
1764[Guide](guide.html)
1765"#;
1766
1767        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1768        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1769        let result = rule.check(&ctx).unwrap();
1770
1771        assert!(
1772            result.is_empty(),
1773            "Should find all markdown variants as source files. Got: {result:?}"
1774        );
1775    }
1776
1777    #[test]
1778    fn test_html_link_in_subdirectory() {
1779        // Should find markdown source in subdirectories
1780        let temp_dir = tempdir().unwrap();
1781        let base_path = temp_dir.path();
1782
1783        let docs_dir = base_path.join("docs");
1784        std::fs::create_dir(&docs_dir).unwrap();
1785        File::create(docs_dir.join("guide.md"))
1786            .unwrap()
1787            .write_all(b"# Guide")
1788            .unwrap();
1789
1790        let content = "[Guide](docs/guide.html)";
1791
1792        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1793        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1794        let result = rule.check(&ctx).unwrap();
1795
1796        assert!(result.is_empty(), "Should find markdown source in subdirectory");
1797    }
1798
1799    #[test]
1800    fn test_absolute_path_skipped_in_check() {
1801        // Test that absolute paths are skipped during link validation
1802        // This fixes the bug where /pkg/runtime was being flagged
1803        let temp_dir = tempdir().unwrap();
1804        let base_path = temp_dir.path();
1805
1806        let content = r#"
1807# Test Document
1808
1809[Go Runtime](/pkg/runtime)
1810[Go Runtime with Fragment](/pkg/runtime#section)
1811[API Docs](/api/v1/users)
1812[Blog Post](/blog/2024/release.html)
1813[React Hook](/react/hooks/use-state.html)
1814"#;
1815
1816        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1817        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1818        let result = rule.check(&ctx).unwrap();
1819
1820        // Should have NO warnings - all absolute paths should be skipped
1821        assert!(
1822            result.is_empty(),
1823            "Absolute paths should be skipped. Got warnings: {result:?}"
1824        );
1825    }
1826
1827    #[test]
1828    fn test_absolute_path_skipped_in_cross_file_check() {
1829        // Test that absolute paths are skipped in cross_file_check()
1830        use crate::workspace_index::WorkspaceIndex;
1831
1832        let rule = MD057ExistingRelativeLinks::new();
1833
1834        // Create an empty workspace index (no files exist)
1835        let workspace_index = WorkspaceIndex::new();
1836
1837        // Create file index with absolute path links (should be skipped)
1838        let mut file_index = FileIndex::new();
1839        file_index.add_cross_file_link(CrossFileLinkIndex {
1840            target_path: "/pkg/runtime.md".to_string(),
1841            fragment: "".to_string(),
1842            line: 5,
1843            column: 1,
1844        });
1845        file_index.add_cross_file_link(CrossFileLinkIndex {
1846            target_path: "/api/v1/users.md".to_string(),
1847            fragment: "section".to_string(),
1848            line: 10,
1849            column: 1,
1850        });
1851
1852        // Run cross-file check
1853        let warnings = rule
1854            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
1855            .unwrap();
1856
1857        // Should have NO warnings - absolute paths should be skipped
1858        assert!(
1859            warnings.is_empty(),
1860            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
1861        );
1862    }
1863
1864    #[test]
1865    fn test_protocol_relative_url_not_skipped() {
1866        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
1867        // They should still be caught by is_external_url() though
1868        let temp_dir = tempdir().unwrap();
1869        let base_path = temp_dir.path();
1870
1871        let content = r#"
1872# Test Document
1873
1874[External](//example.com/page)
1875[Another](//cdn.example.com/asset.js)
1876"#;
1877
1878        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1879        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1880        let result = rule.check(&ctx).unwrap();
1881
1882        // Should have NO warnings - protocol-relative URLs are external and should be skipped
1883        assert!(
1884            result.is_empty(),
1885            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
1886        );
1887    }
1888
1889    #[test]
1890    fn test_email_addresses_skipped() {
1891        // Test that email addresses without mailto: are skipped
1892        // These are clearly not file links (the @ symbol is definitive)
1893        let temp_dir = tempdir().unwrap();
1894        let base_path = temp_dir.path();
1895
1896        let content = r#"
1897# Test Document
1898
1899[Contact](user@example.com)
1900[Steering](steering@kubernetes.io)
1901[Support](john.doe+filter@company.co.uk)
1902[User](user_name@sub.domain.com)
1903"#;
1904
1905        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1906        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1907        let result = rule.check(&ctx).unwrap();
1908
1909        // Should have NO warnings - email addresses are clearly not file links and should be skipped
1910        assert!(
1911            result.is_empty(),
1912            "Email addresses should be skipped. Got warnings: {result:?}"
1913        );
1914    }
1915
1916    #[test]
1917    fn test_email_addresses_vs_file_paths() {
1918        // Test that email addresses (anything with @) are skipped
1919        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
1920        let temp_dir = tempdir().unwrap();
1921        let base_path = temp_dir.path();
1922
1923        let content = r#"
1924# Test Document
1925
1926[Email](user@example.com)  <!-- Should be skipped (email) -->
1927[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
1928[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
1929"#;
1930
1931        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1932        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1933        let result = rule.check(&ctx).unwrap();
1934
1935        // All should be skipped - anything with @ is treated as an email
1936        assert!(
1937            result.is_empty(),
1938            "All email addresses should be skipped. Got: {result:?}"
1939        );
1940    }
1941
1942    #[test]
1943    fn test_diagnostic_position_accuracy() {
1944        // Test that diagnostics point to the URL, not the link text
1945        let temp_dir = tempdir().unwrap();
1946        let base_path = temp_dir.path();
1947
1948        // Position markers:     0         1         2         3
1949        //                       0123456789012345678901234567890123456789
1950        let content = "prefix [text](missing.md) suffix";
1951        //             The URL "missing.md" starts at 0-indexed position 14
1952        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
1953
1954        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1955        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1956        let result = rule.check(&ctx).unwrap();
1957
1958        assert_eq!(result.len(), 1, "Should have exactly one warning");
1959        assert_eq!(result[0].line, 1, "Should be on line 1");
1960        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
1961        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
1962    }
1963
1964    #[test]
1965    fn test_diagnostic_position_angle_brackets() {
1966        // Test position accuracy with angle bracket links
1967        let temp_dir = tempdir().unwrap();
1968        let base_path = temp_dir.path();
1969
1970        // Position markers:     0         1         2
1971        //                       012345678901234567890
1972        let content = "[link](<missing.md>)";
1973        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
1974
1975        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1976        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1977        let result = rule.check(&ctx).unwrap();
1978
1979        assert_eq!(result.len(), 1, "Should have exactly one warning");
1980        assert_eq!(result[0].line, 1, "Should be on line 1");
1981        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
1982    }
1983
1984    #[test]
1985    fn test_diagnostic_position_multiline() {
1986        // Test that line numbers are correct for links on different lines
1987        let temp_dir = tempdir().unwrap();
1988        let base_path = temp_dir.path();
1989
1990        let content = r#"# Title
1991Some text on line 2
1992[link on line 3](missing1.md)
1993More text
1994[link on line 5](missing2.md)"#;
1995
1996        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1997        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1998        let result = rule.check(&ctx).unwrap();
1999
2000        assert_eq!(result.len(), 2, "Should have two warnings");
2001
2002        // First warning should be on line 3
2003        assert_eq!(result[0].line, 3, "First warning should be on line 3");
2004        assert!(result[0].message.contains("missing1.md"));
2005
2006        // Second warning should be on line 5
2007        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
2008        assert!(result[1].message.contains("missing2.md"));
2009    }
2010
2011    #[test]
2012    fn test_diagnostic_position_with_spaces() {
2013        // Test position with URLs that have spaces in parentheses
2014        let temp_dir = tempdir().unwrap();
2015        let base_path = temp_dir.path();
2016
2017        let content = "[link]( missing.md )";
2018        //             0123456789012345678901
2019        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
2020        //             which is 1-indexed column 9
2021
2022        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2023        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2024        let result = rule.check(&ctx).unwrap();
2025
2026        assert_eq!(result.len(), 1, "Should have exactly one warning");
2027        // The regex captures the URL without leading/trailing spaces
2028        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
2029    }
2030
2031    #[test]
2032    fn test_diagnostic_position_image() {
2033        // Test that image diagnostics also have correct positions
2034        let temp_dir = tempdir().unwrap();
2035        let base_path = temp_dir.path();
2036
2037        let content = "![alt text](missing.jpg)";
2038
2039        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2040        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2041        let result = rule.check(&ctx).unwrap();
2042
2043        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
2044        assert_eq!(result[0].line, 1);
2045        // Images use start_col from the parser, which should point to the URL
2046        assert!(result[0].column > 0, "Should have valid column position");
2047        assert!(result[0].message.contains("missing.jpg"));
2048    }
2049
2050    #[test]
2051    fn test_wikilinks_skipped() {
2052        // Wikilinks should not trigger MD057 warnings
2053        // They use a different linking system (e.g., Obsidian, wiki software)
2054        let temp_dir = tempdir().unwrap();
2055        let base_path = temp_dir.path();
2056
2057        let content = r#"# Test Document
2058
2059[[Microsoft#Windows OS]]
2060[[SomePage]]
2061[[Page With Spaces]]
2062[[path/to/page#section]]
2063[[page|Display Text]]
2064
2065This is a [real missing link](missing.md) that should be flagged.
2066"#;
2067
2068        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2069        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2070        let result = rule.check(&ctx).unwrap();
2071
2072        // Should only warn about the regular markdown link, not wikilinks
2073        assert_eq!(
2074            result.len(),
2075            1,
2076            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
2077        );
2078        assert!(
2079            result[0].message.contains("missing.md"),
2080            "Warning should be for missing.md, not wikilinks"
2081        );
2082    }
2083
2084    #[test]
2085    fn test_wikilinks_not_added_to_index() {
2086        // Wikilinks should not be added to the cross-file link index
2087        let temp_dir = tempdir().unwrap();
2088        let base_path = temp_dir.path();
2089
2090        let content = r#"# Test Document
2091
2092[[Microsoft#Windows OS]]
2093[[SomePage#section]]
2094[Regular Link](other.md)
2095"#;
2096
2097        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2098        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2099
2100        let mut file_index = FileIndex::new();
2101        rule.contribute_to_index(&ctx, &mut file_index);
2102
2103        // Should only have the regular markdown link (if it's a markdown file)
2104        // Wikilinks should not be added
2105        let cross_file_links = &file_index.cross_file_links;
2106        assert_eq!(
2107            cross_file_links.len(),
2108            1,
2109            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
2110        );
2111        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
2112    }
2113
2114    #[test]
2115    fn test_reference_definition_missing_file() {
2116        // Reference definitions [ref]: ./path.md should be checked
2117        let temp_dir = tempdir().unwrap();
2118        let base_path = temp_dir.path();
2119
2120        let content = r#"# Test Document
2121
2122[test]: ./missing.md
2123[example]: ./nonexistent.html
2124
2125Use [test] and [example] here.
2126"#;
2127
2128        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2129        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2130        let result = rule.check(&ctx).unwrap();
2131
2132        // Should have warnings for both reference definitions
2133        assert_eq!(
2134            result.len(),
2135            2,
2136            "Should have warnings for missing reference definition targets. Got: {result:?}"
2137        );
2138        assert!(
2139            result.iter().any(|w| w.message.contains("missing.md")),
2140            "Should warn about missing.md"
2141        );
2142        assert!(
2143            result.iter().any(|w| w.message.contains("nonexistent.html")),
2144            "Should warn about nonexistent.html"
2145        );
2146    }
2147
2148    #[test]
2149    fn test_reference_definition_existing_file() {
2150        // Reference definitions to existing files should NOT trigger warnings
2151        let temp_dir = tempdir().unwrap();
2152        let base_path = temp_dir.path();
2153
2154        // Create an existing file
2155        let exists_path = base_path.join("exists.md");
2156        File::create(&exists_path)
2157            .unwrap()
2158            .write_all(b"# Existing file")
2159            .unwrap();
2160
2161        let content = r#"# Test Document
2162
2163[test]: ./exists.md
2164
2165Use [test] here.
2166"#;
2167
2168        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2169        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2170        let result = rule.check(&ctx).unwrap();
2171
2172        // Should have NO warnings since the file exists
2173        assert!(
2174            result.is_empty(),
2175            "Should not warn about existing file. Got: {result:?}"
2176        );
2177    }
2178
2179    #[test]
2180    fn test_reference_definition_external_url_skipped() {
2181        // Reference definitions with external URLs should be skipped
2182        let temp_dir = tempdir().unwrap();
2183        let base_path = temp_dir.path();
2184
2185        let content = r#"# Test Document
2186
2187[google]: https://google.com
2188[example]: http://example.org
2189[mail]: mailto:test@example.com
2190[ftp]: ftp://files.example.com
2191[local]: ./missing.md
2192
2193Use [google], [example], [mail], [ftp], [local] here.
2194"#;
2195
2196        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2197        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2198        let result = rule.check(&ctx).unwrap();
2199
2200        // Should only warn about the local missing file, not external URLs
2201        assert_eq!(
2202            result.len(),
2203            1,
2204            "Should only warn about local missing file. Got: {result:?}"
2205        );
2206        assert!(
2207            result[0].message.contains("missing.md"),
2208            "Warning should be for missing.md"
2209        );
2210    }
2211
2212    #[test]
2213    fn test_reference_definition_fragment_only_skipped() {
2214        // Reference definitions with fragment-only URLs should be skipped
2215        let temp_dir = tempdir().unwrap();
2216        let base_path = temp_dir.path();
2217
2218        let content = r#"# Test Document
2219
2220[section]: #my-section
2221
2222Use [section] here.
2223"#;
2224
2225        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2226        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2227        let result = rule.check(&ctx).unwrap();
2228
2229        // Should have NO warnings for fragment-only links
2230        assert!(
2231            result.is_empty(),
2232            "Should not warn about fragment-only reference. Got: {result:?}"
2233        );
2234    }
2235
2236    #[test]
2237    fn test_reference_definition_column_position() {
2238        // Test that column position points to the URL in the reference definition
2239        let temp_dir = tempdir().unwrap();
2240        let base_path = temp_dir.path();
2241
2242        // Position markers:     0         1         2
2243        //                       0123456789012345678901
2244        let content = "[ref]: ./missing.md";
2245        //             The URL "./missing.md" starts at 0-indexed position 7
2246        //             which is 1-indexed column 8
2247
2248        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2249        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2250        let result = rule.check(&ctx).unwrap();
2251
2252        assert_eq!(result.len(), 1, "Should have exactly one warning");
2253        assert_eq!(result[0].line, 1, "Should be on line 1");
2254        assert_eq!(result[0].column, 8, "Should point to start of URL './missing.md'");
2255    }
2256
2257    #[test]
2258    fn test_reference_definition_html_with_md_source() {
2259        // Reference definitions to .html files should pass if corresponding .md source exists
2260        let temp_dir = tempdir().unwrap();
2261        let base_path = temp_dir.path();
2262
2263        // Create guide.md (source file)
2264        let md_file = base_path.join("guide.md");
2265        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2266
2267        let content = r#"# Test Document
2268
2269[guide]: ./guide.html
2270[missing]: ./missing.html
2271
2272Use [guide] and [missing] here.
2273"#;
2274
2275        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2276        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2277        let result = rule.check(&ctx).unwrap();
2278
2279        // guide.html passes (guide.md exists), missing.html fails
2280        assert_eq!(
2281            result.len(),
2282            1,
2283            "Should only warn about missing source. Got: {result:?}"
2284        );
2285        assert!(result[0].message.contains("missing.html"));
2286    }
2287
2288    #[test]
2289    fn test_reference_definition_url_encoded() {
2290        // Reference definitions with URL-encoded paths should be decoded before checking
2291        let temp_dir = tempdir().unwrap();
2292        let base_path = temp_dir.path();
2293
2294        // Create a file with spaces in the name
2295        let file_with_spaces = base_path.join("file with spaces.md");
2296        File::create(&file_with_spaces).unwrap().write_all(b"# Spaces").unwrap();
2297
2298        let content = r#"# Test Document
2299
2300[spaces]: ./file%20with%20spaces.md
2301[missing]: ./missing%20file.md
2302
2303Use [spaces] and [missing] here.
2304"#;
2305
2306        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2307        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2308        let result = rule.check(&ctx).unwrap();
2309
2310        // Should only warn about the missing file
2311        assert_eq!(
2312            result.len(),
2313            1,
2314            "Should only warn about missing URL-encoded file. Got: {result:?}"
2315        );
2316        assert!(result[0].message.contains("missing%20file.md"));
2317    }
2318
2319    #[test]
2320    fn test_inline_and_reference_both_checked() {
2321        // Both inline links and reference definitions should be checked
2322        let temp_dir = tempdir().unwrap();
2323        let base_path = temp_dir.path();
2324
2325        let content = r#"# Test Document
2326
2327[inline link](./inline-missing.md)
2328[ref]: ./ref-missing.md
2329
2330Use [ref] here.
2331"#;
2332
2333        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2334        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2335        let result = rule.check(&ctx).unwrap();
2336
2337        // Should warn about both the inline link and the reference definition
2338        assert_eq!(
2339            result.len(),
2340            2,
2341            "Should warn about both inline and reference links. Got: {result:?}"
2342        );
2343        assert!(
2344            result.iter().any(|w| w.message.contains("inline-missing.md")),
2345            "Should warn about inline-missing.md"
2346        );
2347        assert!(
2348            result.iter().any(|w| w.message.contains("ref-missing.md")),
2349            "Should warn about ref-missing.md"
2350        );
2351    }
2352
2353    #[test]
2354    fn test_footnote_definitions_not_flagged() {
2355        // Regression test for issue #286: footnote definitions should not be
2356        // treated as reference definitions and flagged as broken links
2357        let rule = MD057ExistingRelativeLinks::default();
2358
2359        let content = r#"# Title
2360
2361A footnote[^1].
2362
2363[^1]: [link](https://www.google.com).
2364"#;
2365
2366        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2367        let result = rule.check(&ctx).unwrap();
2368
2369        assert!(
2370            result.is_empty(),
2371            "Footnote definitions should not trigger MD057 warnings. Got: {result:?}"
2372        );
2373    }
2374
2375    #[test]
2376    fn test_footnote_with_relative_link_inside() {
2377        // Footnotes containing relative links should not be checked
2378        // (the footnote content is not a URL, it's content that may contain links)
2379        let rule = MD057ExistingRelativeLinks::default();
2380
2381        let content = r#"# Title
2382
2383See the footnote[^1].
2384
2385[^1]: Check out [this file](./existing.md) for more info.
2386[^2]: Also see [missing](./does-not-exist.md).
2387"#;
2388
2389        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2390        let result = rule.check(&ctx).unwrap();
2391
2392        // The inline links INSIDE footnotes should be checked (./existing.md, ./does-not-exist.md)
2393        // but the footnote definition itself should not be treated as a reference definition
2394        // Note: This test verifies that [^1]: and [^2]: are not parsed as ref defs with
2395        // URLs like "[this file](./existing.md)" or "[missing](./does-not-exist.md)"
2396        for warning in &result {
2397            assert!(
2398                !warning.message.contains("[this file]"),
2399                "Footnote content should not be treated as URL: {warning:?}"
2400            );
2401            assert!(
2402                !warning.message.contains("[missing]"),
2403                "Footnote content should not be treated as URL: {warning:?}"
2404            );
2405        }
2406    }
2407
2408    #[test]
2409    fn test_mixed_footnotes_and_reference_definitions() {
2410        // Ensure regular reference definitions are still checked while footnotes are skipped
2411        let temp_dir = tempdir().unwrap();
2412        let base_path = temp_dir.path();
2413
2414        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2415
2416        let content = r#"# Title
2417
2418A footnote[^1] and a [ref link][myref].
2419
2420[^1]: This is a footnote with [link](https://example.com).
2421
2422[myref]: ./missing-file.md "This should be checked"
2423"#;
2424
2425        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2426        let result = rule.check(&ctx).unwrap();
2427
2428        // Should only warn about the regular reference definition, not the footnote
2429        assert_eq!(
2430            result.len(),
2431            1,
2432            "Should only warn about the regular reference definition. Got: {result:?}"
2433        );
2434        assert!(
2435            result[0].message.contains("missing-file.md"),
2436            "Should warn about missing-file.md in reference definition"
2437        );
2438    }
2439
2440    #[test]
2441    fn test_absolute_links_ignore_by_default() {
2442        // By default, absolute links are ignored (not validated)
2443        let temp_dir = tempdir().unwrap();
2444        let base_path = temp_dir.path();
2445
2446        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2447
2448        let content = r#"# Links
2449
2450[API docs](/api/v1/users)
2451[Blog post](/blog/2024/release.html)
2452![Logo](/assets/logo.png)
2453
2454[ref]: /docs/reference.md
2455"#;
2456
2457        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2458        let result = rule.check(&ctx).unwrap();
2459
2460        // No warnings - absolute links are ignored by default
2461        assert!(
2462            result.is_empty(),
2463            "Absolute links should be ignored by default. Got: {result:?}"
2464        );
2465    }
2466
2467    #[test]
2468    fn test_absolute_links_warn_config() {
2469        // When configured to warn, absolute links should generate warnings
2470        let temp_dir = tempdir().unwrap();
2471        let base_path = temp_dir.path();
2472
2473        let config = MD057Config {
2474            absolute_links: AbsoluteLinksOption::Warn,
2475        };
2476        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2477
2478        let content = r#"# Links
2479
2480[API docs](/api/v1/users)
2481[Blog post](/blog/2024/release.html)
2482"#;
2483
2484        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2485        let result = rule.check(&ctx).unwrap();
2486
2487        // Should have 2 warnings for the 2 absolute links
2488        assert_eq!(
2489            result.len(),
2490            2,
2491            "Should warn about both absolute links. Got: {result:?}"
2492        );
2493        assert!(
2494            result[0].message.contains("cannot be validated locally"),
2495            "Warning should explain why: {}",
2496            result[0].message
2497        );
2498        assert!(
2499            result[0].message.contains("/api/v1/users"),
2500            "Warning should include the link path"
2501        );
2502    }
2503
2504    #[test]
2505    fn test_absolute_links_warn_images() {
2506        // Images with absolute paths should also warn when configured
2507        let temp_dir = tempdir().unwrap();
2508        let base_path = temp_dir.path();
2509
2510        let config = MD057Config {
2511            absolute_links: AbsoluteLinksOption::Warn,
2512        };
2513        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2514
2515        let content = r#"# Images
2516
2517![Logo](/assets/logo.png)
2518"#;
2519
2520        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2521        let result = rule.check(&ctx).unwrap();
2522
2523        assert_eq!(
2524            result.len(),
2525            1,
2526            "Should warn about absolute image path. Got: {result:?}"
2527        );
2528        assert!(
2529            result[0].message.contains("/assets/logo.png"),
2530            "Warning should include the image path"
2531        );
2532    }
2533
2534    #[test]
2535    fn test_absolute_links_warn_reference_definitions() {
2536        // Reference definitions with absolute paths should also warn when configured
2537        let temp_dir = tempdir().unwrap();
2538        let base_path = temp_dir.path();
2539
2540        let config = MD057Config {
2541            absolute_links: AbsoluteLinksOption::Warn,
2542        };
2543        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
2544
2545        let content = r#"# Reference
2546
2547See the [docs][ref].
2548
2549[ref]: /docs/reference.md
2550"#;
2551
2552        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2553        let result = rule.check(&ctx).unwrap();
2554
2555        assert_eq!(
2556            result.len(),
2557            1,
2558            "Should warn about absolute reference definition. Got: {result:?}"
2559        );
2560        assert!(
2561            result[0].message.contains("/docs/reference.md"),
2562            "Warning should include the reference path"
2563        );
2564    }
2565}