Skip to main content

rumdl_lib/rules/
md057_existing_relative_links.rs

1//!
2//! Rule MD057: Existing relative links
3//!
4//! See [docs/md057.md](../../docs/md057.md) for full documentation, configuration, and examples.
5
6use crate::rule::{
7    CrossFileScope, Fix, FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity,
8};
9use crate::workspace_index::{FileIndex, extract_cross_file_links};
10use regex::Regex;
11use std::collections::HashMap;
12use std::env;
13use std::path::{Path, PathBuf};
14use std::sync::LazyLock;
15use std::sync::{Arc, Mutex};
16
17mod md057_config;
18use crate::rule_config_serde::RuleConfig;
19use crate::utils::mkdocs_config::resolve_docs_dir;
20use crate::utils::obsidian_config::resolve_attachment_folder;
21use crate::utils::project_root::discover_project_root_from;
22pub use md057_config::{AbsoluteLinksOption, MD057Config};
23
24// Thread-safe cache for file existence checks to avoid redundant filesystem operations
25static FILE_EXISTENCE_CACHE: LazyLock<Arc<Mutex<HashMap<PathBuf, bool>>>> =
26    LazyLock::new(|| Arc::new(Mutex::new(HashMap::new())));
27
28// Reset the file existence cache (typically between rule runs)
29fn reset_file_existence_cache() {
30    if let Ok(mut cache) = FILE_EXISTENCE_CACHE.lock() {
31        cache.clear();
32    }
33}
34
35// Check if a file exists with caching
36fn file_exists_with_cache(path: &Path) -> bool {
37    match FILE_EXISTENCE_CACHE.lock() {
38        Ok(mut cache) => *cache.entry(path.to_path_buf()).or_insert_with(|| path.exists()),
39        Err(_) => path.exists(), // Fallback to uncached check on mutex poison
40    }
41}
42
43/// Check if a file exists, also trying markdown extensions for extensionless links.
44/// This supports wiki-style links like `[Link](page)` that resolve to `page.md`.
45fn file_exists_or_markdown_extension(path: &Path) -> bool {
46    // First, check exact path
47    if file_exists_with_cache(path) {
48        return true;
49    }
50
51    // If the path has no extension, try adding markdown extensions
52    if path.extension().is_none() {
53        for ext in MARKDOWN_EXTENSIONS {
54            // MARKDOWN_EXTENSIONS includes the dot, e.g., ".md"
55            let path_with_ext = path.with_extension(&ext[1..]);
56            if file_exists_with_cache(&path_with_ext) {
57                return true;
58            }
59        }
60    }
61
62    false
63}
64
65// Regex to match the start of a link - simplified for performance
66static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
67
68/// Regex to extract the URL from an angle-bracketed markdown link
69/// Format: `](<URL>)` or `](<URL> "title")`
70/// This handles URLs with parentheses like `](<path/(with)/parens.md>)`
71static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
72    LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
73
74/// Regex to extract the URL from a normal markdown link (without angle brackets)
75/// Format: `](URL)` or `](URL "title")`
76static URL_EXTRACT_REGEX: LazyLock<Regex> =
77    LazyLock::new(|| Regex::new("\\]\\(\\s*([^>\\)\\s#]+)(#[^)\\s]*)?\\s*(?:\"[^\"]*\")?\\s*\\)").unwrap());
78
79/// Regex to detect URLs with explicit schemes (should not be checked as relative links)
80/// Matches: scheme:// or scheme: (per RFC 3986)
81/// This covers http, https, ftp, file, smb, mailto, tel, data, macappstores, etc.
82static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
83    LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
84
85// Current working directory
86static CURRENT_DIR: LazyLock<PathBuf> = LazyLock::new(|| env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
87
88/// Project root discovered once at process start by walking up from CWD looking
89/// for `.git`, `.rumdl.toml`, `pyproject.toml`, or `.markdownlint.json`. Used as
90/// the anchor for resolving non-absolute paths in `roots` and `search-paths`,
91/// and as the implicit fallback root for absolute-link validation. Tests that
92/// pass a path via `with_path()` bypass this discovery.
93static PROJECT_ROOT: LazyLock<PathBuf> = LazyLock::new(|| discover_project_root_from(&CURRENT_DIR));
94
95/// Convert a hex digit (0-9, a-f, A-F) to its numeric value.
96/// Returns None for non-hex characters.
97#[inline]
98fn hex_digit_to_value(byte: u8) -> Option<u8> {
99    match byte {
100        b'0'..=b'9' => Some(byte - b'0'),
101        b'a'..=b'f' => Some(byte - b'a' + 10),
102        b'A'..=b'F' => Some(byte - b'A' + 10),
103        _ => None,
104    }
105}
106
107/// Supported markdown file extensions
108const MARKDOWN_EXTENSIONS: &[&str] = &[
109    ".md",
110    ".markdown",
111    ".mdx",
112    ".mkd",
113    ".mkdn",
114    ".mdown",
115    ".mdwn",
116    ".qmd",
117    ".rmd",
118];
119
120/// Rule MD057: Existing relative links should point to valid files or directories.
121#[derive(Debug, Clone)]
122pub struct MD057ExistingRelativeLinks {
123    /// Base directory for resolving relative links
124    base_path: Arc<Mutex<Option<PathBuf>>>,
125    /// Configuration for the rule
126    config: MD057Config,
127    /// Markdown flavor (used for Obsidian attachment folder auto-detection)
128    flavor: crate::config::MarkdownFlavor,
129}
130
131impl Default for MD057ExistingRelativeLinks {
132    fn default() -> Self {
133        Self {
134            base_path: Arc::new(Mutex::new(None)),
135            config: MD057Config::default(),
136            flavor: crate::config::MarkdownFlavor::default(),
137        }
138    }
139}
140
141impl MD057ExistingRelativeLinks {
142    /// Create a new instance with default settings
143    pub fn new() -> Self {
144        Self::default()
145    }
146
147    /// Set the base path for resolving relative links
148    pub fn with_path<P: AsRef<Path>>(self, path: P) -> Self {
149        let path = path.as_ref();
150        let dir_path = if path.is_file() {
151            path.parent().map(std::path::Path::to_path_buf)
152        } else {
153            Some(path.to_path_buf())
154        };
155
156        if let Ok(mut guard) = self.base_path.lock() {
157            *guard = dir_path;
158        }
159        self
160    }
161
162    pub fn from_config_struct(config: MD057Config) -> Self {
163        Self {
164            base_path: Arc::new(Mutex::new(None)),
165            config,
166            flavor: crate::config::MarkdownFlavor::default(),
167        }
168    }
169
170    /// Project root used for absolute-link resolution and `search-paths` anchoring.
171    ///
172    /// Returns the explicit base path when set via `with_path()` (used by tests
173    /// to isolate filesystem state to a temp dir); otherwise returns the
174    /// process-wide discovered project root.
175    fn project_root(&self) -> PathBuf {
176        self.base_path
177            .lock()
178            .ok()
179            .and_then(|g| g.clone())
180            .unwrap_or_else(|| PROJECT_ROOT.clone())
181    }
182
183    /// Resolve a config-supplied path string (from `roots` or `search-paths`)
184    /// against the project root: absolute strings are taken verbatim, relative
185    /// strings are joined onto `project_root`.
186    fn resolve_against_project_root(path_str: &str, project_root: &Path) -> PathBuf {
187        if Path::new(path_str).is_absolute() {
188            PathBuf::from(path_str)
189        } else {
190            project_root.join(path_str)
191        }
192    }
193
194    /// Set the markdown flavor for Obsidian attachment auto-detection
195    #[cfg(test)]
196    fn with_flavor(mut self, flavor: crate::config::MarkdownFlavor) -> Self {
197        self.flavor = flavor;
198        self
199    }
200
201    /// Check if a URL is external or should be skipped for validation.
202    ///
203    /// Returns `true` (skip validation) for:
204    /// - URLs with protocols: `https://`, `http://`, `ftp://`, `mailto:`, etc.
205    /// - Bare domains: `www.example.com`, `example.com`
206    /// - Email addresses: `user@example.com` (without `mailto:`)
207    /// - Template variables: `{{URL}}`, `{{% include %}}`
208    /// - Absolute web URL paths: `/api/docs`, `/blog/post.html`
209    ///
210    /// Returns `false` (validate) for:
211    /// - Relative filesystem paths: `./file.md`, `../parent/file.md`, `file.md`
212    #[inline]
213    fn is_external_url(&self, url: &str) -> bool {
214        if url.is_empty() {
215            return false;
216        }
217
218        // Quick checks for common external URL patterns
219        if PROTOCOL_DOMAIN_REGEX.is_match(url) || url.starts_with("www.") {
220            return true;
221        }
222
223        // Skip template variables (Handlebars/Mustache/Jinja2 syntax)
224        // Examples: {{URL}}, {{#URL}}, {{> partial}}, {{% include %}}, {{ variable }}
225        if url.starts_with("{{") || url.starts_with("{%") {
226            return true;
227        }
228
229        // Simple check: if URL contains @, it's almost certainly an email address
230        // File paths with @ are extremely rare, so this is a safe heuristic
231        if url.contains('@') {
232            return true; // It's an email address, skip it
233        }
234
235        // Bare domain check (e.g., "example.com")
236        // Note: We intentionally DON'T skip all TLDs like .org, .net, etc.
237        // Links like [text](nodejs.org/path) without a protocol are broken -
238        // they'll be treated as relative paths by markdown renderers.
239        // Flagging them helps users find missing protocols.
240        // We only skip .com as a minimal safety net for the most common case.
241        if url.ends_with(".com") {
242            return true;
243        }
244
245        // Framework path aliases (resolved by build tools like Vite, webpack, etc.)
246        // These are not filesystem paths but module/asset aliases
247        // Examples: ~/assets/image.png, @images/photo.jpg, @/components/Button.vue
248        if url.starts_with('~') || url.starts_with('@') {
249            return true;
250        }
251
252        // All other cases (relative paths, etc.) are not external
253        false
254    }
255
256    /// Check if the URL is a fragment-only link (internal document link)
257    #[inline]
258    fn is_fragment_only_link(&self, url: &str) -> bool {
259        url.starts_with('#')
260    }
261
262    /// Check if the URL is an absolute path (starts with /)
263    /// These are typically routes for published documentation sites.
264    #[inline]
265    fn is_absolute_path(url: &str) -> bool {
266        url.starts_with('/')
267    }
268
269    /// Decode URL percent-encoded sequences in a path.
270    /// Converts `%20` to space, `%2F` to `/`, etc.
271    /// Returns the original string if decoding fails or produces invalid UTF-8.
272    fn url_decode(path: &str) -> String {
273        // Quick check: if no percent sign, return as-is
274        if !path.contains('%') {
275            return path.to_string();
276        }
277
278        let bytes = path.as_bytes();
279        let mut result = Vec::with_capacity(bytes.len());
280        let mut i = 0;
281
282        while i < bytes.len() {
283            if bytes[i] == b'%' && i + 2 < bytes.len() {
284                // Try to parse the two hex digits following %
285                let hex1 = bytes[i + 1];
286                let hex2 = bytes[i + 2];
287                if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
288                    result.push(d1 * 16 + d2);
289                    i += 3;
290                    continue;
291                }
292            }
293            result.push(bytes[i]);
294            i += 1;
295        }
296
297        // Convert to UTF-8, falling back to original if invalid
298        String::from_utf8(result).unwrap_or_else(|_| path.to_string())
299    }
300
301    /// Strip query parameters and fragments from a URL for file existence checking.
302    /// URLs like `path/to/image.png?raw=true` or `file.md#section` should check
303    /// for `path/to/image.png` or `file.md` respectively.
304    ///
305    /// Note: In standard URLs, query parameters (`?`) come before fragments (`#`),
306    /// so we check for `?` first. If a URL has both, only the query is stripped here
307    /// (fragments are handled separately by the regex in `contribute_to_index`).
308    fn strip_query_and_fragment(url: &str) -> &str {
309        // Find the first occurrence of '?' or '#', whichever comes first
310        // This handles both standard URLs (? before #) and edge cases (# before ?)
311        let query_pos = url.find('?');
312        let fragment_pos = url.find('#');
313
314        match (query_pos, fragment_pos) {
315            (Some(q), Some(f)) => {
316                // Both exist - strip at whichever comes first
317                &url[..q.min(f)]
318            }
319            (Some(q), None) => &url[..q],
320            (None, Some(f)) => &url[..f],
321            (None, None) => url,
322        }
323    }
324
325    /// Resolve a relative link against a provided base path
326    fn resolve_link_path_with_base(link: &str, base_path: &Path) -> PathBuf {
327        base_path.join(link)
328    }
329
330    /// Compute additional search paths for fallback link resolution.
331    ///
332    /// Combines Obsidian attachment folder auto-detection (when flavor is Obsidian)
333    /// with explicitly configured `search-paths`.
334    fn compute_search_paths(
335        &self,
336        flavor: crate::config::MarkdownFlavor,
337        source_file: Option<&Path>,
338        base_path: &Path,
339        project_root: &Path,
340    ) -> Vec<PathBuf> {
341        let mut paths = Vec::new();
342
343        // Auto-detect Obsidian attachment folder
344        if flavor == crate::config::MarkdownFlavor::Obsidian
345            && let Some(attachment_dir) = resolve_attachment_folder(source_file.unwrap_or(base_path), base_path)
346            && attachment_dir != *base_path
347        {
348            paths.push(attachment_dir);
349        }
350
351        // Add explicitly configured search paths. Resolved relative to the
352        // discovered project root so paths are stable regardless of which
353        // subdirectory rumdl is invoked from.
354        for search_path in &self.config.search_paths {
355            let resolved = Self::resolve_against_project_root(search_path, project_root);
356            if resolved != *base_path && !paths.contains(&resolved) {
357                paths.push(resolved);
358            }
359        }
360
361        paths
362    }
363
364    /// Check if a link target exists in any of the additional search paths.
365    fn exists_in_search_paths(decoded_path: &str, search_paths: &[PathBuf]) -> bool {
366        search_paths.iter().any(|dir| {
367            let candidate = dir.join(decoded_path);
368            file_exists_or_markdown_extension(&candidate)
369        })
370    }
371
372    /// Check if a relative link can be compacted and return the simplified form.
373    ///
374    /// Returns `None` if compact-paths is disabled, the link has no traversal,
375    /// or the link is already the shortest form.
376    /// Returns `Some(suggestion)` with the full compacted URL (including fragment/query suffix).
377    fn compact_path_suggestion(&self, url: &str, base_path: &Path) -> Option<String> {
378        if !self.config.compact_paths {
379            return None;
380        }
381
382        // Split URL into path and suffix (fragment/query)
383        let path_end = url
384            .find('?')
385            .unwrap_or(url.len())
386            .min(url.find('#').unwrap_or(url.len()));
387        let path_part = &url[..path_end];
388        let suffix = &url[path_end..];
389
390        // URL-decode the path portion for filesystem resolution
391        let decoded_path = Self::url_decode(path_part);
392
393        compute_compact_path(base_path, &decoded_path).map(|compact| format!("{compact}{suffix}"))
394    }
395
396    /// Validate an absolute link by resolving it relative to MkDocs docs_dir.
397    ///
398    /// Returns `Some(warning_message)` if the link is broken, `None` if valid.
399    /// Falls back to a generic warning if no mkdocs.yml is found.
400    /// Validate an absolute link against the MkDocs `docs_dir`.
401    fn validate_absolute_link_via_docs_dir(url: &str, source_path: &Path) -> Option<String> {
402        let Some(docs_dir) = resolve_docs_dir(source_path) else {
403            return Some(format!(
404                "Absolute link '{url}' cannot be validated locally (no mkdocs.yml found)"
405            ));
406        };
407
408        let (decoded, is_directory_link) = Self::prepare_absolute_url(url);
409
410        match Self::resolve_under_root(&docs_dir, &decoded, is_directory_link) {
411            Resolution::Found => None,
412            Resolution::DirectoryWithoutIndex { resolved } => Some(format!(
413                "Absolute link '{url}' resolves to directory '{}' which has no index.md",
414                resolved.display()
415            )),
416            Resolution::NotFound { resolved } => Some(format!(
417                "Absolute link '{url}' resolves to '{}' which does not exist",
418                resolved.display()
419            )),
420        }
421    }
422
423    /// Validate an absolute link by resolving it against each configured root and the project root.
424    ///
425    /// Configured `roots` are tried first (first match wins), then the project
426    /// root is tried as an implicit fallback. The fallback supports links
427    /// written as literal absolute paths from the project root (e.g.
428    /// `/content/en/foo.md`) alongside links written relative to a configured
429    /// root (e.g. `/foo.md` with `roots = ["content/en"]`). A warning is
430    /// emitted only when no root — configured or implicit — contains the target.
431    fn validate_absolute_link_via_roots(url: &str, roots: &[String], project_root: &Path) -> Option<String> {
432        let (decoded, is_directory_link) = Self::prepare_absolute_url(url);
433
434        for root in roots {
435            let root_path = Self::resolve_against_project_root(root, project_root);
436            if matches!(
437                Self::resolve_under_root(&root_path, &decoded, is_directory_link),
438                Resolution::Found
439            ) {
440                return None;
441            }
442        }
443
444        if matches!(
445            Self::resolve_under_root(project_root, &decoded, is_directory_link),
446            Resolution::Found
447        ) {
448            return None;
449        }
450
451        let msg = if roots.is_empty() {
452            format!("Absolute link '{url}' was not found under the project root")
453        } else {
454            format!("Absolute link '{url}' was not found under any configured root or the project root")
455        };
456        Some(msg)
457    }
458
459    /// Decode an absolute-link URL into a filesystem-relative path and a
460    /// directory-link flag. Strips the leading `/`, query/fragment suffix, and
461    /// percent-encoding.
462    fn prepare_absolute_url(url: &str) -> (String, bool) {
463        let relative_url = url.trim_start_matches('/');
464        let file_path = Self::strip_query_and_fragment(relative_url);
465        let decoded = Self::url_decode(file_path);
466        let is_directory_link = url.ends_with('/') || decoded.is_empty();
467        (decoded, is_directory_link)
468    }
469
470    /// Try to resolve a decoded absolute-link path under a single root directory.
471    ///
472    /// Applies four resolution strategies in order:
473    /// 1. Directory-style links: look for `<resolved>/index.md`.
474    /// 2. Direct existence (with markdown-extension fallback for extensionless links).
475    /// 3. `.html`/`.htm` links: look for a markdown source with the same stem in the same directory.
476    fn resolve_under_root(root_path: &Path, decoded: &str, is_directory_link: bool) -> Resolution {
477        let resolved = root_path.join(decoded);
478
479        // Directory-style links resolve via `index.md` inside the directory.
480        // Must be checked before `file_exists_or_markdown_extension` because
481        // `path.exists()` returns true for directories.
482        let is_dir = resolved.is_dir();
483        if is_directory_link || is_dir {
484            let index_path = resolved.join("index.md");
485            if file_exists_with_cache(&index_path) {
486                return Resolution::Found;
487            }
488            if is_dir {
489                return Resolution::DirectoryWithoutIndex { resolved };
490            }
491        }
492
493        if file_exists_or_markdown_extension(&resolved) {
494            return Resolution::Found;
495        }
496
497        // For .html/.htm links, accept a matching markdown source in the same
498        // directory — supports doc sites that compile .md to .html.
499        if let Some(ext) = resolved.extension().and_then(|e| e.to_str())
500            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
501            && let (Some(stem), Some(parent)) = (resolved.file_stem().and_then(|s| s.to_str()), resolved.parent())
502        {
503            let has_md_source = MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
504                let source_path = parent.join(format!("{stem}{md_ext}"));
505                file_exists_with_cache(&source_path)
506            });
507            if has_md_source {
508                return Resolution::Found;
509            }
510        }
511
512        Resolution::NotFound { resolved }
513    }
514}
515
516/// Outcome of trying to resolve an absolute link under a single root directory.
517/// Carries the resolved path on the failure variants so callers can build
518/// specific error messages without recomputing it.
519enum Resolution {
520    Found,
521    DirectoryWithoutIndex { resolved: PathBuf },
522    NotFound { resolved: PathBuf },
523}
524
525impl Rule for MD057ExistingRelativeLinks {
526    fn name(&self) -> &'static str {
527        "MD057"
528    }
529
530    fn description(&self) -> &'static str {
531        "Relative links should point to existing files"
532    }
533
534    fn category(&self) -> RuleCategory {
535        RuleCategory::Link
536    }
537
538    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
539        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
540    }
541
542    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
543        let content = ctx.content;
544
545        // Early returns for performance
546        if content.is_empty() || !content.contains('[') {
547            return Ok(Vec::new());
548        }
549
550        // Quick check for any potential links before expensive operations
551        // Check for inline links "](", reference definitions "]:", or images "!["
552        if !content.contains("](") && !content.contains("]:") {
553            return Ok(Vec::new());
554        }
555
556        // Reset the file existence cache for a fresh run
557        reset_file_existence_cache();
558
559        let mut warnings = Vec::new();
560
561        // Read the explicit base path (set via `with_path()` in tests) once; it
562        // doubles as both the per-file base path and the project root override
563        // for absolute-link resolution.
564        let explicit_base = self.base_path.lock().ok().and_then(|g| g.clone());
565
566        // Project root used for absolute-link resolution against configured
567        // `roots` and as the implicit fallback root. The explicit base wins
568        // when set; otherwise the discovered project root is used.
569        let project_root: PathBuf = explicit_base.clone().unwrap_or_else(|| PROJECT_ROOT.clone());
570
571        // Determine base path for resolving relative links.
572        // ALWAYS compute from ctx.source_file for each file - do not reuse cached base_path
573        // This ensures each file resolves links relative to its own directory.
574        let base_path: Option<PathBuf> = {
575            if explicit_base.is_some() {
576                explicit_base
577            } else if let Some(ref source_file) = ctx.source_file {
578                // Resolve symlinks to get the actual file location
579                // This ensures relative links are resolved from the target's directory,
580                // not the symlink's directory
581                let resolved_file = source_file.canonicalize().unwrap_or_else(|_| source_file.clone());
582                resolved_file
583                    .parent()
584                    .map(std::path::Path::to_path_buf)
585                    .or_else(|| Some(CURRENT_DIR.clone()))
586            } else {
587                // No source file available - cannot validate relative links
588                None
589            }
590        };
591
592        // If we still don't have a base path, we can't validate relative links
593        let Some(base_path) = base_path else {
594            return Ok(warnings);
595        };
596
597        // Compute additional search paths for fallback link resolution
598        let extra_search_paths =
599            self.compute_search_paths(ctx.flavor, ctx.source_file.as_deref(), &base_path, &project_root);
600
601        // Use LintContext links instead of expensive regex parsing
602        if !ctx.links.is_empty() {
603            // Use LineIndex for correct position calculation across all line ending types
604            let line_index = &ctx.line_index;
605
606            // Pre-collected lines from context
607            let lines = ctx.raw_lines();
608
609            // Track which lines we've already processed to avoid duplicates
610            // (ctx.links may have multiple entries for the same line, especially with malformed markdown)
611            let mut processed_lines = std::collections::HashSet::new();
612
613            for link in &ctx.links {
614                let line_idx = link.line - 1;
615                if line_idx >= lines.len() {
616                    continue;
617                }
618
619                // Skip lines inside PyMdown blocks
620                if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
621                    continue;
622                }
623
624                // Skip if we've already processed this line
625                if !processed_lines.insert(line_idx) {
626                    continue;
627                }
628
629                let line = lines[line_idx];
630
631                // Quick check for link pattern in this line
632                if !line.contains("](") {
633                    continue;
634                }
635
636                // Find all links in this line using optimized regex
637                for link_match in LINK_START_REGEX.find_iter(line) {
638                    let start_pos = link_match.start();
639                    let end_pos = link_match.end();
640
641                    // Calculate absolute position using LineIndex
642                    let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
643                    let absolute_start_pos = line_start_byte + start_pos;
644
645                    // Skip if this link is in a code span
646                    if ctx.is_in_code_span_byte(absolute_start_pos) {
647                        continue;
648                    }
649
650                    // Skip if this link is in a math span (LaTeX $...$ or $$...$$)
651                    if ctx.is_in_math_span(absolute_start_pos) {
652                        continue;
653                    }
654
655                    // Find the URL part after the link text
656                    // Try angle-bracket regex first (handles URLs with parens like `<path/(with)/parens.md>`)
657                    // Then fall back to normal URL regex
658                    let caps_and_url = URL_EXTRACT_ANGLE_BRACKET_REGEX
659                        .captures_at(line, end_pos - 1)
660                        .and_then(|caps| caps.get(1).map(|g| (caps, g)))
661                        .or_else(|| {
662                            URL_EXTRACT_REGEX
663                                .captures_at(line, end_pos - 1)
664                                .and_then(|caps| caps.get(1).map(|g| (caps, g)))
665                        });
666
667                    if let Some((caps, url_group)) = caps_and_url {
668                        let url = url_group.as_str().trim();
669
670                        // Skip empty URLs
671                        if url.is_empty() {
672                            continue;
673                        }
674
675                        // Skip rustdoc intra-doc links (backtick-wrapped URLs)
676                        // These are Rust API references, not file paths
677                        // Example: [`f32::is_subnormal`], [`Vec::push`]
678                        if url.starts_with('`') && url.ends_with('`') {
679                            continue;
680                        }
681
682                        // Skip external URLs and fragment-only links
683                        if self.is_external_url(url) || self.is_fragment_only_link(url) {
684                            continue;
685                        }
686
687                        // Handle absolute paths based on config
688                        if Self::is_absolute_path(url) {
689                            match self.config.absolute_links {
690                                AbsoluteLinksOption::Warn => {
691                                    let url_start = url_group.start();
692                                    let url_end = url_group.end();
693                                    warnings.push(LintWarning {
694                                        rule_name: Some(self.name().to_string()),
695                                        line: link.line,
696                                        column: url_start + 1,
697                                        end_line: link.line,
698                                        end_column: url_end + 1,
699                                        message: format!("Absolute link '{url}' cannot be validated locally"),
700                                        severity: Severity::Warning,
701                                        fix: None,
702                                    });
703                                }
704                                AbsoluteLinksOption::RelativeToDocs => {
705                                    if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
706                                        let url_start = url_group.start();
707                                        let url_end = url_group.end();
708                                        warnings.push(LintWarning {
709                                            rule_name: Some(self.name().to_string()),
710                                            line: link.line,
711                                            column: url_start + 1,
712                                            end_line: link.line,
713                                            end_column: url_end + 1,
714                                            message: msg,
715                                            severity: Severity::Warning,
716                                            fix: None,
717                                        });
718                                    }
719                                }
720                                AbsoluteLinksOption::RelativeToRoots => {
721                                    if let Some(msg) =
722                                        Self::validate_absolute_link_via_roots(url, &self.config.roots, &project_root)
723                                    {
724                                        let url_start = url_group.start();
725                                        let url_end = url_group.end();
726                                        warnings.push(LintWarning {
727                                            rule_name: Some(self.name().to_string()),
728                                            line: link.line,
729                                            column: url_start + 1,
730                                            end_line: link.line,
731                                            end_column: url_end + 1,
732                                            message: msg,
733                                            severity: Severity::Warning,
734                                            fix: None,
735                                        });
736                                    }
737                                }
738                                AbsoluteLinksOption::Ignore => {}
739                            }
740                            continue;
741                        }
742
743                        // Check for unnecessary path traversal (compact-paths)
744                        // Reconstruct full URL including fragment (regex group 2)
745                        // since url_group (group 1) contains only the path part
746                        let full_url_for_compact = if let Some(frag) = caps.get(2) {
747                            format!("{url}{}", frag.as_str())
748                        } else {
749                            url.to_string()
750                        };
751                        if let Some(suggestion) = self.compact_path_suggestion(&full_url_for_compact, &base_path) {
752                            let url_start = url_group.start();
753                            let url_end = caps.get(2).map_or(url_group.end(), |frag| frag.end());
754                            let fix_byte_start = line_start_byte + url_start;
755                            let fix_byte_end = line_start_byte + url_end;
756                            warnings.push(LintWarning {
757                                rule_name: Some(self.name().to_string()),
758                                line: link.line,
759                                column: url_start + 1,
760                                end_line: link.line,
761                                end_column: url_end + 1,
762                                message: format!(
763                                    "Relative link '{full_url_for_compact}' can be simplified to '{suggestion}'"
764                                ),
765                                severity: Severity::Warning,
766                                fix: Some(Fix {
767                                    range: fix_byte_start..fix_byte_end,
768                                    replacement: suggestion,
769                                }),
770                            });
771                        }
772
773                        // Strip query parameters and fragments before checking file existence
774                        let file_path = Self::strip_query_and_fragment(url);
775
776                        // URL-decode the path to handle percent-encoded characters
777                        let decoded_path = Self::url_decode(file_path);
778
779                        // Resolve the relative link against the base path
780                        let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
781
782                        // Check if the file exists, also trying markdown extensions for extensionless links
783                        if file_exists_or_markdown_extension(&resolved_path) {
784                            continue; // File exists, no warning needed
785                        }
786
787                        // For .html/.htm links, check if a corresponding markdown source exists
788                        let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
789                            && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
790                            && let (Some(stem), Some(parent)) = (
791                                resolved_path.file_stem().and_then(|s| s.to_str()),
792                                resolved_path.parent(),
793                            ) {
794                            MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
795                                let source_path = parent.join(format!("{stem}{md_ext}"));
796                                file_exists_with_cache(&source_path)
797                            })
798                        } else {
799                            false
800                        };
801
802                        if has_md_source {
803                            continue; // Markdown source exists, link is valid
804                        }
805
806                        // Try additional search paths (Obsidian attachment folder, configured paths)
807                        if Self::exists_in_search_paths(&decoded_path, &extra_search_paths) {
808                            continue;
809                        }
810
811                        // File doesn't exist and no source file found
812                        // Use actual URL position from regex capture group
813                        // Note: capture group positions are absolute within the line string
814                        let url_start = url_group.start();
815                        let url_end = url_group.end();
816
817                        warnings.push(LintWarning {
818                            rule_name: Some(self.name().to_string()),
819                            line: link.line,
820                            column: url_start + 1, // 1-indexed
821                            end_line: link.line,
822                            end_column: url_end + 1, // 1-indexed
823                            message: format!("Relative link '{url}' does not exist"),
824                            severity: Severity::Error,
825                            fix: None,
826                        });
827                    }
828                }
829            }
830        }
831
832        // Also process images - they have URLs already parsed
833        for image in &ctx.images {
834            // Skip images inside PyMdown blocks (MkDocs flavor)
835            if ctx.line_info(image.line).is_some_and(|info| info.in_pymdown_block) {
836                continue;
837            }
838
839            let url = image.url.as_ref();
840
841            // Skip empty URLs
842            if url.is_empty() {
843                continue;
844            }
845
846            // Skip external URLs and fragment-only links
847            if self.is_external_url(url) || self.is_fragment_only_link(url) {
848                continue;
849            }
850
851            // Handle absolute paths based on config
852            if Self::is_absolute_path(url) {
853                match self.config.absolute_links {
854                    AbsoluteLinksOption::Warn => {
855                        warnings.push(LintWarning {
856                            rule_name: Some(self.name().to_string()),
857                            line: image.line,
858                            column: image.start_col + 1,
859                            end_line: image.line,
860                            end_column: image.start_col + 1 + url.len(),
861                            message: format!("Absolute link '{url}' cannot be validated locally"),
862                            severity: Severity::Warning,
863                            fix: None,
864                        });
865                    }
866                    AbsoluteLinksOption::RelativeToDocs => {
867                        if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
868                            warnings.push(LintWarning {
869                                rule_name: Some(self.name().to_string()),
870                                line: image.line,
871                                column: image.start_col + 1,
872                                end_line: image.line,
873                                end_column: image.start_col + 1 + url.len(),
874                                message: msg,
875                                severity: Severity::Warning,
876                                fix: None,
877                            });
878                        }
879                    }
880                    AbsoluteLinksOption::RelativeToRoots => {
881                        if let Some(msg) =
882                            Self::validate_absolute_link_via_roots(url, &self.config.roots, &project_root)
883                        {
884                            warnings.push(LintWarning {
885                                rule_name: Some(self.name().to_string()),
886                                line: image.line,
887                                column: image.start_col + 1,
888                                end_line: image.line,
889                                end_column: image.start_col + 1 + url.len(),
890                                message: msg,
891                                severity: Severity::Warning,
892                                fix: None,
893                            });
894                        }
895                    }
896                    AbsoluteLinksOption::Ignore => {}
897                }
898                continue;
899            }
900
901            // Check for unnecessary path traversal (compact-paths)
902            if let Some(suggestion) = self.compact_path_suggestion(url, &base_path) {
903                // Find the URL position within the image syntax using document byte offsets.
904                // Search from image.byte_offset (the `!` character) to locate the URL string.
905                let fix = content[image.byte_offset..image.byte_end].find(url).map(|url_offset| {
906                    let fix_byte_start = image.byte_offset + url_offset;
907                    let fix_byte_end = fix_byte_start + url.len();
908                    Fix {
909                        range: fix_byte_start..fix_byte_end,
910                        replacement: suggestion.clone(),
911                    }
912                });
913
914                let img_line_start_byte = ctx.line_index.get_line_start_byte(image.line).unwrap_or(0);
915                let url_col = fix
916                    .as_ref()
917                    .map_or(image.start_col + 1, |f| f.range.start - img_line_start_byte + 1);
918                warnings.push(LintWarning {
919                    rule_name: Some(self.name().to_string()),
920                    line: image.line,
921                    column: url_col,
922                    end_line: image.line,
923                    end_column: url_col + url.len(),
924                    message: format!("Relative link '{url}' can be simplified to '{suggestion}'"),
925                    severity: Severity::Warning,
926                    fix,
927                });
928            }
929
930            // Strip query parameters and fragments before checking file existence
931            let file_path = Self::strip_query_and_fragment(url);
932
933            // URL-decode the path to handle percent-encoded characters
934            let decoded_path = Self::url_decode(file_path);
935
936            // Resolve the relative link against the base path
937            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
938
939            // Check if the file exists, also trying markdown extensions for extensionless links
940            if file_exists_or_markdown_extension(&resolved_path) {
941                continue; // File exists, no warning needed
942            }
943
944            // For .html/.htm links, check if a corresponding markdown source exists
945            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
946                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
947                && let (Some(stem), Some(parent)) = (
948                    resolved_path.file_stem().and_then(|s| s.to_str()),
949                    resolved_path.parent(),
950                ) {
951                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
952                    let source_path = parent.join(format!("{stem}{md_ext}"));
953                    file_exists_with_cache(&source_path)
954                })
955            } else {
956                false
957            };
958
959            if has_md_source {
960                continue; // Markdown source exists, link is valid
961            }
962
963            // Try additional search paths (Obsidian attachment folder, configured paths)
964            if Self::exists_in_search_paths(&decoded_path, &extra_search_paths) {
965                continue;
966            }
967
968            // File doesn't exist and no source file found
969            // Images already have correct position from parser
970            warnings.push(LintWarning {
971                rule_name: Some(self.name().to_string()),
972                line: image.line,
973                column: image.start_col + 1,
974                end_line: image.line,
975                end_column: image.start_col + 1 + url.len(),
976                message: format!("Relative link '{url}' does not exist"),
977                severity: Severity::Error,
978                fix: None,
979            });
980        }
981
982        // Also process reference definitions: [ref]: ./path.md
983        for ref_def in &ctx.reference_defs {
984            let url = &ref_def.url;
985
986            // Skip empty URLs
987            if url.is_empty() {
988                continue;
989            }
990
991            // Skip external URLs and fragment-only links
992            if self.is_external_url(url) || self.is_fragment_only_link(url) {
993                continue;
994            }
995
996            // Handle absolute paths based on config
997            if Self::is_absolute_path(url) {
998                match self.config.absolute_links {
999                    AbsoluteLinksOption::Warn => {
1000                        let line_idx = ref_def.line - 1;
1001                        let column = content.lines().nth(line_idx).map_or(1, |line_content| {
1002                            line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
1003                        });
1004                        warnings.push(LintWarning {
1005                            rule_name: Some(self.name().to_string()),
1006                            line: ref_def.line,
1007                            column,
1008                            end_line: ref_def.line,
1009                            end_column: column + url.len(),
1010                            message: format!("Absolute link '{url}' cannot be validated locally"),
1011                            severity: Severity::Warning,
1012                            fix: None,
1013                        });
1014                    }
1015                    AbsoluteLinksOption::RelativeToDocs => {
1016                        if let Some(msg) = Self::validate_absolute_link_via_docs_dir(url, &base_path) {
1017                            let line_idx = ref_def.line - 1;
1018                            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
1019                                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
1020                            });
1021                            warnings.push(LintWarning {
1022                                rule_name: Some(self.name().to_string()),
1023                                line: ref_def.line,
1024                                column,
1025                                end_line: ref_def.line,
1026                                end_column: column + url.len(),
1027                                message: msg,
1028                                severity: Severity::Warning,
1029                                fix: None,
1030                            });
1031                        }
1032                    }
1033                    AbsoluteLinksOption::RelativeToRoots => {
1034                        if let Some(msg) =
1035                            Self::validate_absolute_link_via_roots(url, &self.config.roots, &project_root)
1036                        {
1037                            let line_idx = ref_def.line - 1;
1038                            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
1039                                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
1040                            });
1041                            warnings.push(LintWarning {
1042                                rule_name: Some(self.name().to_string()),
1043                                line: ref_def.line,
1044                                column,
1045                                end_line: ref_def.line,
1046                                end_column: column + url.len(),
1047                                message: msg,
1048                                severity: Severity::Warning,
1049                                fix: None,
1050                            });
1051                        }
1052                    }
1053                    AbsoluteLinksOption::Ignore => {}
1054                }
1055                continue;
1056            }
1057
1058            // Check for unnecessary path traversal (compact-paths)
1059            if let Some(suggestion) = self.compact_path_suggestion(url, &base_path) {
1060                let ref_line_idx = ref_def.line - 1;
1061                let col = content.lines().nth(ref_line_idx).map_or(1, |line_content| {
1062                    line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
1063                });
1064                let ref_line_start_byte = ctx.line_index.get_line_start_byte(ref_def.line).unwrap_or(0);
1065                let fix_byte_start = ref_line_start_byte + col - 1;
1066                let fix_byte_end = fix_byte_start + url.len();
1067                warnings.push(LintWarning {
1068                    rule_name: Some(self.name().to_string()),
1069                    line: ref_def.line,
1070                    column: col,
1071                    end_line: ref_def.line,
1072                    end_column: col + url.len(),
1073                    message: format!("Relative link '{url}' can be simplified to '{suggestion}'"),
1074                    severity: Severity::Warning,
1075                    fix: Some(Fix {
1076                        range: fix_byte_start..fix_byte_end,
1077                        replacement: suggestion,
1078                    }),
1079                });
1080            }
1081
1082            // Strip query parameters and fragments before checking file existence
1083            let file_path = Self::strip_query_and_fragment(url);
1084
1085            // URL-decode the path to handle percent-encoded characters
1086            let decoded_path = Self::url_decode(file_path);
1087
1088            // Resolve the relative link against the base path
1089            let resolved_path = Self::resolve_link_path_with_base(&decoded_path, &base_path);
1090
1091            // Check if the file exists, also trying markdown extensions for extensionless links
1092            if file_exists_or_markdown_extension(&resolved_path) {
1093                continue; // File exists, no warning needed
1094            }
1095
1096            // For .html/.htm links, check if a corresponding markdown source exists
1097            let has_md_source = if let Some(ext) = resolved_path.extension().and_then(|e| e.to_str())
1098                && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
1099                && let (Some(stem), Some(parent)) = (
1100                    resolved_path.file_stem().and_then(|s| s.to_str()),
1101                    resolved_path.parent(),
1102                ) {
1103                MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
1104                    let source_path = parent.join(format!("{stem}{md_ext}"));
1105                    file_exists_with_cache(&source_path)
1106                })
1107            } else {
1108                false
1109            };
1110
1111            if has_md_source {
1112                continue; // Markdown source exists, link is valid
1113            }
1114
1115            // Try additional search paths (Obsidian attachment folder, configured paths)
1116            if Self::exists_in_search_paths(&decoded_path, &extra_search_paths) {
1117                continue;
1118            }
1119
1120            // File doesn't exist and no source file found
1121            // Calculate column position: find URL within the line
1122            let line_idx = ref_def.line - 1;
1123            let column = content.lines().nth(line_idx).map_or(1, |line_content| {
1124                // Find URL position in line (after ]: )
1125                line_content.find(url.as_str()).map_or(1, |url_pos| url_pos + 1)
1126            });
1127
1128            warnings.push(LintWarning {
1129                rule_name: Some(self.name().to_string()),
1130                line: ref_def.line,
1131                column,
1132                end_line: ref_def.line,
1133                end_column: column + url.len(),
1134                message: format!("Relative link '{url}' does not exist"),
1135                severity: Severity::Error,
1136                fix: None,
1137            });
1138        }
1139
1140        Ok(warnings)
1141    }
1142
1143    fn fix_capability(&self) -> FixCapability {
1144        if self.config.compact_paths {
1145            FixCapability::ConditionallyFixable
1146        } else {
1147            FixCapability::Unfixable
1148        }
1149    }
1150
1151    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1152        if !self.config.compact_paths {
1153            return Ok(ctx.content.to_string());
1154        }
1155
1156        let warnings = self.check(ctx)?;
1157        let warnings =
1158            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
1159        let mut content = ctx.content.to_string();
1160
1161        // Collect fixable warnings (compact-paths) sorted by byte offset descending
1162        let mut fixes: Vec<_> = warnings.iter().filter_map(|w| w.fix.as_ref()).collect();
1163        fixes.sort_by(|a, b| b.range.start.cmp(&a.range.start));
1164
1165        for fix in fixes {
1166            if fix.range.end <= content.len() {
1167                content.replace_range(fix.range.clone(), &fix.replacement);
1168            }
1169        }
1170
1171        Ok(content)
1172    }
1173
1174    fn as_any(&self) -> &dyn std::any::Any {
1175        self
1176    }
1177
1178    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1179        let default_config = MD057Config::default();
1180        let json_value = serde_json::to_value(&default_config).ok()?;
1181        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
1182
1183        if let toml::Value::Table(table) = toml_value {
1184            if !table.is_empty() {
1185                Some((MD057Config::RULE_NAME.to_string(), toml::Value::Table(table)))
1186            } else {
1187                None
1188            }
1189        } else {
1190            None
1191        }
1192    }
1193
1194    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1195    where
1196        Self: Sized,
1197    {
1198        let rule_config = crate::rule_config_serde::load_rule_config::<MD057Config>(config);
1199        let mut rule = Self::from_config_struct(rule_config);
1200        rule.flavor = config.global.flavor;
1201        Box::new(rule)
1202    }
1203
1204    fn cross_file_scope(&self) -> CrossFileScope {
1205        CrossFileScope::Workspace
1206    }
1207
1208    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, index: &mut FileIndex) {
1209        // Use the shared utility for cross-file link extraction
1210        // This ensures consistent position tracking between CLI and LSP
1211        for link in extract_cross_file_links(ctx) {
1212            index.add_cross_file_link(link);
1213        }
1214    }
1215
1216    fn cross_file_check(
1217        &self,
1218        file_path: &Path,
1219        file_index: &FileIndex,
1220        workspace_index: &crate::workspace_index::WorkspaceIndex,
1221    ) -> LintResult {
1222        // Reset the file existence cache for a fresh run
1223        reset_file_existence_cache();
1224
1225        let mut warnings = Vec::new();
1226
1227        // Get the directory containing this file for resolving relative links
1228        let file_dir = file_path.parent();
1229
1230        // Compute additional search paths for fallback link resolution
1231        let base_path = file_dir.map_or_else(|| CURRENT_DIR.clone(), std::path::Path::to_path_buf);
1232        let project_root = self.project_root();
1233        let extra_search_paths = self.compute_search_paths(self.flavor, Some(file_path), &base_path, &project_root);
1234
1235        for cross_link in &file_index.cross_file_links {
1236            // URL-decode the path for filesystem operations
1237            // The stored path is URL-encoded (e.g., "%F0%9F%91%A4" for emoji 👤)
1238            let decoded_target = Self::url_decode(&cross_link.target_path);
1239
1240            // Skip absolute paths — they are already handled by check()
1241            // which validates them according to the absolute_links config.
1242            // Handling them here too would produce duplicate warnings.
1243            if decoded_target.starts_with('/') {
1244                continue;
1245            }
1246
1247            // Resolve relative path
1248            let target_path = if let Some(dir) = file_dir {
1249                dir.join(&decoded_target)
1250            } else {
1251                Path::new(&decoded_target).to_path_buf()
1252            };
1253
1254            // Normalize the path (handle .., ., etc.)
1255            let target_path = normalize_path(&target_path);
1256
1257            // Check if the target file exists, also trying markdown extensions for extensionless links
1258            let file_exists =
1259                workspace_index.contains_file(&target_path) || file_exists_or_markdown_extension(&target_path);
1260
1261            if !file_exists {
1262                // For .html/.htm links, check if a corresponding markdown source exists
1263                // This handles doc sites (mdBook, etc.) where .md is compiled to .html
1264                let has_md_source = if let Some(ext) = target_path.extension().and_then(|e| e.to_str())
1265                    && (ext.eq_ignore_ascii_case("html") || ext.eq_ignore_ascii_case("htm"))
1266                    && let (Some(stem), Some(parent)) =
1267                        (target_path.file_stem().and_then(|s| s.to_str()), target_path.parent())
1268                {
1269                    MARKDOWN_EXTENSIONS.iter().any(|md_ext| {
1270                        let source_path = parent.join(format!("{stem}{md_ext}"));
1271                        workspace_index.contains_file(&source_path) || source_path.exists()
1272                    })
1273                } else {
1274                    false
1275                };
1276
1277                if !has_md_source && !Self::exists_in_search_paths(&decoded_target, &extra_search_paths) {
1278                    warnings.push(LintWarning {
1279                        rule_name: Some(self.name().to_string()),
1280                        line: cross_link.line,
1281                        column: cross_link.column,
1282                        end_line: cross_link.line,
1283                        end_column: cross_link.column + cross_link.target_path.len(),
1284                        message: format!("Relative link '{}' does not exist", cross_link.target_path),
1285                        severity: Severity::Error,
1286                        fix: None,
1287                    });
1288                }
1289            }
1290        }
1291
1292        Ok(warnings)
1293    }
1294}
1295
1296/// Compute the shortest relative path from `from_dir` to `to_path`.
1297///
1298/// Both paths must be normalized (no `.` or `..` components).
1299/// Returns a relative `PathBuf` that navigates from `from_dir` to `to_path`.
1300fn shortest_relative_path(from_dir: &Path, to_path: &Path) -> PathBuf {
1301    let from_components: Vec<_> = from_dir.components().collect();
1302    let to_components: Vec<_> = to_path.components().collect();
1303
1304    // Find common prefix length
1305    let common_len = from_components
1306        .iter()
1307        .zip(to_components.iter())
1308        .take_while(|(a, b)| a == b)
1309        .count();
1310
1311    let mut result = PathBuf::new();
1312
1313    // Go up for each remaining component in from_dir
1314    for _ in common_len..from_components.len() {
1315        result.push("..");
1316    }
1317
1318    // Append remaining components from to_path
1319    for component in &to_components[common_len..] {
1320        result.push(component);
1321    }
1322
1323    result
1324}
1325
1326/// Check if a relative link path can be shortened.
1327///
1328/// Given the source directory and the raw link path, computes whether there's
1329/// a shorter equivalent path. Returns `Some(compact_path)` if the link can
1330/// be simplified, `None` if it's already optimal.
1331fn compute_compact_path(source_dir: &Path, raw_link_path: &str) -> Option<String> {
1332    let link_path = Path::new(raw_link_path);
1333
1334    // Only check paths that contain traversal (../ or ./)
1335    let has_traversal = link_path
1336        .components()
1337        .any(|c| matches!(c, std::path::Component::ParentDir | std::path::Component::CurDir));
1338
1339    if !has_traversal {
1340        return None;
1341    }
1342
1343    // Resolve: source_dir + raw_link_path, then normalize
1344    let combined = source_dir.join(link_path);
1345    let normalized_target = normalize_path(&combined);
1346
1347    // Compute shortest path from source_dir back to the normalized target
1348    let normalized_source = normalize_path(source_dir);
1349    let shortest = shortest_relative_path(&normalized_source, &normalized_target);
1350
1351    // Compare against the raw link path — if it differs, the path can be compacted
1352    if shortest != link_path {
1353        let compact = shortest.to_string_lossy().to_string();
1354        // Avoid suggesting empty path
1355        if compact.is_empty() {
1356            return None;
1357        }
1358        // Markdown links always use forward slashes regardless of platform
1359        Some(compact.replace('\\', "/"))
1360    } else {
1361        None
1362    }
1363}
1364
1365/// Normalize a path by resolving . and .. components
1366fn normalize_path(path: &Path) -> PathBuf {
1367    let mut components = Vec::new();
1368
1369    for component in path.components() {
1370        match component {
1371            std::path::Component::ParentDir => {
1372                // Go up one level if possible
1373                if !components.is_empty() {
1374                    components.pop();
1375                }
1376            }
1377            std::path::Component::CurDir => {
1378                // Skip current directory markers
1379            }
1380            _ => {
1381                components.push(component);
1382            }
1383        }
1384    }
1385
1386    components.iter().collect()
1387}
1388
1389#[cfg(test)]
1390mod tests {
1391    use super::*;
1392    use crate::workspace_index::CrossFileLinkIndex;
1393    use std::fs::File;
1394    use std::io::Write;
1395    use tempfile::tempdir;
1396
1397    #[test]
1398    fn test_strip_query_and_fragment() {
1399        // Test query parameter stripping
1400        assert_eq!(
1401            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true"),
1402            "file.png"
1403        );
1404        assert_eq!(
1405            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?raw=true&version=1"),
1406            "file.png"
1407        );
1408        assert_eq!(
1409            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png?"),
1410            "file.png"
1411        );
1412
1413        // Test fragment stripping
1414        assert_eq!(
1415            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section"),
1416            "file.md"
1417        );
1418        assert_eq!(
1419            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#"),
1420            "file.md"
1421        );
1422
1423        // Test both query and fragment (query comes first, per RFC 3986)
1424        assert_eq!(
1425            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md?raw=true#section"),
1426            "file.md"
1427        );
1428
1429        // Test no query or fragment
1430        assert_eq!(
1431            MD057ExistingRelativeLinks::strip_query_and_fragment("file.png"),
1432            "file.png"
1433        );
1434
1435        // Test with path
1436        assert_eq!(
1437            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true"),
1438            "path/to/image.png"
1439        );
1440        assert_eq!(
1441            MD057ExistingRelativeLinks::strip_query_and_fragment("path/to/image.png?raw=true#anchor"),
1442            "path/to/image.png"
1443        );
1444
1445        // Edge case: fragment before query (non-standard but possible)
1446        assert_eq!(
1447            MD057ExistingRelativeLinks::strip_query_and_fragment("file.md#section?query"),
1448            "file.md"
1449        );
1450    }
1451
1452    #[test]
1453    fn test_url_decode() {
1454        // Simple space encoding
1455        assert_eq!(
1456            MD057ExistingRelativeLinks::url_decode("penguin%20with%20space.jpg"),
1457            "penguin with space.jpg"
1458        );
1459
1460        // Path with encoded spaces
1461        assert_eq!(
1462            MD057ExistingRelativeLinks::url_decode("assets/my%20file%20name.png"),
1463            "assets/my file name.png"
1464        );
1465
1466        // Multiple encoded characters
1467        assert_eq!(
1468            MD057ExistingRelativeLinks::url_decode("hello%20world%21.md"),
1469            "hello world!.md"
1470        );
1471
1472        // Lowercase hex
1473        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2e%2e"), "/..");
1474
1475        // Uppercase hex
1476        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2F%2E%2E"), "/..");
1477
1478        // Mixed case hex
1479        assert_eq!(MD057ExistingRelativeLinks::url_decode("%2f%2E%2e"), "/..");
1480
1481        // No encoding - return as-is
1482        assert_eq!(
1483            MD057ExistingRelativeLinks::url_decode("normal-file.md"),
1484            "normal-file.md"
1485        );
1486
1487        // Incomplete percent encoding - leave as-is
1488        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%2.txt"), "file%2.txt");
1489
1490        // Percent at end - leave as-is
1491        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%"), "file%");
1492
1493        // Invalid hex digits - leave as-is
1494        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%GG.txt"), "file%GG.txt");
1495
1496        // Plus sign (should NOT be decoded - that's form encoding, not URL encoding)
1497        assert_eq!(MD057ExistingRelativeLinks::url_decode("file+name.txt"), "file+name.txt");
1498
1499        // Empty string
1500        assert_eq!(MD057ExistingRelativeLinks::url_decode(""), "");
1501
1502        // UTF-8 multi-byte characters (é = C3 A9 in UTF-8)
1503        assert_eq!(MD057ExistingRelativeLinks::url_decode("caf%C3%A9.md"), "café.md");
1504
1505        // Multiple consecutive encoded characters
1506        assert_eq!(MD057ExistingRelativeLinks::url_decode("%20%20%20"), "   ");
1507
1508        // Encoded path separators
1509        assert_eq!(
1510            MD057ExistingRelativeLinks::url_decode("path%2Fto%2Ffile.md"),
1511            "path/to/file.md"
1512        );
1513
1514        // Mixed encoded and non-encoded
1515        assert_eq!(
1516            MD057ExistingRelativeLinks::url_decode("hello%20world/foo%20bar.md"),
1517            "hello world/foo bar.md"
1518        );
1519
1520        // Special characters that are commonly encoded
1521        assert_eq!(MD057ExistingRelativeLinks::url_decode("file%5B1%5D.md"), "file[1].md");
1522
1523        // Percent at position that looks like encoding but isn't valid
1524        assert_eq!(MD057ExistingRelativeLinks::url_decode("100%pure.md"), "100%pure.md");
1525    }
1526
1527    #[test]
1528    fn test_url_encoded_filenames() {
1529        // Create a temporary directory for test files
1530        let temp_dir = tempdir().unwrap();
1531        let base_path = temp_dir.path();
1532
1533        // Create a file with spaces in the name
1534        let file_with_spaces = base_path.join("penguin with space.jpg");
1535        File::create(&file_with_spaces)
1536            .unwrap()
1537            .write_all(b"image data")
1538            .unwrap();
1539
1540        // Create a subdirectory with spaces
1541        let subdir = base_path.join("my images");
1542        std::fs::create_dir(&subdir).unwrap();
1543        let nested_file = subdir.join("photo 1.png");
1544        File::create(&nested_file).unwrap().write_all(b"photo data").unwrap();
1545
1546        // Test content with URL-encoded links
1547        let content = r#"
1548# Test Document with URL-Encoded Links
1549
1550![Penguin](penguin%20with%20space.jpg)
1551![Photo](my%20images/photo%201.png)
1552![Missing](missing%20file.jpg)
1553"#;
1554
1555        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1556
1557        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1558        let result = rule.check(&ctx).unwrap();
1559
1560        // Should only have one warning for the missing file
1561        assert_eq!(
1562            result.len(),
1563            1,
1564            "Should only warn about missing%20file.jpg. Got: {result:?}"
1565        );
1566        assert!(
1567            result[0].message.contains("missing%20file.jpg"),
1568            "Warning should mention the URL-encoded filename"
1569        );
1570    }
1571
1572    #[test]
1573    fn test_external_urls() {
1574        let rule = MD057ExistingRelativeLinks::new();
1575
1576        // Common web protocols
1577        assert!(rule.is_external_url("https://example.com"));
1578        assert!(rule.is_external_url("http://example.com"));
1579        assert!(rule.is_external_url("ftp://example.com"));
1580        assert!(rule.is_external_url("www.example.com"));
1581        assert!(rule.is_external_url("example.com"));
1582
1583        // Special URI schemes
1584        assert!(rule.is_external_url("file:///path/to/file"));
1585        assert!(rule.is_external_url("smb://server/share"));
1586        assert!(rule.is_external_url("macappstores://apps.apple.com/"));
1587        assert!(rule.is_external_url("mailto:user@example.com"));
1588        assert!(rule.is_external_url("tel:+1234567890"));
1589        assert!(rule.is_external_url("data:text/plain;base64,SGVsbG8="));
1590        assert!(rule.is_external_url("javascript:void(0)"));
1591        assert!(rule.is_external_url("ssh://git@github.com/repo"));
1592        assert!(rule.is_external_url("git://github.com/repo.git"));
1593
1594        // Email addresses without mailto: protocol
1595        // These are clearly not file links and should be skipped
1596        assert!(rule.is_external_url("user@example.com"));
1597        assert!(rule.is_external_url("steering@kubernetes.io"));
1598        assert!(rule.is_external_url("john.doe+filter@company.co.uk"));
1599        assert!(rule.is_external_url("user_name@sub.domain.com"));
1600        assert!(rule.is_external_url("firstname.lastname+tag@really.long.domain.example.org"));
1601
1602        // Template variables should be skipped (not checked as relative links)
1603        assert!(rule.is_external_url("{{URL}}")); // Handlebars/Mustache
1604        assert!(rule.is_external_url("{{#URL}}")); // Handlebars block helper
1605        assert!(rule.is_external_url("{{> partial}}")); // Handlebars partial
1606        assert!(rule.is_external_url("{{ variable }}")); // Mustache with spaces
1607        assert!(rule.is_external_url("{{% include %}}")); // Jinja2/Hugo shortcode
1608        assert!(rule.is_external_url("{{")); // Even partial matches (regex edge case)
1609
1610        // Absolute paths are NOT external (handled separately via is_absolute_path)
1611        // By default they are ignored, but can be configured to warn
1612        assert!(!rule.is_external_url("/api/v1/users"));
1613        assert!(!rule.is_external_url("/blog/2024/release.html"));
1614        assert!(!rule.is_external_url("/react/hooks/use-state.html"));
1615        assert!(!rule.is_external_url("/pkg/runtime"));
1616        assert!(!rule.is_external_url("/doc/go1compat"));
1617        assert!(!rule.is_external_url("/index.html"));
1618        assert!(!rule.is_external_url("/assets/logo.png"));
1619
1620        // But is_absolute_path should detect them
1621        assert!(MD057ExistingRelativeLinks::is_absolute_path("/api/v1/users"));
1622        assert!(MD057ExistingRelativeLinks::is_absolute_path("/blog/2024/release.html"));
1623        assert!(MD057ExistingRelativeLinks::is_absolute_path("/index.html"));
1624        assert!(!MD057ExistingRelativeLinks::is_absolute_path("./relative.md"));
1625        assert!(!MD057ExistingRelativeLinks::is_absolute_path("relative.md"));
1626
1627        // Framework path aliases should be skipped (resolved by build tools)
1628        // Tilde prefix (common in Vite, Nuxt, Astro for project root)
1629        assert!(rule.is_external_url("~/assets/image.png"));
1630        assert!(rule.is_external_url("~/components/Button.vue"));
1631        assert!(rule.is_external_url("~assets/logo.svg")); // Nuxt style without /
1632
1633        // @ prefix (common in Vue, webpack, Vite aliases)
1634        assert!(rule.is_external_url("@/components/Header.vue"));
1635        assert!(rule.is_external_url("@images/photo.jpg"));
1636        assert!(rule.is_external_url("@assets/styles.css"));
1637
1638        // Relative paths should NOT be external (should be validated)
1639        assert!(!rule.is_external_url("./relative/path.md"));
1640        assert!(!rule.is_external_url("relative/path.md"));
1641        assert!(!rule.is_external_url("../parent/path.md"));
1642    }
1643
1644    #[test]
1645    fn test_framework_path_aliases() {
1646        // Create a temporary directory for test files
1647        let temp_dir = tempdir().unwrap();
1648        let base_path = temp_dir.path();
1649
1650        // Test content with framework path aliases (should all be skipped)
1651        let content = r#"
1652# Framework Path Aliases
1653
1654![Image 1](~/assets/penguin.jpg)
1655![Image 2](~assets/logo.svg)
1656![Image 3](@images/photo.jpg)
1657![Image 4](@/components/icon.svg)
1658[Link](@/pages/about.md)
1659
1660This is a [real missing link](missing.md) that should be flagged.
1661"#;
1662
1663        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1664
1665        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1666        let result = rule.check(&ctx).unwrap();
1667
1668        // Should only have one warning for the real missing link
1669        assert_eq!(
1670            result.len(),
1671            1,
1672            "Should only warn about missing.md, not framework aliases. Got: {result:?}"
1673        );
1674        assert!(
1675            result[0].message.contains("missing.md"),
1676            "Warning should be for missing.md"
1677        );
1678    }
1679
1680    #[test]
1681    fn test_url_decode_security_path_traversal() {
1682        // Ensure URL decoding doesn't enable path traversal attacks
1683        // The decoded path is still validated against the base path
1684        let temp_dir = tempdir().unwrap();
1685        let base_path = temp_dir.path();
1686
1687        // Create a file in the temp directory
1688        let file_in_base = base_path.join("safe.md");
1689        File::create(&file_in_base).unwrap().write_all(b"# Safe").unwrap();
1690
1691        // Test with encoded path traversal attempt
1692        // Use a path that definitely won't exist on any platform (not /etc/passwd which exists on Linux)
1693        // %2F = /, so ..%2F..%2Fnonexistent%2Ffile = ../../nonexistent/file
1694        // %252F = %2F (double encoded), so ..%252F..%252F = ..%2F..%2F (literal, won't decode to ..)
1695        let content = r#"
1696[Traversal attempt](..%2F..%2Fnonexistent_dir_12345%2Fmissing.md)
1697[Double encoded](..%252F..%252Fnonexistent%252Ffile.md)
1698[Safe link](safe.md)
1699"#;
1700
1701        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1702
1703        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1704        let result = rule.check(&ctx).unwrap();
1705
1706        // The traversal attempts should still be flagged as missing
1707        // (they don't exist relative to base_path after decoding)
1708        assert_eq!(
1709            result.len(),
1710            2,
1711            "Should have warnings for traversal attempts. Got: {result:?}"
1712        );
1713    }
1714
1715    #[test]
1716    fn test_url_encoded_utf8_filenames() {
1717        // Test with actual UTF-8 encoded filenames
1718        let temp_dir = tempdir().unwrap();
1719        let base_path = temp_dir.path();
1720
1721        // Create files with unicode names
1722        let cafe_file = base_path.join("café.md");
1723        File::create(&cafe_file).unwrap().write_all(b"# Cafe").unwrap();
1724
1725        let content = r#"
1726[Café link](caf%C3%A9.md)
1727[Missing unicode](r%C3%A9sum%C3%A9.md)
1728"#;
1729
1730        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1731
1732        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1733        let result = rule.check(&ctx).unwrap();
1734
1735        // Should only warn about the missing file
1736        assert_eq!(
1737            result.len(),
1738            1,
1739            "Should only warn about missing résumé.md. Got: {result:?}"
1740        );
1741        assert!(
1742            result[0].message.contains("r%C3%A9sum%C3%A9.md"),
1743            "Warning should mention the URL-encoded filename"
1744        );
1745    }
1746
1747    #[test]
1748    fn test_url_encoded_emoji_filenames() {
1749        // URL-encoded emoji paths should be correctly resolved
1750        // 👤 = U+1F464 = F0 9F 91 A4 in UTF-8
1751        let temp_dir = tempdir().unwrap();
1752        let base_path = temp_dir.path();
1753
1754        // Create directory with emoji in name: 👤 Personal
1755        let emoji_dir = base_path.join("👤 Personal");
1756        std::fs::create_dir(&emoji_dir).unwrap();
1757
1758        // Create file in that directory: TV Shows.md
1759        let file_path = emoji_dir.join("TV Shows.md");
1760        File::create(&file_path)
1761            .unwrap()
1762            .write_all(b"# TV Shows\n\nContent here.")
1763            .unwrap();
1764
1765        // Test content with URL-encoded emoji link
1766        // %F0%9F%91%A4 = 👤, %20 = space
1767        let content = r#"
1768# Test Document
1769
1770[TV Shows](./%F0%9F%91%A4%20Personal/TV%20Shows.md)
1771[Missing](./%F0%9F%91%A4%20Personal/Missing.md)
1772"#;
1773
1774        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1775
1776        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1777        let result = rule.check(&ctx).unwrap();
1778
1779        // Should only warn about the missing file, not the valid emoji path
1780        assert_eq!(result.len(), 1, "Should only warn about missing file. Got: {result:?}");
1781        assert!(
1782            result[0].message.contains("Missing.md"),
1783            "Warning should be for Missing.md, got: {}",
1784            result[0].message
1785        );
1786    }
1787
1788    #[test]
1789    fn test_no_warnings_without_base_path() {
1790        let rule = MD057ExistingRelativeLinks::new();
1791        let content = "[Link](missing.md)";
1792
1793        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1794        let result = rule.check(&ctx).unwrap();
1795        assert!(result.is_empty(), "Should have no warnings without base path");
1796    }
1797
1798    #[test]
1799    fn test_existing_and_missing_links() {
1800        // Create a temporary directory for test files
1801        let temp_dir = tempdir().unwrap();
1802        let base_path = temp_dir.path();
1803
1804        // Create an existing file
1805        let exists_path = base_path.join("exists.md");
1806        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1807
1808        // Verify the file exists
1809        assert!(exists_path.exists(), "exists.md should exist for this test");
1810
1811        // Create test content with both existing and missing links
1812        let content = r#"
1813# Test Document
1814
1815[Valid Link](exists.md)
1816[Invalid Link](missing.md)
1817[External Link](https://example.com)
1818[Media Link](image.jpg)
1819        "#;
1820
1821        // Initialize rule with the base path (default: check all files including media)
1822        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1823
1824        // Test the rule
1825        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1826        let result = rule.check(&ctx).unwrap();
1827
1828        // Should have two warnings: missing.md and image.jpg (both don't exist)
1829        assert_eq!(result.len(), 2);
1830        let messages: Vec<_> = result.iter().map(|w| w.message.as_str()).collect();
1831        assert!(messages.iter().any(|m| m.contains("missing.md")));
1832        assert!(messages.iter().any(|m| m.contains("image.jpg")));
1833    }
1834
1835    #[test]
1836    fn test_angle_bracket_links() {
1837        // Create a temporary directory for test files
1838        let temp_dir = tempdir().unwrap();
1839        let base_path = temp_dir.path();
1840
1841        // Create an existing file
1842        let exists_path = base_path.join("exists.md");
1843        File::create(&exists_path).unwrap().write_all(b"# Test File").unwrap();
1844
1845        // Create test content with angle bracket links
1846        let content = r#"
1847# Test Document
1848
1849[Valid Link](<exists.md>)
1850[Invalid Link](<missing.md>)
1851[External Link](<https://example.com>)
1852    "#;
1853
1854        // Test with default settings
1855        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1856
1857        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1858        let result = rule.check(&ctx).unwrap();
1859
1860        // Should have one warning for missing.md
1861        assert_eq!(result.len(), 1, "Should have exactly one warning");
1862        assert!(
1863            result[0].message.contains("missing.md"),
1864            "Warning should mention missing.md"
1865        );
1866    }
1867
1868    #[test]
1869    fn test_angle_bracket_links_with_parens() {
1870        // Create a temporary directory for test files
1871        let temp_dir = tempdir().unwrap();
1872        let base_path = temp_dir.path();
1873
1874        // Create directory structure with parentheses in path
1875        let app_dir = base_path.join("app");
1876        std::fs::create_dir(&app_dir).unwrap();
1877        let upload_dir = app_dir.join("(upload)");
1878        std::fs::create_dir(&upload_dir).unwrap();
1879        let page_file = upload_dir.join("page.tsx");
1880        File::create(&page_file)
1881            .unwrap()
1882            .write_all(b"export default function Page() {}")
1883            .unwrap();
1884
1885        // Create test content with angle bracket links containing parentheses
1886        let content = r#"
1887# Test Document with Paths Containing Parens
1888
1889[Upload Page](<app/(upload)/page.tsx>)
1890[Unix pipe](<https://en.wikipedia.org/wiki/Pipeline_(Unix)>)
1891[Missing](<app/(missing)/file.md>)
1892"#;
1893
1894        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1895
1896        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1897        let result = rule.check(&ctx).unwrap();
1898
1899        // Should only have one warning for the missing file
1900        assert_eq!(
1901            result.len(),
1902            1,
1903            "Should have exactly one warning for missing file. Got: {result:?}"
1904        );
1905        assert!(
1906            result[0].message.contains("app/(missing)/file.md"),
1907            "Warning should mention app/(missing)/file.md"
1908        );
1909    }
1910
1911    #[test]
1912    fn test_all_file_types_checked() {
1913        // Create a temporary directory for test files
1914        let temp_dir = tempdir().unwrap();
1915        let base_path = temp_dir.path();
1916
1917        // Create a test with various file types - all should be checked
1918        let content = r#"
1919[Image Link](image.jpg)
1920[Video Link](video.mp4)
1921[Markdown Link](document.md)
1922[PDF Link](file.pdf)
1923"#;
1924
1925        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1926
1927        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1928        let result = rule.check(&ctx).unwrap();
1929
1930        // Should warn about all missing files regardless of extension
1931        assert_eq!(result.len(), 4, "Should have warnings for all missing files");
1932    }
1933
1934    #[test]
1935    fn test_code_span_detection() {
1936        let rule = MD057ExistingRelativeLinks::new();
1937
1938        // Create a temporary directory for test files
1939        let temp_dir = tempdir().unwrap();
1940        let base_path = temp_dir.path();
1941
1942        let rule = rule.with_path(base_path);
1943
1944        // Test with document structure
1945        let content = "This is a [link](nonexistent.md) and `[not a link](not-checked.md)` in code.";
1946
1947        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1948        let result = rule.check(&ctx).unwrap();
1949
1950        // Should only find the real link, not the one in code
1951        assert_eq!(result.len(), 1, "Should only flag the real link");
1952        assert!(result[0].message.contains("nonexistent.md"));
1953    }
1954
1955    #[test]
1956    fn test_inline_code_spans() {
1957        // Create a temporary directory for test files
1958        let temp_dir = tempdir().unwrap();
1959        let base_path = temp_dir.path();
1960
1961        // Create test content with links in inline code spans
1962        let content = r#"
1963# Test Document
1964
1965This is a normal link: [Link](missing.md)
1966
1967This is a code span with a link: `[Link](another-missing.md)`
1968
1969Some more text with `inline code [Link](yet-another-missing.md) embedded`.
1970
1971    "#;
1972
1973        // Initialize rule with the base path
1974        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
1975
1976        // Test the rule
1977        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1978        let result = rule.check(&ctx).unwrap();
1979
1980        // Should only have warning for the normal link, not for links in code spans
1981        assert_eq!(result.len(), 1, "Should have exactly one warning");
1982        assert!(
1983            result[0].message.contains("missing.md"),
1984            "Warning should be for missing.md"
1985        );
1986        assert!(
1987            !result.iter().any(|w| w.message.contains("another-missing.md")),
1988            "Should not warn about link in code span"
1989        );
1990        assert!(
1991            !result.iter().any(|w| w.message.contains("yet-another-missing.md")),
1992            "Should not warn about link in inline code"
1993        );
1994    }
1995
1996    #[test]
1997    fn test_extensionless_link_resolution() {
1998        // Create a temporary directory for test files
1999        let temp_dir = tempdir().unwrap();
2000        let base_path = temp_dir.path();
2001
2002        // Create a markdown file WITHOUT specifying .md extension in the link
2003        let page_path = base_path.join("page.md");
2004        File::create(&page_path).unwrap().write_all(b"# Page").unwrap();
2005
2006        // Test content with extensionless link that should resolve to page.md
2007        let content = r#"
2008# Test Document
2009
2010[Link without extension](page)
2011[Link with extension](page.md)
2012[Missing link](nonexistent)
2013"#;
2014
2015        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2016
2017        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2018        let result = rule.check(&ctx).unwrap();
2019
2020        // Should only have warning for nonexistent link
2021        // Both "page" and "page.md" should resolve to the same file
2022        assert_eq!(result.len(), 1, "Should only warn about nonexistent link");
2023        assert!(
2024            result[0].message.contains("nonexistent"),
2025            "Warning should be for 'nonexistent' not 'page'"
2026        );
2027    }
2028
2029    // Cross-file validation tests
2030    #[test]
2031    fn test_cross_file_scope() {
2032        let rule = MD057ExistingRelativeLinks::new();
2033        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
2034    }
2035
2036    #[test]
2037    fn test_contribute_to_index_extracts_markdown_links() {
2038        let rule = MD057ExistingRelativeLinks::new();
2039        let content = r#"
2040# Document
2041
2042[Link to docs](./docs/guide.md)
2043[Link with fragment](./other.md#section)
2044[External link](https://example.com)
2045[Image link](image.png)
2046[Media file](video.mp4)
2047"#;
2048
2049        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2050        let mut index = FileIndex::new();
2051        rule.contribute_to_index(&ctx, &mut index);
2052
2053        // Should only index markdown file links
2054        assert_eq!(index.cross_file_links.len(), 2);
2055
2056        // Check first link
2057        assert_eq!(index.cross_file_links[0].target_path, "./docs/guide.md");
2058        assert_eq!(index.cross_file_links[0].fragment, "");
2059
2060        // Check second link (with fragment)
2061        assert_eq!(index.cross_file_links[1].target_path, "./other.md");
2062        assert_eq!(index.cross_file_links[1].fragment, "section");
2063    }
2064
2065    #[test]
2066    fn test_contribute_to_index_skips_external_and_anchors() {
2067        let rule = MD057ExistingRelativeLinks::new();
2068        let content = r#"
2069# Document
2070
2071[External](https://example.com)
2072[Another external](http://example.org)
2073[Fragment only](#section)
2074[FTP link](ftp://files.example.com)
2075[Mail link](mailto:test@example.com)
2076[WWW link](www.example.com)
2077"#;
2078
2079        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2080        let mut index = FileIndex::new();
2081        rule.contribute_to_index(&ctx, &mut index);
2082
2083        // Should not index any of these
2084        assert_eq!(index.cross_file_links.len(), 0);
2085    }
2086
2087    #[test]
2088    fn test_cross_file_check_valid_link() {
2089        use crate::workspace_index::WorkspaceIndex;
2090
2091        let rule = MD057ExistingRelativeLinks::new();
2092
2093        // Create a workspace index with the target file
2094        let mut workspace_index = WorkspaceIndex::new();
2095        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
2096
2097        // Create file index with a link to an existing file
2098        let mut file_index = FileIndex::new();
2099        file_index.add_cross_file_link(CrossFileLinkIndex {
2100            target_path: "guide.md".to_string(),
2101            fragment: "".to_string(),
2102            line: 5,
2103            column: 1,
2104        });
2105
2106        // Run cross-file check from docs/index.md
2107        let warnings = rule
2108            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
2109            .unwrap();
2110
2111        // Should have no warnings - file exists
2112        assert!(warnings.is_empty());
2113    }
2114
2115    #[test]
2116    fn test_cross_file_check_missing_link() {
2117        use crate::workspace_index::WorkspaceIndex;
2118
2119        let rule = MD057ExistingRelativeLinks::new();
2120
2121        // Create an empty workspace index
2122        let workspace_index = WorkspaceIndex::new();
2123
2124        // Create file index with a link to a missing file
2125        let mut file_index = FileIndex::new();
2126        file_index.add_cross_file_link(CrossFileLinkIndex {
2127            target_path: "missing.md".to_string(),
2128            fragment: "".to_string(),
2129            line: 5,
2130            column: 1,
2131        });
2132
2133        // Run cross-file check
2134        let warnings = rule
2135            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
2136            .unwrap();
2137
2138        // Should have one warning for the missing file
2139        assert_eq!(warnings.len(), 1);
2140        assert!(warnings[0].message.contains("missing.md"));
2141        assert!(warnings[0].message.contains("does not exist"));
2142    }
2143
2144    #[test]
2145    fn test_cross_file_check_parent_path() {
2146        use crate::workspace_index::WorkspaceIndex;
2147
2148        let rule = MD057ExistingRelativeLinks::new();
2149
2150        // Create a workspace index with the target file at the root
2151        let mut workspace_index = WorkspaceIndex::new();
2152        workspace_index.insert_file(PathBuf::from("readme.md"), FileIndex::new());
2153
2154        // Create file index with a parent path link
2155        let mut file_index = FileIndex::new();
2156        file_index.add_cross_file_link(CrossFileLinkIndex {
2157            target_path: "../readme.md".to_string(),
2158            fragment: "".to_string(),
2159            line: 5,
2160            column: 1,
2161        });
2162
2163        // Run cross-file check from docs/guide.md
2164        let warnings = rule
2165            .cross_file_check(Path::new("docs/guide.md"), &file_index, &workspace_index)
2166            .unwrap();
2167
2168        // Should have no warnings - file exists at normalized path
2169        assert!(warnings.is_empty());
2170    }
2171
2172    #[test]
2173    fn test_cross_file_check_html_link_with_md_source() {
2174        // Test that .html links are accepted when corresponding .md source exists
2175        // This supports mdBook and similar doc generators that compile .md to .html
2176        use crate::workspace_index::WorkspaceIndex;
2177
2178        let rule = MD057ExistingRelativeLinks::new();
2179
2180        // Create a workspace index with the .md source file
2181        let mut workspace_index = WorkspaceIndex::new();
2182        workspace_index.insert_file(PathBuf::from("docs/guide.md"), FileIndex::new());
2183
2184        // Create file index with an .html link (from another rule like MD051)
2185        let mut file_index = FileIndex::new();
2186        file_index.add_cross_file_link(CrossFileLinkIndex {
2187            target_path: "guide.html".to_string(),
2188            fragment: "section".to_string(),
2189            line: 10,
2190            column: 5,
2191        });
2192
2193        // Run cross-file check from docs/index.md
2194        let warnings = rule
2195            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
2196            .unwrap();
2197
2198        // Should have no warnings - .md source exists for the .html link
2199        assert!(
2200            warnings.is_empty(),
2201            "Expected no warnings for .html link with .md source, got: {warnings:?}"
2202        );
2203    }
2204
2205    #[test]
2206    fn test_cross_file_check_html_link_without_source() {
2207        // Test that .html links without corresponding .md source ARE flagged
2208        use crate::workspace_index::WorkspaceIndex;
2209
2210        let rule = MD057ExistingRelativeLinks::new();
2211
2212        // Create an empty workspace index
2213        let workspace_index = WorkspaceIndex::new();
2214
2215        // Create file index with an .html link to a non-existent file
2216        let mut file_index = FileIndex::new();
2217        file_index.add_cross_file_link(CrossFileLinkIndex {
2218            target_path: "missing.html".to_string(),
2219            fragment: "".to_string(),
2220            line: 10,
2221            column: 5,
2222        });
2223
2224        // Run cross-file check from docs/index.md
2225        let warnings = rule
2226            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
2227            .unwrap();
2228
2229        // Should have one warning - no .md source exists
2230        assert_eq!(warnings.len(), 1, "Expected 1 warning for .html link without source");
2231        assert!(warnings[0].message.contains("missing.html"));
2232    }
2233
2234    #[test]
2235    fn test_normalize_path_function() {
2236        // Test simple cases
2237        assert_eq!(
2238            normalize_path(Path::new("docs/guide.md")),
2239            PathBuf::from("docs/guide.md")
2240        );
2241
2242        // Test current directory removal
2243        assert_eq!(
2244            normalize_path(Path::new("./docs/guide.md")),
2245            PathBuf::from("docs/guide.md")
2246        );
2247
2248        // Test parent directory resolution
2249        assert_eq!(
2250            normalize_path(Path::new("docs/sub/../guide.md")),
2251            PathBuf::from("docs/guide.md")
2252        );
2253
2254        // Test multiple parent directories
2255        assert_eq!(normalize_path(Path::new("a/b/c/../../d.md")), PathBuf::from("a/d.md"));
2256    }
2257
2258    #[test]
2259    fn test_html_link_with_md_source() {
2260        // Links to .html files should pass if corresponding .md source exists
2261        let temp_dir = tempdir().unwrap();
2262        let base_path = temp_dir.path();
2263
2264        // Create guide.md (source file)
2265        let md_file = base_path.join("guide.md");
2266        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2267
2268        let content = r#"
2269[Read the guide](guide.html)
2270[Also here](getting-started.html)
2271"#;
2272
2273        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2274        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2275        let result = rule.check(&ctx).unwrap();
2276
2277        // guide.html passes (guide.md exists), getting-started.html fails
2278        assert_eq!(
2279            result.len(),
2280            1,
2281            "Should only warn about missing source. Got: {result:?}"
2282        );
2283        assert!(result[0].message.contains("getting-started.html"));
2284    }
2285
2286    #[test]
2287    fn test_htm_link_with_md_source() {
2288        // .htm extension should also check for markdown source
2289        let temp_dir = tempdir().unwrap();
2290        let base_path = temp_dir.path();
2291
2292        let md_file = base_path.join("page.md");
2293        File::create(&md_file).unwrap().write_all(b"# Page").unwrap();
2294
2295        let content = "[Page](page.htm)";
2296
2297        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2298        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2299        let result = rule.check(&ctx).unwrap();
2300
2301        assert!(
2302            result.is_empty(),
2303            "Should not warn when .md source exists for .htm link"
2304        );
2305    }
2306
2307    #[test]
2308    fn test_html_link_finds_various_markdown_extensions() {
2309        // Should find .mdx, .markdown, etc. as source files
2310        let temp_dir = tempdir().unwrap();
2311        let base_path = temp_dir.path();
2312
2313        File::create(base_path.join("doc.md")).unwrap();
2314        File::create(base_path.join("tutorial.mdx")).unwrap();
2315        File::create(base_path.join("guide.markdown")).unwrap();
2316
2317        let content = r#"
2318[Doc](doc.html)
2319[Tutorial](tutorial.html)
2320[Guide](guide.html)
2321"#;
2322
2323        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2324        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2325        let result = rule.check(&ctx).unwrap();
2326
2327        assert!(
2328            result.is_empty(),
2329            "Should find all markdown variants as source files. Got: {result:?}"
2330        );
2331    }
2332
2333    #[test]
2334    fn test_html_link_in_subdirectory() {
2335        // Should find markdown source in subdirectories
2336        let temp_dir = tempdir().unwrap();
2337        let base_path = temp_dir.path();
2338
2339        let docs_dir = base_path.join("docs");
2340        std::fs::create_dir(&docs_dir).unwrap();
2341        File::create(docs_dir.join("guide.md"))
2342            .unwrap()
2343            .write_all(b"# Guide")
2344            .unwrap();
2345
2346        let content = "[Guide](docs/guide.html)";
2347
2348        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2349        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2350        let result = rule.check(&ctx).unwrap();
2351
2352        assert!(result.is_empty(), "Should find markdown source in subdirectory");
2353    }
2354
2355    #[test]
2356    fn test_absolute_path_skipped_in_check() {
2357        // Test that absolute paths are skipped during link validation
2358        // This fixes the bug where /pkg/runtime was being flagged
2359        let temp_dir = tempdir().unwrap();
2360        let base_path = temp_dir.path();
2361
2362        let content = r#"
2363# Test Document
2364
2365[Go Runtime](/pkg/runtime)
2366[Go Runtime with Fragment](/pkg/runtime#section)
2367[API Docs](/api/v1/users)
2368[Blog Post](/blog/2024/release.html)
2369[React Hook](/react/hooks/use-state.html)
2370"#;
2371
2372        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2373        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2374        let result = rule.check(&ctx).unwrap();
2375
2376        // Should have NO warnings - all absolute paths should be skipped
2377        assert!(
2378            result.is_empty(),
2379            "Absolute paths should be skipped. Got warnings: {result:?}"
2380        );
2381    }
2382
2383    #[test]
2384    fn test_absolute_path_skipped_in_cross_file_check() {
2385        // Test that absolute paths are skipped in cross_file_check()
2386        use crate::workspace_index::WorkspaceIndex;
2387
2388        let rule = MD057ExistingRelativeLinks::new();
2389
2390        // Create an empty workspace index (no files exist)
2391        let workspace_index = WorkspaceIndex::new();
2392
2393        // Create file index with absolute path links (should be skipped)
2394        let mut file_index = FileIndex::new();
2395        file_index.add_cross_file_link(CrossFileLinkIndex {
2396            target_path: "/pkg/runtime.md".to_string(),
2397            fragment: "".to_string(),
2398            line: 5,
2399            column: 1,
2400        });
2401        file_index.add_cross_file_link(CrossFileLinkIndex {
2402            target_path: "/api/v1/users.md".to_string(),
2403            fragment: "section".to_string(),
2404            line: 10,
2405            column: 1,
2406        });
2407
2408        // Run cross-file check
2409        let warnings = rule
2410            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
2411            .unwrap();
2412
2413        // Should have NO warnings - absolute paths should be skipped
2414        assert!(
2415            warnings.is_empty(),
2416            "Absolute paths should be skipped in cross_file_check. Got warnings: {warnings:?}"
2417        );
2418    }
2419
2420    #[test]
2421    fn test_protocol_relative_url_not_skipped() {
2422        // Test that protocol-relative URLs (//example.com) are NOT skipped as absolute paths
2423        // They should still be caught by is_external_url() though
2424        let temp_dir = tempdir().unwrap();
2425        let base_path = temp_dir.path();
2426
2427        let content = r#"
2428# Test Document
2429
2430[External](//example.com/page)
2431[Another](//cdn.example.com/asset.js)
2432"#;
2433
2434        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2435        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2436        let result = rule.check(&ctx).unwrap();
2437
2438        // Should have NO warnings - protocol-relative URLs are external and should be skipped
2439        assert!(
2440            result.is_empty(),
2441            "Protocol-relative URLs should be skipped. Got warnings: {result:?}"
2442        );
2443    }
2444
2445    #[test]
2446    fn test_email_addresses_skipped() {
2447        // Test that email addresses without mailto: are skipped
2448        // These are clearly not file links (the @ symbol is definitive)
2449        let temp_dir = tempdir().unwrap();
2450        let base_path = temp_dir.path();
2451
2452        let content = r#"
2453# Test Document
2454
2455[Contact](user@example.com)
2456[Steering](steering@kubernetes.io)
2457[Support](john.doe+filter@company.co.uk)
2458[User](user_name@sub.domain.com)
2459"#;
2460
2461        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2462        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2463        let result = rule.check(&ctx).unwrap();
2464
2465        // Should have NO warnings - email addresses are clearly not file links and should be skipped
2466        assert!(
2467            result.is_empty(),
2468            "Email addresses should be skipped. Got warnings: {result:?}"
2469        );
2470    }
2471
2472    #[test]
2473    fn test_email_addresses_vs_file_paths() {
2474        // Test that email addresses (anything with @) are skipped
2475        // Note: File paths with @ are extremely rare, so we treat anything with @ as an email
2476        let temp_dir = tempdir().unwrap();
2477        let base_path = temp_dir.path();
2478
2479        let content = r#"
2480# Test Document
2481
2482[Email](user@example.com)  <!-- Should be skipped (email) -->
2483[Email2](steering@kubernetes.io)  <!-- Should be skipped (email) -->
2484[Email3](user@file.md)  <!-- Should be skipped (has @, treated as email) -->
2485"#;
2486
2487        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2488        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2489        let result = rule.check(&ctx).unwrap();
2490
2491        // All should be skipped - anything with @ is treated as an email
2492        assert!(
2493            result.is_empty(),
2494            "All email addresses should be skipped. Got: {result:?}"
2495        );
2496    }
2497
2498    #[test]
2499    fn test_diagnostic_position_accuracy() {
2500        // Test that diagnostics point to the URL, not the link text
2501        let temp_dir = tempdir().unwrap();
2502        let base_path = temp_dir.path();
2503
2504        // Position markers:     0         1         2         3
2505        //                       0123456789012345678901234567890123456789
2506        let content = "prefix [text](missing.md) suffix";
2507        //             The URL "missing.md" starts at 0-indexed position 14
2508        //             which is 1-indexed column 15, and ends at 0-indexed 24 (1-indexed column 25)
2509
2510        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2511        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2512        let result = rule.check(&ctx).unwrap();
2513
2514        assert_eq!(result.len(), 1, "Should have exactly one warning");
2515        assert_eq!(result[0].line, 1, "Should be on line 1");
2516        assert_eq!(result[0].column, 15, "Should point to start of URL 'missing.md'");
2517        assert_eq!(result[0].end_column, 25, "Should point past end of URL 'missing.md'");
2518    }
2519
2520    #[test]
2521    fn test_diagnostic_position_angle_brackets() {
2522        // Test position accuracy with angle bracket links
2523        let temp_dir = tempdir().unwrap();
2524        let base_path = temp_dir.path();
2525
2526        // Position markers:     0         1         2
2527        //                       012345678901234567890
2528        let content = "[link](<missing.md>)";
2529        //             The URL "missing.md" starts at 0-indexed position 8 (1-indexed column 9)
2530
2531        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2532        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2533        let result = rule.check(&ctx).unwrap();
2534
2535        assert_eq!(result.len(), 1, "Should have exactly one warning");
2536        assert_eq!(result[0].line, 1, "Should be on line 1");
2537        assert_eq!(result[0].column, 9, "Should point to start of URL in angle brackets");
2538    }
2539
2540    #[test]
2541    fn test_diagnostic_position_multiline() {
2542        // Test that line numbers are correct for links on different lines
2543        let temp_dir = tempdir().unwrap();
2544        let base_path = temp_dir.path();
2545
2546        let content = r#"# Title
2547Some text on line 2
2548[link on line 3](missing1.md)
2549More text
2550[link on line 5](missing2.md)"#;
2551
2552        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2553        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2554        let result = rule.check(&ctx).unwrap();
2555
2556        assert_eq!(result.len(), 2, "Should have two warnings");
2557
2558        // First warning should be on line 3
2559        assert_eq!(result[0].line, 3, "First warning should be on line 3");
2560        assert!(result[0].message.contains("missing1.md"));
2561
2562        // Second warning should be on line 5
2563        assert_eq!(result[1].line, 5, "Second warning should be on line 5");
2564        assert!(result[1].message.contains("missing2.md"));
2565    }
2566
2567    #[test]
2568    fn test_diagnostic_position_with_spaces() {
2569        // Test position with URLs that have spaces in parentheses
2570        let temp_dir = tempdir().unwrap();
2571        let base_path = temp_dir.path();
2572
2573        let content = "[link]( missing.md )";
2574        //             0123456789012345678901
2575        //             0-indexed position 8 is 'm' in 'missing.md' (after space and paren)
2576        //             which is 1-indexed column 9
2577
2578        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2579        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2580        let result = rule.check(&ctx).unwrap();
2581
2582        assert_eq!(result.len(), 1, "Should have exactly one warning");
2583        // The regex captures the URL without leading/trailing spaces
2584        assert_eq!(result[0].column, 9, "Should point to URL after stripping spaces");
2585    }
2586
2587    #[test]
2588    fn test_diagnostic_position_image() {
2589        // Test that image diagnostics also have correct positions
2590        let temp_dir = tempdir().unwrap();
2591        let base_path = temp_dir.path();
2592
2593        let content = "![alt text](missing.jpg)";
2594
2595        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2596        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2597        let result = rule.check(&ctx).unwrap();
2598
2599        assert_eq!(result.len(), 1, "Should have exactly one warning for image");
2600        assert_eq!(result[0].line, 1);
2601        // Images use start_col from the parser, which should point to the URL
2602        assert!(result[0].column > 0, "Should have valid column position");
2603        assert!(result[0].message.contains("missing.jpg"));
2604    }
2605
2606    #[test]
2607    fn test_wikilinks_skipped() {
2608        // Wikilinks should not trigger MD057 warnings
2609        // They use a different linking system (e.g., Obsidian, wiki software)
2610        let temp_dir = tempdir().unwrap();
2611        let base_path = temp_dir.path();
2612
2613        let content = r#"# Test Document
2614
2615[[Microsoft#Windows OS]]
2616[[SomePage]]
2617[[Page With Spaces]]
2618[[path/to/page#section]]
2619[[page|Display Text]]
2620
2621This is a [real missing link](missing.md) that should be flagged.
2622"#;
2623
2624        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2625        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2626        let result = rule.check(&ctx).unwrap();
2627
2628        // Should only warn about the regular markdown link, not wikilinks
2629        assert_eq!(
2630            result.len(),
2631            1,
2632            "Should only warn about missing.md, not wikilinks. Got: {result:?}"
2633        );
2634        assert!(
2635            result[0].message.contains("missing.md"),
2636            "Warning should be for missing.md, not wikilinks"
2637        );
2638    }
2639
2640    #[test]
2641    fn test_wikilinks_not_added_to_index() {
2642        // Wikilinks should not be added to the cross-file link index
2643        let temp_dir = tempdir().unwrap();
2644        let base_path = temp_dir.path();
2645
2646        let content = r#"# Test Document
2647
2648[[Microsoft#Windows OS]]
2649[[SomePage#section]]
2650[Regular Link](other.md)
2651"#;
2652
2653        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2654        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2655
2656        let mut file_index = FileIndex::new();
2657        rule.contribute_to_index(&ctx, &mut file_index);
2658
2659        // Should only have the regular markdown link (if it's a markdown file)
2660        // Wikilinks should not be added
2661        let cross_file_links = &file_index.cross_file_links;
2662        assert_eq!(
2663            cross_file_links.len(),
2664            1,
2665            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
2666        );
2667        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
2668    }
2669
2670    #[test]
2671    fn test_reference_definition_missing_file() {
2672        // Reference definitions [ref]: ./path.md should be checked
2673        let temp_dir = tempdir().unwrap();
2674        let base_path = temp_dir.path();
2675
2676        let content = r#"# Test Document
2677
2678[test]: ./missing.md
2679[example]: ./nonexistent.html
2680
2681Use [test] and [example] here.
2682"#;
2683
2684        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2685        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2686        let result = rule.check(&ctx).unwrap();
2687
2688        // Should have warnings for both reference definitions
2689        assert_eq!(
2690            result.len(),
2691            2,
2692            "Should have warnings for missing reference definition targets. Got: {result:?}"
2693        );
2694        assert!(
2695            result.iter().any(|w| w.message.contains("missing.md")),
2696            "Should warn about missing.md"
2697        );
2698        assert!(
2699            result.iter().any(|w| w.message.contains("nonexistent.html")),
2700            "Should warn about nonexistent.html"
2701        );
2702    }
2703
2704    #[test]
2705    fn test_reference_definition_existing_file() {
2706        // Reference definitions to existing files should NOT trigger warnings
2707        let temp_dir = tempdir().unwrap();
2708        let base_path = temp_dir.path();
2709
2710        // Create an existing file
2711        let exists_path = base_path.join("exists.md");
2712        File::create(&exists_path)
2713            .unwrap()
2714            .write_all(b"# Existing file")
2715            .unwrap();
2716
2717        let content = r#"# Test Document
2718
2719[test]: ./exists.md
2720
2721Use [test] here.
2722"#;
2723
2724        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2725        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2726        let result = rule.check(&ctx).unwrap();
2727
2728        // Should have NO warnings since the file exists
2729        assert!(
2730            result.is_empty(),
2731            "Should not warn about existing file. Got: {result:?}"
2732        );
2733    }
2734
2735    #[test]
2736    fn test_reference_definition_external_url_skipped() {
2737        // Reference definitions with external URLs should be skipped
2738        let temp_dir = tempdir().unwrap();
2739        let base_path = temp_dir.path();
2740
2741        let content = r#"# Test Document
2742
2743[google]: https://google.com
2744[example]: http://example.org
2745[mail]: mailto:test@example.com
2746[ftp]: ftp://files.example.com
2747[local]: ./missing.md
2748
2749Use [google], [example], [mail], [ftp], [local] here.
2750"#;
2751
2752        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2753        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2754        let result = rule.check(&ctx).unwrap();
2755
2756        // Should only warn about the local missing file, not external URLs
2757        assert_eq!(
2758            result.len(),
2759            1,
2760            "Should only warn about local missing file. Got: {result:?}"
2761        );
2762        assert!(
2763            result[0].message.contains("missing.md"),
2764            "Warning should be for missing.md"
2765        );
2766    }
2767
2768    #[test]
2769    fn test_reference_definition_fragment_only_skipped() {
2770        // Reference definitions with fragment-only URLs should be skipped
2771        let temp_dir = tempdir().unwrap();
2772        let base_path = temp_dir.path();
2773
2774        let content = r#"# Test Document
2775
2776[section]: #my-section
2777
2778Use [section] here.
2779"#;
2780
2781        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2782        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2783        let result = rule.check(&ctx).unwrap();
2784
2785        // Should have NO warnings for fragment-only links
2786        assert!(
2787            result.is_empty(),
2788            "Should not warn about fragment-only reference. Got: {result:?}"
2789        );
2790    }
2791
2792    #[test]
2793    fn test_reference_definition_column_position() {
2794        // Test that column position points to the URL in the reference definition
2795        let temp_dir = tempdir().unwrap();
2796        let base_path = temp_dir.path();
2797
2798        // Position markers:     0         1         2
2799        //                       0123456789012345678901
2800        let content = "[ref]: ./missing.md";
2801        //             The URL "./missing.md" starts at 0-indexed position 7
2802        //             which is 1-indexed column 8
2803
2804        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2805        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2806        let result = rule.check(&ctx).unwrap();
2807
2808        assert_eq!(result.len(), 1, "Should have exactly one warning");
2809        assert_eq!(result[0].line, 1, "Should be on line 1");
2810        assert_eq!(result[0].column, 8, "Should point to start of URL './missing.md'");
2811    }
2812
2813    #[test]
2814    fn test_reference_definition_html_with_md_source() {
2815        // Reference definitions to .html files should pass if corresponding .md source exists
2816        let temp_dir = tempdir().unwrap();
2817        let base_path = temp_dir.path();
2818
2819        // Create guide.md (source file)
2820        let md_file = base_path.join("guide.md");
2821        File::create(&md_file).unwrap().write_all(b"# Guide").unwrap();
2822
2823        let content = r#"# Test Document
2824
2825[guide]: ./guide.html
2826[missing]: ./missing.html
2827
2828Use [guide] and [missing] here.
2829"#;
2830
2831        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2832        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2833        let result = rule.check(&ctx).unwrap();
2834
2835        // guide.html passes (guide.md exists), missing.html fails
2836        assert_eq!(
2837            result.len(),
2838            1,
2839            "Should only warn about missing source. Got: {result:?}"
2840        );
2841        assert!(result[0].message.contains("missing.html"));
2842    }
2843
2844    #[test]
2845    fn test_reference_definition_url_encoded() {
2846        // Reference definitions with URL-encoded paths should be decoded before checking
2847        let temp_dir = tempdir().unwrap();
2848        let base_path = temp_dir.path();
2849
2850        // Create a file with spaces in the name
2851        let file_with_spaces = base_path.join("file with spaces.md");
2852        File::create(&file_with_spaces).unwrap().write_all(b"# Spaces").unwrap();
2853
2854        let content = r#"# Test Document
2855
2856[spaces]: ./file%20with%20spaces.md
2857[missing]: ./missing%20file.md
2858
2859Use [spaces] and [missing] here.
2860"#;
2861
2862        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2863        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2864        let result = rule.check(&ctx).unwrap();
2865
2866        // Should only warn about the missing file
2867        assert_eq!(
2868            result.len(),
2869            1,
2870            "Should only warn about missing URL-encoded file. Got: {result:?}"
2871        );
2872        assert!(result[0].message.contains("missing%20file.md"));
2873    }
2874
2875    #[test]
2876    fn test_inline_and_reference_both_checked() {
2877        // Both inline links and reference definitions should be checked
2878        let temp_dir = tempdir().unwrap();
2879        let base_path = temp_dir.path();
2880
2881        let content = r#"# Test Document
2882
2883[inline link](./inline-missing.md)
2884[ref]: ./ref-missing.md
2885
2886Use [ref] here.
2887"#;
2888
2889        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2890        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2891        let result = rule.check(&ctx).unwrap();
2892
2893        // Should warn about both the inline link and the reference definition
2894        assert_eq!(
2895            result.len(),
2896            2,
2897            "Should warn about both inline and reference links. Got: {result:?}"
2898        );
2899        assert!(
2900            result.iter().any(|w| w.message.contains("inline-missing.md")),
2901            "Should warn about inline-missing.md"
2902        );
2903        assert!(
2904            result.iter().any(|w| w.message.contains("ref-missing.md")),
2905            "Should warn about ref-missing.md"
2906        );
2907    }
2908
2909    #[test]
2910    fn test_footnote_definitions_not_flagged() {
2911        // Regression test for issue #286: footnote definitions should not be
2912        // treated as reference definitions and flagged as broken links
2913        let rule = MD057ExistingRelativeLinks::default();
2914
2915        let content = r#"# Title
2916
2917A footnote[^1].
2918
2919[^1]: [link](https://www.google.com).
2920"#;
2921
2922        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2923        let result = rule.check(&ctx).unwrap();
2924
2925        assert!(
2926            result.is_empty(),
2927            "Footnote definitions should not trigger MD057 warnings. Got: {result:?}"
2928        );
2929    }
2930
2931    #[test]
2932    fn test_footnote_with_relative_link_inside() {
2933        // Footnotes containing relative links should not be checked
2934        // (the footnote content is not a URL, it's content that may contain links)
2935        let rule = MD057ExistingRelativeLinks::default();
2936
2937        let content = r#"# Title
2938
2939See the footnote[^1].
2940
2941[^1]: Check out [this file](./existing.md) for more info.
2942[^2]: Also see [missing](./does-not-exist.md).
2943"#;
2944
2945        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2946        let result = rule.check(&ctx).unwrap();
2947
2948        // The inline links INSIDE footnotes should be checked (./existing.md, ./does-not-exist.md)
2949        // but the footnote definition itself should not be treated as a reference definition
2950        // Note: This test verifies that [^1]: and [^2]: are not parsed as ref defs with
2951        // URLs like "[this file](./existing.md)" or "[missing](./does-not-exist.md)"
2952        for warning in &result {
2953            assert!(
2954                !warning.message.contains("[this file]"),
2955                "Footnote content should not be treated as URL: {warning:?}"
2956            );
2957            assert!(
2958                !warning.message.contains("[missing]"),
2959                "Footnote content should not be treated as URL: {warning:?}"
2960            );
2961        }
2962    }
2963
2964    #[test]
2965    fn test_mixed_footnotes_and_reference_definitions() {
2966        // Ensure regular reference definitions are still checked while footnotes are skipped
2967        let temp_dir = tempdir().unwrap();
2968        let base_path = temp_dir.path();
2969
2970        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
2971
2972        let content = r#"# Title
2973
2974A footnote[^1] and a [ref link][myref].
2975
2976[^1]: This is a footnote with [link](https://example.com).
2977
2978[myref]: ./missing-file.md "This should be checked"
2979"#;
2980
2981        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2982        let result = rule.check(&ctx).unwrap();
2983
2984        // Should only warn about the regular reference definition, not the footnote
2985        assert_eq!(
2986            result.len(),
2987            1,
2988            "Should only warn about the regular reference definition. Got: {result:?}"
2989        );
2990        assert!(
2991            result[0].message.contains("missing-file.md"),
2992            "Should warn about missing-file.md in reference definition"
2993        );
2994    }
2995
2996    #[test]
2997    fn test_absolute_links_ignore_by_default() {
2998        // By default, absolute links are ignored (not validated)
2999        let temp_dir = tempdir().unwrap();
3000        let base_path = temp_dir.path();
3001
3002        let rule = MD057ExistingRelativeLinks::new().with_path(base_path);
3003
3004        let content = r#"# Links
3005
3006[API docs](/api/v1/users)
3007[Blog post](/blog/2024/release.html)
3008![Logo](/assets/logo.png)
3009
3010[ref]: /docs/reference.md
3011"#;
3012
3013        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3014        let result = rule.check(&ctx).unwrap();
3015
3016        // No warnings - absolute links are ignored by default
3017        assert!(
3018            result.is_empty(),
3019            "Absolute links should be ignored by default. Got: {result:?}"
3020        );
3021    }
3022
3023    #[test]
3024    fn test_absolute_links_warn_config() {
3025        // When configured to warn, absolute links should generate warnings
3026        let temp_dir = tempdir().unwrap();
3027        let base_path = temp_dir.path();
3028
3029        let config = MD057Config {
3030            absolute_links: AbsoluteLinksOption::Warn,
3031            ..Default::default()
3032        };
3033        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3034
3035        let content = r#"# Links
3036
3037[API docs](/api/v1/users)
3038[Blog post](/blog/2024/release.html)
3039"#;
3040
3041        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3042        let result = rule.check(&ctx).unwrap();
3043
3044        // Should have 2 warnings for the 2 absolute links
3045        assert_eq!(
3046            result.len(),
3047            2,
3048            "Should warn about both absolute links. Got: {result:?}"
3049        );
3050        assert!(
3051            result[0].message.contains("cannot be validated locally"),
3052            "Warning should explain why: {}",
3053            result[0].message
3054        );
3055        assert!(
3056            result[0].message.contains("/api/v1/users"),
3057            "Warning should include the link path"
3058        );
3059    }
3060
3061    #[test]
3062    fn test_absolute_links_warn_images() {
3063        // Images with absolute paths should also warn when configured
3064        let temp_dir = tempdir().unwrap();
3065        let base_path = temp_dir.path();
3066
3067        let config = MD057Config {
3068            absolute_links: AbsoluteLinksOption::Warn,
3069            ..Default::default()
3070        };
3071        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3072
3073        let content = r#"# Images
3074
3075![Logo](/assets/logo.png)
3076"#;
3077
3078        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3079        let result = rule.check(&ctx).unwrap();
3080
3081        assert_eq!(
3082            result.len(),
3083            1,
3084            "Should warn about absolute image path. Got: {result:?}"
3085        );
3086        assert!(
3087            result[0].message.contains("/assets/logo.png"),
3088            "Warning should include the image path"
3089        );
3090    }
3091
3092    #[test]
3093    fn test_absolute_links_warn_reference_definitions() {
3094        // Reference definitions with absolute paths should also warn when configured
3095        let temp_dir = tempdir().unwrap();
3096        let base_path = temp_dir.path();
3097
3098        let config = MD057Config {
3099            absolute_links: AbsoluteLinksOption::Warn,
3100            ..Default::default()
3101        };
3102        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3103
3104        let content = r#"# Reference
3105
3106See the [docs][ref].
3107
3108[ref]: /docs/reference.md
3109"#;
3110
3111        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3112        let result = rule.check(&ctx).unwrap();
3113
3114        assert_eq!(
3115            result.len(),
3116            1,
3117            "Should warn about absolute reference definition. Got: {result:?}"
3118        );
3119        assert!(
3120            result[0].message.contains("/docs/reference.md"),
3121            "Warning should include the reference path"
3122        );
3123    }
3124
3125    #[test]
3126    fn test_search_paths_inline_link() {
3127        let temp_dir = tempdir().unwrap();
3128        let base_path = temp_dir.path();
3129
3130        // Create an "assets" directory with an image
3131        let assets_dir = base_path.join("assets");
3132        std::fs::create_dir_all(&assets_dir).unwrap();
3133        std::fs::write(assets_dir.join("photo.png"), "fake image").unwrap();
3134
3135        let config = MD057Config {
3136            search_paths: vec![assets_dir.to_string_lossy().into_owned()],
3137            ..Default::default()
3138        };
3139        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3140
3141        let content = "# Test\n\n[Photo](photo.png)\n";
3142        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3143        let result = rule.check(&ctx).unwrap();
3144
3145        assert!(
3146            result.is_empty(),
3147            "Should find photo.png via search-paths. Got: {result:?}"
3148        );
3149    }
3150
3151    #[test]
3152    fn test_search_paths_image() {
3153        let temp_dir = tempdir().unwrap();
3154        let base_path = temp_dir.path();
3155
3156        let assets_dir = base_path.join("attachments");
3157        std::fs::create_dir_all(&assets_dir).unwrap();
3158        std::fs::write(assets_dir.join("diagram.svg"), "<svg/>").unwrap();
3159
3160        let config = MD057Config {
3161            search_paths: vec![assets_dir.to_string_lossy().into_owned()],
3162            ..Default::default()
3163        };
3164        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3165
3166        let content = "# Test\n\n![Diagram](diagram.svg)\n";
3167        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3168        let result = rule.check(&ctx).unwrap();
3169
3170        assert!(
3171            result.is_empty(),
3172            "Should find diagram.svg via search-paths. Got: {result:?}"
3173        );
3174    }
3175
3176    #[test]
3177    fn test_search_paths_reference_definition() {
3178        let temp_dir = tempdir().unwrap();
3179        let base_path = temp_dir.path();
3180
3181        let assets_dir = base_path.join("images");
3182        std::fs::create_dir_all(&assets_dir).unwrap();
3183        std::fs::write(assets_dir.join("logo.png"), "fake").unwrap();
3184
3185        let config = MD057Config {
3186            search_paths: vec![assets_dir.to_string_lossy().into_owned()],
3187            ..Default::default()
3188        };
3189        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3190
3191        let content = "# Test\n\nSee [logo][ref].\n\n[ref]: logo.png\n";
3192        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3193        let result = rule.check(&ctx).unwrap();
3194
3195        assert!(
3196            result.is_empty(),
3197            "Should find logo.png via search-paths in reference definition. Got: {result:?}"
3198        );
3199    }
3200
3201    #[test]
3202    fn test_search_paths_still_warns_when_truly_missing() {
3203        let temp_dir = tempdir().unwrap();
3204        let base_path = temp_dir.path();
3205
3206        let assets_dir = base_path.join("assets");
3207        std::fs::create_dir_all(&assets_dir).unwrap();
3208
3209        let config = MD057Config {
3210            search_paths: vec![assets_dir.to_string_lossy().into_owned()],
3211            ..Default::default()
3212        };
3213        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3214
3215        let content = "# Test\n\n![Missing](nonexistent.png)\n";
3216        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3217        let result = rule.check(&ctx).unwrap();
3218
3219        assert_eq!(
3220            result.len(),
3221            1,
3222            "Should still warn when file doesn't exist in any search path. Got: {result:?}"
3223        );
3224    }
3225
3226    #[test]
3227    fn test_search_paths_nonexistent_directory() {
3228        let temp_dir = tempdir().unwrap();
3229        let base_path = temp_dir.path();
3230
3231        let config = MD057Config {
3232            search_paths: vec!["/nonexistent/path/that/does/not/exist".to_string()],
3233            ..Default::default()
3234        };
3235        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3236
3237        let content = "# Test\n\n![Missing](photo.png)\n";
3238        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3239        let result = rule.check(&ctx).unwrap();
3240
3241        assert_eq!(
3242            result.len(),
3243            1,
3244            "Nonexistent search path should not cause errors, just not find the file. Got: {result:?}"
3245        );
3246    }
3247
3248    #[test]
3249    fn test_obsidian_attachment_folder_named() {
3250        let temp_dir = tempdir().unwrap();
3251        let vault = temp_dir.path().join("vault");
3252        std::fs::create_dir_all(vault.join(".obsidian")).unwrap();
3253        std::fs::create_dir_all(vault.join("Attachments")).unwrap();
3254        std::fs::create_dir_all(vault.join("notes")).unwrap();
3255
3256        std::fs::write(
3257            vault.join(".obsidian/app.json"),
3258            r#"{"attachmentFolderPath": "Attachments"}"#,
3259        )
3260        .unwrap();
3261        std::fs::write(vault.join("Attachments/photo.png"), "fake").unwrap();
3262
3263        let notes_dir = vault.join("notes");
3264        let source_file = notes_dir.join("test.md");
3265        std::fs::write(&source_file, "# Test\n\n![Photo](photo.png)\n").unwrap();
3266
3267        let rule = MD057ExistingRelativeLinks::from_config_struct(MD057Config::default()).with_path(&notes_dir);
3268
3269        let content = "# Test\n\n![Photo](photo.png)\n";
3270        let ctx =
3271            crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, Some(source_file));
3272        let result = rule.check(&ctx).unwrap();
3273
3274        assert!(
3275            result.is_empty(),
3276            "Obsidian attachment folder should resolve photo.png. Got: {result:?}"
3277        );
3278    }
3279
3280    #[test]
3281    fn test_obsidian_attachment_same_folder_as_file() {
3282        let temp_dir = tempdir().unwrap();
3283        let vault = temp_dir.path().join("vault-rf");
3284        std::fs::create_dir_all(vault.join(".obsidian")).unwrap();
3285        std::fs::create_dir_all(vault.join("notes")).unwrap();
3286
3287        std::fs::write(vault.join(".obsidian/app.json"), r#"{"attachmentFolderPath": "./"}"#).unwrap();
3288
3289        // Image in the same directory as the file — default behavior, no extra search needed
3290        let notes_dir = vault.join("notes");
3291        let source_file = notes_dir.join("test.md");
3292        std::fs::write(&source_file, "placeholder").unwrap();
3293        std::fs::write(notes_dir.join("photo.png"), "fake").unwrap();
3294
3295        let rule = MD057ExistingRelativeLinks::from_config_struct(MD057Config::default()).with_path(&notes_dir);
3296
3297        let content = "# Test\n\n![Photo](photo.png)\n";
3298        let ctx =
3299            crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, Some(source_file));
3300        let result = rule.check(&ctx).unwrap();
3301
3302        assert!(
3303            result.is_empty(),
3304            "'./' attachment mode resolves to same folder — should work by default. Got: {result:?}"
3305        );
3306    }
3307
3308    #[test]
3309    fn test_obsidian_not_triggered_without_obsidian_flavor() {
3310        let temp_dir = tempdir().unwrap();
3311        let vault = temp_dir.path().join("vault-nf");
3312        std::fs::create_dir_all(vault.join(".obsidian")).unwrap();
3313        std::fs::create_dir_all(vault.join("Attachments")).unwrap();
3314        std::fs::create_dir_all(vault.join("notes")).unwrap();
3315
3316        std::fs::write(
3317            vault.join(".obsidian/app.json"),
3318            r#"{"attachmentFolderPath": "Attachments"}"#,
3319        )
3320        .unwrap();
3321        std::fs::write(vault.join("Attachments/photo.png"), "fake").unwrap();
3322
3323        let notes_dir = vault.join("notes");
3324        let source_file = notes_dir.join("test.md");
3325        std::fs::write(&source_file, "placeholder").unwrap();
3326
3327        let rule = MD057ExistingRelativeLinks::from_config_struct(MD057Config::default()).with_path(&notes_dir);
3328
3329        let content = "# Test\n\n![Photo](photo.png)\n";
3330        // Standard flavor — NOT Obsidian
3331        let ctx =
3332            crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, Some(source_file));
3333        let result = rule.check(&ctx).unwrap();
3334
3335        assert_eq!(
3336            result.len(),
3337            1,
3338            "Without Obsidian flavor, attachment folder should not be auto-detected. Got: {result:?}"
3339        );
3340    }
3341
3342    #[test]
3343    fn test_search_paths_combined_with_obsidian() {
3344        let temp_dir = tempdir().unwrap();
3345        let vault = temp_dir.path().join("vault-combo");
3346        std::fs::create_dir_all(vault.join(".obsidian")).unwrap();
3347        std::fs::create_dir_all(vault.join("Attachments")).unwrap();
3348        std::fs::create_dir_all(vault.join("extra-assets")).unwrap();
3349        std::fs::create_dir_all(vault.join("notes")).unwrap();
3350
3351        std::fs::write(
3352            vault.join(".obsidian/app.json"),
3353            r#"{"attachmentFolderPath": "Attachments"}"#,
3354        )
3355        .unwrap();
3356        std::fs::write(vault.join("Attachments/photo.png"), "fake").unwrap();
3357        std::fs::write(vault.join("extra-assets/diagram.svg"), "fake").unwrap();
3358
3359        let notes_dir = vault.join("notes");
3360        let source_file = notes_dir.join("test.md");
3361        std::fs::write(&source_file, "placeholder").unwrap();
3362
3363        let extra_assets_dir = vault.join("extra-assets");
3364        let config = MD057Config {
3365            search_paths: vec![extra_assets_dir.to_string_lossy().into_owned()],
3366            ..Default::default()
3367        };
3368        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(&notes_dir);
3369
3370        // Both links should resolve: photo.png via Obsidian, diagram.svg via search-paths
3371        let content = "# Test\n\n![Photo](photo.png)\n\n![Diagram](diagram.svg)\n";
3372        let ctx =
3373            crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, Some(source_file));
3374        let result = rule.check(&ctx).unwrap();
3375
3376        assert!(
3377            result.is_empty(),
3378            "Both Obsidian attachment and search-paths should resolve. Got: {result:?}"
3379        );
3380    }
3381
3382    #[test]
3383    fn test_obsidian_attachment_subfolder_under_file() {
3384        let temp_dir = tempdir().unwrap();
3385        let vault = temp_dir.path().join("vault-sub");
3386        std::fs::create_dir_all(vault.join(".obsidian")).unwrap();
3387        std::fs::create_dir_all(vault.join("notes/assets")).unwrap();
3388
3389        std::fs::write(
3390            vault.join(".obsidian/app.json"),
3391            r#"{"attachmentFolderPath": "./assets"}"#,
3392        )
3393        .unwrap();
3394        std::fs::write(vault.join("notes/assets/photo.png"), "fake").unwrap();
3395
3396        let notes_dir = vault.join("notes");
3397        let source_file = notes_dir.join("test.md");
3398        std::fs::write(&source_file, "placeholder").unwrap();
3399
3400        let rule = MD057ExistingRelativeLinks::from_config_struct(MD057Config::default()).with_path(&notes_dir);
3401
3402        let content = "# Test\n\n![Photo](photo.png)\n";
3403        let ctx =
3404            crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, Some(source_file));
3405        let result = rule.check(&ctx).unwrap();
3406
3407        assert!(
3408            result.is_empty(),
3409            "Obsidian './assets' mode should find photo.png in <file-dir>/assets/. Got: {result:?}"
3410        );
3411    }
3412
3413    #[test]
3414    fn test_obsidian_attachment_vault_root() {
3415        let temp_dir = tempdir().unwrap();
3416        let vault = temp_dir.path().join("vault-root");
3417        std::fs::create_dir_all(vault.join(".obsidian")).unwrap();
3418        std::fs::create_dir_all(vault.join("notes")).unwrap();
3419
3420        // Empty string = vault root
3421        std::fs::write(vault.join(".obsidian/app.json"), r#"{"attachmentFolderPath": ""}"#).unwrap();
3422        std::fs::write(vault.join("photo.png"), "fake").unwrap();
3423
3424        let notes_dir = vault.join("notes");
3425        let source_file = notes_dir.join("test.md");
3426        std::fs::write(&source_file, "placeholder").unwrap();
3427
3428        let rule = MD057ExistingRelativeLinks::from_config_struct(MD057Config::default()).with_path(&notes_dir);
3429
3430        let content = "# Test\n\n![Photo](photo.png)\n";
3431        let ctx =
3432            crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, Some(source_file));
3433        let result = rule.check(&ctx).unwrap();
3434
3435        assert!(
3436            result.is_empty(),
3437            "Obsidian vault-root mode should find photo.png at vault root. Got: {result:?}"
3438        );
3439    }
3440
3441    #[test]
3442    fn test_search_paths_multiple_directories() {
3443        let temp_dir = tempdir().unwrap();
3444        let base_path = temp_dir.path();
3445
3446        let dir_a = base_path.join("dir-a");
3447        let dir_b = base_path.join("dir-b");
3448        std::fs::create_dir_all(&dir_a).unwrap();
3449        std::fs::create_dir_all(&dir_b).unwrap();
3450        std::fs::write(dir_a.join("alpha.png"), "fake").unwrap();
3451        std::fs::write(dir_b.join("beta.png"), "fake").unwrap();
3452
3453        let config = MD057Config {
3454            search_paths: vec![
3455                dir_a.to_string_lossy().into_owned(),
3456                dir_b.to_string_lossy().into_owned(),
3457            ],
3458            ..Default::default()
3459        };
3460        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3461
3462        let content = "# Test\n\n![A](alpha.png)\n\n![B](beta.png)\n";
3463        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
3464        let result = rule.check(&ctx).unwrap();
3465
3466        assert!(
3467            result.is_empty(),
3468            "Should find files across multiple search paths. Got: {result:?}"
3469        );
3470    }
3471
3472    #[test]
3473    fn test_cross_file_check_with_search_paths() {
3474        use crate::workspace_index::{CrossFileLinkIndex, FileIndex, WorkspaceIndex};
3475
3476        let temp_dir = tempdir().unwrap();
3477        let base_path = temp_dir.path();
3478
3479        // Create docs directory with a markdown target in a search path
3480        let docs_dir = base_path.join("docs");
3481        std::fs::create_dir_all(&docs_dir).unwrap();
3482        std::fs::write(docs_dir.join("guide.md"), "# Guide\n").unwrap();
3483
3484        let config = MD057Config {
3485            search_paths: vec![docs_dir.to_string_lossy().into_owned()],
3486            ..Default::default()
3487        };
3488        let rule = MD057ExistingRelativeLinks::from_config_struct(config).with_path(base_path);
3489
3490        let file_path = base_path.join("README.md");
3491        std::fs::write(&file_path, "# Readme\n").unwrap();
3492
3493        let mut file_index = FileIndex::default();
3494        file_index.cross_file_links.push(CrossFileLinkIndex {
3495            target_path: "guide.md".to_string(),
3496            fragment: String::new(),
3497            line: 3,
3498            column: 1,
3499        });
3500
3501        let workspace_index = WorkspaceIndex::new();
3502
3503        let result = rule
3504            .cross_file_check(&file_path, &file_index, &workspace_index)
3505            .unwrap();
3506
3507        assert!(
3508            result.is_empty(),
3509            "cross_file_check should find guide.md via search-paths. Got: {result:?}"
3510        );
3511    }
3512
3513    #[test]
3514    fn test_cross_file_check_with_obsidian_flavor() {
3515        use crate::workspace_index::{CrossFileLinkIndex, FileIndex, WorkspaceIndex};
3516
3517        let temp_dir = tempdir().unwrap();
3518        let vault = temp_dir.path().join("vault-xf");
3519        std::fs::create_dir_all(vault.join(".obsidian")).unwrap();
3520        std::fs::create_dir_all(vault.join("Attachments")).unwrap();
3521        std::fs::create_dir_all(vault.join("notes")).unwrap();
3522
3523        std::fs::write(
3524            vault.join(".obsidian/app.json"),
3525            r#"{"attachmentFolderPath": "Attachments"}"#,
3526        )
3527        .unwrap();
3528        std::fs::write(vault.join("Attachments/ref.md"), "# Reference\n").unwrap();
3529
3530        let notes_dir = vault.join("notes");
3531        let file_path = notes_dir.join("test.md");
3532        std::fs::write(&file_path, "placeholder").unwrap();
3533
3534        let rule = MD057ExistingRelativeLinks::from_config_struct(MD057Config::default())
3535            .with_path(&notes_dir)
3536            .with_flavor(crate::config::MarkdownFlavor::Obsidian);
3537
3538        let mut file_index = FileIndex::default();
3539        file_index.cross_file_links.push(CrossFileLinkIndex {
3540            target_path: "ref.md".to_string(),
3541            fragment: String::new(),
3542            line: 3,
3543            column: 1,
3544        });
3545
3546        let workspace_index = WorkspaceIndex::new();
3547
3548        let result = rule
3549            .cross_file_check(&file_path, &file_index, &workspace_index)
3550            .unwrap();
3551
3552        assert!(
3553            result.is_empty(),
3554            "cross_file_check should find ref.md via Obsidian attachment folder. Got: {result:?}"
3555        );
3556    }
3557
3558    #[test]
3559    fn test_cross_file_check_clears_stale_cache() {
3560        // Verify that cross_file_check() resets the file existence cache so stale
3561        // entries from a previous lint cycle do not affect results.
3562        use crate::workspace_index::WorkspaceIndex;
3563
3564        let rule = MD057ExistingRelativeLinks::new();
3565
3566        // Seed the cache with a stale entry: pretend "docs/phantom.md" exists on disk.
3567        // In reality, neither the filesystem nor the workspace index has this file.
3568        {
3569            let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap();
3570            cache.insert(PathBuf::from("docs/phantom.md"), true);
3571        }
3572
3573        let workspace_index = WorkspaceIndex::new();
3574
3575        let mut file_index = FileIndex::new();
3576        file_index.add_cross_file_link(CrossFileLinkIndex {
3577            target_path: "phantom.md".to_string(),
3578            fragment: "".to_string(),
3579            line: 1,
3580            column: 1,
3581        });
3582
3583        let warnings = rule
3584            .cross_file_check(Path::new("docs/index.md"), &file_index, &workspace_index)
3585            .unwrap();
3586
3587        // With cache reset, cross_file_check must detect that phantom.md does not exist
3588        assert_eq!(
3589            warnings.len(),
3590            1,
3591            "cross_file_check should report missing file after clearing stale cache. Got: {warnings:?}"
3592        );
3593        assert!(warnings[0].message.contains("phantom.md"));
3594    }
3595
3596    #[test]
3597    fn test_cross_file_check_does_not_carry_over_cache_between_runs() {
3598        // Two consecutive cross_file_check() calls should each start with a fresh cache.
3599        use crate::workspace_index::WorkspaceIndex;
3600
3601        let rule = MD057ExistingRelativeLinks::new();
3602        let workspace_index = WorkspaceIndex::new();
3603
3604        // First run: link to a file that doesn't exist
3605        let mut file_index_1 = FileIndex::new();
3606        file_index_1.add_cross_file_link(CrossFileLinkIndex {
3607            target_path: "nonexistent.md".to_string(),
3608            fragment: "".to_string(),
3609            line: 1,
3610            column: 1,
3611        });
3612
3613        let warnings_1 = rule
3614            .cross_file_check(Path::new("docs/a.md"), &file_index_1, &workspace_index)
3615            .unwrap();
3616        assert_eq!(warnings_1.len(), 1, "First run should detect missing file");
3617
3618        // Between runs, inject a stale "exists = true" entry for the same resolved path
3619        {
3620            let mut cache = FILE_EXISTENCE_CACHE.lock().unwrap();
3621            cache.insert(PathBuf::from("docs/nonexistent.md"), true);
3622        }
3623
3624        // Second run: same link, but now cache says file exists (stale data)
3625        let warnings_2 = rule
3626            .cross_file_check(Path::new("docs/a.md"), &file_index_1, &workspace_index)
3627            .unwrap();
3628
3629        // The second run must also detect the missing file because the cache should be reset
3630        assert_eq!(
3631            warnings_2.len(),
3632            1,
3633            "Second run should still detect missing file after cache reset. Got: {warnings_2:?}"
3634        );
3635    }
3636}