rumdl_lib/rules/
md051_link_fragments.rs

1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9// HTML tags with id or name attributes (supports any HTML element, not just <a>)
10// This pattern only captures the first id/name attribute in a tag
11static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14// Attribute anchor pattern for kramdown/MkDocs { #id } syntax
15// Matches {#id} or { #id } with optional spaces, supports multiple anchors
16// Also supports classes and attributes: { #id .class key=value }
17static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
19
20/// Normalize a path by resolving . and .. components
21fn normalize_path(path: &Path) -> PathBuf {
22    let mut result = PathBuf::new();
23    for component in path.components() {
24        match component {
25            Component::CurDir => {} // Skip .
26            Component::ParentDir => {
27                result.pop(); // Go up one level for ..
28            }
29            c => result.push(c.as_os_str()),
30        }
31    }
32    result
33}
34
35/// Rule MD051: Link fragments
36///
37/// See [docs/md051.md](../../docs/md051.md) for full documentation, configuration, and examples.
38///
39/// This rule validates that link anchors (the part after #) exist in the current document.
40/// Only applies to internal document links (like #heading), not to external URLs or cross-file links.
41#[derive(Clone)]
42pub struct MD051LinkFragments {
43    /// Anchor style to use for validation
44    anchor_style: AnchorStyle,
45}
46
47impl Default for MD051LinkFragments {
48    fn default() -> Self {
49        Self::new()
50    }
51}
52
53impl MD051LinkFragments {
54    pub fn new() -> Self {
55        Self {
56            anchor_style: AnchorStyle::GitHub,
57        }
58    }
59
60    /// Create with specific anchor style
61    pub fn with_anchor_style(style: AnchorStyle) -> Self {
62        Self { anchor_style: style }
63    }
64
65    /// Extract all valid heading anchors from the document
66    /// Returns (markdown_anchors, html_anchors) where markdown_anchors are lowercased
67    /// for case-insensitive matching, and html_anchors are case-sensitive
68    fn extract_headings_from_context(
69        &self,
70        ctx: &crate::lint_context::LintContext,
71    ) -> (HashSet<String>, HashSet<String>) {
72        let mut markdown_headings = HashSet::with_capacity(32);
73        let mut html_anchors = HashSet::with_capacity(16);
74        let mut fragment_counts = std::collections::HashMap::new();
75
76        for line_info in &ctx.lines {
77            if line_info.in_front_matter {
78                continue;
79            }
80
81            // Skip code blocks for anchor extraction
82            if line_info.in_code_block {
83                continue;
84            }
85
86            let content = line_info.content(ctx.content);
87            let bytes = content.as_bytes();
88
89            // Extract HTML anchor tags with id/name attributes
90            if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
91                // HTML spec: only the first id attribute per element is valid
92                // Process element by element to handle multiple id attributes correctly
93                let mut pos = 0;
94                while pos < content.len() {
95                    if let Some(start) = content[pos..].find('<') {
96                        let tag_start = pos + start;
97                        if let Some(end) = content[tag_start..].find('>') {
98                            let tag_end = tag_start + end + 1;
99                            let tag = &content[tag_start..tag_end];
100
101                            // Extract first id or name attribute from this tag
102                            if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
103                                let matched_text = caps.as_str();
104                                if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
105                                    && let Some(id_match) = caps.get(1)
106                                {
107                                    let id = id_match.as_str();
108                                    if !id.is_empty() {
109                                        html_anchors.insert(id.to_string());
110                                    }
111                                }
112                            }
113                            pos = tag_end;
114                        } else {
115                            break;
116                        }
117                    } else {
118                        break;
119                    }
120                }
121            }
122
123            // Extract attribute anchors { #id } from non-heading lines
124            // Headings already have custom_id extracted below
125            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
126                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
127                    if let Some(id_match) = caps.get(1) {
128                        // Add to markdown_headings (lowercased for case-insensitive matching)
129                        markdown_headings.insert(id_match.as_str().to_lowercase());
130                    }
131                }
132            }
133
134            // Extract markdown heading anchors
135            if let Some(heading) = &line_info.heading {
136                // Custom ID from {#custom-id} syntax
137                if let Some(custom_id) = &heading.custom_id {
138                    markdown_headings.insert(custom_id.to_lowercase());
139                }
140
141                // Generate fragment directly from heading text
142                // Note: HTML stripping was removed because it interfered with arrow patterns
143                // like <-> and placeholders like <FILE>. The anchor styles handle these correctly.
144                let fragment = self.anchor_style.generate_fragment(&heading.text);
145
146                if !fragment.is_empty() {
147                    // Handle duplicate headings by appending -1, -2, etc.
148                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
149                        let suffix = *count;
150                        *count += 1;
151                        format!("{fragment}-{suffix}")
152                    } else {
153                        fragment_counts.insert(fragment.clone(), 1);
154                        fragment
155                    };
156                    markdown_headings.insert(final_fragment);
157                }
158            }
159        }
160
161        (markdown_headings, html_anchors)
162    }
163
164    /// Fast check if URL is external (doesn't need to be validated)
165    #[inline]
166    fn is_external_url_fast(url: &str) -> bool {
167        // Quick prefix checks for common protocols
168        url.starts_with("http://")
169            || url.starts_with("https://")
170            || url.starts_with("ftp://")
171            || url.starts_with("mailto:")
172            || url.starts_with("tel:")
173            || url.starts_with("//")
174    }
175
176    /// Resolve a path by trying markdown extensions if it has no extension
177    ///
178    /// For extension-less paths (e.g., `page`), returns a list of paths to try:
179    /// 1. The original path (in case it's already in the index)
180    /// 2. The path with each markdown extension (e.g., `page.md`, `page.markdown`, etc.)
181    ///
182    /// For paths with extensions, returns just the original path.
183    #[inline]
184    fn resolve_path_with_extensions(path: &Path, extensions: &[&str]) -> Vec<PathBuf> {
185        if path.extension().is_none() {
186            // Extension-less path - try with markdown extensions
187            let mut paths = Vec::with_capacity(extensions.len() + 1);
188            // First try the exact path (in case it's already in the index)
189            paths.push(path.to_path_buf());
190            // Then try with each markdown extension
191            for ext in extensions {
192                let path_with_ext = path.with_extension(&ext[1..]); // Remove leading dot
193                paths.push(path_with_ext);
194            }
195            paths
196        } else {
197            // Path has extension - use as-is
198            vec![path.to_path_buf()]
199        }
200    }
201
202    /// Check if a path part (without fragment) is an extension-less path
203    ///
204    /// Extension-less paths are potential cross-file links that need resolution
205    /// with markdown extensions (e.g., `page#section` -> `page.md#section`).
206    ///
207    /// We recognize them as extension-less if:
208    /// 1. Path has no extension (no dot)
209    /// 2. Path is not empty
210    /// 3. Path doesn't look like a query parameter or special syntax
211    /// 4. Path contains at least one alphanumeric character (valid filename)
212    /// 5. Path contains only valid path characters (alphanumeric, slashes, hyphens, underscores)
213    ///
214    /// Optimized: single pass through characters to check both conditions.
215    #[inline]
216    fn is_extensionless_path(path_part: &str) -> bool {
217        // Quick rejections for common non-extension-less cases
218        if path_part.is_empty()
219            || path_part.contains('.')
220            || path_part.contains('?')
221            || path_part.contains('&')
222            || path_part.contains('=')
223        {
224            return false;
225        }
226
227        // Single pass: check for alphanumeric and validate all characters
228        let mut has_alphanumeric = false;
229        for c in path_part.chars() {
230            if c.is_alphanumeric() {
231                has_alphanumeric = true;
232            } else if !matches!(c, '/' | '\\' | '-' | '_') {
233                // Invalid character found - early exit
234                return false;
235            }
236        }
237
238        // Must have at least one alphanumeric character to be a valid filename
239        has_alphanumeric
240    }
241
242    /// Check if URL is a cross-file link (contains a file path before #)
243    #[inline]
244    fn is_cross_file_link(url: &str) -> bool {
245        if let Some(fragment_pos) = url.find('#') {
246            let path_part = &url[..fragment_pos];
247
248            // If there's no path part, it's just a fragment (#heading)
249            if path_part.is_empty() {
250                return false;
251            }
252
253            // Check for Liquid syntax used by Jekyll and other static site generators
254            // Liquid tags: {% ... %} for control flow and includes
255            // Liquid variables: {{ ... }} for outputting values
256            // These are template directives that reference external content and should be skipped
257            // We check for proper bracket order to avoid false positives
258            if let Some(tag_start) = path_part.find("{%")
259                && path_part[tag_start + 2..].contains("%}")
260            {
261                return true;
262            }
263            if let Some(var_start) = path_part.find("{{")
264                && path_part[var_start + 2..].contains("}}")
265            {
266                return true;
267            }
268
269            // Check if it's an absolute path (starts with /)
270            // These are links to other pages on the same site
271            if path_part.starts_with('/') {
272                return true;
273            }
274
275            // Check if it looks like a file path:
276            // - Contains a file extension (dot followed by letters)
277            // - Contains path separators
278            // - Contains relative path indicators
279            // - OR is an extension-less path with a fragment (GitHub-style: page#section)
280            let has_extension = path_part.contains('.')
281                && (
282                    // Has file extension pattern (handle query parameters by splitting on them first)
283                    {
284                    let clean_path = path_part.split('?').next().unwrap_or(path_part);
285                    // Handle files starting with dot
286                    if let Some(after_dot) = clean_path.strip_prefix('.') {
287                        let dots_count = clean_path.matches('.').count();
288                        if dots_count == 1 {
289                            // Could be ".ext" (file extension) or ".hidden" (hidden file)
290                            // Treat short alphanumeric suffixes as file extensions
291                            !after_dot.is_empty() && after_dot.len() <= 10 &&
292                            after_dot.chars().all(|c| c.is_ascii_alphanumeric())
293                        } else {
294                            // Hidden file with extension like ".hidden.txt"
295                            clean_path.split('.').next_back().is_some_and(|ext| {
296                                !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
297                            })
298                        }
299                    } else {
300                        // Regular file path
301                        clean_path.split('.').next_back().is_some_and(|ext| {
302                            !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
303                        })
304                    }
305                } ||
306                // Or contains path separators
307                path_part.contains('/') || path_part.contains('\\') ||
308                // Or starts with relative path indicators
309                path_part.starts_with("./") || path_part.starts_with("../")
310                );
311
312            // Extension-less paths with fragments are potential cross-file links
313            // This supports GitHub-style links like [link](page#section) that resolve to page.md#section
314            let is_extensionless = Self::is_extensionless_path(path_part);
315
316            has_extension || is_extensionless
317        } else {
318            false
319        }
320    }
321}
322
323impl Rule for MD051LinkFragments {
324    fn name(&self) -> &'static str {
325        "MD051"
326    }
327
328    fn description(&self) -> &'static str {
329        "Link fragments should reference valid headings"
330    }
331
332    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
333        // Skip if no link fragments present
334        if !ctx.likely_has_links_or_images() {
335            return true;
336        }
337        // Check for # character (fragments)
338        !ctx.has_char('#')
339    }
340
341    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
342        let mut warnings = Vec::new();
343
344        if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
345            return Ok(warnings);
346        }
347
348        let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
349
350        for link in &ctx.links {
351            if link.is_reference {
352                continue;
353            }
354
355            // Skip wiki-links - they reference other files and may have their own fragment validation
356            if matches!(link.link_type, LinkType::WikiLink { .. }) {
357                continue;
358            }
359
360            // Skip links inside Jinja templates
361            if ctx.is_in_jinja_range(link.byte_offset) {
362                continue;
363            }
364
365            let url = &link.url;
366
367            // Skip links without fragments or external URLs
368            if !url.contains('#') || Self::is_external_url_fast(url) {
369                continue;
370            }
371
372            // Skip mdbook template placeholders ({{#VARIABLE}})
373            // mdbook uses {{#VARIABLE}} syntax where # is part of the template, not a fragment
374            if url.contains("{{#") && url.contains("}}") {
375                continue;
376            }
377
378            // Skip Quarto/RMarkdown cross-references (@fig-, @tbl-, @sec-, @eq-, etc.)
379            // These are special cross-reference syntax, not HTML anchors
380            // Format: @prefix-identifier or just @identifier
381            if url.starts_with('@') {
382                continue;
383            }
384
385            // Cross-file links are valid if the file exists (not checked here)
386            if Self::is_cross_file_link(url) {
387                continue;
388            }
389
390            let Some(fragment_pos) = url.find('#') else {
391                continue;
392            };
393
394            let fragment = &url[fragment_pos + 1..];
395
396            // Skip Liquid template variables and filters
397            if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
398                continue;
399            }
400
401            if fragment.is_empty() {
402                continue;
403            }
404
405            // Validate fragment against document headings
406            // HTML anchors are case-sensitive, markdown anchors are case-insensitive
407            let found = if html_anchors.contains(fragment) {
408                true
409            } else {
410                let fragment_lower = fragment.to_lowercase();
411                markdown_headings.contains(&fragment_lower)
412            };
413
414            if !found {
415                warnings.push(LintWarning {
416                    rule_name: Some(self.name().to_string()),
417                    message: format!("Link anchor '#{fragment}' does not exist in document headings"),
418                    line: link.line,
419                    column: link.start_col + 1,
420                    end_line: link.line,
421                    end_column: link.end_col + 1,
422                    severity: Severity::Error,
423                    fix: None,
424                });
425            }
426        }
427
428        Ok(warnings)
429    }
430
431    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
432        // MD051 does not provide auto-fix
433        // Link fragment corrections require human judgment to avoid incorrect fixes
434        Ok(ctx.content.to_string())
435    }
436
437    fn as_any(&self) -> &dyn std::any::Any {
438        self
439    }
440
441    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
442    where
443        Self: Sized,
444    {
445        // Config keys are normalized to kebab-case by the config system
446        let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
447            if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
448                match style_str.to_lowercase().as_str() {
449                    "kramdown" => AnchorStyle::Kramdown,
450                    "kramdown-gfm" => AnchorStyle::KramdownGfm,
451                    "jekyll" => AnchorStyle::KramdownGfm, // Backward compatibility alias
452                    _ => AnchorStyle::GitHub,
453                }
454            } else {
455                AnchorStyle::GitHub
456            }
457        } else {
458            AnchorStyle::GitHub
459        };
460
461        Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
462    }
463
464    fn category(&self) -> RuleCategory {
465        RuleCategory::Link
466    }
467
468    fn cross_file_scope(&self) -> CrossFileScope {
469        CrossFileScope::Workspace
470    }
471
472    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
473        let mut fragment_counts = HashMap::new();
474
475        // Extract headings, HTML anchors, and attribute anchors (for other files to reference)
476        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
477            if line_info.in_front_matter {
478                continue;
479            }
480
481            // Skip code blocks for anchor extraction
482            if line_info.in_code_block {
483                continue;
484            }
485
486            let content = line_info.content(ctx.content);
487
488            // Extract HTML anchors (id or name attributes on any element)
489            if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
490                let mut pos = 0;
491                while pos < content.len() {
492                    if let Some(start) = content[pos..].find('<') {
493                        let tag_start = pos + start;
494                        if let Some(end) = content[tag_start..].find('>') {
495                            let tag_end = tag_start + end + 1;
496                            let tag = &content[tag_start..tag_end];
497
498                            if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
499                                && let Some(id_match) = caps.get(1)
500                            {
501                                file_index.add_html_anchor(id_match.as_str().to_string());
502                            }
503                            pos = tag_end;
504                        } else {
505                            break;
506                        }
507                    } else {
508                        break;
509                    }
510                }
511            }
512
513            // Extract attribute anchors { #id } on non-heading lines
514            // Headings already have custom_id extracted via heading.custom_id
515            if line_info.heading.is_none() && content.contains("{") && content.contains("#") {
516                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
517                    if let Some(id_match) = caps.get(1) {
518                        file_index.add_attribute_anchor(id_match.as_str().to_string());
519                    }
520                }
521            }
522
523            // Extract heading anchors
524            if let Some(heading) = &line_info.heading {
525                let fragment = self.anchor_style.generate_fragment(&heading.text);
526
527                if !fragment.is_empty() {
528                    // Handle duplicate headings
529                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
530                        let suffix = *count;
531                        *count += 1;
532                        format!("{fragment}-{suffix}")
533                    } else {
534                        fragment_counts.insert(fragment.clone(), 1);
535                        fragment
536                    };
537
538                    file_index.add_heading(HeadingIndex {
539                        text: heading.text.clone(),
540                        auto_anchor: final_fragment,
541                        custom_anchor: heading.custom_id.clone(),
542                        line: line_idx + 1, // 1-indexed
543                    });
544                }
545            }
546        }
547
548        // Extract cross-file links (for validation against other files)
549        for link in &ctx.links {
550            if link.is_reference {
551                continue;
552            }
553
554            let url = &link.url;
555
556            // Skip external URLs
557            if Self::is_external_url_fast(url) {
558                continue;
559            }
560
561            // Only process cross-file links with fragments
562            if Self::is_cross_file_link(url)
563                && let Some(fragment_pos) = url.find('#')
564            {
565                let path_part = &url[..fragment_pos];
566                let fragment = &url[fragment_pos + 1..];
567
568                // Skip empty fragments or template syntax
569                if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
570                    continue;
571                }
572
573                file_index.add_cross_file_link(CrossFileLinkIndex {
574                    target_path: path_part.to_string(),
575                    fragment: fragment.to_string(),
576                    line: link.line,
577                    column: link.start_col + 1,
578                });
579            }
580        }
581    }
582
583    fn cross_file_check(
584        &self,
585        file_path: &Path,
586        file_index: &FileIndex,
587        workspace_index: &crate::workspace_index::WorkspaceIndex,
588    ) -> LintResult {
589        let mut warnings = Vec::new();
590
591        // Supported markdown file extensions (with leading dot, matching MD057)
592        const MARKDOWN_EXTENSIONS: &[&str] = &[
593            ".md",
594            ".markdown",
595            ".mdx",
596            ".mkd",
597            ".mkdn",
598            ".mdown",
599            ".mdwn",
600            ".qmd",
601            ".rmd",
602        ];
603
604        // Check each cross-file link in this file
605        for cross_link in &file_index.cross_file_links {
606            // Skip cross-file links without fragments - nothing to validate
607            if cross_link.fragment.is_empty() {
608                continue;
609            }
610
611            // Resolve the target file path relative to the current file
612            let base_target_path = if let Some(parent) = file_path.parent() {
613                parent.join(&cross_link.target_path)
614            } else {
615                Path::new(&cross_link.target_path).to_path_buf()
616            };
617
618            // Normalize the path (remove . and ..)
619            let base_target_path = normalize_path(&base_target_path);
620
621            // For extension-less paths, try resolving with markdown extensions
622            // This handles GitHub-style links like [link](page#section) -> page.md#section
623            let target_paths_to_try = Self::resolve_path_with_extensions(&base_target_path, MARKDOWN_EXTENSIONS);
624
625            // Try to find the target file in the workspace index
626            let mut target_file_index = None;
627
628            for target_path in &target_paths_to_try {
629                if let Some(index) = workspace_index.get_file(target_path) {
630                    target_file_index = Some(index);
631                    break;
632                }
633            }
634
635            if let Some(target_file_index) = target_file_index {
636                // Check if the fragment matches any heading in the target file (O(1) lookup)
637                if !target_file_index.has_anchor(&cross_link.fragment) {
638                    warnings.push(LintWarning {
639                        rule_name: Some(self.name().to_string()),
640                        line: cross_link.line,
641                        column: cross_link.column,
642                        end_line: cross_link.line,
643                        end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
644                        message: format!(
645                            "Link fragment '{}' not found in '{}'",
646                            cross_link.fragment, cross_link.target_path
647                        ),
648                        severity: Severity::Error,
649                        fix: None,
650                    });
651                }
652            }
653            // If target file not in index, skip (could be external file or not in workspace)
654        }
655
656        Ok(warnings)
657    }
658
659    fn default_config_section(&self) -> Option<(String, toml::Value)> {
660        let value: toml::Value = toml::from_str(
661            r#"
662# Anchor generation style to match your target platform
663# Options: "github" (default), "kramdown-gfm", "kramdown"
664# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
665anchor-style = "github"
666"#,
667        )
668        .ok()?;
669        Some(("MD051".to_string(), value))
670    }
671}
672
673#[cfg(test)]
674mod tests {
675    use super::*;
676    use crate::lint_context::LintContext;
677
678    #[test]
679    fn test_quarto_cross_references() {
680        let rule = MD051LinkFragments::new();
681
682        // Test that Quarto cross-references are skipped
683        let content = r#"# Test Document
684
685## Figures
686
687See [@fig-plot] for the visualization.
688
689More details in [@tbl-results] and [@sec-methods].
690
691The equation [@eq-regression] shows the relationship.
692
693Reference to [@lst-code] for implementation."#;
694        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
695        let result = rule.check(&ctx).unwrap();
696        assert!(
697            result.is_empty(),
698            "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
699            result.len()
700        );
701
702        // Test that normal anchors still work
703        let content_with_anchor = r#"# Test
704
705See [link](#test) for details."#;
706        let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
707        let result_anchor = rule.check(&ctx_anchor).unwrap();
708        assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
709
710        // Test that invalid anchors are still flagged
711        let content_invalid = r#"# Test
712
713See [link](#nonexistent) for details."#;
714        let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
715        let result_invalid = rule.check(&ctx_invalid).unwrap();
716        assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
717    }
718
719    // Cross-file validation tests
720    #[test]
721    fn test_cross_file_scope() {
722        let rule = MD051LinkFragments::new();
723        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
724    }
725
726    #[test]
727    fn test_contribute_to_index_extracts_headings() {
728        let rule = MD051LinkFragments::new();
729        let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
730        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
731
732        let mut file_index = FileIndex::new();
733        rule.contribute_to_index(&ctx, &mut file_index);
734
735        assert_eq!(file_index.headings.len(), 3);
736        assert_eq!(file_index.headings[0].text, "First Heading");
737        assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
738        assert!(file_index.headings[0].custom_anchor.is_none());
739
740        assert_eq!(file_index.headings[1].text, "Second");
741        assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
742
743        assert_eq!(file_index.headings[2].text, "Third");
744    }
745
746    #[test]
747    fn test_contribute_to_index_extracts_cross_file_links() {
748        let rule = MD051LinkFragments::new();
749        let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
750        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
751
752        let mut file_index = FileIndex::new();
753        rule.contribute_to_index(&ctx, &mut file_index);
754
755        assert_eq!(file_index.cross_file_links.len(), 2);
756        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
757        assert_eq!(file_index.cross_file_links[0].fragment, "installation");
758        assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
759        assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
760    }
761
762    #[test]
763    fn test_cross_file_check_valid_fragment() {
764        use crate::workspace_index::WorkspaceIndex;
765
766        let rule = MD051LinkFragments::new();
767
768        // Build workspace index with target file
769        let mut workspace_index = WorkspaceIndex::new();
770        let mut target_file_index = FileIndex::new();
771        target_file_index.add_heading(HeadingIndex {
772            text: "Installation Guide".to_string(),
773            auto_anchor: "installation-guide".to_string(),
774            custom_anchor: None,
775            line: 1,
776        });
777        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
778
779        // Create a FileIndex for the file being checked
780        let mut current_file_index = FileIndex::new();
781        current_file_index.add_cross_file_link(CrossFileLinkIndex {
782            target_path: "install.md".to_string(),
783            fragment: "installation-guide".to_string(),
784            line: 3,
785            column: 5,
786        });
787
788        let warnings = rule
789            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
790            .unwrap();
791
792        // Should find no warnings since fragment exists
793        assert!(warnings.is_empty());
794    }
795
796    #[test]
797    fn test_cross_file_check_invalid_fragment() {
798        use crate::workspace_index::WorkspaceIndex;
799
800        let rule = MD051LinkFragments::new();
801
802        // Build workspace index with target file
803        let mut workspace_index = WorkspaceIndex::new();
804        let mut target_file_index = FileIndex::new();
805        target_file_index.add_heading(HeadingIndex {
806            text: "Installation Guide".to_string(),
807            auto_anchor: "installation-guide".to_string(),
808            custom_anchor: None,
809            line: 1,
810        });
811        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
812
813        // Create a FileIndex with a cross-file link pointing to non-existent fragment
814        let mut current_file_index = FileIndex::new();
815        current_file_index.add_cross_file_link(CrossFileLinkIndex {
816            target_path: "install.md".to_string(),
817            fragment: "nonexistent".to_string(),
818            line: 3,
819            column: 5,
820        });
821
822        let warnings = rule
823            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
824            .unwrap();
825
826        // Should find one warning since fragment doesn't exist
827        assert_eq!(warnings.len(), 1);
828        assert!(warnings[0].message.contains("nonexistent"));
829        assert!(warnings[0].message.contains("install.md"));
830    }
831
832    #[test]
833    fn test_cross_file_check_custom_anchor_match() {
834        use crate::workspace_index::WorkspaceIndex;
835
836        let rule = MD051LinkFragments::new();
837
838        // Build workspace index with target file that has custom anchor
839        let mut workspace_index = WorkspaceIndex::new();
840        let mut target_file_index = FileIndex::new();
841        target_file_index.add_heading(HeadingIndex {
842            text: "Installation Guide".to_string(),
843            auto_anchor: "installation-guide".to_string(),
844            custom_anchor: Some("install".to_string()),
845            line: 1,
846        });
847        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
848
849        // Link uses custom anchor
850        let mut current_file_index = FileIndex::new();
851        current_file_index.add_cross_file_link(CrossFileLinkIndex {
852            target_path: "install.md".to_string(),
853            fragment: "install".to_string(),
854            line: 3,
855            column: 5,
856        });
857
858        let warnings = rule
859            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
860            .unwrap();
861
862        // Should find no warnings since custom anchor matches
863        assert!(warnings.is_empty());
864    }
865
866    #[test]
867    fn test_cross_file_check_target_not_in_workspace() {
868        use crate::workspace_index::WorkspaceIndex;
869
870        let rule = MD051LinkFragments::new();
871
872        // Empty workspace index
873        let workspace_index = WorkspaceIndex::new();
874
875        // Link to file not in workspace
876        let mut current_file_index = FileIndex::new();
877        current_file_index.add_cross_file_link(CrossFileLinkIndex {
878            target_path: "external.md".to_string(),
879            fragment: "heading".to_string(),
880            line: 3,
881            column: 5,
882        });
883
884        let warnings = rule
885            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
886            .unwrap();
887
888        // Should not warn about files not in workspace
889        assert!(warnings.is_empty());
890    }
891}