rumdl_lib/rules/
md051_link_fragments.rs

1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9// HTML tags with id or name attributes (supports any HTML element, not just <a>)
10// This pattern only captures the first id/name attribute in a tag
11static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14// Attribute anchor pattern for kramdown/MkDocs { #id } syntax
15// Matches {#id} or { #id } with optional spaces, supports multiple anchors
16// Also supports classes and attributes: { #id .class key=value }
17static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
19
20/// Normalize a path by resolving . and .. components
21fn normalize_path(path: &Path) -> PathBuf {
22    let mut result = PathBuf::new();
23    for component in path.components() {
24        match component {
25            Component::CurDir => {} // Skip .
26            Component::ParentDir => {
27                result.pop(); // Go up one level for ..
28            }
29            c => result.push(c.as_os_str()),
30        }
31    }
32    result
33}
34
35/// Rule MD051: Link fragments
36///
37/// See [docs/md051.md](../../docs/md051.md) for full documentation, configuration, and examples.
38///
39/// This rule validates that link anchors (the part after #) exist in the current document.
40/// Only applies to internal document links (like #heading), not to external URLs or cross-file links.
41#[derive(Clone)]
42pub struct MD051LinkFragments {
43    /// Anchor style to use for validation
44    anchor_style: AnchorStyle,
45}
46
47impl Default for MD051LinkFragments {
48    fn default() -> Self {
49        Self::new()
50    }
51}
52
53impl MD051LinkFragments {
54    pub fn new() -> Self {
55        Self {
56            anchor_style: AnchorStyle::GitHub,
57        }
58    }
59
60    /// Create with specific anchor style
61    pub fn with_anchor_style(style: AnchorStyle) -> Self {
62        Self { anchor_style: style }
63    }
64
65    /// Extract all valid heading anchors from the document
66    /// Returns (markdown_anchors, html_anchors) where markdown_anchors are lowercased
67    /// for case-insensitive matching, and html_anchors are case-sensitive
68    fn extract_headings_from_context(
69        &self,
70        ctx: &crate::lint_context::LintContext,
71    ) -> (HashSet<String>, HashSet<String>) {
72        let mut markdown_headings = HashSet::with_capacity(32);
73        let mut html_anchors = HashSet::with_capacity(16);
74        let mut fragment_counts = std::collections::HashMap::new();
75
76        for line_info in &ctx.lines {
77            if line_info.in_front_matter {
78                continue;
79            }
80
81            // Skip code blocks for anchor extraction
82            if line_info.in_code_block {
83                continue;
84            }
85
86            let content = line_info.content(ctx.content);
87            let bytes = content.as_bytes();
88
89            // Extract HTML anchor tags with id/name attributes
90            if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
91                // HTML spec: only the first id attribute per element is valid
92                // Process element by element to handle multiple id attributes correctly
93                let mut pos = 0;
94                while pos < content.len() {
95                    if let Some(start) = content[pos..].find('<') {
96                        let tag_start = pos + start;
97                        if let Some(end) = content[tag_start..].find('>') {
98                            let tag_end = tag_start + end + 1;
99                            let tag = &content[tag_start..tag_end];
100
101                            // Extract first id or name attribute from this tag
102                            if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
103                                let matched_text = caps.as_str();
104                                if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
105                                    && let Some(id_match) = caps.get(1)
106                                {
107                                    let id = id_match.as_str();
108                                    if !id.is_empty() {
109                                        html_anchors.insert(id.to_string());
110                                    }
111                                }
112                            }
113                            pos = tag_end;
114                        } else {
115                            break;
116                        }
117                    } else {
118                        break;
119                    }
120                }
121            }
122
123            // Extract attribute anchors { #id } from non-heading lines
124            // Headings already have custom_id extracted below
125            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
126                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
127                    if let Some(id_match) = caps.get(1) {
128                        // Add to markdown_headings (lowercased for case-insensitive matching)
129                        markdown_headings.insert(id_match.as_str().to_lowercase());
130                    }
131                }
132            }
133
134            // Extract markdown heading anchors
135            if let Some(heading) = &line_info.heading {
136                // Custom ID from {#custom-id} syntax
137                if let Some(custom_id) = &heading.custom_id {
138                    markdown_headings.insert(custom_id.to_lowercase());
139                }
140
141                // Generate anchor from heading text
142                // The anchor generation algorithm handles markdown formatting and HTML tags correctly
143                let fragment = self.anchor_style.generate_fragment(&heading.text);
144
145                if !fragment.is_empty() {
146                    // Handle duplicate headings by appending -1, -2, etc.
147                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
148                        let suffix = *count;
149                        *count += 1;
150                        format!("{fragment}-{suffix}")
151                    } else {
152                        fragment_counts.insert(fragment.clone(), 1);
153                        fragment
154                    };
155                    markdown_headings.insert(final_fragment);
156                }
157            }
158        }
159
160        (markdown_headings, html_anchors)
161    }
162
163    /// Fast check if URL is external (doesn't need to be validated)
164    #[inline]
165    fn is_external_url_fast(url: &str) -> bool {
166        // Quick prefix checks for common protocols
167        url.starts_with("http://")
168            || url.starts_with("https://")
169            || url.starts_with("ftp://")
170            || url.starts_with("mailto:")
171            || url.starts_with("tel:")
172            || url.starts_with("//")
173    }
174
175    /// Check if URL is a cross-file link (contains a file path before #)
176    #[inline]
177    fn is_cross_file_link(url: &str) -> bool {
178        if let Some(fragment_pos) = url.find('#') {
179            let path_part = &url[..fragment_pos];
180
181            // If there's no path part, it's just a fragment (#heading)
182            if path_part.is_empty() {
183                return false;
184            }
185
186            // Check for Liquid syntax used by Jekyll and other static site generators
187            // Liquid tags: {% ... %} for control flow and includes
188            // Liquid variables: {{ ... }} for outputting values
189            // These are template directives that reference external content and should be skipped
190            // We check for proper bracket order to avoid false positives
191            if let Some(tag_start) = path_part.find("{%")
192                && path_part[tag_start + 2..].contains("%}")
193            {
194                return true;
195            }
196            if let Some(var_start) = path_part.find("{{")
197                && path_part[var_start + 2..].contains("}}")
198            {
199                return true;
200            }
201
202            // Check if it's an absolute path (starts with /)
203            // These are links to other pages on the same site
204            if path_part.starts_with('/') {
205                return true;
206            }
207
208            // Check if it looks like a file path:
209            // - Contains a file extension (dot followed by letters)
210            // - Contains path separators
211            // - Contains relative path indicators
212            path_part.contains('.')
213                && (
214                    // Has file extension pattern (handle query parameters by splitting on them first)
215                    {
216                    let clean_path = path_part.split('?').next().unwrap_or(path_part);
217                    // Handle files starting with dot
218                    if let Some(after_dot) = clean_path.strip_prefix('.') {
219                        let dots_count = clean_path.matches('.').count();
220                        if dots_count == 1 {
221                            // Could be ".ext" (file extension) or ".hidden" (hidden file)
222                            // Treat short alphanumeric suffixes as file extensions
223                            !after_dot.is_empty() && after_dot.len() <= 10 &&
224                            after_dot.chars().all(|c| c.is_ascii_alphanumeric())
225                        } else {
226                            // Hidden file with extension like ".hidden.txt"
227                            clean_path.split('.').next_back().is_some_and(|ext| {
228                                !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
229                            })
230                        }
231                    } else {
232                        // Regular file path
233                        clean_path.split('.').next_back().is_some_and(|ext| {
234                            !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
235                        })
236                    }
237                } ||
238                // Or contains path separators
239                path_part.contains('/') || path_part.contains('\\') ||
240                // Or starts with relative path indicators
241                path_part.starts_with("./") || path_part.starts_with("../")
242                )
243        } else {
244            false
245        }
246    }
247}
248
249impl Rule for MD051LinkFragments {
250    fn name(&self) -> &'static str {
251        "MD051"
252    }
253
254    fn description(&self) -> &'static str {
255        "Link fragments should reference valid headings"
256    }
257
258    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
259        // Skip if no link fragments present
260        if !ctx.likely_has_links_or_images() {
261            return true;
262        }
263        // Check for # character (fragments)
264        !ctx.has_char('#')
265    }
266
267    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
268        let mut warnings = Vec::new();
269
270        if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
271            return Ok(warnings);
272        }
273
274        let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
275
276        for link in &ctx.links {
277            if link.is_reference {
278                continue;
279            }
280
281            // Skip wiki-links - they reference other files and may have their own fragment validation
282            if matches!(link.link_type, LinkType::WikiLink { .. }) {
283                continue;
284            }
285
286            // Skip links inside Jinja templates
287            if ctx.is_in_jinja_range(link.byte_offset) {
288                continue;
289            }
290
291            let url = &link.url;
292
293            // Skip links without fragments or external URLs
294            if !url.contains('#') || Self::is_external_url_fast(url) {
295                continue;
296            }
297
298            // Skip mdbook template placeholders ({{#VARIABLE}})
299            // mdbook uses {{#VARIABLE}} syntax where # is part of the template, not a fragment
300            if url.contains("{{#") && url.contains("}}") {
301                continue;
302            }
303
304            // Skip Quarto/RMarkdown cross-references (@fig-, @tbl-, @sec-, @eq-, etc.)
305            // These are special cross-reference syntax, not HTML anchors
306            // Format: @prefix-identifier or just @identifier
307            if url.starts_with('@') {
308                continue;
309            }
310
311            // Cross-file links are valid if the file exists (not checked here)
312            if Self::is_cross_file_link(url) {
313                continue;
314            }
315
316            let Some(fragment_pos) = url.find('#') else {
317                continue;
318            };
319
320            let fragment = &url[fragment_pos + 1..];
321
322            // Skip Liquid template variables and filters
323            if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
324                continue;
325            }
326
327            if fragment.is_empty() {
328                continue;
329            }
330
331            // Validate fragment against document headings
332            // HTML anchors are case-sensitive, markdown anchors are case-insensitive
333            let found = if html_anchors.contains(fragment) {
334                true
335            } else {
336                let fragment_lower = fragment.to_lowercase();
337                markdown_headings.contains(&fragment_lower)
338            };
339
340            if !found {
341                warnings.push(LintWarning {
342                    rule_name: Some(self.name().to_string()),
343                    message: format!("Link anchor '#{fragment}' does not exist in document headings"),
344                    line: link.line,
345                    column: link.start_col + 1,
346                    end_line: link.line,
347                    end_column: link.end_col + 1,
348                    severity: Severity::Warning,
349                    fix: None,
350                });
351            }
352        }
353
354        Ok(warnings)
355    }
356
357    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
358        // MD051 does not provide auto-fix
359        // Link fragment corrections require human judgment to avoid incorrect fixes
360        Ok(ctx.content.to_string())
361    }
362
363    fn as_any(&self) -> &dyn std::any::Any {
364        self
365    }
366
367    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
368    where
369        Self: Sized,
370    {
371        // Config keys are normalized to kebab-case by the config system
372        let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
373            if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
374                match style_str.to_lowercase().as_str() {
375                    "kramdown" => AnchorStyle::Kramdown,
376                    "kramdown-gfm" => AnchorStyle::KramdownGfm,
377                    "jekyll" => AnchorStyle::KramdownGfm, // Backward compatibility alias
378                    _ => AnchorStyle::GitHub,
379                }
380            } else {
381                AnchorStyle::GitHub
382            }
383        } else {
384            AnchorStyle::GitHub
385        };
386
387        Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
388    }
389
390    fn category(&self) -> RuleCategory {
391        RuleCategory::Link
392    }
393
394    fn cross_file_scope(&self) -> CrossFileScope {
395        CrossFileScope::Workspace
396    }
397
398    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
399        let mut fragment_counts = HashMap::new();
400
401        // Extract headings, HTML anchors, and attribute anchors (for other files to reference)
402        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
403            if line_info.in_front_matter {
404                continue;
405            }
406
407            // Skip code blocks for anchor extraction
408            if line_info.in_code_block {
409                continue;
410            }
411
412            let content = line_info.content(ctx.content);
413
414            // Extract HTML anchors (id or name attributes on any element)
415            if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
416                let mut pos = 0;
417                while pos < content.len() {
418                    if let Some(start) = content[pos..].find('<') {
419                        let tag_start = pos + start;
420                        if let Some(end) = content[tag_start..].find('>') {
421                            let tag_end = tag_start + end + 1;
422                            let tag = &content[tag_start..tag_end];
423
424                            if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
425                                && let Some(id_match) = caps.get(1)
426                            {
427                                file_index.add_html_anchor(id_match.as_str().to_string());
428                            }
429                            pos = tag_end;
430                        } else {
431                            break;
432                        }
433                    } else {
434                        break;
435                    }
436                }
437            }
438
439            // Extract attribute anchors { #id } on non-heading lines
440            // Headings already have custom_id extracted via heading.custom_id
441            if line_info.heading.is_none() && content.contains("{") && content.contains("#") {
442                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
443                    if let Some(id_match) = caps.get(1) {
444                        file_index.add_attribute_anchor(id_match.as_str().to_string());
445                    }
446                }
447            }
448
449            // Extract heading anchors
450            if let Some(heading) = &line_info.heading {
451                let fragment = self.anchor_style.generate_fragment(&heading.text);
452
453                if !fragment.is_empty() {
454                    // Handle duplicate headings
455                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
456                        let suffix = *count;
457                        *count += 1;
458                        format!("{fragment}-{suffix}")
459                    } else {
460                        fragment_counts.insert(fragment.clone(), 1);
461                        fragment
462                    };
463
464                    file_index.add_heading(HeadingIndex {
465                        text: heading.text.clone(),
466                        auto_anchor: final_fragment,
467                        custom_anchor: heading.custom_id.clone(),
468                        line: line_idx + 1, // 1-indexed
469                    });
470                }
471            }
472        }
473
474        // Extract cross-file links (for validation against other files)
475        for link in &ctx.links {
476            if link.is_reference {
477                continue;
478            }
479
480            let url = &link.url;
481
482            // Skip external URLs
483            if Self::is_external_url_fast(url) {
484                continue;
485            }
486
487            // Only process cross-file links with fragments
488            if Self::is_cross_file_link(url)
489                && let Some(fragment_pos) = url.find('#')
490            {
491                let path_part = &url[..fragment_pos];
492                let fragment = &url[fragment_pos + 1..];
493
494                // Skip empty fragments or template syntax
495                if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
496                    continue;
497                }
498
499                file_index.add_cross_file_link(CrossFileLinkIndex {
500                    target_path: path_part.to_string(),
501                    fragment: fragment.to_string(),
502                    line: link.line,
503                    column: link.start_col + 1,
504                });
505            }
506        }
507    }
508
509    fn cross_file_check(
510        &self,
511        file_path: &Path,
512        file_index: &FileIndex,
513        workspace_index: &crate::workspace_index::WorkspaceIndex,
514    ) -> LintResult {
515        let mut warnings = Vec::new();
516
517        // Check each cross-file link in this file
518        for cross_link in &file_index.cross_file_links {
519            // Skip cross-file links without fragments - nothing to validate
520            if cross_link.fragment.is_empty() {
521                continue;
522            }
523
524            // Resolve the target file path relative to the current file
525            let target_path = if let Some(parent) = file_path.parent() {
526                parent.join(&cross_link.target_path)
527            } else {
528                Path::new(&cross_link.target_path).to_path_buf()
529            };
530
531            // Normalize the path (remove . and ..)
532            let target_path = normalize_path(&target_path);
533
534            // Look up the target file in the workspace index
535            if let Some(target_file_index) = workspace_index.get_file(&target_path) {
536                // Check if the fragment matches any heading in the target file (O(1) lookup)
537                if !target_file_index.has_anchor(&cross_link.fragment) {
538                    warnings.push(LintWarning {
539                        rule_name: Some(self.name().to_string()),
540                        line: cross_link.line,
541                        column: cross_link.column,
542                        end_line: cross_link.line,
543                        end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
544                        message: format!(
545                            "Link fragment '{}' not found in '{}'",
546                            cross_link.fragment, cross_link.target_path
547                        ),
548                        severity: Severity::Warning,
549                        fix: None,
550                    });
551                }
552            }
553            // If target file not in index, skip (could be external file or not in workspace)
554        }
555
556        Ok(warnings)
557    }
558
559    fn default_config_section(&self) -> Option<(String, toml::Value)> {
560        let value: toml::Value = toml::from_str(
561            r#"
562# Anchor generation style to match your target platform
563# Options: "github" (default), "kramdown-gfm", "kramdown"
564# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
565anchor-style = "github"
566"#,
567        )
568        .ok()?;
569        Some(("MD051".to_string(), value))
570    }
571}
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576    use crate::lint_context::LintContext;
577
578    #[test]
579    fn test_quarto_cross_references() {
580        let rule = MD051LinkFragments::new();
581
582        // Test that Quarto cross-references are skipped
583        let content = r#"# Test Document
584
585## Figures
586
587See [@fig-plot] for the visualization.
588
589More details in [@tbl-results] and [@sec-methods].
590
591The equation [@eq-regression] shows the relationship.
592
593Reference to [@lst-code] for implementation."#;
594        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
595        let result = rule.check(&ctx).unwrap();
596        assert!(
597            result.is_empty(),
598            "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
599            result.len()
600        );
601
602        // Test that normal anchors still work
603        let content_with_anchor = r#"# Test
604
605See [link](#test) for details."#;
606        let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
607        let result_anchor = rule.check(&ctx_anchor).unwrap();
608        assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
609
610        // Test that invalid anchors are still flagged
611        let content_invalid = r#"# Test
612
613See [link](#nonexistent) for details."#;
614        let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
615        let result_invalid = rule.check(&ctx_invalid).unwrap();
616        assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
617    }
618
619    // Cross-file validation tests
620    #[test]
621    fn test_cross_file_scope() {
622        let rule = MD051LinkFragments::new();
623        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
624    }
625
626    #[test]
627    fn test_contribute_to_index_extracts_headings() {
628        let rule = MD051LinkFragments::new();
629        let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
630        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
631
632        let mut file_index = FileIndex::new();
633        rule.contribute_to_index(&ctx, &mut file_index);
634
635        assert_eq!(file_index.headings.len(), 3);
636        assert_eq!(file_index.headings[0].text, "First Heading");
637        assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
638        assert!(file_index.headings[0].custom_anchor.is_none());
639
640        assert_eq!(file_index.headings[1].text, "Second");
641        assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
642
643        assert_eq!(file_index.headings[2].text, "Third");
644    }
645
646    #[test]
647    fn test_contribute_to_index_extracts_cross_file_links() {
648        let rule = MD051LinkFragments::new();
649        let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
650        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
651
652        let mut file_index = FileIndex::new();
653        rule.contribute_to_index(&ctx, &mut file_index);
654
655        assert_eq!(file_index.cross_file_links.len(), 2);
656        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
657        assert_eq!(file_index.cross_file_links[0].fragment, "installation");
658        assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
659        assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
660    }
661
662    #[test]
663    fn test_cross_file_check_valid_fragment() {
664        use crate::workspace_index::WorkspaceIndex;
665
666        let rule = MD051LinkFragments::new();
667
668        // Build workspace index with target file
669        let mut workspace_index = WorkspaceIndex::new();
670        let mut target_file_index = FileIndex::new();
671        target_file_index.add_heading(HeadingIndex {
672            text: "Installation Guide".to_string(),
673            auto_anchor: "installation-guide".to_string(),
674            custom_anchor: None,
675            line: 1,
676        });
677        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
678
679        // Create a FileIndex for the file being checked
680        let mut current_file_index = FileIndex::new();
681        current_file_index.add_cross_file_link(CrossFileLinkIndex {
682            target_path: "install.md".to_string(),
683            fragment: "installation-guide".to_string(),
684            line: 3,
685            column: 5,
686        });
687
688        let warnings = rule
689            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
690            .unwrap();
691
692        // Should find no warnings since fragment exists
693        assert!(warnings.is_empty());
694    }
695
696    #[test]
697    fn test_cross_file_check_invalid_fragment() {
698        use crate::workspace_index::WorkspaceIndex;
699
700        let rule = MD051LinkFragments::new();
701
702        // Build workspace index with target file
703        let mut workspace_index = WorkspaceIndex::new();
704        let mut target_file_index = FileIndex::new();
705        target_file_index.add_heading(HeadingIndex {
706            text: "Installation Guide".to_string(),
707            auto_anchor: "installation-guide".to_string(),
708            custom_anchor: None,
709            line: 1,
710        });
711        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
712
713        // Create a FileIndex with a cross-file link pointing to non-existent fragment
714        let mut current_file_index = FileIndex::new();
715        current_file_index.add_cross_file_link(CrossFileLinkIndex {
716            target_path: "install.md".to_string(),
717            fragment: "nonexistent".to_string(),
718            line: 3,
719            column: 5,
720        });
721
722        let warnings = rule
723            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
724            .unwrap();
725
726        // Should find one warning since fragment doesn't exist
727        assert_eq!(warnings.len(), 1);
728        assert!(warnings[0].message.contains("nonexistent"));
729        assert!(warnings[0].message.contains("install.md"));
730    }
731
732    #[test]
733    fn test_cross_file_check_custom_anchor_match() {
734        use crate::workspace_index::WorkspaceIndex;
735
736        let rule = MD051LinkFragments::new();
737
738        // Build workspace index with target file that has custom anchor
739        let mut workspace_index = WorkspaceIndex::new();
740        let mut target_file_index = FileIndex::new();
741        target_file_index.add_heading(HeadingIndex {
742            text: "Installation Guide".to_string(),
743            auto_anchor: "installation-guide".to_string(),
744            custom_anchor: Some("install".to_string()),
745            line: 1,
746        });
747        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
748
749        // Link uses custom anchor
750        let mut current_file_index = FileIndex::new();
751        current_file_index.add_cross_file_link(CrossFileLinkIndex {
752            target_path: "install.md".to_string(),
753            fragment: "install".to_string(),
754            line: 3,
755            column: 5,
756        });
757
758        let warnings = rule
759            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
760            .unwrap();
761
762        // Should find no warnings since custom anchor matches
763        assert!(warnings.is_empty());
764    }
765
766    #[test]
767    fn test_cross_file_check_target_not_in_workspace() {
768        use crate::workspace_index::WorkspaceIndex;
769
770        let rule = MD051LinkFragments::new();
771
772        // Empty workspace index
773        let workspace_index = WorkspaceIndex::new();
774
775        // Link to file not in workspace
776        let mut current_file_index = FileIndex::new();
777        current_file_index.add_cross_file_link(CrossFileLinkIndex {
778            target_path: "external.md".to_string(),
779            fragment: "heading".to_string(),
780            line: 3,
781            column: 5,
782        });
783
784        let warnings = rule
785            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
786            .unwrap();
787
788        // Should not warn about files not in workspace
789        assert!(warnings.is_empty());
790    }
791}