rumdl_lib/rules/
md051_link_fragments.rs

1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9// HTML tags with id or name attributes (supports any HTML element, not just <a>)
10// This pattern only captures the first id/name attribute in a tag
11static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14// Attribute anchor pattern for kramdown/MkDocs { #id } syntax
15// Matches {#id} or { #id } with optional spaces, supports multiple anchors
16// Also supports classes and attributes: { #id .class key=value }
17static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
19
20/// Normalize a path by resolving . and .. components
21fn normalize_path(path: &Path) -> PathBuf {
22    let mut result = PathBuf::new();
23    for component in path.components() {
24        match component {
25            Component::CurDir => {} // Skip .
26            Component::ParentDir => {
27                result.pop(); // Go up one level for ..
28            }
29            c => result.push(c.as_os_str()),
30        }
31    }
32    result
33}
34
35/// Rule MD051: Link fragments
36///
37/// See [docs/md051.md](../../docs/md051.md) for full documentation, configuration, and examples.
38///
39/// This rule validates that link anchors (the part after #) exist in the current document.
40/// Only applies to internal document links (like #heading), not to external URLs or cross-file links.
41#[derive(Clone)]
42pub struct MD051LinkFragments {
43    /// Anchor style to use for validation
44    anchor_style: AnchorStyle,
45}
46
47impl Default for MD051LinkFragments {
48    fn default() -> Self {
49        Self::new()
50    }
51}
52
53impl MD051LinkFragments {
54    pub fn new() -> Self {
55        Self {
56            anchor_style: AnchorStyle::GitHub,
57        }
58    }
59
60    /// Create with specific anchor style
61    pub fn with_anchor_style(style: AnchorStyle) -> Self {
62        Self { anchor_style: style }
63    }
64
65    /// Extract all valid heading anchors from the document
66    /// Returns (markdown_anchors, html_anchors) where markdown_anchors are lowercased
67    /// for case-insensitive matching, and html_anchors are case-sensitive
68    fn extract_headings_from_context(
69        &self,
70        ctx: &crate::lint_context::LintContext,
71    ) -> (HashSet<String>, HashSet<String>) {
72        let mut markdown_headings = HashSet::with_capacity(32);
73        let mut html_anchors = HashSet::with_capacity(16);
74        let mut fragment_counts = std::collections::HashMap::new();
75
76        for line_info in &ctx.lines {
77            if line_info.in_front_matter {
78                continue;
79            }
80
81            // Skip code blocks for anchor extraction
82            if line_info.in_code_block {
83                continue;
84            }
85
86            let content = line_info.content(ctx.content);
87            let bytes = content.as_bytes();
88
89            // Extract HTML anchor tags with id/name attributes
90            if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
91                // HTML spec: only the first id attribute per element is valid
92                // Process element by element to handle multiple id attributes correctly
93                let mut pos = 0;
94                while pos < content.len() {
95                    if let Some(start) = content[pos..].find('<') {
96                        let tag_start = pos + start;
97                        if let Some(end) = content[tag_start..].find('>') {
98                            let tag_end = tag_start + end + 1;
99                            let tag = &content[tag_start..tag_end];
100
101                            // Extract first id or name attribute from this tag
102                            if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
103                                let matched_text = caps.as_str();
104                                if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
105                                    && let Some(id_match) = caps.get(1)
106                                {
107                                    let id = id_match.as_str();
108                                    if !id.is_empty() {
109                                        html_anchors.insert(id.to_string());
110                                    }
111                                }
112                            }
113                            pos = tag_end;
114                        } else {
115                            break;
116                        }
117                    } else {
118                        break;
119                    }
120                }
121            }
122
123            // Extract attribute anchors { #id } from non-heading lines
124            // Headings already have custom_id extracted below
125            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
126                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
127                    if let Some(id_match) = caps.get(1) {
128                        // Add to markdown_headings (lowercased for case-insensitive matching)
129                        markdown_headings.insert(id_match.as_str().to_lowercase());
130                    }
131                }
132            }
133
134            // Extract markdown heading anchors
135            if let Some(heading) = &line_info.heading {
136                // Custom ID from {#custom-id} syntax
137                if let Some(custom_id) = &heading.custom_id {
138                    markdown_headings.insert(custom_id.to_lowercase());
139                }
140
141                // Generate fragment directly from heading text
142                // Note: HTML stripping was removed because it interfered with arrow patterns
143                // like <-> and placeholders like <FILE>. The anchor styles handle these correctly.
144                let fragment = self.anchor_style.generate_fragment(&heading.text);
145
146                if !fragment.is_empty() {
147                    // Handle duplicate headings by appending -1, -2, etc.
148                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
149                        let suffix = *count;
150                        *count += 1;
151                        format!("{fragment}-{suffix}")
152                    } else {
153                        fragment_counts.insert(fragment.clone(), 1);
154                        fragment
155                    };
156                    markdown_headings.insert(final_fragment);
157                }
158            }
159        }
160
161        (markdown_headings, html_anchors)
162    }
163
164    /// Fast check if URL is external (doesn't need to be validated)
165    #[inline]
166    fn is_external_url_fast(url: &str) -> bool {
167        // Quick prefix checks for common protocols
168        url.starts_with("http://")
169            || url.starts_with("https://")
170            || url.starts_with("ftp://")
171            || url.starts_with("mailto:")
172            || url.starts_with("tel:")
173            || url.starts_with("//")
174    }
175
176    /// Resolve a path by trying markdown extensions if it has no extension
177    ///
178    /// For extension-less paths (e.g., `page`), returns a list of paths to try:
179    /// 1. The original path (in case it's already in the index)
180    /// 2. The path with each markdown extension (e.g., `page.md`, `page.markdown`, etc.)
181    ///
182    /// For paths with extensions, returns just the original path.
183    #[inline]
184    fn resolve_path_with_extensions(path: &Path, extensions: &[&str]) -> Vec<PathBuf> {
185        if path.extension().is_none() {
186            // Extension-less path - try with markdown extensions
187            let mut paths = Vec::with_capacity(extensions.len() + 1);
188            // First try the exact path (in case it's already in the index)
189            paths.push(path.to_path_buf());
190            // Then try with each markdown extension
191            for ext in extensions {
192                let path_with_ext = path.with_extension(&ext[1..]); // Remove leading dot
193                paths.push(path_with_ext);
194            }
195            paths
196        } else {
197            // Path has extension - use as-is
198            vec![path.to_path_buf()]
199        }
200    }
201
202    /// Check if a path part (without fragment) is an extension-less path
203    ///
204    /// Extension-less paths are potential cross-file links that need resolution
205    /// with markdown extensions (e.g., `page#section` -> `page.md#section`).
206    ///
207    /// We recognize them as extension-less if:
208    /// 1. Path has no extension (no dot)
209    /// 2. Path is not empty
210    /// 3. Path doesn't look like a query parameter or special syntax
211    /// 4. Path contains at least one alphanumeric character (valid filename)
212    /// 5. Path contains only valid path characters (alphanumeric, slashes, hyphens, underscores)
213    ///
214    /// Optimized: single pass through characters to check both conditions.
215    #[inline]
216    fn is_extensionless_path(path_part: &str) -> bool {
217        // Quick rejections for common non-extension-less cases
218        if path_part.is_empty()
219            || path_part.contains('.')
220            || path_part.contains('?')
221            || path_part.contains('&')
222            || path_part.contains('=')
223        {
224            return false;
225        }
226
227        // Single pass: check for alphanumeric and validate all characters
228        let mut has_alphanumeric = false;
229        for c in path_part.chars() {
230            if c.is_alphanumeric() {
231                has_alphanumeric = true;
232            } else if !matches!(c, '/' | '\\' | '-' | '_') {
233                // Invalid character found - early exit
234                return false;
235            }
236        }
237
238        // Must have at least one alphanumeric character to be a valid filename
239        has_alphanumeric
240    }
241
242    /// Check if URL is a cross-file link (contains a file path before #)
243    #[inline]
244    fn is_cross_file_link(url: &str) -> bool {
245        if let Some(fragment_pos) = url.find('#') {
246            let path_part = &url[..fragment_pos];
247
248            // If there's no path part, it's just a fragment (#heading)
249            if path_part.is_empty() {
250                return false;
251            }
252
253            // Check for Liquid syntax used by Jekyll and other static site generators
254            // Liquid tags: {% ... %} for control flow and includes
255            // Liquid variables: {{ ... }} for outputting values
256            // These are template directives that reference external content and should be skipped
257            // We check for proper bracket order to avoid false positives
258            if let Some(tag_start) = path_part.find("{%")
259                && path_part[tag_start + 2..].contains("%}")
260            {
261                return true;
262            }
263            if let Some(var_start) = path_part.find("{{")
264                && path_part[var_start + 2..].contains("}}")
265            {
266                return true;
267            }
268
269            // Check if it's an absolute path (starts with /)
270            // These are links to other pages on the same site
271            if path_part.starts_with('/') {
272                return true;
273            }
274
275            // Check if it looks like a file path:
276            // - Contains a file extension (dot followed by letters)
277            // - Contains path separators
278            // - Contains relative path indicators
279            // - OR is an extension-less path with a fragment (GitHub-style: page#section)
280            let has_extension = path_part.contains('.')
281                && (
282                    // Has file extension pattern (handle query parameters by splitting on them first)
283                    {
284                    let clean_path = path_part.split('?').next().unwrap_or(path_part);
285                    // Handle files starting with dot
286                    if let Some(after_dot) = clean_path.strip_prefix('.') {
287                        let dots_count = clean_path.matches('.').count();
288                        if dots_count == 1 {
289                            // Could be ".ext" (file extension) or ".hidden" (hidden file)
290                            // Treat short alphanumeric suffixes as file extensions
291                            !after_dot.is_empty() && after_dot.len() <= 10 &&
292                            after_dot.chars().all(|c| c.is_ascii_alphanumeric())
293                        } else {
294                            // Hidden file with extension like ".hidden.txt"
295                            clean_path.split('.').next_back().is_some_and(|ext| {
296                                !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
297                            })
298                        }
299                    } else {
300                        // Regular file path
301                        clean_path.split('.').next_back().is_some_and(|ext| {
302                            !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
303                        })
304                    }
305                } ||
306                // Or contains path separators
307                path_part.contains('/') || path_part.contains('\\') ||
308                // Or starts with relative path indicators
309                path_part.starts_with("./") || path_part.starts_with("../")
310                );
311
312            // Extension-less paths with fragments are potential cross-file links
313            // This supports GitHub-style links like [link](page#section) that resolve to page.md#section
314            let is_extensionless = Self::is_extensionless_path(path_part);
315
316            has_extension || is_extensionless
317        } else {
318            false
319        }
320    }
321}
322
323impl Rule for MD051LinkFragments {
324    fn name(&self) -> &'static str {
325        "MD051"
326    }
327
328    fn description(&self) -> &'static str {
329        "Link fragments should reference valid headings"
330    }
331
332    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
333        // Skip if no link fragments present
334        if !ctx.likely_has_links_or_images() {
335            return true;
336        }
337        // Check for # character (fragments)
338        !ctx.has_char('#')
339    }
340
341    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
342        let mut warnings = Vec::new();
343
344        if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
345            return Ok(warnings);
346        }
347
348        let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
349
350        for link in &ctx.links {
351            if link.is_reference {
352                continue;
353            }
354
355            // Skip wiki-links - they reference other files and may have their own fragment validation
356            if matches!(link.link_type, LinkType::WikiLink { .. }) {
357                continue;
358            }
359
360            // Skip links inside Jinja templates
361            if ctx.is_in_jinja_range(link.byte_offset) {
362                continue;
363            }
364
365            // Skip Quarto/Pandoc citations ([@citation], @citation)
366            // Citations are bibliography references, not link fragments
367            if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
368                continue;
369            }
370
371            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
372            // Shortcodes may contain template syntax that looks like fragment links
373            if ctx.is_in_shortcode(link.byte_offset) {
374                continue;
375            }
376
377            let url = &link.url;
378
379            // Skip links without fragments or external URLs
380            if !url.contains('#') || Self::is_external_url_fast(url) {
381                continue;
382            }
383
384            // Skip mdbook template placeholders ({{#VARIABLE}})
385            // mdbook uses {{#VARIABLE}} syntax where # is part of the template, not a fragment
386            if url.contains("{{#") && url.contains("}}") {
387                continue;
388            }
389
390            // Skip Quarto/RMarkdown cross-references (@fig-, @tbl-, @sec-, @eq-, etc.)
391            // These are special cross-reference syntax, not HTML anchors
392            // Format: @prefix-identifier or just @identifier
393            if url.starts_with('@') {
394                continue;
395            }
396
397            // Cross-file links are valid if the file exists (not checked here)
398            if Self::is_cross_file_link(url) {
399                continue;
400            }
401
402            let Some(fragment_pos) = url.find('#') else {
403                continue;
404            };
405
406            let fragment = &url[fragment_pos + 1..];
407
408            // Skip Liquid template variables and filters
409            if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
410                continue;
411            }
412
413            if fragment.is_empty() {
414                continue;
415            }
416
417            // Validate fragment against document headings
418            // HTML anchors are case-sensitive, markdown anchors are case-insensitive
419            let found = if html_anchors.contains(fragment) {
420                true
421            } else {
422                let fragment_lower = fragment.to_lowercase();
423                markdown_headings.contains(&fragment_lower)
424            };
425
426            if !found {
427                warnings.push(LintWarning {
428                    rule_name: Some(self.name().to_string()),
429                    message: format!("Link anchor '#{fragment}' does not exist in document headings"),
430                    line: link.line,
431                    column: link.start_col + 1,
432                    end_line: link.line,
433                    end_column: link.end_col + 1,
434                    severity: Severity::Error,
435                    fix: None,
436                });
437            }
438        }
439
440        Ok(warnings)
441    }
442
443    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
444        // MD051 does not provide auto-fix
445        // Link fragment corrections require human judgment to avoid incorrect fixes
446        Ok(ctx.content.to_string())
447    }
448
449    fn as_any(&self) -> &dyn std::any::Any {
450        self
451    }
452
453    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
454    where
455        Self: Sized,
456    {
457        // Config keys are normalized to kebab-case by the config system
458        let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
459            if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
460                match style_str.to_lowercase().as_str() {
461                    "kramdown" => AnchorStyle::Kramdown,
462                    "kramdown-gfm" => AnchorStyle::KramdownGfm,
463                    "jekyll" => AnchorStyle::KramdownGfm, // Backward compatibility alias
464                    _ => AnchorStyle::GitHub,
465                }
466            } else {
467                AnchorStyle::GitHub
468            }
469        } else {
470            AnchorStyle::GitHub
471        };
472
473        Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
474    }
475
476    fn category(&self) -> RuleCategory {
477        RuleCategory::Link
478    }
479
480    fn cross_file_scope(&self) -> CrossFileScope {
481        CrossFileScope::Workspace
482    }
483
484    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
485        let mut fragment_counts = HashMap::new();
486
487        // Extract headings, HTML anchors, and attribute anchors (for other files to reference)
488        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
489            if line_info.in_front_matter {
490                continue;
491            }
492
493            // Skip code blocks for anchor extraction
494            if line_info.in_code_block {
495                continue;
496            }
497
498            let content = line_info.content(ctx.content);
499
500            // Extract HTML anchors (id or name attributes on any element)
501            if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
502                let mut pos = 0;
503                while pos < content.len() {
504                    if let Some(start) = content[pos..].find('<') {
505                        let tag_start = pos + start;
506                        if let Some(end) = content[tag_start..].find('>') {
507                            let tag_end = tag_start + end + 1;
508                            let tag = &content[tag_start..tag_end];
509
510                            if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
511                                && let Some(id_match) = caps.get(1)
512                            {
513                                file_index.add_html_anchor(id_match.as_str().to_string());
514                            }
515                            pos = tag_end;
516                        } else {
517                            break;
518                        }
519                    } else {
520                        break;
521                    }
522                }
523            }
524
525            // Extract attribute anchors { #id } on non-heading lines
526            // Headings already have custom_id extracted via heading.custom_id
527            if line_info.heading.is_none() && content.contains("{") && content.contains("#") {
528                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
529                    if let Some(id_match) = caps.get(1) {
530                        file_index.add_attribute_anchor(id_match.as_str().to_string());
531                    }
532                }
533            }
534
535            // Extract heading anchors
536            if let Some(heading) = &line_info.heading {
537                let fragment = self.anchor_style.generate_fragment(&heading.text);
538
539                if !fragment.is_empty() {
540                    // Handle duplicate headings
541                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
542                        let suffix = *count;
543                        *count += 1;
544                        format!("{fragment}-{suffix}")
545                    } else {
546                        fragment_counts.insert(fragment.clone(), 1);
547                        fragment
548                    };
549
550                    file_index.add_heading(HeadingIndex {
551                        text: heading.text.clone(),
552                        auto_anchor: final_fragment,
553                        custom_anchor: heading.custom_id.clone(),
554                        line: line_idx + 1, // 1-indexed
555                    });
556                }
557            }
558        }
559
560        // Extract cross-file links (for validation against other files)
561        for link in &ctx.links {
562            if link.is_reference {
563                continue;
564            }
565
566            // Skip wiki-links - they use a different linking system and are not validated
567            // as relative file paths
568            if matches!(link.link_type, LinkType::WikiLink { .. }) {
569                continue;
570            }
571
572            let url = &link.url;
573
574            // Skip external URLs
575            if Self::is_external_url_fast(url) {
576                continue;
577            }
578
579            // Only process cross-file links with fragments
580            if Self::is_cross_file_link(url)
581                && let Some(fragment_pos) = url.find('#')
582            {
583                let path_part = &url[..fragment_pos];
584                let fragment = &url[fragment_pos + 1..];
585
586                // Skip empty fragments or template syntax
587                if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
588                    continue;
589                }
590
591                file_index.add_cross_file_link(CrossFileLinkIndex {
592                    target_path: path_part.to_string(),
593                    fragment: fragment.to_string(),
594                    line: link.line,
595                    column: link.start_col + 1,
596                });
597            }
598        }
599    }
600
601    fn cross_file_check(
602        &self,
603        file_path: &Path,
604        file_index: &FileIndex,
605        workspace_index: &crate::workspace_index::WorkspaceIndex,
606    ) -> LintResult {
607        let mut warnings = Vec::new();
608
609        // Supported markdown file extensions (with leading dot, matching MD057)
610        const MARKDOWN_EXTENSIONS: &[&str] = &[
611            ".md",
612            ".markdown",
613            ".mdx",
614            ".mkd",
615            ".mkdn",
616            ".mdown",
617            ".mdwn",
618            ".qmd",
619            ".rmd",
620        ];
621
622        // Check each cross-file link in this file
623        for cross_link in &file_index.cross_file_links {
624            // Skip cross-file links without fragments - nothing to validate
625            if cross_link.fragment.is_empty() {
626                continue;
627            }
628
629            // Resolve the target file path relative to the current file
630            let base_target_path = if let Some(parent) = file_path.parent() {
631                parent.join(&cross_link.target_path)
632            } else {
633                Path::new(&cross_link.target_path).to_path_buf()
634            };
635
636            // Normalize the path (remove . and ..)
637            let base_target_path = normalize_path(&base_target_path);
638
639            // For extension-less paths, try resolving with markdown extensions
640            // This handles GitHub-style links like [link](page#section) -> page.md#section
641            let target_paths_to_try = Self::resolve_path_with_extensions(&base_target_path, MARKDOWN_EXTENSIONS);
642
643            // Try to find the target file in the workspace index
644            let mut target_file_index = None;
645
646            for target_path in &target_paths_to_try {
647                if let Some(index) = workspace_index.get_file(target_path) {
648                    target_file_index = Some(index);
649                    break;
650                }
651            }
652
653            if let Some(target_file_index) = target_file_index {
654                // Check if the fragment matches any heading in the target file (O(1) lookup)
655                if !target_file_index.has_anchor(&cross_link.fragment) {
656                    warnings.push(LintWarning {
657                        rule_name: Some(self.name().to_string()),
658                        line: cross_link.line,
659                        column: cross_link.column,
660                        end_line: cross_link.line,
661                        end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
662                        message: format!(
663                            "Link fragment '{}' not found in '{}'",
664                            cross_link.fragment, cross_link.target_path
665                        ),
666                        severity: Severity::Error,
667                        fix: None,
668                    });
669                }
670            }
671            // If target file not in index, skip (could be external file or not in workspace)
672        }
673
674        Ok(warnings)
675    }
676
677    fn default_config_section(&self) -> Option<(String, toml::Value)> {
678        let value: toml::Value = toml::from_str(
679            r#"
680# Anchor generation style to match your target platform
681# Options: "github" (default), "kramdown-gfm", "kramdown"
682# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
683anchor-style = "github"
684"#,
685        )
686        .ok()?;
687        Some(("MD051".to_string(), value))
688    }
689}
690
691#[cfg(test)]
692mod tests {
693    use super::*;
694    use crate::lint_context::LintContext;
695
696    #[test]
697    fn test_quarto_cross_references() {
698        let rule = MD051LinkFragments::new();
699
700        // Test that Quarto cross-references are skipped
701        let content = r#"# Test Document
702
703## Figures
704
705See [@fig-plot] for the visualization.
706
707More details in [@tbl-results] and [@sec-methods].
708
709The equation [@eq-regression] shows the relationship.
710
711Reference to [@lst-code] for implementation."#;
712        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
713        let result = rule.check(&ctx).unwrap();
714        assert!(
715            result.is_empty(),
716            "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
717            result.len()
718        );
719
720        // Test that normal anchors still work
721        let content_with_anchor = r#"# Test
722
723See [link](#test) for details."#;
724        let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
725        let result_anchor = rule.check(&ctx_anchor).unwrap();
726        assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
727
728        // Test that invalid anchors are still flagged
729        let content_invalid = r#"# Test
730
731See [link](#nonexistent) for details."#;
732        let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
733        let result_invalid = rule.check(&ctx_invalid).unwrap();
734        assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
735    }
736
737    // Cross-file validation tests
738    #[test]
739    fn test_cross_file_scope() {
740        let rule = MD051LinkFragments::new();
741        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
742    }
743
744    #[test]
745    fn test_contribute_to_index_extracts_headings() {
746        let rule = MD051LinkFragments::new();
747        let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
748        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
749
750        let mut file_index = FileIndex::new();
751        rule.contribute_to_index(&ctx, &mut file_index);
752
753        assert_eq!(file_index.headings.len(), 3);
754        assert_eq!(file_index.headings[0].text, "First Heading");
755        assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
756        assert!(file_index.headings[0].custom_anchor.is_none());
757
758        assert_eq!(file_index.headings[1].text, "Second");
759        assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
760
761        assert_eq!(file_index.headings[2].text, "Third");
762    }
763
764    #[test]
765    fn test_contribute_to_index_extracts_cross_file_links() {
766        let rule = MD051LinkFragments::new();
767        let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
768        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
769
770        let mut file_index = FileIndex::new();
771        rule.contribute_to_index(&ctx, &mut file_index);
772
773        assert_eq!(file_index.cross_file_links.len(), 2);
774        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
775        assert_eq!(file_index.cross_file_links[0].fragment, "installation");
776        assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
777        assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
778    }
779
780    #[test]
781    fn test_cross_file_check_valid_fragment() {
782        use crate::workspace_index::WorkspaceIndex;
783
784        let rule = MD051LinkFragments::new();
785
786        // Build workspace index with target file
787        let mut workspace_index = WorkspaceIndex::new();
788        let mut target_file_index = FileIndex::new();
789        target_file_index.add_heading(HeadingIndex {
790            text: "Installation Guide".to_string(),
791            auto_anchor: "installation-guide".to_string(),
792            custom_anchor: None,
793            line: 1,
794        });
795        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
796
797        // Create a FileIndex for the file being checked
798        let mut current_file_index = FileIndex::new();
799        current_file_index.add_cross_file_link(CrossFileLinkIndex {
800            target_path: "install.md".to_string(),
801            fragment: "installation-guide".to_string(),
802            line: 3,
803            column: 5,
804        });
805
806        let warnings = rule
807            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
808            .unwrap();
809
810        // Should find no warnings since fragment exists
811        assert!(warnings.is_empty());
812    }
813
814    #[test]
815    fn test_cross_file_check_invalid_fragment() {
816        use crate::workspace_index::WorkspaceIndex;
817
818        let rule = MD051LinkFragments::new();
819
820        // Build workspace index with target file
821        let mut workspace_index = WorkspaceIndex::new();
822        let mut target_file_index = FileIndex::new();
823        target_file_index.add_heading(HeadingIndex {
824            text: "Installation Guide".to_string(),
825            auto_anchor: "installation-guide".to_string(),
826            custom_anchor: None,
827            line: 1,
828        });
829        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
830
831        // Create a FileIndex with a cross-file link pointing to non-existent fragment
832        let mut current_file_index = FileIndex::new();
833        current_file_index.add_cross_file_link(CrossFileLinkIndex {
834            target_path: "install.md".to_string(),
835            fragment: "nonexistent".to_string(),
836            line: 3,
837            column: 5,
838        });
839
840        let warnings = rule
841            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
842            .unwrap();
843
844        // Should find one warning since fragment doesn't exist
845        assert_eq!(warnings.len(), 1);
846        assert!(warnings[0].message.contains("nonexistent"));
847        assert!(warnings[0].message.contains("install.md"));
848    }
849
850    #[test]
851    fn test_cross_file_check_custom_anchor_match() {
852        use crate::workspace_index::WorkspaceIndex;
853
854        let rule = MD051LinkFragments::new();
855
856        // Build workspace index with target file that has custom anchor
857        let mut workspace_index = WorkspaceIndex::new();
858        let mut target_file_index = FileIndex::new();
859        target_file_index.add_heading(HeadingIndex {
860            text: "Installation Guide".to_string(),
861            auto_anchor: "installation-guide".to_string(),
862            custom_anchor: Some("install".to_string()),
863            line: 1,
864        });
865        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
866
867        // Link uses custom anchor
868        let mut current_file_index = FileIndex::new();
869        current_file_index.add_cross_file_link(CrossFileLinkIndex {
870            target_path: "install.md".to_string(),
871            fragment: "install".to_string(),
872            line: 3,
873            column: 5,
874        });
875
876        let warnings = rule
877            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
878            .unwrap();
879
880        // Should find no warnings since custom anchor matches
881        assert!(warnings.is_empty());
882    }
883
884    #[test]
885    fn test_cross_file_check_target_not_in_workspace() {
886        use crate::workspace_index::WorkspaceIndex;
887
888        let rule = MD051LinkFragments::new();
889
890        // Empty workspace index
891        let workspace_index = WorkspaceIndex::new();
892
893        // Link to file not in workspace
894        let mut current_file_index = FileIndex::new();
895        current_file_index.add_cross_file_link(CrossFileLinkIndex {
896            target_path: "external.md".to_string(),
897            fragment: "heading".to_string(),
898            line: 3,
899            column: 5,
900        });
901
902        let warnings = rule
903            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
904            .unwrap();
905
906        // Should not warn about files not in workspace
907        assert!(warnings.is_empty());
908    }
909
910    #[test]
911    fn test_wikilinks_skipped_in_check() {
912        // Wikilinks should not trigger MD051 warnings for missing fragments
913        let rule = MD051LinkFragments::new();
914
915        let content = r#"# Test Document
916
917## Valid Heading
918
919[[Microsoft#Windows OS]]
920[[SomePage#section]]
921[[page|Display Text]]
922[[path/to/page#section]]
923"#;
924        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
925        let result = rule.check(&ctx).unwrap();
926
927        assert!(
928            result.is_empty(),
929            "Wikilinks should not trigger MD051 warnings. Got: {result:?}"
930        );
931    }
932
933    #[test]
934    fn test_wikilinks_not_added_to_cross_file_index() {
935        // Wikilinks should not be added to the cross-file link index
936        let rule = MD051LinkFragments::new();
937
938        let content = r#"# Test Document
939
940[[Microsoft#Windows OS]]
941[[SomePage#section]]
942[Regular Link](other.md#section)
943"#;
944        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
945
946        let mut file_index = FileIndex::new();
947        rule.contribute_to_index(&ctx, &mut file_index);
948
949        // Should only have one cross-file link (the regular markdown link)
950        // Wikilinks should not be added
951        let cross_file_links = &file_index.cross_file_links;
952        assert_eq!(
953            cross_file_links.len(),
954            1,
955            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
956        );
957        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
958        assert_eq!(file_index.cross_file_links[0].fragment, "section");
959    }
960}