Skip to main content

rumdl_lib/rules/
md051_link_fragments.rs

1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9// HTML tags with id or name attributes (supports any HTML element, not just <a>)
10// This pattern only captures the first id/name attribute in a tag
11static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14// Attribute anchor pattern for kramdown/MkDocs { #id } syntax
15// Matches {#id} or { #id } with optional spaces, supports multiple anchors
16// Also supports classes and attributes: { #id .class key=value }
17static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
19
20/// Normalize a path by resolving . and .. components
21fn normalize_path(path: &Path) -> PathBuf {
22    let mut result = PathBuf::new();
23    for component in path.components() {
24        match component {
25            Component::CurDir => {} // Skip .
26            Component::ParentDir => {
27                result.pop(); // Go up one level for ..
28            }
29            c => result.push(c.as_os_str()),
30        }
31    }
32    result
33}
34
35/// Rule MD051: Link fragments
36///
37/// See [docs/md051.md](../../docs/md051.md) for full documentation, configuration, and examples.
38///
39/// This rule validates that link anchors (the part after #) exist in the current document.
40/// Only applies to internal document links (like #heading), not to external URLs or cross-file links.
41#[derive(Clone)]
42pub struct MD051LinkFragments {
43    /// Anchor style to use for validation
44    anchor_style: AnchorStyle,
45}
46
47impl Default for MD051LinkFragments {
48    fn default() -> Self {
49        Self::new()
50    }
51}
52
53impl MD051LinkFragments {
54    pub fn new() -> Self {
55        Self {
56            anchor_style: AnchorStyle::GitHub,
57        }
58    }
59
60    /// Create with specific anchor style
61    pub fn with_anchor_style(style: AnchorStyle) -> Self {
62        Self { anchor_style: style }
63    }
64
65    /// Extract all valid heading anchors from the document
66    /// Returns (markdown_anchors, html_anchors) where markdown_anchors are lowercased
67    /// for case-insensitive matching, and html_anchors are case-sensitive
68    fn extract_headings_from_context(
69        &self,
70        ctx: &crate::lint_context::LintContext,
71    ) -> (HashSet<String>, HashSet<String>) {
72        let mut markdown_headings = HashSet::with_capacity(32);
73        let mut html_anchors = HashSet::with_capacity(16);
74        let mut fragment_counts = std::collections::HashMap::new();
75
76        for line_info in &ctx.lines {
77            if line_info.in_front_matter {
78                continue;
79            }
80
81            // Skip code blocks for anchor extraction
82            if line_info.in_code_block {
83                continue;
84            }
85
86            let content = line_info.content(ctx.content);
87            let bytes = content.as_bytes();
88
89            // Extract HTML anchor tags with id/name attributes
90            if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
91                // HTML spec: only the first id attribute per element is valid
92                // Process element by element to handle multiple id attributes correctly
93                let mut pos = 0;
94                while pos < content.len() {
95                    if let Some(start) = content[pos..].find('<') {
96                        let tag_start = pos + start;
97                        if let Some(end) = content[tag_start..].find('>') {
98                            let tag_end = tag_start + end + 1;
99                            let tag = &content[tag_start..tag_end];
100
101                            // Extract first id or name attribute from this tag
102                            if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
103                                let matched_text = caps.as_str();
104                                if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
105                                    && let Some(id_match) = caps.get(1)
106                                {
107                                    let id = id_match.as_str();
108                                    if !id.is_empty() {
109                                        html_anchors.insert(id.to_string());
110                                    }
111                                }
112                            }
113                            pos = tag_end;
114                        } else {
115                            break;
116                        }
117                    } else {
118                        break;
119                    }
120                }
121            }
122
123            // Extract attribute anchors { #id } from non-heading lines
124            // Headings already have custom_id extracted below
125            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
126                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
127                    if let Some(id_match) = caps.get(1) {
128                        // Add to markdown_headings (lowercased for case-insensitive matching)
129                        markdown_headings.insert(id_match.as_str().to_lowercase());
130                    }
131                }
132            }
133
134            // Extract markdown heading anchors
135            if let Some(heading) = &line_info.heading {
136                // Custom ID from {#custom-id} syntax
137                if let Some(custom_id) = &heading.custom_id {
138                    markdown_headings.insert(custom_id.to_lowercase());
139                }
140
141                // Generate fragment directly from heading text
142                // Note: HTML stripping was removed because it interfered with arrow patterns
143                // like <-> and placeholders like <FILE>. The anchor styles handle these correctly.
144                let fragment = self.anchor_style.generate_fragment(&heading.text);
145
146                if !fragment.is_empty() {
147                    // Handle duplicate headings by appending -1, -2, etc.
148                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
149                        let suffix = *count;
150                        *count += 1;
151                        format!("{fragment}-{suffix}")
152                    } else {
153                        fragment_counts.insert(fragment.clone(), 1);
154                        fragment
155                    };
156                    markdown_headings.insert(final_fragment);
157                }
158            }
159        }
160
161        (markdown_headings, html_anchors)
162    }
163
164    /// Fast check if URL is external (doesn't need to be validated)
165    #[inline]
166    fn is_external_url_fast(url: &str) -> bool {
167        // Quick prefix checks for common protocols
168        url.starts_with("http://")
169            || url.starts_with("https://")
170            || url.starts_with("ftp://")
171            || url.starts_with("mailto:")
172            || url.starts_with("tel:")
173            || url.starts_with("//")
174    }
175
176    /// Resolve a path by trying markdown extensions if it has no extension
177    ///
178    /// For extension-less paths (e.g., `page`), returns a list of paths to try:
179    /// 1. The original path (in case it's already in the index)
180    /// 2. The path with each markdown extension (e.g., `page.md`, `page.markdown`, etc.)
181    ///
182    /// For paths with extensions, returns just the original path.
183    #[inline]
184    fn resolve_path_with_extensions(path: &Path, extensions: &[&str]) -> Vec<PathBuf> {
185        if path.extension().is_none() {
186            // Extension-less path - try with markdown extensions
187            let mut paths = Vec::with_capacity(extensions.len() + 1);
188            // First try the exact path (in case it's already in the index)
189            paths.push(path.to_path_buf());
190            // Then try with each markdown extension
191            for ext in extensions {
192                let path_with_ext = path.with_extension(&ext[1..]); // Remove leading dot
193                paths.push(path_with_ext);
194            }
195            paths
196        } else {
197            // Path has extension - use as-is
198            vec![path.to_path_buf()]
199        }
200    }
201
202    /// Check if a path part (without fragment) is an extension-less path
203    ///
204    /// Extension-less paths are potential cross-file links that need resolution
205    /// with markdown extensions (e.g., `page#section` -> `page.md#section`).
206    ///
207    /// We recognize them as extension-less if:
208    /// 1. Path has no extension (no dot)
209    /// 2. Path is not empty
210    /// 3. Path doesn't look like a query parameter or special syntax
211    /// 4. Path contains at least one alphanumeric character (valid filename)
212    /// 5. Path contains only valid path characters (alphanumeric, slashes, hyphens, underscores)
213    ///
214    /// Optimized: single pass through characters to check both conditions.
215    #[inline]
216    fn is_extensionless_path(path_part: &str) -> bool {
217        // Quick rejections for common non-extension-less cases
218        if path_part.is_empty()
219            || path_part.contains('.')
220            || path_part.contains('?')
221            || path_part.contains('&')
222            || path_part.contains('=')
223        {
224            return false;
225        }
226
227        // Single pass: check for alphanumeric and validate all characters
228        let mut has_alphanumeric = false;
229        for c in path_part.chars() {
230            if c.is_alphanumeric() {
231                has_alphanumeric = true;
232            } else if !matches!(c, '/' | '\\' | '-' | '_') {
233                // Invalid character found - early exit
234                return false;
235            }
236        }
237
238        // Must have at least one alphanumeric character to be a valid filename
239        has_alphanumeric
240    }
241
242    /// Check if URL is a cross-file link (contains a file path before #)
243    #[inline]
244    fn is_cross_file_link(url: &str) -> bool {
245        if let Some(fragment_pos) = url.find('#') {
246            let path_part = &url[..fragment_pos];
247
248            // If there's no path part, it's just a fragment (#heading)
249            if path_part.is_empty() {
250                return false;
251            }
252
253            // Check for Liquid syntax used by Jekyll and other static site generators
254            // Liquid tags: {% ... %} for control flow and includes
255            // Liquid variables: {{ ... }} for outputting values
256            // These are template directives that reference external content and should be skipped
257            // We check for proper bracket order to avoid false positives
258            if let Some(tag_start) = path_part.find("{%")
259                && path_part[tag_start + 2..].contains("%}")
260            {
261                return true;
262            }
263            if let Some(var_start) = path_part.find("{{")
264                && path_part[var_start + 2..].contains("}}")
265            {
266                return true;
267            }
268
269            // Check if it's an absolute path (starts with /)
270            // These are links to other pages on the same site
271            if path_part.starts_with('/') {
272                return true;
273            }
274
275            // Check if it looks like a file path:
276            // - Contains a file extension (dot followed by letters)
277            // - Contains path separators
278            // - Contains relative path indicators
279            // - OR is an extension-less path with a fragment (GitHub-style: page#section)
280            let has_extension = path_part.contains('.')
281                && (
282                    // Has file extension pattern (handle query parameters by splitting on them first)
283                    {
284                    let clean_path = path_part.split('?').next().unwrap_or(path_part);
285                    // Handle files starting with dot
286                    if let Some(after_dot) = clean_path.strip_prefix('.') {
287                        let dots_count = clean_path.matches('.').count();
288                        if dots_count == 1 {
289                            // Could be ".ext" (file extension) or ".hidden" (hidden file)
290                            // Treat short alphanumeric suffixes as file extensions
291                            !after_dot.is_empty() && after_dot.len() <= 10 &&
292                            after_dot.chars().all(|c| c.is_ascii_alphanumeric())
293                        } else {
294                            // Hidden file with extension like ".hidden.txt"
295                            clean_path.split('.').next_back().is_some_and(|ext| {
296                                !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
297                            })
298                        }
299                    } else {
300                        // Regular file path
301                        clean_path.split('.').next_back().is_some_and(|ext| {
302                            !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
303                        })
304                    }
305                } ||
306                // Or contains path separators
307                path_part.contains('/') || path_part.contains('\\') ||
308                // Or starts with relative path indicators
309                path_part.starts_with("./") || path_part.starts_with("../")
310                );
311
312            // Extension-less paths with fragments are potential cross-file links
313            // This supports GitHub-style links like [link](page#section) that resolve to page.md#section
314            let is_extensionless = Self::is_extensionless_path(path_part);
315
316            has_extension || is_extensionless
317        } else {
318            false
319        }
320    }
321}
322
323impl Rule for MD051LinkFragments {
324    fn name(&self) -> &'static str {
325        "MD051"
326    }
327
328    fn description(&self) -> &'static str {
329        "Link fragments should reference valid headings"
330    }
331
332    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
333        // Skip if no link fragments present
334        if !ctx.likely_has_links_or_images() {
335            return true;
336        }
337        // Check for # character (fragments)
338        !ctx.has_char('#')
339    }
340
341    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
342        let mut warnings = Vec::new();
343
344        if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
345            return Ok(warnings);
346        }
347
348        let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
349
350        for link in &ctx.links {
351            if link.is_reference {
352                continue;
353            }
354
355            // Skip links inside PyMdown blocks (MkDocs flavor)
356            if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
357                continue;
358            }
359
360            // Skip wiki-links - they reference other files and may have their own fragment validation
361            if matches!(link.link_type, LinkType::WikiLink { .. }) {
362                continue;
363            }
364
365            // Skip links inside Jinja templates
366            if ctx.is_in_jinja_range(link.byte_offset) {
367                continue;
368            }
369
370            // Skip Quarto/Pandoc citations ([@citation], @citation)
371            // Citations are bibliography references, not link fragments
372            if ctx.flavor == crate::config::MarkdownFlavor::Quarto && ctx.is_in_citation(link.byte_offset) {
373                continue;
374            }
375
376            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
377            // Shortcodes may contain template syntax that looks like fragment links
378            if ctx.is_in_shortcode(link.byte_offset) {
379                continue;
380            }
381
382            let url = &link.url;
383
384            // Skip links without fragments or external URLs
385            if !url.contains('#') || Self::is_external_url_fast(url) {
386                continue;
387            }
388
389            // Skip mdbook template placeholders ({{#VARIABLE}})
390            // mdbook uses {{#VARIABLE}} syntax where # is part of the template, not a fragment
391            if url.contains("{{#") && url.contains("}}") {
392                continue;
393            }
394
395            // Skip Quarto/RMarkdown cross-references (@fig-, @tbl-, @sec-, @eq-, etc.)
396            // These are special cross-reference syntax, not HTML anchors
397            // Format: @prefix-identifier or just @identifier
398            if url.starts_with('@') {
399                continue;
400            }
401
402            // Cross-file links are valid if the file exists (not checked here)
403            if Self::is_cross_file_link(url) {
404                continue;
405            }
406
407            let Some(fragment_pos) = url.find('#') else {
408                continue;
409            };
410
411            let fragment = &url[fragment_pos + 1..];
412
413            // Skip Liquid template variables and filters
414            if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
415                continue;
416            }
417
418            if fragment.is_empty() {
419                continue;
420            }
421
422            // Validate fragment against document headings
423            // HTML anchors are case-sensitive, markdown anchors are case-insensitive
424            let found = if html_anchors.contains(fragment) {
425                true
426            } else {
427                let fragment_lower = fragment.to_lowercase();
428                markdown_headings.contains(&fragment_lower)
429            };
430
431            if !found {
432                warnings.push(LintWarning {
433                    rule_name: Some(self.name().to_string()),
434                    message: format!("Link anchor '#{fragment}' does not exist in document headings"),
435                    line: link.line,
436                    column: link.start_col + 1,
437                    end_line: link.line,
438                    end_column: link.end_col + 1,
439                    severity: Severity::Error,
440                    fix: None,
441                });
442            }
443        }
444
445        Ok(warnings)
446    }
447
448    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
449        // MD051 does not provide auto-fix
450        // Link fragment corrections require human judgment to avoid incorrect fixes
451        Ok(ctx.content.to_string())
452    }
453
454    fn as_any(&self) -> &dyn std::any::Any {
455        self
456    }
457
458    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
459    where
460        Self: Sized,
461    {
462        // Config keys are normalized to kebab-case by the config system
463        let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
464            if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
465                match style_str.to_lowercase().as_str() {
466                    "kramdown" => AnchorStyle::Kramdown,
467                    "kramdown-gfm" => AnchorStyle::KramdownGfm,
468                    "jekyll" => AnchorStyle::KramdownGfm, // Backward compatibility alias
469                    _ => AnchorStyle::GitHub,
470                }
471            } else {
472                AnchorStyle::GitHub
473            }
474        } else {
475            AnchorStyle::GitHub
476        };
477
478        Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
479    }
480
481    fn category(&self) -> RuleCategory {
482        RuleCategory::Link
483    }
484
485    fn cross_file_scope(&self) -> CrossFileScope {
486        CrossFileScope::Workspace
487    }
488
489    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
490        let mut fragment_counts = HashMap::new();
491
492        // Extract headings, HTML anchors, and attribute anchors (for other files to reference)
493        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
494            if line_info.in_front_matter {
495                continue;
496            }
497
498            // Skip code blocks for anchor extraction
499            if line_info.in_code_block {
500                continue;
501            }
502
503            let content = line_info.content(ctx.content);
504
505            // Extract HTML anchors (id or name attributes on any element)
506            if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
507                let mut pos = 0;
508                while pos < content.len() {
509                    if let Some(start) = content[pos..].find('<') {
510                        let tag_start = pos + start;
511                        if let Some(end) = content[tag_start..].find('>') {
512                            let tag_end = tag_start + end + 1;
513                            let tag = &content[tag_start..tag_end];
514
515                            if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
516                                && let Some(id_match) = caps.get(1)
517                            {
518                                file_index.add_html_anchor(id_match.as_str().to_string());
519                            }
520                            pos = tag_end;
521                        } else {
522                            break;
523                        }
524                    } else {
525                        break;
526                    }
527                }
528            }
529
530            // Extract attribute anchors { #id } on non-heading lines
531            // Headings already have custom_id extracted via heading.custom_id
532            if line_info.heading.is_none() && content.contains("{") && content.contains("#") {
533                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
534                    if let Some(id_match) = caps.get(1) {
535                        file_index.add_attribute_anchor(id_match.as_str().to_string());
536                    }
537                }
538            }
539
540            // Extract heading anchors
541            if let Some(heading) = &line_info.heading {
542                let fragment = self.anchor_style.generate_fragment(&heading.text);
543
544                if !fragment.is_empty() {
545                    // Handle duplicate headings
546                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
547                        let suffix = *count;
548                        *count += 1;
549                        format!("{fragment}-{suffix}")
550                    } else {
551                        fragment_counts.insert(fragment.clone(), 1);
552                        fragment
553                    };
554
555                    file_index.add_heading(HeadingIndex {
556                        text: heading.text.clone(),
557                        auto_anchor: final_fragment,
558                        custom_anchor: heading.custom_id.clone(),
559                        line: line_idx + 1, // 1-indexed
560                    });
561                }
562            }
563        }
564
565        // Extract cross-file links (for validation against other files)
566        for link in &ctx.links {
567            if link.is_reference {
568                continue;
569            }
570
571            // Skip links inside PyMdown blocks (MkDocs flavor)
572            if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
573                continue;
574            }
575
576            // Skip wiki-links - they use a different linking system and are not validated
577            // as relative file paths
578            if matches!(link.link_type, LinkType::WikiLink { .. }) {
579                continue;
580            }
581
582            let url = &link.url;
583
584            // Skip external URLs
585            if Self::is_external_url_fast(url) {
586                continue;
587            }
588
589            // Only process cross-file links with fragments
590            if Self::is_cross_file_link(url)
591                && let Some(fragment_pos) = url.find('#')
592            {
593                let path_part = &url[..fragment_pos];
594                let fragment = &url[fragment_pos + 1..];
595
596                // Skip empty fragments or template syntax
597                if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
598                    continue;
599                }
600
601                file_index.add_cross_file_link(CrossFileLinkIndex {
602                    target_path: path_part.to_string(),
603                    fragment: fragment.to_string(),
604                    line: link.line,
605                    column: link.start_col + 1,
606                });
607            }
608        }
609    }
610
611    fn cross_file_check(
612        &self,
613        file_path: &Path,
614        file_index: &FileIndex,
615        workspace_index: &crate::workspace_index::WorkspaceIndex,
616    ) -> LintResult {
617        let mut warnings = Vec::new();
618
619        // Supported markdown file extensions (with leading dot, matching MD057)
620        const MARKDOWN_EXTENSIONS: &[&str] = &[
621            ".md",
622            ".markdown",
623            ".mdx",
624            ".mkd",
625            ".mkdn",
626            ".mdown",
627            ".mdwn",
628            ".qmd",
629            ".rmd",
630        ];
631
632        // Check each cross-file link in this file
633        for cross_link in &file_index.cross_file_links {
634            // Skip cross-file links without fragments - nothing to validate
635            if cross_link.fragment.is_empty() {
636                continue;
637            }
638
639            // Resolve the target file path relative to the current file
640            let base_target_path = if let Some(parent) = file_path.parent() {
641                parent.join(&cross_link.target_path)
642            } else {
643                Path::new(&cross_link.target_path).to_path_buf()
644            };
645
646            // Normalize the path (remove . and ..)
647            let base_target_path = normalize_path(&base_target_path);
648
649            // For extension-less paths, try resolving with markdown extensions
650            // This handles GitHub-style links like [link](page#section) -> page.md#section
651            let target_paths_to_try = Self::resolve_path_with_extensions(&base_target_path, MARKDOWN_EXTENSIONS);
652
653            // Try to find the target file in the workspace index
654            let mut target_file_index = None;
655
656            for target_path in &target_paths_to_try {
657                if let Some(index) = workspace_index.get_file(target_path) {
658                    target_file_index = Some(index);
659                    break;
660                }
661            }
662
663            if let Some(target_file_index) = target_file_index {
664                // Check if the fragment matches any heading in the target file (O(1) lookup)
665                if !target_file_index.has_anchor(&cross_link.fragment) {
666                    warnings.push(LintWarning {
667                        rule_name: Some(self.name().to_string()),
668                        line: cross_link.line,
669                        column: cross_link.column,
670                        end_line: cross_link.line,
671                        end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
672                        message: format!(
673                            "Link fragment '{}' not found in '{}'",
674                            cross_link.fragment, cross_link.target_path
675                        ),
676                        severity: Severity::Error,
677                        fix: None,
678                    });
679                }
680            }
681            // If target file not in index, skip (could be external file or not in workspace)
682        }
683
684        Ok(warnings)
685    }
686
687    fn default_config_section(&self) -> Option<(String, toml::Value)> {
688        let value: toml::Value = toml::from_str(
689            r#"
690# Anchor generation style to match your target platform
691# Options: "github" (default), "kramdown-gfm", "kramdown"
692# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
693anchor-style = "github"
694"#,
695        )
696        .ok()?;
697        Some(("MD051".to_string(), value))
698    }
699}
700
701#[cfg(test)]
702mod tests {
703    use super::*;
704    use crate::lint_context::LintContext;
705
706    #[test]
707    fn test_quarto_cross_references() {
708        let rule = MD051LinkFragments::new();
709
710        // Test that Quarto cross-references are skipped
711        let content = r#"# Test Document
712
713## Figures
714
715See [@fig-plot] for the visualization.
716
717More details in [@tbl-results] and [@sec-methods].
718
719The equation [@eq-regression] shows the relationship.
720
721Reference to [@lst-code] for implementation."#;
722        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
723        let result = rule.check(&ctx).unwrap();
724        assert!(
725            result.is_empty(),
726            "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
727            result.len()
728        );
729
730        // Test that normal anchors still work
731        let content_with_anchor = r#"# Test
732
733See [link](#test) for details."#;
734        let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
735        let result_anchor = rule.check(&ctx_anchor).unwrap();
736        assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
737
738        // Test that invalid anchors are still flagged
739        let content_invalid = r#"# Test
740
741See [link](#nonexistent) for details."#;
742        let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
743        let result_invalid = rule.check(&ctx_invalid).unwrap();
744        assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
745    }
746
747    // Cross-file validation tests
748    #[test]
749    fn test_cross_file_scope() {
750        let rule = MD051LinkFragments::new();
751        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
752    }
753
754    #[test]
755    fn test_contribute_to_index_extracts_headings() {
756        let rule = MD051LinkFragments::new();
757        let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
758        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
759
760        let mut file_index = FileIndex::new();
761        rule.contribute_to_index(&ctx, &mut file_index);
762
763        assert_eq!(file_index.headings.len(), 3);
764        assert_eq!(file_index.headings[0].text, "First Heading");
765        assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
766        assert!(file_index.headings[0].custom_anchor.is_none());
767
768        assert_eq!(file_index.headings[1].text, "Second");
769        assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
770
771        assert_eq!(file_index.headings[2].text, "Third");
772    }
773
774    #[test]
775    fn test_contribute_to_index_extracts_cross_file_links() {
776        let rule = MD051LinkFragments::new();
777        let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
778        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
779
780        let mut file_index = FileIndex::new();
781        rule.contribute_to_index(&ctx, &mut file_index);
782
783        assert_eq!(file_index.cross_file_links.len(), 2);
784        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
785        assert_eq!(file_index.cross_file_links[0].fragment, "installation");
786        assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
787        assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
788    }
789
790    #[test]
791    fn test_cross_file_check_valid_fragment() {
792        use crate::workspace_index::WorkspaceIndex;
793
794        let rule = MD051LinkFragments::new();
795
796        // Build workspace index with target file
797        let mut workspace_index = WorkspaceIndex::new();
798        let mut target_file_index = FileIndex::new();
799        target_file_index.add_heading(HeadingIndex {
800            text: "Installation Guide".to_string(),
801            auto_anchor: "installation-guide".to_string(),
802            custom_anchor: None,
803            line: 1,
804        });
805        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
806
807        // Create a FileIndex for the file being checked
808        let mut current_file_index = FileIndex::new();
809        current_file_index.add_cross_file_link(CrossFileLinkIndex {
810            target_path: "install.md".to_string(),
811            fragment: "installation-guide".to_string(),
812            line: 3,
813            column: 5,
814        });
815
816        let warnings = rule
817            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
818            .unwrap();
819
820        // Should find no warnings since fragment exists
821        assert!(warnings.is_empty());
822    }
823
824    #[test]
825    fn test_cross_file_check_invalid_fragment() {
826        use crate::workspace_index::WorkspaceIndex;
827
828        let rule = MD051LinkFragments::new();
829
830        // Build workspace index with target file
831        let mut workspace_index = WorkspaceIndex::new();
832        let mut target_file_index = FileIndex::new();
833        target_file_index.add_heading(HeadingIndex {
834            text: "Installation Guide".to_string(),
835            auto_anchor: "installation-guide".to_string(),
836            custom_anchor: None,
837            line: 1,
838        });
839        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
840
841        // Create a FileIndex with a cross-file link pointing to non-existent fragment
842        let mut current_file_index = FileIndex::new();
843        current_file_index.add_cross_file_link(CrossFileLinkIndex {
844            target_path: "install.md".to_string(),
845            fragment: "nonexistent".to_string(),
846            line: 3,
847            column: 5,
848        });
849
850        let warnings = rule
851            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
852            .unwrap();
853
854        // Should find one warning since fragment doesn't exist
855        assert_eq!(warnings.len(), 1);
856        assert!(warnings[0].message.contains("nonexistent"));
857        assert!(warnings[0].message.contains("install.md"));
858    }
859
860    #[test]
861    fn test_cross_file_check_custom_anchor_match() {
862        use crate::workspace_index::WorkspaceIndex;
863
864        let rule = MD051LinkFragments::new();
865
866        // Build workspace index with target file that has custom anchor
867        let mut workspace_index = WorkspaceIndex::new();
868        let mut target_file_index = FileIndex::new();
869        target_file_index.add_heading(HeadingIndex {
870            text: "Installation Guide".to_string(),
871            auto_anchor: "installation-guide".to_string(),
872            custom_anchor: Some("install".to_string()),
873            line: 1,
874        });
875        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
876
877        // Link uses custom anchor
878        let mut current_file_index = FileIndex::new();
879        current_file_index.add_cross_file_link(CrossFileLinkIndex {
880            target_path: "install.md".to_string(),
881            fragment: "install".to_string(),
882            line: 3,
883            column: 5,
884        });
885
886        let warnings = rule
887            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
888            .unwrap();
889
890        // Should find no warnings since custom anchor matches
891        assert!(warnings.is_empty());
892    }
893
894    #[test]
895    fn test_cross_file_check_target_not_in_workspace() {
896        use crate::workspace_index::WorkspaceIndex;
897
898        let rule = MD051LinkFragments::new();
899
900        // Empty workspace index
901        let workspace_index = WorkspaceIndex::new();
902
903        // Link to file not in workspace
904        let mut current_file_index = FileIndex::new();
905        current_file_index.add_cross_file_link(CrossFileLinkIndex {
906            target_path: "external.md".to_string(),
907            fragment: "heading".to_string(),
908            line: 3,
909            column: 5,
910        });
911
912        let warnings = rule
913            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
914            .unwrap();
915
916        // Should not warn about files not in workspace
917        assert!(warnings.is_empty());
918    }
919
920    #[test]
921    fn test_wikilinks_skipped_in_check() {
922        // Wikilinks should not trigger MD051 warnings for missing fragments
923        let rule = MD051LinkFragments::new();
924
925        let content = r#"# Test Document
926
927## Valid Heading
928
929[[Microsoft#Windows OS]]
930[[SomePage#section]]
931[[page|Display Text]]
932[[path/to/page#section]]
933"#;
934        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
935        let result = rule.check(&ctx).unwrap();
936
937        assert!(
938            result.is_empty(),
939            "Wikilinks should not trigger MD051 warnings. Got: {result:?}"
940        );
941    }
942
943    #[test]
944    fn test_wikilinks_not_added_to_cross_file_index() {
945        // Wikilinks should not be added to the cross-file link index
946        let rule = MD051LinkFragments::new();
947
948        let content = r#"# Test Document
949
950[[Microsoft#Windows OS]]
951[[SomePage#section]]
952[Regular Link](other.md#section)
953"#;
954        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
955
956        let mut file_index = FileIndex::new();
957        rule.contribute_to_index(&ctx, &mut file_index);
958
959        // Should only have one cross-file link (the regular markdown link)
960        // Wikilinks should not be added
961        let cross_file_links = &file_index.cross_file_links;
962        assert_eq!(
963            cross_file_links.len(),
964            1,
965            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
966        );
967        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
968        assert_eq!(file_index.cross_file_links[0].fragment, "section");
969    }
970}