rumdl_lib/rules/
md051_link_fragments.rs

1use crate::rule::{CrossFileScope, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::anchor_styles::AnchorStyle;
3use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
4use pulldown_cmark::LinkType;
5use regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::path::{Component, Path, PathBuf};
8use std::sync::LazyLock;
9// HTML tags with id or name attributes (supports any HTML element, not just <a>)
10// This pattern only captures the first id/name attribute in a tag
11static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
13
14// Attribute anchor pattern for kramdown/MkDocs { #id } syntax
15// Matches {#id} or { #id } with optional spaces, supports multiple anchors
16// Also supports classes and attributes: { #id .class key=value }
17static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
19
20/// Normalize a path by resolving . and .. components
21fn normalize_path(path: &Path) -> PathBuf {
22    let mut result = PathBuf::new();
23    for component in path.components() {
24        match component {
25            Component::CurDir => {} // Skip .
26            Component::ParentDir => {
27                result.pop(); // Go up one level for ..
28            }
29            c => result.push(c.as_os_str()),
30        }
31    }
32    result
33}
34
35/// Rule MD051: Link fragments
36///
37/// See [docs/md051.md](../../docs/md051.md) for full documentation, configuration, and examples.
38///
39/// This rule validates that link anchors (the part after #) exist in the current document.
40/// Only applies to internal document links (like #heading), not to external URLs or cross-file links.
41#[derive(Clone)]
42pub struct MD051LinkFragments {
43    /// Anchor style to use for validation
44    anchor_style: AnchorStyle,
45}
46
47impl Default for MD051LinkFragments {
48    fn default() -> Self {
49        Self::new()
50    }
51}
52
53impl MD051LinkFragments {
54    pub fn new() -> Self {
55        Self {
56            anchor_style: AnchorStyle::GitHub,
57        }
58    }
59
60    /// Create with specific anchor style
61    pub fn with_anchor_style(style: AnchorStyle) -> Self {
62        Self { anchor_style: style }
63    }
64
65    /// Extract all valid heading anchors from the document
66    /// Returns (markdown_anchors, html_anchors) where markdown_anchors are lowercased
67    /// for case-insensitive matching, and html_anchors are case-sensitive
68    fn extract_headings_from_context(
69        &self,
70        ctx: &crate::lint_context::LintContext,
71    ) -> (HashSet<String>, HashSet<String>) {
72        let mut markdown_headings = HashSet::with_capacity(32);
73        let mut html_anchors = HashSet::with_capacity(16);
74        let mut fragment_counts = std::collections::HashMap::new();
75
76        for line_info in &ctx.lines {
77            if line_info.in_front_matter {
78                continue;
79            }
80
81            // Skip code blocks for anchor extraction
82            if line_info.in_code_block {
83                continue;
84            }
85
86            let content = line_info.content(ctx.content);
87            let bytes = content.as_bytes();
88
89            // Extract HTML anchor tags with id/name attributes
90            if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
91                // HTML spec: only the first id attribute per element is valid
92                // Process element by element to handle multiple id attributes correctly
93                let mut pos = 0;
94                while pos < content.len() {
95                    if let Some(start) = content[pos..].find('<') {
96                        let tag_start = pos + start;
97                        if let Some(end) = content[tag_start..].find('>') {
98                            let tag_end = tag_start + end + 1;
99                            let tag = &content[tag_start..tag_end];
100
101                            // Extract first id or name attribute from this tag
102                            if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
103                                let matched_text = caps.as_str();
104                                if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
105                                    && let Some(id_match) = caps.get(1)
106                                {
107                                    let id = id_match.as_str();
108                                    if !id.is_empty() {
109                                        html_anchors.insert(id.to_string());
110                                    }
111                                }
112                            }
113                            pos = tag_end;
114                        } else {
115                            break;
116                        }
117                    } else {
118                        break;
119                    }
120                }
121            }
122
123            // Extract attribute anchors { #id } from non-heading lines
124            // Headings already have custom_id extracted below
125            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
126                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
127                    if let Some(id_match) = caps.get(1) {
128                        // Add to markdown_headings (lowercased for case-insensitive matching)
129                        markdown_headings.insert(id_match.as_str().to_lowercase());
130                    }
131                }
132            }
133
134            // Extract markdown heading anchors
135            if let Some(heading) = &line_info.heading {
136                // Custom ID from {#custom-id} syntax
137                if let Some(custom_id) = &heading.custom_id {
138                    markdown_headings.insert(custom_id.to_lowercase());
139                }
140
141                // Generate fragment directly from heading text
142                // Note: HTML stripping was removed because it interfered with arrow patterns
143                // like <-> and placeholders like <FILE>. The anchor styles handle these correctly.
144                let fragment = self.anchor_style.generate_fragment(&heading.text);
145
146                if !fragment.is_empty() {
147                    // Handle duplicate headings by appending -1, -2, etc.
148                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
149                        let suffix = *count;
150                        *count += 1;
151                        format!("{fragment}-{suffix}")
152                    } else {
153                        fragment_counts.insert(fragment.clone(), 1);
154                        fragment
155                    };
156                    markdown_headings.insert(final_fragment);
157                }
158            }
159        }
160
161        (markdown_headings, html_anchors)
162    }
163
164    /// Fast check if URL is external (doesn't need to be validated)
165    #[inline]
166    fn is_external_url_fast(url: &str) -> bool {
167        // Quick prefix checks for common protocols
168        url.starts_with("http://")
169            || url.starts_with("https://")
170            || url.starts_with("ftp://")
171            || url.starts_with("mailto:")
172            || url.starts_with("tel:")
173            || url.starts_with("//")
174    }
175
176    /// Resolve a path by trying markdown extensions if it has no extension
177    ///
178    /// For extension-less paths (e.g., `page`), returns a list of paths to try:
179    /// 1. The original path (in case it's already in the index)
180    /// 2. The path with each markdown extension (e.g., `page.md`, `page.markdown`, etc.)
181    ///
182    /// For paths with extensions, returns just the original path.
183    #[inline]
184    fn resolve_path_with_extensions(path: &Path, extensions: &[&str]) -> Vec<PathBuf> {
185        if path.extension().is_none() {
186            // Extension-less path - try with markdown extensions
187            let mut paths = Vec::with_capacity(extensions.len() + 1);
188            // First try the exact path (in case it's already in the index)
189            paths.push(path.to_path_buf());
190            // Then try with each markdown extension
191            for ext in extensions {
192                let path_with_ext = path.with_extension(&ext[1..]); // Remove leading dot
193                paths.push(path_with_ext);
194            }
195            paths
196        } else {
197            // Path has extension - use as-is
198            vec![path.to_path_buf()]
199        }
200    }
201
202    /// Check if a path part (without fragment) is an extension-less path
203    ///
204    /// Extension-less paths are potential cross-file links that need resolution
205    /// with markdown extensions (e.g., `page#section` -> `page.md#section`).
206    ///
207    /// We recognize them as extension-less if:
208    /// 1. Path has no extension (no dot)
209    /// 2. Path is not empty
210    /// 3. Path doesn't look like a query parameter or special syntax
211    /// 4. Path contains at least one alphanumeric character (valid filename)
212    /// 5. Path contains only valid path characters (alphanumeric, slashes, hyphens, underscores)
213    ///
214    /// Optimized: single pass through characters to check both conditions.
215    #[inline]
216    fn is_extensionless_path(path_part: &str) -> bool {
217        // Quick rejections for common non-extension-less cases
218        if path_part.is_empty()
219            || path_part.contains('.')
220            || path_part.contains('?')
221            || path_part.contains('&')
222            || path_part.contains('=')
223        {
224            return false;
225        }
226
227        // Single pass: check for alphanumeric and validate all characters
228        let mut has_alphanumeric = false;
229        for c in path_part.chars() {
230            if c.is_alphanumeric() {
231                has_alphanumeric = true;
232            } else if !matches!(c, '/' | '\\' | '-' | '_') {
233                // Invalid character found - early exit
234                return false;
235            }
236        }
237
238        // Must have at least one alphanumeric character to be a valid filename
239        has_alphanumeric
240    }
241
242    /// Check if URL is a cross-file link (contains a file path before #)
243    #[inline]
244    fn is_cross_file_link(url: &str) -> bool {
245        if let Some(fragment_pos) = url.find('#') {
246            let path_part = &url[..fragment_pos];
247
248            // If there's no path part, it's just a fragment (#heading)
249            if path_part.is_empty() {
250                return false;
251            }
252
253            // Check for Liquid syntax used by Jekyll and other static site generators
254            // Liquid tags: {% ... %} for control flow and includes
255            // Liquid variables: {{ ... }} for outputting values
256            // These are template directives that reference external content and should be skipped
257            // We check for proper bracket order to avoid false positives
258            if let Some(tag_start) = path_part.find("{%")
259                && path_part[tag_start + 2..].contains("%}")
260            {
261                return true;
262            }
263            if let Some(var_start) = path_part.find("{{")
264                && path_part[var_start + 2..].contains("}}")
265            {
266                return true;
267            }
268
269            // Check if it's an absolute path (starts with /)
270            // These are links to other pages on the same site
271            if path_part.starts_with('/') {
272                return true;
273            }
274
275            // Check if it looks like a file path:
276            // - Contains a file extension (dot followed by letters)
277            // - Contains path separators
278            // - Contains relative path indicators
279            // - OR is an extension-less path with a fragment (GitHub-style: page#section)
280            let has_extension = path_part.contains('.')
281                && (
282                    // Has file extension pattern (handle query parameters by splitting on them first)
283                    {
284                    let clean_path = path_part.split('?').next().unwrap_or(path_part);
285                    // Handle files starting with dot
286                    if let Some(after_dot) = clean_path.strip_prefix('.') {
287                        let dots_count = clean_path.matches('.').count();
288                        if dots_count == 1 {
289                            // Could be ".ext" (file extension) or ".hidden" (hidden file)
290                            // Treat short alphanumeric suffixes as file extensions
291                            !after_dot.is_empty() && after_dot.len() <= 10 &&
292                            after_dot.chars().all(|c| c.is_ascii_alphanumeric())
293                        } else {
294                            // Hidden file with extension like ".hidden.txt"
295                            clean_path.split('.').next_back().is_some_and(|ext| {
296                                !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
297                            })
298                        }
299                    } else {
300                        // Regular file path
301                        clean_path.split('.').next_back().is_some_and(|ext| {
302                            !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
303                        })
304                    }
305                } ||
306                // Or contains path separators
307                path_part.contains('/') || path_part.contains('\\') ||
308                // Or starts with relative path indicators
309                path_part.starts_with("./") || path_part.starts_with("../")
310                );
311
312            // Extension-less paths with fragments are potential cross-file links
313            // This supports GitHub-style links like [link](page#section) that resolve to page.md#section
314            let is_extensionless = Self::is_extensionless_path(path_part);
315
316            has_extension || is_extensionless
317        } else {
318            false
319        }
320    }
321}
322
323impl Rule for MD051LinkFragments {
324    fn name(&self) -> &'static str {
325        "MD051"
326    }
327
328    fn description(&self) -> &'static str {
329        "Link fragments should reference valid headings"
330    }
331
332    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
333        // Skip if no link fragments present
334        if !ctx.likely_has_links_or_images() {
335            return true;
336        }
337        // Check for # character (fragments)
338        !ctx.has_char('#')
339    }
340
341    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
342        let mut warnings = Vec::new();
343
344        if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
345            return Ok(warnings);
346        }
347
348        let (markdown_headings, html_anchors) = self.extract_headings_from_context(ctx);
349
350        for link in &ctx.links {
351            if link.is_reference {
352                continue;
353            }
354
355            // Skip wiki-links - they reference other files and may have their own fragment validation
356            if matches!(link.link_type, LinkType::WikiLink { .. }) {
357                continue;
358            }
359
360            // Skip links inside Jinja templates
361            if ctx.is_in_jinja_range(link.byte_offset) {
362                continue;
363            }
364
365            let url = &link.url;
366
367            // Skip links without fragments or external URLs
368            if !url.contains('#') || Self::is_external_url_fast(url) {
369                continue;
370            }
371
372            // Skip mdbook template placeholders ({{#VARIABLE}})
373            // mdbook uses {{#VARIABLE}} syntax where # is part of the template, not a fragment
374            if url.contains("{{#") && url.contains("}}") {
375                continue;
376            }
377
378            // Skip Quarto/RMarkdown cross-references (@fig-, @tbl-, @sec-, @eq-, etc.)
379            // These are special cross-reference syntax, not HTML anchors
380            // Format: @prefix-identifier or just @identifier
381            if url.starts_with('@') {
382                continue;
383            }
384
385            // Cross-file links are valid if the file exists (not checked here)
386            if Self::is_cross_file_link(url) {
387                continue;
388            }
389
390            let Some(fragment_pos) = url.find('#') else {
391                continue;
392            };
393
394            let fragment = &url[fragment_pos + 1..];
395
396            // Skip Liquid template variables and filters
397            if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
398                continue;
399            }
400
401            if fragment.is_empty() {
402                continue;
403            }
404
405            // Validate fragment against document headings
406            // HTML anchors are case-sensitive, markdown anchors are case-insensitive
407            let found = if html_anchors.contains(fragment) {
408                true
409            } else {
410                let fragment_lower = fragment.to_lowercase();
411                markdown_headings.contains(&fragment_lower)
412            };
413
414            if !found {
415                warnings.push(LintWarning {
416                    rule_name: Some(self.name().to_string()),
417                    message: format!("Link anchor '#{fragment}' does not exist in document headings"),
418                    line: link.line,
419                    column: link.start_col + 1,
420                    end_line: link.line,
421                    end_column: link.end_col + 1,
422                    severity: Severity::Error,
423                    fix: None,
424                });
425            }
426        }
427
428        Ok(warnings)
429    }
430
431    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
432        // MD051 does not provide auto-fix
433        // Link fragment corrections require human judgment to avoid incorrect fixes
434        Ok(ctx.content.to_string())
435    }
436
437    fn as_any(&self) -> &dyn std::any::Any {
438        self
439    }
440
441    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
442    where
443        Self: Sized,
444    {
445        // Config keys are normalized to kebab-case by the config system
446        let anchor_style = if let Some(rule_config) = config.rules.get("MD051") {
447            if let Some(style_str) = rule_config.values.get("anchor-style").and_then(|v| v.as_str()) {
448                match style_str.to_lowercase().as_str() {
449                    "kramdown" => AnchorStyle::Kramdown,
450                    "kramdown-gfm" => AnchorStyle::KramdownGfm,
451                    "jekyll" => AnchorStyle::KramdownGfm, // Backward compatibility alias
452                    _ => AnchorStyle::GitHub,
453                }
454            } else {
455                AnchorStyle::GitHub
456            }
457        } else {
458            AnchorStyle::GitHub
459        };
460
461        Box::new(MD051LinkFragments::with_anchor_style(anchor_style))
462    }
463
464    fn category(&self) -> RuleCategory {
465        RuleCategory::Link
466    }
467
468    fn cross_file_scope(&self) -> CrossFileScope {
469        CrossFileScope::Workspace
470    }
471
472    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
473        let mut fragment_counts = HashMap::new();
474
475        // Extract headings, HTML anchors, and attribute anchors (for other files to reference)
476        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
477            if line_info.in_front_matter {
478                continue;
479            }
480
481            // Skip code blocks for anchor extraction
482            if line_info.in_code_block {
483                continue;
484            }
485
486            let content = line_info.content(ctx.content);
487
488            // Extract HTML anchors (id or name attributes on any element)
489            if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
490                let mut pos = 0;
491                while pos < content.len() {
492                    if let Some(start) = content[pos..].find('<') {
493                        let tag_start = pos + start;
494                        if let Some(end) = content[tag_start..].find('>') {
495                            let tag_end = tag_start + end + 1;
496                            let tag = &content[tag_start..tag_end];
497
498                            if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
499                                && let Some(id_match) = caps.get(1)
500                            {
501                                file_index.add_html_anchor(id_match.as_str().to_string());
502                            }
503                            pos = tag_end;
504                        } else {
505                            break;
506                        }
507                    } else {
508                        break;
509                    }
510                }
511            }
512
513            // Extract attribute anchors { #id } on non-heading lines
514            // Headings already have custom_id extracted via heading.custom_id
515            if line_info.heading.is_none() && content.contains("{") && content.contains("#") {
516                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
517                    if let Some(id_match) = caps.get(1) {
518                        file_index.add_attribute_anchor(id_match.as_str().to_string());
519                    }
520                }
521            }
522
523            // Extract heading anchors
524            if let Some(heading) = &line_info.heading {
525                let fragment = self.anchor_style.generate_fragment(&heading.text);
526
527                if !fragment.is_empty() {
528                    // Handle duplicate headings
529                    let final_fragment = if let Some(count) = fragment_counts.get_mut(&fragment) {
530                        let suffix = *count;
531                        *count += 1;
532                        format!("{fragment}-{suffix}")
533                    } else {
534                        fragment_counts.insert(fragment.clone(), 1);
535                        fragment
536                    };
537
538                    file_index.add_heading(HeadingIndex {
539                        text: heading.text.clone(),
540                        auto_anchor: final_fragment,
541                        custom_anchor: heading.custom_id.clone(),
542                        line: line_idx + 1, // 1-indexed
543                    });
544                }
545            }
546        }
547
548        // Extract cross-file links (for validation against other files)
549        for link in &ctx.links {
550            if link.is_reference {
551                continue;
552            }
553
554            // Skip wiki-links - they use a different linking system and are not validated
555            // as relative file paths
556            if matches!(link.link_type, LinkType::WikiLink { .. }) {
557                continue;
558            }
559
560            let url = &link.url;
561
562            // Skip external URLs
563            if Self::is_external_url_fast(url) {
564                continue;
565            }
566
567            // Only process cross-file links with fragments
568            if Self::is_cross_file_link(url)
569                && let Some(fragment_pos) = url.find('#')
570            {
571                let path_part = &url[..fragment_pos];
572                let fragment = &url[fragment_pos + 1..];
573
574                // Skip empty fragments or template syntax
575                if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
576                    continue;
577                }
578
579                file_index.add_cross_file_link(CrossFileLinkIndex {
580                    target_path: path_part.to_string(),
581                    fragment: fragment.to_string(),
582                    line: link.line,
583                    column: link.start_col + 1,
584                });
585            }
586        }
587    }
588
589    fn cross_file_check(
590        &self,
591        file_path: &Path,
592        file_index: &FileIndex,
593        workspace_index: &crate::workspace_index::WorkspaceIndex,
594    ) -> LintResult {
595        let mut warnings = Vec::new();
596
597        // Supported markdown file extensions (with leading dot, matching MD057)
598        const MARKDOWN_EXTENSIONS: &[&str] = &[
599            ".md",
600            ".markdown",
601            ".mdx",
602            ".mkd",
603            ".mkdn",
604            ".mdown",
605            ".mdwn",
606            ".qmd",
607            ".rmd",
608        ];
609
610        // Check each cross-file link in this file
611        for cross_link in &file_index.cross_file_links {
612            // Skip cross-file links without fragments - nothing to validate
613            if cross_link.fragment.is_empty() {
614                continue;
615            }
616
617            // Resolve the target file path relative to the current file
618            let base_target_path = if let Some(parent) = file_path.parent() {
619                parent.join(&cross_link.target_path)
620            } else {
621                Path::new(&cross_link.target_path).to_path_buf()
622            };
623
624            // Normalize the path (remove . and ..)
625            let base_target_path = normalize_path(&base_target_path);
626
627            // For extension-less paths, try resolving with markdown extensions
628            // This handles GitHub-style links like [link](page#section) -> page.md#section
629            let target_paths_to_try = Self::resolve_path_with_extensions(&base_target_path, MARKDOWN_EXTENSIONS);
630
631            // Try to find the target file in the workspace index
632            let mut target_file_index = None;
633
634            for target_path in &target_paths_to_try {
635                if let Some(index) = workspace_index.get_file(target_path) {
636                    target_file_index = Some(index);
637                    break;
638                }
639            }
640
641            if let Some(target_file_index) = target_file_index {
642                // Check if the fragment matches any heading in the target file (O(1) lookup)
643                if !target_file_index.has_anchor(&cross_link.fragment) {
644                    warnings.push(LintWarning {
645                        rule_name: Some(self.name().to_string()),
646                        line: cross_link.line,
647                        column: cross_link.column,
648                        end_line: cross_link.line,
649                        end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
650                        message: format!(
651                            "Link fragment '{}' not found in '{}'",
652                            cross_link.fragment, cross_link.target_path
653                        ),
654                        severity: Severity::Error,
655                        fix: None,
656                    });
657                }
658            }
659            // If target file not in index, skip (could be external file or not in workspace)
660        }
661
662        Ok(warnings)
663    }
664
665    fn default_config_section(&self) -> Option<(String, toml::Value)> {
666        let value: toml::Value = toml::from_str(
667            r#"
668# Anchor generation style to match your target platform
669# Options: "github" (default), "kramdown-gfm", "kramdown"
670# Note: "jekyll" is accepted as an alias for "kramdown-gfm" (backward compatibility)
671anchor-style = "github"
672"#,
673        )
674        .ok()?;
675        Some(("MD051".to_string(), value))
676    }
677}
678
679#[cfg(test)]
680mod tests {
681    use super::*;
682    use crate::lint_context::LintContext;
683
684    #[test]
685    fn test_quarto_cross_references() {
686        let rule = MD051LinkFragments::new();
687
688        // Test that Quarto cross-references are skipped
689        let content = r#"# Test Document
690
691## Figures
692
693See [@fig-plot] for the visualization.
694
695More details in [@tbl-results] and [@sec-methods].
696
697The equation [@eq-regression] shows the relationship.
698
699Reference to [@lst-code] for implementation."#;
700        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
701        let result = rule.check(&ctx).unwrap();
702        assert!(
703            result.is_empty(),
704            "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
705            result.len()
706        );
707
708        // Test that normal anchors still work
709        let content_with_anchor = r#"# Test
710
711See [link](#test) for details."#;
712        let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
713        let result_anchor = rule.check(&ctx_anchor).unwrap();
714        assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
715
716        // Test that invalid anchors are still flagged
717        let content_invalid = r#"# Test
718
719See [link](#nonexistent) for details."#;
720        let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
721        let result_invalid = rule.check(&ctx_invalid).unwrap();
722        assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
723    }
724
725    // Cross-file validation tests
726    #[test]
727    fn test_cross_file_scope() {
728        let rule = MD051LinkFragments::new();
729        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
730    }
731
732    #[test]
733    fn test_contribute_to_index_extracts_headings() {
734        let rule = MD051LinkFragments::new();
735        let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
736        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
737
738        let mut file_index = FileIndex::new();
739        rule.contribute_to_index(&ctx, &mut file_index);
740
741        assert_eq!(file_index.headings.len(), 3);
742        assert_eq!(file_index.headings[0].text, "First Heading");
743        assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
744        assert!(file_index.headings[0].custom_anchor.is_none());
745
746        assert_eq!(file_index.headings[1].text, "Second");
747        assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
748
749        assert_eq!(file_index.headings[2].text, "Third");
750    }
751
752    #[test]
753    fn test_contribute_to_index_extracts_cross_file_links() {
754        let rule = MD051LinkFragments::new();
755        let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
756        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
757
758        let mut file_index = FileIndex::new();
759        rule.contribute_to_index(&ctx, &mut file_index);
760
761        assert_eq!(file_index.cross_file_links.len(), 2);
762        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
763        assert_eq!(file_index.cross_file_links[0].fragment, "installation");
764        assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
765        assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
766    }
767
768    #[test]
769    fn test_cross_file_check_valid_fragment() {
770        use crate::workspace_index::WorkspaceIndex;
771
772        let rule = MD051LinkFragments::new();
773
774        // Build workspace index with target file
775        let mut workspace_index = WorkspaceIndex::new();
776        let mut target_file_index = FileIndex::new();
777        target_file_index.add_heading(HeadingIndex {
778            text: "Installation Guide".to_string(),
779            auto_anchor: "installation-guide".to_string(),
780            custom_anchor: None,
781            line: 1,
782        });
783        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
784
785        // Create a FileIndex for the file being checked
786        let mut current_file_index = FileIndex::new();
787        current_file_index.add_cross_file_link(CrossFileLinkIndex {
788            target_path: "install.md".to_string(),
789            fragment: "installation-guide".to_string(),
790            line: 3,
791            column: 5,
792        });
793
794        let warnings = rule
795            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
796            .unwrap();
797
798        // Should find no warnings since fragment exists
799        assert!(warnings.is_empty());
800    }
801
802    #[test]
803    fn test_cross_file_check_invalid_fragment() {
804        use crate::workspace_index::WorkspaceIndex;
805
806        let rule = MD051LinkFragments::new();
807
808        // Build workspace index with target file
809        let mut workspace_index = WorkspaceIndex::new();
810        let mut target_file_index = FileIndex::new();
811        target_file_index.add_heading(HeadingIndex {
812            text: "Installation Guide".to_string(),
813            auto_anchor: "installation-guide".to_string(),
814            custom_anchor: None,
815            line: 1,
816        });
817        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
818
819        // Create a FileIndex with a cross-file link pointing to non-existent fragment
820        let mut current_file_index = FileIndex::new();
821        current_file_index.add_cross_file_link(CrossFileLinkIndex {
822            target_path: "install.md".to_string(),
823            fragment: "nonexistent".to_string(),
824            line: 3,
825            column: 5,
826        });
827
828        let warnings = rule
829            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
830            .unwrap();
831
832        // Should find one warning since fragment doesn't exist
833        assert_eq!(warnings.len(), 1);
834        assert!(warnings[0].message.contains("nonexistent"));
835        assert!(warnings[0].message.contains("install.md"));
836    }
837
838    #[test]
839    fn test_cross_file_check_custom_anchor_match() {
840        use crate::workspace_index::WorkspaceIndex;
841
842        let rule = MD051LinkFragments::new();
843
844        // Build workspace index with target file that has custom anchor
845        let mut workspace_index = WorkspaceIndex::new();
846        let mut target_file_index = FileIndex::new();
847        target_file_index.add_heading(HeadingIndex {
848            text: "Installation Guide".to_string(),
849            auto_anchor: "installation-guide".to_string(),
850            custom_anchor: Some("install".to_string()),
851            line: 1,
852        });
853        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
854
855        // Link uses custom anchor
856        let mut current_file_index = FileIndex::new();
857        current_file_index.add_cross_file_link(CrossFileLinkIndex {
858            target_path: "install.md".to_string(),
859            fragment: "install".to_string(),
860            line: 3,
861            column: 5,
862        });
863
864        let warnings = rule
865            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
866            .unwrap();
867
868        // Should find no warnings since custom anchor matches
869        assert!(warnings.is_empty());
870    }
871
872    #[test]
873    fn test_cross_file_check_target_not_in_workspace() {
874        use crate::workspace_index::WorkspaceIndex;
875
876        let rule = MD051LinkFragments::new();
877
878        // Empty workspace index
879        let workspace_index = WorkspaceIndex::new();
880
881        // Link to file not in workspace
882        let mut current_file_index = FileIndex::new();
883        current_file_index.add_cross_file_link(CrossFileLinkIndex {
884            target_path: "external.md".to_string(),
885            fragment: "heading".to_string(),
886            line: 3,
887            column: 5,
888        });
889
890        let warnings = rule
891            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
892            .unwrap();
893
894        // Should not warn about files not in workspace
895        assert!(warnings.is_empty());
896    }
897
898    #[test]
899    fn test_wikilinks_skipped_in_check() {
900        // Wikilinks should not trigger MD051 warnings for missing fragments
901        let rule = MD051LinkFragments::new();
902
903        let content = r#"# Test Document
904
905## Valid Heading
906
907[[Microsoft#Windows OS]]
908[[SomePage#section]]
909[[page|Display Text]]
910[[path/to/page#section]]
911"#;
912        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
913        let result = rule.check(&ctx).unwrap();
914
915        assert!(
916            result.is_empty(),
917            "Wikilinks should not trigger MD051 warnings. Got: {result:?}"
918        );
919    }
920
921    #[test]
922    fn test_wikilinks_not_added_to_cross_file_index() {
923        // Wikilinks should not be added to the cross-file link index
924        let rule = MD051LinkFragments::new();
925
926        let content = r#"# Test Document
927
928[[Microsoft#Windows OS]]
929[[SomePage#section]]
930[Regular Link](other.md#section)
931"#;
932        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
933
934        let mut file_index = FileIndex::new();
935        rule.contribute_to_index(&ctx, &mut file_index);
936
937        // Should only have one cross-file link (the regular markdown link)
938        // Wikilinks should not be added
939        let cross_file_links = &file_index.cross_file_links;
940        assert_eq!(
941            cross_file_links.len(),
942            1,
943            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
944        );
945        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
946        assert_eq!(file_index.cross_file_links[0].fragment, "section");
947    }
948}