rumdl_lib/rules/
md051_link_fragments.rs

1use crate::rule::{CrossFileScope, FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::anchor_styles::AnchorStyle;
4use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
5use pulldown_cmark::LinkType;
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9use std::path::{Component, Path, PathBuf};
10use std::sync::LazyLock;
11
12/// Configuration for MD051 (Link fragments)
13#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
14#[serde(rename_all = "kebab-case")]
15pub struct MD051Config {
16    /// Anchor generation style to match the target platform
17    #[serde(default, alias = "anchor_style")]
18    pub anchor_style: AnchorStyle,
19
20    /// Match link fragments against headings case-insensitively.
21    ///
22    /// rumdl defaults to `true` (permissive matching), which deviates from
23    /// markdownlint's default of `false`. Set this to `false` for strict
24    /// markdownlint parity.
25    #[serde(default = "default_ignore_case", alias = "ignore_case")]
26    pub ignore_case: bool,
27
28    /// Optional regex applied to the fragment text (without the leading `#`).
29    /// Fragments that match are skipped — useful for runtime-generated anchors
30    /// (e.g., footnote IDs) that aren't visible to the linter.
31    #[serde(default, alias = "ignored_pattern")]
32    pub ignored_pattern: Option<String>,
33}
34
35fn default_ignore_case() -> bool {
36    true
37}
38
39impl Default for MD051Config {
40    fn default() -> Self {
41        Self {
42            anchor_style: AnchorStyle::default(),
43            ignore_case: true,
44            ignored_pattern: None,
45        }
46    }
47}
48
49impl RuleConfig for MD051Config {
50    const RULE_NAME: &'static str = "MD051";
51}
52// HTML tags with id or name attributes (supports any HTML element, not just <a>)
53// This pattern only captures the first id/name attribute in a tag
54static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
55    LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
56
57// Attribute anchor pattern for kramdown/MkDocs { #id } syntax
58// Matches {#id} or { #id } with optional spaces, supports multiple anchors
59// Also supports classes and attributes: { #id .class key=value }
60static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
61    LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z0-9_][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
62
63// Material for MkDocs setting anchor pattern: <!-- md:setting NAME -->
64// Used in headings to generate anchors for configuration option references
65static MD_SETTING_PATTERN: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"<!--\s*md:setting\s+([^\s]+)\s*-->").unwrap());
67
68/// Normalize a path by resolving . and .. components
69fn normalize_path(path: &Path) -> PathBuf {
70    let mut result = PathBuf::new();
71    for component in path.components() {
72        match component {
73            Component::CurDir => {} // Skip .
74            Component::ParentDir => {
75                result.pop(); // Go up one level for ..
76            }
77            c => result.push(c.as_os_str()),
78        }
79    }
80    result
81}
82
83/// Rule MD051: Link fragments
84///
85/// See [docs/md051.md](../../docs/md051.md) for full documentation, configuration, and examples.
86///
87/// This rule validates that link anchors (the part after #) point to existing headings.
88/// Supports both same-document anchors and cross-file fragment links when linting a workspace.
89#[derive(Clone)]
90pub struct MD051LinkFragments {
91    config: MD051Config,
92    /// Pre-compiled `ignored_pattern` regex. `None` if the user did not set the
93    /// option, or if the pattern failed to compile (a `log::warn!` is emitted
94    /// once at construction time so the user can fix the config).
95    ignored_pattern_regex: Option<Regex>,
96}
97
98/// Anchor sets extracted from a single document, with parallel lowercase and
99/// case-preserving storage. The `*_exact` sets are empty unless
100/// `ignore_case = false` so the default permissive path costs no extra
101/// allocations.
102struct AnchorSets {
103    markdown_headings: HashSet<String>,
104    markdown_headings_exact: HashSet<String>,
105    html_anchors: HashSet<String>,
106    html_anchors_exact: HashSet<String>,
107}
108
109impl Default for MD051LinkFragments {
110    fn default() -> Self {
111        Self::new()
112    }
113}
114
115impl MD051LinkFragments {
116    pub fn new() -> Self {
117        Self::from_config_struct(MD051Config::default())
118    }
119
120    /// Create with specific anchor style (other options use defaults)
121    pub fn with_anchor_style(style: AnchorStyle) -> Self {
122        Self::from_config_struct(MD051Config {
123            anchor_style: style,
124            ..MD051Config::default()
125        })
126    }
127
128    /// Create from a fully-populated config struct.
129    ///
130    /// Compiles `ignored_pattern` once. An invalid regex is logged via
131    /// `log::warn!` and the rule falls back to "no filter" so linting still
132    /// works rather than silently swallowing every fragment.
133    pub fn from_config_struct(config: MD051Config) -> Self {
134        let ignored_pattern_regex = config
135            .ignored_pattern
136            .as_deref()
137            .and_then(|pattern| match Regex::new(pattern) {
138                Ok(re) => Some(re),
139                Err(err) => {
140                    log::warn!(
141                        "Invalid ignored_pattern regex for MD051 ('{pattern}'): {err}. Falling back to no filter."
142                    );
143                    None
144                }
145            });
146        Self {
147            config,
148            ignored_pattern_regex,
149        }
150    }
151
152    /// Parse ATX heading content from blockquote inner text.
153    /// Strips the leading `# ` marker, optional closing hash sequence, and extracts custom IDs.
154    /// Returns `(clean_text, custom_id)` or None if not a heading.
155    fn parse_blockquote_heading(bq_content: &str) -> Option<(String, Option<String>)> {
156        crate::utils::header_id_utils::parse_blockquote_atx_heading(bq_content)
157    }
158
159    /// Insert a heading fragment with deduplication.
160    /// When `use_underscore_dedup` is true (Python-Markdown/MkDocs), the primary suffix
161    /// uses `_N` and `-N` is registered as a fallback. Otherwise, only `-N` is used.
162    ///
163    /// Empty fragments (from CJK-only headings) are handled specially for Python-Markdown:
164    /// the first empty slug gets `_1`, the second `_2`, etc. (matching Python-Markdown's
165    /// `unique()` function which always enters the dedup loop for falsy IDs).
166    fn insert_deduplicated_fragment(
167        fragment: String,
168        fragment_counts: &mut HashMap<String, usize>,
169        markdown_headings: &mut HashSet<String>,
170        mut markdown_headings_exact: Option<&mut HashSet<String>>,
171        use_underscore_dedup: bool,
172    ) {
173        // Slugs from generate_fragment are already lowercase, so the exact set
174        // ends up identical to the lowercased set for slugs. The exact set is
175        // only meaningfully different for case-preserving custom IDs (handled
176        // by the caller). Skipping the parallel inserts when the caller passes
177        // None avoids unnecessary allocations on the default ignore_case=true path.
178        let mut also_insert_exact = |form: &str| {
179            if let Some(set) = markdown_headings_exact.as_deref_mut() {
180                set.insert(form.to_string());
181            }
182        };
183
184        if fragment.is_empty() {
185            if !use_underscore_dedup {
186                return;
187            }
188            // Python-Markdown: empty slug → _1, _2, _3, ...
189            let count = fragment_counts.entry(fragment).or_insert(0);
190            *count += 1;
191            let formed = format!("_{count}");
192            also_insert_exact(&formed);
193            markdown_headings.insert(formed);
194            return;
195        }
196        if let Some(count) = fragment_counts.get_mut(&fragment) {
197            let suffix = *count;
198            *count += 1;
199            if use_underscore_dedup {
200                // Python-Markdown primary: heading_1, heading_2
201                let underscore_form = format!("{fragment}_{suffix}");
202                also_insert_exact(&underscore_form);
203                markdown_headings.insert(underscore_form);
204                // Also accept GitHub-style for compatibility
205                let dash_form = format!("{fragment}-{suffix}");
206                also_insert_exact(&dash_form);
207                markdown_headings.insert(dash_form);
208            } else {
209                // GitHub-style: heading-1, heading-2
210                let form = format!("{fragment}-{suffix}");
211                also_insert_exact(&form);
212                markdown_headings.insert(form);
213            }
214        } else {
215            fragment_counts.insert(fragment.clone(), 1);
216            also_insert_exact(&fragment);
217            markdown_headings.insert(fragment);
218        }
219    }
220
221    /// Add a heading to the cross-file index with proper deduplication.
222    /// When `use_underscore_dedup` is true (Python-Markdown/MkDocs), the primary anchor
223    /// uses `_N` and `-N` is registered as a fallback alias.
224    ///
225    /// Empty fragments (from CJK-only headings) get `_1`, `_2`, etc. in Python-Markdown mode.
226    fn add_heading_to_index(
227        fragment: &str,
228        text: &str,
229        custom_anchor: Option<String>,
230        line: usize,
231        fragment_counts: &mut HashMap<String, usize>,
232        file_index: &mut FileIndex,
233        use_underscore_dedup: bool,
234    ) {
235        if fragment.is_empty() {
236            if !use_underscore_dedup {
237                return;
238            }
239            // Python-Markdown: empty slug → _1, _2, _3, ...
240            let count = fragment_counts.entry(fragment.to_string()).or_insert(0);
241            *count += 1;
242            file_index.add_heading(HeadingIndex {
243                text: text.to_string(),
244                auto_anchor: format!("_{count}"),
245                custom_anchor,
246                line,
247                is_setext: false,
248            });
249            return;
250        }
251        if let Some(count) = fragment_counts.get_mut(fragment) {
252            let suffix = *count;
253            *count += 1;
254            let (primary, alias) = if use_underscore_dedup {
255                // Python-Markdown primary: heading_1; GitHub fallback: heading-1
256                (format!("{fragment}_{suffix}"), Some(format!("{fragment}-{suffix}")))
257            } else {
258                // GitHub-style primary: heading-1
259                (format!("{fragment}-{suffix}"), None)
260            };
261            file_index.add_heading(HeadingIndex {
262                text: text.to_string(),
263                auto_anchor: primary,
264                custom_anchor,
265                line,
266                is_setext: false,
267            });
268            if let Some(alias_anchor) = alias {
269                let heading_idx = file_index.headings.len() - 1;
270                file_index.add_anchor_alias(&alias_anchor, heading_idx);
271            }
272        } else {
273            fragment_counts.insert(fragment.to_string(), 1);
274            file_index.add_heading(HeadingIndex {
275                text: text.to_string(),
276                auto_anchor: fragment.to_string(),
277                custom_anchor,
278                line,
279                is_setext: false,
280            });
281        }
282    }
283
284    /// Extract all valid heading anchors from the document.
285    ///
286    /// Returns parallel lowercase + case-preserving sets so the same-document
287    /// check can honor `ignore_case` consistently with cross-file lookups.
288    /// The `*_exact` sets are only populated when `ignore_case = false` to
289    /// avoid unnecessary allocations on the default permissive path.
290    fn extract_headings_from_context(&self, ctx: &crate::lint_context::LintContext) -> AnchorSets {
291        let track_exact = !self.config.ignore_case;
292        let mut markdown_headings = HashSet::with_capacity(32);
293        let mut markdown_headings_exact = if track_exact {
294            HashSet::with_capacity(32)
295        } else {
296            HashSet::new()
297        };
298        let mut html_anchors = HashSet::with_capacity(16);
299        let mut html_anchors_exact = if track_exact {
300            HashSet::with_capacity(16)
301        } else {
302            HashSet::new()
303        };
304        let mut fragment_counts = std::collections::HashMap::new();
305        let use_underscore_dedup = self.config.anchor_style == AnchorStyle::PythonMarkdown;
306
307        for line_info in &ctx.lines {
308            if line_info.in_front_matter {
309                continue;
310            }
311
312            // Skip code blocks for anchor extraction
313            if line_info.in_code_block {
314                continue;
315            }
316
317            let content = line_info.content(ctx.content);
318            let bytes = content.as_bytes();
319
320            // Extract HTML anchor tags with id/name attributes
321            if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
322                // HTML spec: only the first id attribute per element is valid
323                // Process element by element to handle multiple id attributes correctly
324                let mut pos = 0;
325                while pos < content.len() {
326                    if let Some(start) = content[pos..].find('<') {
327                        let tag_start = pos + start;
328                        if let Some(end) = content[tag_start..].find('>') {
329                            let tag_end = tag_start + end + 1;
330                            let tag = &content[tag_start..tag_end];
331
332                            // Extract first id or name attribute from this tag
333                            if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
334                                let matched_text = caps.as_str();
335                                if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
336                                    && let Some(id_match) = caps.get(1)
337                                {
338                                    let id = id_match.as_str();
339                                    if !id.is_empty() {
340                                        html_anchors.insert(id.to_lowercase());
341                                        if track_exact {
342                                            html_anchors_exact.insert(id.to_string());
343                                        }
344                                    }
345                                }
346                            }
347                            pos = tag_end;
348                        } else {
349                            break;
350                        }
351                    } else {
352                        break;
353                    }
354                }
355            }
356
357            // Extract attribute anchors { #id } from non-heading lines
358            // Headings already have custom_id extracted below
359            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
360                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
361                    if let Some(id_match) = caps.get(1) {
362                        let id = id_match.as_str();
363                        markdown_headings.insert(id.to_lowercase());
364                        if track_exact {
365                            markdown_headings_exact.insert(id.to_string());
366                        }
367                    }
368                }
369            }
370
371            // Extract heading anchors from blockquote content
372            // Blockquote headings (e.g., "> ## Heading") are not detected by the main heading parser
373            // because the regex operates on the full line, but they still generate valid anchors
374            if line_info.heading.is_none()
375                && let Some(bq) = &line_info.blockquote
376                && let Some((clean_text, custom_id)) = Self::parse_blockquote_heading(&bq.content)
377            {
378                if let Some(id) = custom_id {
379                    markdown_headings.insert(id.to_lowercase());
380                    if track_exact {
381                        markdown_headings_exact.insert(id);
382                    }
383                }
384                let fragment = self.config.anchor_style.generate_fragment(&clean_text);
385                Self::insert_deduplicated_fragment(
386                    fragment,
387                    &mut fragment_counts,
388                    &mut markdown_headings,
389                    track_exact.then_some(&mut markdown_headings_exact),
390                    use_underscore_dedup,
391                );
392            }
393
394            // Extract markdown heading anchors
395            if let Some(heading) = &line_info.heading {
396                // Custom ID from {#custom-id} syntax
397                if let Some(custom_id) = &heading.custom_id {
398                    markdown_headings.insert(custom_id.to_lowercase());
399                    if track_exact {
400                        markdown_headings_exact.insert(custom_id.clone());
401                    }
402                }
403
404                // Generate fragment directly from heading text
405                // Note: HTML stripping was removed because it interfered with arrow patterns
406                // like <-> and placeholders like <FILE>. The anchor styles handle these correctly.
407                let fragment = self.config.anchor_style.generate_fragment(&heading.text);
408
409                Self::insert_deduplicated_fragment(
410                    fragment,
411                    &mut fragment_counts,
412                    &mut markdown_headings,
413                    track_exact.then_some(&mut markdown_headings_exact),
414                    use_underscore_dedup,
415                );
416            }
417        }
418
419        AnchorSets {
420            markdown_headings,
421            markdown_headings_exact,
422            html_anchors,
423            html_anchors_exact,
424        }
425    }
426
427    /// Fast check if URL is external (doesn't need to be validated)
428    #[inline]
429    fn is_external_url_fast(url: &str) -> bool {
430        // Quick prefix checks for common protocols
431        url.starts_with("http://")
432            || url.starts_with("https://")
433            || url.starts_with("ftp://")
434            || url.starts_with("mailto:")
435            || url.starts_with("tel:")
436            || url.starts_with("//")
437    }
438
439    /// Resolve a path by trying markdown extensions if it has no extension
440    ///
441    /// For extension-less paths (e.g., `page`), returns a list of paths to try:
442    /// 1. The original path (in case it's already in the index)
443    /// 2. The path with each markdown extension (e.g., `page.md`, `page.markdown`, etc.)
444    ///
445    /// For paths with extensions, returns just the original path.
446    #[inline]
447    fn resolve_path_with_extensions(path: &Path, extensions: &[&str]) -> Vec<PathBuf> {
448        if path.extension().is_none() {
449            // Extension-less path - try with markdown extensions
450            let mut paths = Vec::with_capacity(extensions.len() + 1);
451            // First try the exact path (in case it's already in the index)
452            paths.push(path.to_path_buf());
453            // Then try with each markdown extension
454            for ext in extensions {
455                let path_with_ext = path.with_extension(&ext[1..]); // Remove leading dot
456                paths.push(path_with_ext);
457            }
458            paths
459        } else {
460            // Path has extension - use as-is
461            vec![path.to_path_buf()]
462        }
463    }
464
465    /// Check if a path part (without fragment) is an extension-less path
466    ///
467    /// Extension-less paths are potential cross-file links that need resolution
468    /// with markdown extensions (e.g., `page#section` -> `page.md#section`).
469    ///
470    /// We recognize them as extension-less if:
471    /// 1. Path has no extension (no dot)
472    /// 2. Path is not empty
473    /// 3. Path doesn't look like a query parameter or special syntax
474    /// 4. Path contains at least one alphanumeric character (valid filename)
475    /// 5. Path contains only valid path characters (alphanumeric, slashes, hyphens, underscores)
476    ///
477    /// Optimized: single pass through characters to check both conditions.
478    #[inline]
479    fn is_extensionless_path(path_part: &str) -> bool {
480        // Quick rejections for common non-extension-less cases
481        if path_part.is_empty()
482            || path_part.contains('.')
483            || path_part.contains('?')
484            || path_part.contains('&')
485            || path_part.contains('=')
486        {
487            return false;
488        }
489
490        // Single pass: check for alphanumeric and validate all characters
491        let mut has_alphanumeric = false;
492        for c in path_part.chars() {
493            if c.is_alphanumeric() {
494                has_alphanumeric = true;
495            } else if !matches!(c, '/' | '\\' | '-' | '_') {
496                // Invalid character found - early exit
497                return false;
498            }
499        }
500
501        // Must have at least one alphanumeric character to be a valid filename
502        has_alphanumeric
503    }
504
505    /// Check if URL is a cross-file link (contains a file path before #)
506    #[inline]
507    fn is_cross_file_link(url: &str) -> bool {
508        if let Some(fragment_pos) = url.find('#') {
509            let path_part = &url[..fragment_pos];
510
511            // If there's no path part, it's just a fragment (#heading)
512            if path_part.is_empty() {
513                return false;
514            }
515
516            // Check for Liquid syntax used by Jekyll and other static site generators
517            // Liquid tags: {% ... %} for control flow and includes
518            // Liquid variables: {{ ... }} for outputting values
519            // These are template directives that reference external content and should be skipped
520            // We check for proper bracket order to avoid false positives
521            if let Some(tag_start) = path_part.find("{%")
522                && path_part[tag_start + 2..].contains("%}")
523            {
524                return true;
525            }
526            if let Some(var_start) = path_part.find("{{")
527                && path_part[var_start + 2..].contains("}}")
528            {
529                return true;
530            }
531
532            // Check if it's an absolute path (starts with /)
533            // These are links to other pages on the same site
534            if path_part.starts_with('/') {
535                return true;
536            }
537
538            // Check if it looks like a file path:
539            // - Contains a file extension (dot followed by letters)
540            // - Contains path separators
541            // - Contains relative path indicators
542            // - OR is an extension-less path with a fragment (GitHub-style: page#section)
543            let has_extension = path_part.contains('.')
544                && (
545                    // Has file extension pattern (handle query parameters by splitting on them first)
546                    {
547                    let clean_path = path_part.split('?').next().unwrap_or(path_part);
548                    // Handle files starting with dot
549                    if let Some(after_dot) = clean_path.strip_prefix('.') {
550                        let dots_count = clean_path.matches('.').count();
551                        if dots_count == 1 {
552                            // Could be ".ext" (file extension) or ".hidden" (hidden file)
553                            // Treat short alphanumeric suffixes as file extensions
554                            !after_dot.is_empty() && after_dot.len() <= 10 &&
555                            after_dot.chars().all(|c| c.is_ascii_alphanumeric())
556                        } else {
557                            // Hidden file with extension like ".hidden.txt"
558                            clean_path.split('.').next_back().is_some_and(|ext| {
559                                !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
560                            })
561                        }
562                    } else {
563                        // Regular file path
564                        clean_path.split('.').next_back().is_some_and(|ext| {
565                            !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
566                        })
567                    }
568                } ||
569                // Or contains path separators
570                path_part.contains('/') || path_part.contains('\\') ||
571                // Or starts with relative path indicators
572                path_part.starts_with("./") || path_part.starts_with("../")
573                );
574
575            // Extension-less paths with fragments are potential cross-file links
576            // This supports GitHub-style links like [link](page#section) that resolve to page.md#section
577            let is_extensionless = Self::is_extensionless_path(path_part);
578
579            has_extension || is_extensionless
580        } else {
581            false
582        }
583    }
584}
585
586impl Rule for MD051LinkFragments {
587    fn name(&self) -> &'static str {
588        "MD051"
589    }
590
591    fn description(&self) -> &'static str {
592        "Link fragments should reference valid headings"
593    }
594
595    fn fix_capability(&self) -> FixCapability {
596        FixCapability::Unfixable
597    }
598
599    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
600        // Skip if no link fragments present
601        if !ctx.likely_has_links_or_images() {
602            return true;
603        }
604        // Check for # character (fragments)
605        !ctx.has_char('#')
606    }
607
608    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
609        let mut warnings = Vec::new();
610
611        if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
612            return Ok(warnings);
613        }
614
615        let AnchorSets {
616            markdown_headings,
617            markdown_headings_exact,
618            html_anchors,
619            html_anchors_exact,
620        } = self.extract_headings_from_context(ctx);
621        let ignored_pattern = self.ignored_pattern_regex.as_ref();
622
623        for link in &ctx.links {
624            if link.is_reference {
625                continue;
626            }
627
628            // Skip links inside PyMdown blocks (MkDocs flavor)
629            if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
630                continue;
631            }
632
633            // Skip wiki-links - they reference other files and may have their own fragment validation
634            if matches!(link.link_type, LinkType::WikiLink { .. }) {
635                continue;
636            }
637
638            // Skip links inside Jinja templates
639            if ctx.is_in_jinja_range(link.byte_offset) {
640                continue;
641            }
642
643            // Skip Pandoc/Quarto citations ([@citation], @citation)
644            // Citations are bibliography references, not link fragments
645            if ctx.flavor.is_pandoc_compatible() && ctx.is_in_citation(link.byte_offset) {
646                continue;
647            }
648
649            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
650            // Shortcodes may contain template syntax that looks like fragment links
651            if ctx.is_in_shortcode(link.byte_offset) {
652                continue;
653            }
654
655            let url = &link.url;
656
657            // Skip links without fragments or external URLs
658            if !url.contains('#') || Self::is_external_url_fast(url) {
659                continue;
660            }
661
662            // Skip mdbook template placeholders ({{#VARIABLE}})
663            // mdbook uses {{#VARIABLE}} syntax where # is part of the template, not a fragment
664            if url.contains("{{#") && url.contains("}}") {
665                continue;
666            }
667
668            // Resolve link fragments against Pandoc heading slugs. Pandoc/Quarto
669            // auto-generate slugs that diverge from GitHub style for headings that
670            // contain punctuation (e.g. `# 5. Five Things` becomes `5.-five-things`
671            // under Pandoc but `5-five-things` under GitHub). Treat such fragments
672            // as resolved when running under a Pandoc-compatible flavor.
673            if ctx.flavor.is_pandoc_compatible()
674                && let Some(frag) = url.strip_prefix('#')
675                && ctx.has_pandoc_slug(frag)
676            {
677                continue;
678            }
679
680            // Skip Quarto/RMarkdown cross-references (@fig-, @tbl-, @sec-, @eq-, etc.)
681            // These are special cross-reference syntax, not HTML anchors
682            // Format: @prefix-identifier or just @identifier
683            if url.starts_with('@') {
684                continue;
685            }
686
687            // Cross-file links are valid if the file exists (not checked here)
688            if Self::is_cross_file_link(url) {
689                continue;
690            }
691
692            let Some(fragment_pos) = url.find('#') else {
693                continue;
694            };
695
696            let fragment = &url[fragment_pos + 1..];
697
698            // Skip Liquid template variables and filters
699            if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
700                continue;
701            }
702
703            if fragment.is_empty() {
704                continue;
705            }
706
707            // Skip MkDocs runtime-generated anchors:
708            // - #fn:NAME, #fnref:NAME from the footnotes extension
709            // - #+key.path or #+key:value from Material for MkDocs option references
710            //   (e.g., #+type:abstract, #+toc.slugify, #+pymdownx.highlight.anchor_linenums)
711            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs
712                && (fragment.starts_with("fn:")
713                    || fragment.starts_with("fnref:")
714                    || (fragment.starts_with('+') && (fragment.contains('.') || fragment.contains(':'))))
715            {
716                continue;
717            }
718
719            // Skip fragments matching the user-configured ignored_pattern
720            if ignored_pattern.is_some_and(|re| re.is_match(fragment)) {
721                continue;
722            }
723
724            // Validate fragment against document headings. Both HTML and
725            // markdown anchors honor the `ignore_case` option, mirroring
726            // markdownlint and the cross-file path.
727            let found = if self.config.ignore_case {
728                let lower = fragment.to_lowercase();
729                html_anchors.contains(&lower) || markdown_headings.contains(&lower)
730            } else {
731                html_anchors_exact.contains(fragment) || markdown_headings_exact.contains(fragment)
732            };
733
734            if !found {
735                warnings.push(LintWarning {
736                    rule_name: Some(self.name().to_string()),
737                    message: format!("Link anchor '#{fragment}' does not exist in document headings"),
738                    line: link.line,
739                    column: link.start_col + 1,
740                    end_line: link.line,
741                    end_column: link.end_col + 1,
742                    severity: Severity::Error,
743                    fix: None,
744                });
745            }
746        }
747
748        Ok(warnings)
749    }
750
751    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
752        // MD051 does not provide auto-fix
753        // Link fragment corrections require human judgment to avoid incorrect fixes
754        Ok(ctx.content.to_string())
755    }
756
757    fn as_any(&self) -> &dyn std::any::Any {
758        self
759    }
760
761    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
762    where
763        Self: Sized,
764    {
765        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD051Config>(config);
766
767        // When no explicit anchor style is configured (the user didn't override the default),
768        // and a flavor is active, fall back to the flavor's native anchor generation.
769        let explicit_style_present = config
770            .rules
771            .get("MD051")
772            .is_some_and(|rc| rc.values.contains_key("anchor-style") || rc.values.contains_key("anchor_style"));
773        if !explicit_style_present {
774            rule_config.anchor_style = match config.global.flavor {
775                crate::config::MarkdownFlavor::MkDocs => AnchorStyle::PythonMarkdown,
776                crate::config::MarkdownFlavor::Kramdown => AnchorStyle::KramdownGfm,
777                _ => AnchorStyle::GitHub,
778            };
779        }
780
781        Box::new(MD051LinkFragments::from_config_struct(rule_config))
782    }
783
784    fn category(&self) -> RuleCategory {
785        RuleCategory::Link
786    }
787
788    fn cross_file_scope(&self) -> CrossFileScope {
789        CrossFileScope::Workspace
790    }
791
792    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
793        let mut fragment_counts = HashMap::new();
794        let use_underscore_dedup = self.config.anchor_style == AnchorStyle::PythonMarkdown;
795
796        // Extract headings, HTML anchors, and attribute anchors (for other files to reference)
797        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
798            if line_info.in_front_matter {
799                continue;
800            }
801
802            // Skip code blocks for anchor extraction
803            if line_info.in_code_block {
804                continue;
805            }
806
807            let content = line_info.content(ctx.content);
808
809            // Extract HTML anchors (id or name attributes on any element)
810            if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
811                let mut pos = 0;
812                while pos < content.len() {
813                    if let Some(start) = content[pos..].find('<') {
814                        let tag_start = pos + start;
815                        if let Some(end) = content[tag_start..].find('>') {
816                            let tag_end = tag_start + end + 1;
817                            let tag = &content[tag_start..tag_end];
818
819                            if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
820                                && let Some(id_match) = caps.get(1)
821                            {
822                                file_index.add_html_anchor(id_match.as_str());
823                            }
824                            pos = tag_end;
825                        } else {
826                            break;
827                        }
828                    } else {
829                        break;
830                    }
831                }
832            }
833
834            // Extract attribute anchors { #id } on non-heading lines
835            // Headings already have custom_id extracted via heading.custom_id
836            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
837                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
838                    if let Some(id_match) = caps.get(1) {
839                        file_index.add_attribute_anchor(id_match.as_str());
840                    }
841                }
842            }
843
844            // Extract heading anchors from blockquote content
845            if line_info.heading.is_none()
846                && let Some(bq) = &line_info.blockquote
847                && let Some((clean_text, custom_id)) = Self::parse_blockquote_heading(&bq.content)
848            {
849                let fragment = self.config.anchor_style.generate_fragment(&clean_text);
850                Self::add_heading_to_index(
851                    &fragment,
852                    &clean_text,
853                    custom_id,
854                    line_idx + 1,
855                    &mut fragment_counts,
856                    file_index,
857                    use_underscore_dedup,
858                );
859            }
860
861            // Extract heading anchors
862            if let Some(heading) = &line_info.heading {
863                let fragment = self.config.anchor_style.generate_fragment(&heading.text);
864
865                Self::add_heading_to_index(
866                    &fragment,
867                    &heading.text,
868                    heading.custom_id.clone(),
869                    line_idx + 1,
870                    &mut fragment_counts,
871                    file_index,
872                    use_underscore_dedup,
873                );
874
875                // Extract Material for MkDocs setting anchors from headings.
876                // These are rendered as anchors at build time by Material's JS.
877                // Most references use #+key.path format (handled by the skip logic in check()),
878                // but this extraction enables cross-file validation for direct #key.path references.
879                if ctx.flavor == crate::config::MarkdownFlavor::MkDocs
880                    && let Some(caps) = MD_SETTING_PATTERN.captures(content)
881                    && let Some(name) = caps.get(1)
882                {
883                    file_index.add_html_anchor(name.as_str());
884                }
885            }
886        }
887
888        // Extract cross-file links (for validation against other files)
889        for link in &ctx.links {
890            if link.is_reference {
891                continue;
892            }
893
894            // Skip links inside PyMdown blocks (MkDocs flavor)
895            if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
896                continue;
897            }
898
899            // Skip wiki-links - they use a different linking system and are not validated
900            // as relative file paths
901            if matches!(link.link_type, LinkType::WikiLink { .. }) {
902                continue;
903            }
904
905            let url = &link.url;
906
907            // Skip external URLs
908            if Self::is_external_url_fast(url) {
909                continue;
910            }
911
912            // Only process cross-file links with fragments
913            if Self::is_cross_file_link(url)
914                && let Some(fragment_pos) = url.find('#')
915            {
916                let path_part = &url[..fragment_pos];
917                let fragment = &url[fragment_pos + 1..];
918
919                // Skip empty fragments or template syntax
920                if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
921                    continue;
922                }
923
924                file_index.add_cross_file_link(CrossFileLinkIndex {
925                    target_path: path_part.to_string(),
926                    fragment: fragment.to_string(),
927                    line: link.line,
928                    column: link.start_col + 1,
929                });
930            }
931        }
932    }
933
934    fn cross_file_check(
935        &self,
936        file_path: &Path,
937        file_index: &FileIndex,
938        workspace_index: &crate::workspace_index::WorkspaceIndex,
939    ) -> LintResult {
940        let mut warnings = Vec::new();
941
942        // Supported markdown file extensions (with leading dot, matching MD057)
943        const MARKDOWN_EXTENSIONS: &[&str] = &[
944            ".md",
945            ".markdown",
946            ".mdx",
947            ".mkd",
948            ".mkdn",
949            ".mdown",
950            ".mdwn",
951            ".qmd",
952            ".rmd",
953        ];
954
955        let ignored_pattern = self.ignored_pattern_regex.as_ref();
956        let ignore_case = self.config.ignore_case;
957
958        // Check each cross-file link in this file
959        for cross_link in &file_index.cross_file_links {
960            // Skip cross-file links without fragments - nothing to validate
961            if cross_link.fragment.is_empty() {
962                continue;
963            }
964
965            // Honor `ignored-pattern`: skip fragments matching the configured regex.
966            if ignored_pattern.is_some_and(|re| re.is_match(&cross_link.fragment)) {
967                continue;
968            }
969
970            // Resolve the target file path relative to the current file
971            let base_target_path = if let Some(parent) = file_path.parent() {
972                parent.join(&cross_link.target_path)
973            } else {
974                Path::new(&cross_link.target_path).to_path_buf()
975            };
976
977            // Normalize the path (remove . and ..)
978            let base_target_path = normalize_path(&base_target_path);
979
980            // For extension-less paths, try resolving with markdown extensions
981            // This handles GitHub-style links like [link](page#section) -> page.md#section
982            let target_paths_to_try = Self::resolve_path_with_extensions(&base_target_path, MARKDOWN_EXTENSIONS);
983
984            // Try to find the target file in the workspace index
985            let mut target_file_index = None;
986
987            for target_path in &target_paths_to_try {
988                if let Some(index) = workspace_index.get_file(target_path) {
989                    target_file_index = Some(index);
990                    break;
991                }
992            }
993
994            if let Some(target_file_index) = target_file_index {
995                // Check if the fragment matches any heading in the target file (O(1) lookup)
996                if !target_file_index.has_anchor_with_case(&cross_link.fragment, ignore_case) {
997                    warnings.push(LintWarning {
998                        rule_name: Some(self.name().to_string()),
999                        line: cross_link.line,
1000                        column: cross_link.column,
1001                        end_line: cross_link.line,
1002                        end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
1003                        message: format!(
1004                            "Link fragment '{}' not found in '{}'",
1005                            cross_link.fragment, cross_link.target_path
1006                        ),
1007                        severity: Severity::Error,
1008                        fix: None,
1009                    });
1010                }
1011            }
1012            // If target file not in index, skip (could be external file or not in workspace)
1013        }
1014
1015        Ok(warnings)
1016    }
1017
1018    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1019        let table = crate::rule_config_serde::config_schema_table(&MD051Config::default())?;
1020        if table.is_empty() {
1021            None
1022        } else {
1023            Some((MD051Config::RULE_NAME.to_string(), toml::Value::Table(table)))
1024        }
1025    }
1026}
1027
1028#[cfg(test)]
1029mod tests {
1030    use super::*;
1031    use crate::lint_context::LintContext;
1032
1033    #[test]
1034    fn test_quarto_cross_references() {
1035        let rule = MD051LinkFragments::new();
1036
1037        // Test that Quarto cross-references are skipped
1038        let content = r#"# Test Document
1039
1040## Figures
1041
1042See [@fig-plot] for the visualization.
1043
1044More details in [@tbl-results] and [@sec-methods].
1045
1046The equation [@eq-regression] shows the relationship.
1047
1048Reference to [@lst-code] for implementation."#;
1049        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1050        let result = rule.check(&ctx).unwrap();
1051        assert!(
1052            result.is_empty(),
1053            "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
1054            result.len()
1055        );
1056
1057        // Test that normal anchors still work
1058        let content_with_anchor = r#"# Test
1059
1060See [link](#test) for details."#;
1061        let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
1062        let result_anchor = rule.check(&ctx_anchor).unwrap();
1063        assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
1064
1065        // Test that invalid anchors are still flagged
1066        let content_invalid = r#"# Test
1067
1068See [link](#nonexistent) for details."#;
1069        let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
1070        let result_invalid = rule.check(&ctx_invalid).unwrap();
1071        assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
1072    }
1073
1074    #[test]
1075    fn test_jsx_in_heading_anchor() {
1076        // Issue #510: JSX/HTML tags in headings should be stripped for anchor generation
1077        let rule = MD051LinkFragments::new();
1078
1079        // Self-closing JSX tag
1080        let content = "# Test\n\n### `retentionPolicy`<Component />\n\n[link](#retentionpolicy)\n";
1081        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1082        let result = rule.check(&ctx).unwrap();
1083        assert!(
1084            result.is_empty(),
1085            "JSX self-closing tag should be stripped from anchor: got {result:?}"
1086        );
1087
1088        // JSX with attributes
1089        let content2 =
1090            "### retentionPolicy<HeaderTag type=\"danger\" text=\"required\" />\n\n[link](#retentionpolicy)\n";
1091        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1092        let result2 = rule.check(&ctx2).unwrap();
1093        assert!(
1094            result2.is_empty(),
1095            "JSX tag with attributes should be stripped from anchor: got {result2:?}"
1096        );
1097
1098        // HTML tags with inner text preserved
1099        let content3 = "### Test <span>extra</span>\n\n[link](#test-extra)\n";
1100        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard, None);
1101        let result3 = rule.check(&ctx3).unwrap();
1102        assert!(
1103            result3.is_empty(),
1104            "HTML tag content should be preserved in anchor: got {result3:?}"
1105        );
1106    }
1107
1108    // Cross-file validation tests
1109    #[test]
1110    fn test_cross_file_scope() {
1111        let rule = MD051LinkFragments::new();
1112        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1113    }
1114
1115    #[test]
1116    fn test_contribute_to_index_extracts_headings() {
1117        let rule = MD051LinkFragments::new();
1118        let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
1119        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1120
1121        let mut file_index = FileIndex::new();
1122        rule.contribute_to_index(&ctx, &mut file_index);
1123
1124        assert_eq!(file_index.headings.len(), 3);
1125        assert_eq!(file_index.headings[0].text, "First Heading");
1126        assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
1127        assert!(file_index.headings[0].custom_anchor.is_none());
1128
1129        assert_eq!(file_index.headings[1].text, "Second");
1130        assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
1131
1132        assert_eq!(file_index.headings[2].text, "Third");
1133    }
1134
1135    #[test]
1136    fn test_contribute_to_index_extracts_cross_file_links() {
1137        let rule = MD051LinkFragments::new();
1138        let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
1139        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1140
1141        let mut file_index = FileIndex::new();
1142        rule.contribute_to_index(&ctx, &mut file_index);
1143
1144        assert_eq!(file_index.cross_file_links.len(), 2);
1145        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
1146        assert_eq!(file_index.cross_file_links[0].fragment, "installation");
1147        assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
1148        assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
1149    }
1150
1151    #[test]
1152    fn test_cross_file_check_valid_fragment() {
1153        use crate::workspace_index::WorkspaceIndex;
1154
1155        let rule = MD051LinkFragments::new();
1156
1157        // Build workspace index with target file
1158        let mut workspace_index = WorkspaceIndex::new();
1159        let mut target_file_index = FileIndex::new();
1160        target_file_index.add_heading(HeadingIndex {
1161            text: "Installation Guide".to_string(),
1162            auto_anchor: "installation-guide".to_string(),
1163            custom_anchor: None,
1164            line: 1,
1165            is_setext: false,
1166        });
1167        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
1168
1169        // Create a FileIndex for the file being checked
1170        let mut current_file_index = FileIndex::new();
1171        current_file_index.add_cross_file_link(CrossFileLinkIndex {
1172            target_path: "install.md".to_string(),
1173            fragment: "installation-guide".to_string(),
1174            line: 3,
1175            column: 5,
1176        });
1177
1178        let warnings = rule
1179            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
1180            .unwrap();
1181
1182        // Should find no warnings since fragment exists
1183        assert!(warnings.is_empty());
1184    }
1185
1186    #[test]
1187    fn test_cross_file_check_invalid_fragment() {
1188        use crate::workspace_index::WorkspaceIndex;
1189
1190        let rule = MD051LinkFragments::new();
1191
1192        // Build workspace index with target file
1193        let mut workspace_index = WorkspaceIndex::new();
1194        let mut target_file_index = FileIndex::new();
1195        target_file_index.add_heading(HeadingIndex {
1196            text: "Installation Guide".to_string(),
1197            auto_anchor: "installation-guide".to_string(),
1198            custom_anchor: None,
1199            line: 1,
1200            is_setext: false,
1201        });
1202        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
1203
1204        // Create a FileIndex with a cross-file link pointing to non-existent fragment
1205        let mut current_file_index = FileIndex::new();
1206        current_file_index.add_cross_file_link(CrossFileLinkIndex {
1207            target_path: "install.md".to_string(),
1208            fragment: "nonexistent".to_string(),
1209            line: 3,
1210            column: 5,
1211        });
1212
1213        let warnings = rule
1214            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
1215            .unwrap();
1216
1217        // Should find one warning since fragment doesn't exist
1218        assert_eq!(warnings.len(), 1);
1219        assert!(warnings[0].message.contains("nonexistent"));
1220        assert!(warnings[0].message.contains("install.md"));
1221    }
1222
1223    #[test]
1224    fn test_cross_file_check_custom_anchor_match() {
1225        use crate::workspace_index::WorkspaceIndex;
1226
1227        let rule = MD051LinkFragments::new();
1228
1229        // Build workspace index with target file that has custom anchor
1230        let mut workspace_index = WorkspaceIndex::new();
1231        let mut target_file_index = FileIndex::new();
1232        target_file_index.add_heading(HeadingIndex {
1233            text: "Installation Guide".to_string(),
1234            auto_anchor: "installation-guide".to_string(),
1235            custom_anchor: Some("install".to_string()),
1236            line: 1,
1237            is_setext: false,
1238        });
1239        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
1240
1241        // Link uses custom anchor
1242        let mut current_file_index = FileIndex::new();
1243        current_file_index.add_cross_file_link(CrossFileLinkIndex {
1244            target_path: "install.md".to_string(),
1245            fragment: "install".to_string(),
1246            line: 3,
1247            column: 5,
1248        });
1249
1250        let warnings = rule
1251            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
1252            .unwrap();
1253
1254        // Should find no warnings since custom anchor matches
1255        assert!(warnings.is_empty());
1256    }
1257
1258    #[test]
1259    fn test_cross_file_check_target_not_in_workspace() {
1260        use crate::workspace_index::WorkspaceIndex;
1261
1262        let rule = MD051LinkFragments::new();
1263
1264        // Empty workspace index
1265        let workspace_index = WorkspaceIndex::new();
1266
1267        // Link to file not in workspace
1268        let mut current_file_index = FileIndex::new();
1269        current_file_index.add_cross_file_link(CrossFileLinkIndex {
1270            target_path: "external.md".to_string(),
1271            fragment: "heading".to_string(),
1272            line: 3,
1273            column: 5,
1274        });
1275
1276        let warnings = rule
1277            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
1278            .unwrap();
1279
1280        // Should not warn about files not in workspace
1281        assert!(warnings.is_empty());
1282    }
1283
1284    #[test]
1285    fn test_wikilinks_skipped_in_check() {
1286        // Wikilinks should not trigger MD051 warnings for missing fragments
1287        let rule = MD051LinkFragments::new();
1288
1289        let content = r#"# Test Document
1290
1291## Valid Heading
1292
1293[[Microsoft#Windows OS]]
1294[[SomePage#section]]
1295[[page|Display Text]]
1296[[path/to/page#section]]
1297"#;
1298        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1299        let result = rule.check(&ctx).unwrap();
1300
1301        assert!(
1302            result.is_empty(),
1303            "Wikilinks should not trigger MD051 warnings. Got: {result:?}"
1304        );
1305    }
1306
1307    #[test]
1308    fn test_wikilinks_not_added_to_cross_file_index() {
1309        // Wikilinks should not be added to the cross-file link index
1310        let rule = MD051LinkFragments::new();
1311
1312        let content = r#"# Test Document
1313
1314[[Microsoft#Windows OS]]
1315[[SomePage#section]]
1316[Regular Link](other.md#section)
1317"#;
1318        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1319
1320        let mut file_index = FileIndex::new();
1321        rule.contribute_to_index(&ctx, &mut file_index);
1322
1323        // Should only have one cross-file link (the regular markdown link)
1324        // Wikilinks should not be added
1325        let cross_file_links = &file_index.cross_file_links;
1326        assert_eq!(
1327            cross_file_links.len(),
1328            1,
1329            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
1330        );
1331        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
1332        assert_eq!(file_index.cross_file_links[0].fragment, "section");
1333    }
1334
1335    #[test]
1336    fn test_pandoc_flavor_skips_citations() {
1337        // Pandoc citations ([@key]) are bibliography references, not link fragments.
1338        // MD051 should skip them under Pandoc flavor, mirroring the Quarto skip behavior
1339        // tested in test_quarto_cross_references.
1340        let rule = MD051LinkFragments::new();
1341        let content = "# Test Document\n\nSee [@smith2020] for details.\n";
1342        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1343        let result = rule.check(&ctx).unwrap();
1344        assert!(
1345            result.is_empty(),
1346            "MD051 should skip Pandoc citations under Pandoc flavor: {result:?}"
1347        );
1348    }
1349
1350    #[test]
1351    fn md051_pandoc_resolves_pandoc_slug_diverging_from_github() {
1352        // The Pandoc heading slug for `# 5. Five Things` is `5.-five-things` (the
1353        // dot is preserved per Pandoc's rule of keeping `.`/`_`/`-`), whereas the
1354        // GitHub anchor for the same heading is `5-five-things` (the dot is
1355        // stripped). A link to `#5.-five-things` would be flagged under the
1356        // GitHub default but must be accepted under Pandoc-compatible flavors via
1357        // the `has_pandoc_slug` short-circuit.
1358        use crate::config::MarkdownFlavor;
1359        let rule = MD051LinkFragments::new();
1360        let content = "# 5. Five Things\n\nSee [details](#5.-five-things).\n";
1361
1362        // Sanity check: under Standard flavor (GitHub anchor style), the
1363        // divergent fragment is reported as an unknown anchor.
1364        let ctx_std = LintContext::new(content, MarkdownFlavor::Standard, None);
1365        let std_result = rule.check(&ctx_std).unwrap();
1366        assert_eq!(
1367            std_result.len(),
1368            1,
1369            "Standard flavor should flag the Pandoc-style fragment: {std_result:?}"
1370        );
1371
1372        // Under Pandoc flavor, the Pandoc slug guard should resolve it.
1373        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1374        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1375        assert!(
1376            pandoc_result.is_empty(),
1377            "Pandoc flavor should resolve `#5.-five-things` against the heading slug: {pandoc_result:?}"
1378        );
1379    }
1380
1381    /// A link whose text contains an email address must still be checked under
1382    /// Pandoc — the `@` embedded in a word is not a citation marker, so the
1383    /// citation guard must not silence MD051 on a missing fragment.
1384    #[test]
1385    fn md051_pandoc_flags_missing_fragment_with_email_in_link_text() {
1386        use crate::config::MarkdownFlavor;
1387        let rule = MD051LinkFragments::new();
1388        let content = "# Title\n\n[contact user@example.com](#missing)\n";
1389
1390        let ctx_std = LintContext::new(content, MarkdownFlavor::Standard, None);
1391        let std_result = rule.check(&ctx_std).unwrap();
1392        assert_eq!(
1393            std_result.len(),
1394            1,
1395            "Standard flavor must flag the missing fragment: {std_result:?}"
1396        );
1397
1398        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1399        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1400        assert_eq!(
1401            pandoc_result.len(),
1402            1,
1403            "Pandoc flavor must also flag the missing fragment — link text with embedded email is not a citation: {pandoc_result:?}"
1404        );
1405    }
1406
1407    /// `[see @smith2020](#missing)` is a Markdown link, not a citation —
1408    /// Pandoc prefers the link interpretation when `[...]` is immediately
1409    /// followed by `(...)`. MD051 must still flag the missing fragment.
1410    #[test]
1411    fn md051_pandoc_flags_missing_fragment_with_citation_in_link_text() {
1412        use crate::config::MarkdownFlavor;
1413        let rule = MD051LinkFragments::new();
1414        let content = "# Title\n\n[see @smith2020](#missing)\n";
1415
1416        let ctx_std = LintContext::new(content, MarkdownFlavor::Standard, None);
1417        let std_result = rule.check(&ctx_std).unwrap();
1418        assert_eq!(
1419            std_result.len(),
1420            1,
1421            "Standard flavor must flag the missing fragment: {std_result:?}"
1422        );
1423
1424        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1425        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1426        assert_eq!(
1427            pandoc_result.len(),
1428            1,
1429            "Pandoc flavor must flag the missing fragment — `[label](url)` is a link, not a citation: {pandoc_result:?}"
1430        );
1431    }
1432
1433    /// Pandoc's auto_identifiers extension disambiguates duplicate headings by
1434    /// appending `-1`, `-2`, etc. A link to `#a.-1` must resolve against the
1435    /// second `# A.` heading.
1436    #[test]
1437    fn md051_pandoc_resolves_duplicate_heading_suffix_slug() {
1438        use crate::config::MarkdownFlavor;
1439        let rule = MD051LinkFragments::new();
1440        let content = "# A.\n\nfirst\n\n# A.\n\nsecond\n\n[first](#a.) and [second](#a.-1).\n";
1441
1442        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1443        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1444        assert!(
1445            pandoc_result.is_empty(),
1446            "Pandoc flavor should resolve `#a.` and `#a.-1` against duplicate headings: {pandoc_result:?}"
1447        );
1448
1449        let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1450        let quarto_result = rule.check(&ctx_quarto).unwrap();
1451        assert!(
1452            quarto_result.is_empty(),
1453            "Quarto flavor should also resolve duplicate-heading suffix slugs: {quarto_result:?}"
1454        );
1455    }
1456
1457    /// A link to `#a.-2` with only two `# A.` headings must still be flagged —
1458    /// only `-1` exists when there are two duplicates.
1459    #[test]
1460    fn md051_pandoc_flags_overshoot_duplicate_suffix() {
1461        use crate::config::MarkdownFlavor;
1462        let rule = MD051LinkFragments::new();
1463        let content = "# A.\n\n# A.\n\n[overshoot](#a.-2)\n";
1464
1465        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1466        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1467        assert_eq!(
1468            pandoc_result.len(),
1469            1,
1470            "Pandoc must flag `#a.-2` when only `-1` exists (two duplicates): {pandoc_result:?}"
1471        );
1472    }
1473}
rumdl_lib/rules/md051_link_fragments.rs

rumdl_lib/rules/
md051_link_fragments.rs