rumdl_lib/rules/
md051_link_fragments.rs

1use crate::rule::{CrossFileScope, FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::anchor_styles::AnchorStyle;
4use crate::workspace_index::{CrossFileLinkIndex, FileIndex, HeadingIndex};
5use pulldown_cmark::LinkType;
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, HashSet};
9use std::path::{Component, Path, PathBuf};
10use std::sync::LazyLock;
11
12/// Configuration for MD051 (Link fragments)
13#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
14#[serde(rename_all = "kebab-case")]
15pub struct MD051Config {
16    /// Anchor generation style to match the target platform
17    #[serde(default, alias = "anchor_style")]
18    pub anchor_style: AnchorStyle,
19
20    /// Match link fragments against headings case-insensitively.
21    ///
22    /// rumdl defaults to `true` (permissive matching), which deviates from
23    /// markdownlint's default of `false`. Set this to `false` for strict
24    /// markdownlint parity.
25    #[serde(default = "default_ignore_case", alias = "ignore_case")]
26    pub ignore_case: bool,
27
28    /// Optional regex applied to the fragment text (without the leading `#`).
29    /// Fragments that match are skipped — useful for runtime-generated anchors
30    /// (e.g., footnote IDs) that aren't visible to the linter.
31    #[serde(default, alias = "ignored_pattern")]
32    pub ignored_pattern: Option<String>,
33}
34
35fn default_ignore_case() -> bool {
36    true
37}
38
39impl Default for MD051Config {
40    fn default() -> Self {
41        Self {
42            anchor_style: AnchorStyle::default(),
43            ignore_case: true,
44            ignored_pattern: None,
45        }
46    }
47}
48
49impl RuleConfig for MD051Config {
50    const RULE_NAME: &'static str = "MD051";
51}
52// HTML tags with id or name attributes (supports any HTML element, not just <a>)
53// This pattern only captures the first id/name attribute in a tag
54static HTML_ANCHOR_PATTERN: LazyLock<Regex> =
55    LazyLock::new(|| Regex::new(r#"\b(?:id|name)\s*=\s*["']([^"']+)["']"#).unwrap());
56
57// Attribute anchor pattern for kramdown/MkDocs { #id } syntax
58// Matches {#id} or { #id } with optional spaces, supports multiple anchors
59// Also supports classes and attributes: { #id .class key=value }
60static ATTR_ANCHOR_PATTERN: LazyLock<Regex> =
61    LazyLock::new(|| Regex::new(r#"\{\s*#([a-zA-Z0-9_][a-zA-Z0-9_-]*)[^}]*\}"#).unwrap());
62
63// Material for MkDocs setting anchor pattern: <!-- md:setting NAME -->
64// Used in headings to generate anchors for configuration option references
65static MD_SETTING_PATTERN: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"<!--\s*md:setting\s+([^\s]+)\s*-->").unwrap());
67
68/// Normalize a path by resolving . and .. components
69fn normalize_path(path: &Path) -> PathBuf {
70    let mut result = PathBuf::new();
71    for component in path.components() {
72        match component {
73            Component::CurDir => {} // Skip .
74            Component::ParentDir => {
75                result.pop(); // Go up one level for ..
76            }
77            c => result.push(c.as_os_str()),
78        }
79    }
80    result
81}
82
83/// Rule MD051: Link fragments
84///
85/// See [docs/md051.md](../../docs/md051.md) for full documentation, configuration, and examples.
86///
87/// This rule validates that link anchors (the part after #) point to existing headings.
88/// Supports both same-document anchors and cross-file fragment links when linting a workspace.
89#[derive(Clone)]
90pub struct MD051LinkFragments {
91    config: MD051Config,
92    /// Pre-compiled `ignored_pattern` regex. `None` if the user did not set the
93    /// option, or if the pattern failed to compile (a `log::warn!` is emitted
94    /// once at construction time so the user can fix the config).
95    ignored_pattern_regex: Option<Regex>,
96}
97
98/// Anchor sets extracted from a single document, with parallel lowercase and
99/// case-preserving storage. The `*_exact` sets are empty unless
100/// `ignore_case = false` so the default permissive path costs no extra
101/// allocations.
102struct AnchorSets {
103    markdown_headings: HashSet<String>,
104    markdown_headings_exact: HashSet<String>,
105    html_anchors: HashSet<String>,
106    html_anchors_exact: HashSet<String>,
107}
108
109impl Default for MD051LinkFragments {
110    fn default() -> Self {
111        Self::new()
112    }
113}
114
115impl MD051LinkFragments {
116    pub fn new() -> Self {
117        Self::from_config_struct(MD051Config::default())
118    }
119
120    /// Create with specific anchor style (other options use defaults)
121    pub fn with_anchor_style(style: AnchorStyle) -> Self {
122        Self::from_config_struct(MD051Config {
123            anchor_style: style,
124            ..MD051Config::default()
125        })
126    }
127
128    /// Create from a fully-populated config struct.
129    ///
130    /// Compiles `ignored_pattern` once. An invalid regex is logged via
131    /// `log::warn!` and the rule falls back to "no filter" so linting still
132    /// works rather than silently swallowing every fragment.
133    pub fn from_config_struct(config: MD051Config) -> Self {
134        let ignored_pattern_regex = config
135            .ignored_pattern
136            .as_deref()
137            .and_then(|pattern| match Regex::new(pattern) {
138                Ok(re) => Some(re),
139                Err(err) => {
140                    log::warn!(
141                        "Invalid ignored_pattern regex for MD051 ('{pattern}'): {err}. Falling back to no filter."
142                    );
143                    None
144                }
145            });
146        Self {
147            config,
148            ignored_pattern_regex,
149        }
150    }
151
152    /// Parse ATX heading content from blockquote inner text.
153    /// Strips the leading `# ` marker, optional closing hash sequence, and extracts custom IDs.
154    /// Returns `(clean_text, custom_id)` or None if not a heading.
155    fn parse_blockquote_heading(bq_content: &str) -> Option<(String, Option<String>)> {
156        static BQ_ATX_HEADING_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(#{1,6})\s+(.*)$").unwrap());
157
158        let trimmed = bq_content.trim();
159        let caps = BQ_ATX_HEADING_RE.captures(trimmed)?;
160        let mut rest = caps.get(2).map_or("", |m| m.as_str()).to_string();
161
162        // Strip optional closing hash sequence (CommonMark: trailing `#`s preceded by a space)
163        let rest_trimmed = rest.trim_end();
164        if let Some(last_hash_pos) = rest_trimmed.rfind('#') {
165            let after_hashes = &rest_trimmed[last_hash_pos..];
166            if after_hashes.chars().all(|c| c == '#') {
167                // Find where the consecutive trailing hashes start
168                let mut hash_start = last_hash_pos;
169                while hash_start > 0 && rest_trimmed.as_bytes()[hash_start - 1] == b'#' {
170                    hash_start -= 1;
171                }
172                // Must be preceded by whitespace (or be the entire content)
173                if hash_start == 0
174                    || rest_trimmed
175                        .as_bytes()
176                        .get(hash_start - 1)
177                        .is_some_and(u8::is_ascii_whitespace)
178                {
179                    rest = rest_trimmed[..hash_start].trim_end().to_string();
180                }
181            }
182        }
183
184        let (clean_text, custom_id) = crate::utils::header_id_utils::extract_header_id(&rest);
185        Some((clean_text, custom_id))
186    }
187
188    /// Insert a heading fragment with deduplication.
189    /// When `use_underscore_dedup` is true (Python-Markdown/MkDocs), the primary suffix
190    /// uses `_N` and `-N` is registered as a fallback. Otherwise, only `-N` is used.
191    ///
192    /// Empty fragments (from CJK-only headings) are handled specially for Python-Markdown:
193    /// the first empty slug gets `_1`, the second `_2`, etc. (matching Python-Markdown's
194    /// `unique()` function which always enters the dedup loop for falsy IDs).
195    fn insert_deduplicated_fragment(
196        fragment: String,
197        fragment_counts: &mut HashMap<String, usize>,
198        markdown_headings: &mut HashSet<String>,
199        mut markdown_headings_exact: Option<&mut HashSet<String>>,
200        use_underscore_dedup: bool,
201    ) {
202        // Slugs from generate_fragment are already lowercase, so the exact set
203        // ends up identical to the lowercased set for slugs. The exact set is
204        // only meaningfully different for case-preserving custom IDs (handled
205        // by the caller). Skipping the parallel inserts when the caller passes
206        // None avoids unnecessary allocations on the default ignore_case=true path.
207        let mut also_insert_exact = |form: &str| {
208            if let Some(set) = markdown_headings_exact.as_deref_mut() {
209                set.insert(form.to_string());
210            }
211        };
212
213        if fragment.is_empty() {
214            if !use_underscore_dedup {
215                return;
216            }
217            // Python-Markdown: empty slug → _1, _2, _3, ...
218            let count = fragment_counts.entry(fragment).or_insert(0);
219            *count += 1;
220            let formed = format!("_{count}");
221            also_insert_exact(&formed);
222            markdown_headings.insert(formed);
223            return;
224        }
225        if let Some(count) = fragment_counts.get_mut(&fragment) {
226            let suffix = *count;
227            *count += 1;
228            if use_underscore_dedup {
229                // Python-Markdown primary: heading_1, heading_2
230                let underscore_form = format!("{fragment}_{suffix}");
231                also_insert_exact(&underscore_form);
232                markdown_headings.insert(underscore_form);
233                // Also accept GitHub-style for compatibility
234                let dash_form = format!("{fragment}-{suffix}");
235                also_insert_exact(&dash_form);
236                markdown_headings.insert(dash_form);
237            } else {
238                // GitHub-style: heading-1, heading-2
239                let form = format!("{fragment}-{suffix}");
240                also_insert_exact(&form);
241                markdown_headings.insert(form);
242            }
243        } else {
244            fragment_counts.insert(fragment.clone(), 1);
245            also_insert_exact(&fragment);
246            markdown_headings.insert(fragment);
247        }
248    }
249
250    /// Add a heading to the cross-file index with proper deduplication.
251    /// When `use_underscore_dedup` is true (Python-Markdown/MkDocs), the primary anchor
252    /// uses `_N` and `-N` is registered as a fallback alias.
253    ///
254    /// Empty fragments (from CJK-only headings) get `_1`, `_2`, etc. in Python-Markdown mode.
255    fn add_heading_to_index(
256        fragment: &str,
257        text: &str,
258        custom_anchor: Option<String>,
259        line: usize,
260        fragment_counts: &mut HashMap<String, usize>,
261        file_index: &mut FileIndex,
262        use_underscore_dedup: bool,
263    ) {
264        if fragment.is_empty() {
265            if !use_underscore_dedup {
266                return;
267            }
268            // Python-Markdown: empty slug → _1, _2, _3, ...
269            let count = fragment_counts.entry(fragment.to_string()).or_insert(0);
270            *count += 1;
271            file_index.add_heading(HeadingIndex {
272                text: text.to_string(),
273                auto_anchor: format!("_{count}"),
274                custom_anchor,
275                line,
276                is_setext: false,
277            });
278            return;
279        }
280        if let Some(count) = fragment_counts.get_mut(fragment) {
281            let suffix = *count;
282            *count += 1;
283            let (primary, alias) = if use_underscore_dedup {
284                // Python-Markdown primary: heading_1; GitHub fallback: heading-1
285                (format!("{fragment}_{suffix}"), Some(format!("{fragment}-{suffix}")))
286            } else {
287                // GitHub-style primary: heading-1
288                (format!("{fragment}-{suffix}"), None)
289            };
290            file_index.add_heading(HeadingIndex {
291                text: text.to_string(),
292                auto_anchor: primary,
293                custom_anchor,
294                line,
295                is_setext: false,
296            });
297            if let Some(alias_anchor) = alias {
298                let heading_idx = file_index.headings.len() - 1;
299                file_index.add_anchor_alias(&alias_anchor, heading_idx);
300            }
301        } else {
302            fragment_counts.insert(fragment.to_string(), 1);
303            file_index.add_heading(HeadingIndex {
304                text: text.to_string(),
305                auto_anchor: fragment.to_string(),
306                custom_anchor,
307                line,
308                is_setext: false,
309            });
310        }
311    }
312
313    /// Extract all valid heading anchors from the document.
314    ///
315    /// Returns parallel lowercase + case-preserving sets so the same-document
316    /// check can honor `ignore_case` consistently with cross-file lookups.
317    /// The `*_exact` sets are only populated when `ignore_case = false` to
318    /// avoid unnecessary allocations on the default permissive path.
319    fn extract_headings_from_context(&self, ctx: &crate::lint_context::LintContext) -> AnchorSets {
320        let track_exact = !self.config.ignore_case;
321        let mut markdown_headings = HashSet::with_capacity(32);
322        let mut markdown_headings_exact = if track_exact {
323            HashSet::with_capacity(32)
324        } else {
325            HashSet::new()
326        };
327        let mut html_anchors = HashSet::with_capacity(16);
328        let mut html_anchors_exact = if track_exact {
329            HashSet::with_capacity(16)
330        } else {
331            HashSet::new()
332        };
333        let mut fragment_counts = std::collections::HashMap::new();
334        let use_underscore_dedup = self.config.anchor_style == AnchorStyle::PythonMarkdown;
335
336        for line_info in &ctx.lines {
337            if line_info.in_front_matter {
338                continue;
339            }
340
341            // Skip code blocks for anchor extraction
342            if line_info.in_code_block {
343                continue;
344            }
345
346            let content = line_info.content(ctx.content);
347            let bytes = content.as_bytes();
348
349            // Extract HTML anchor tags with id/name attributes
350            if bytes.contains(&b'<') && (content.contains("id=") || content.contains("name=")) {
351                // HTML spec: only the first id attribute per element is valid
352                // Process element by element to handle multiple id attributes correctly
353                let mut pos = 0;
354                while pos < content.len() {
355                    if let Some(start) = content[pos..].find('<') {
356                        let tag_start = pos + start;
357                        if let Some(end) = content[tag_start..].find('>') {
358                            let tag_end = tag_start + end + 1;
359                            let tag = &content[tag_start..tag_end];
360
361                            // Extract first id or name attribute from this tag
362                            if let Some(caps) = HTML_ANCHOR_PATTERN.find(tag) {
363                                let matched_text = caps.as_str();
364                                if let Some(caps) = HTML_ANCHOR_PATTERN.captures(matched_text)
365                                    && let Some(id_match) = caps.get(1)
366                                {
367                                    let id = id_match.as_str();
368                                    if !id.is_empty() {
369                                        html_anchors.insert(id.to_lowercase());
370                                        if track_exact {
371                                            html_anchors_exact.insert(id.to_string());
372                                        }
373                                    }
374                                }
375                            }
376                            pos = tag_end;
377                        } else {
378                            break;
379                        }
380                    } else {
381                        break;
382                    }
383                }
384            }
385
386            // Extract attribute anchors { #id } from non-heading lines
387            // Headings already have custom_id extracted below
388            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
389                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
390                    if let Some(id_match) = caps.get(1) {
391                        let id = id_match.as_str();
392                        markdown_headings.insert(id.to_lowercase());
393                        if track_exact {
394                            markdown_headings_exact.insert(id.to_string());
395                        }
396                    }
397                }
398            }
399
400            // Extract heading anchors from blockquote content
401            // Blockquote headings (e.g., "> ## Heading") are not detected by the main heading parser
402            // because the regex operates on the full line, but they still generate valid anchors
403            if line_info.heading.is_none()
404                && let Some(bq) = &line_info.blockquote
405                && let Some((clean_text, custom_id)) = Self::parse_blockquote_heading(&bq.content)
406            {
407                if let Some(id) = custom_id {
408                    markdown_headings.insert(id.to_lowercase());
409                    if track_exact {
410                        markdown_headings_exact.insert(id);
411                    }
412                }
413                let fragment = self.config.anchor_style.generate_fragment(&clean_text);
414                Self::insert_deduplicated_fragment(
415                    fragment,
416                    &mut fragment_counts,
417                    &mut markdown_headings,
418                    track_exact.then_some(&mut markdown_headings_exact),
419                    use_underscore_dedup,
420                );
421            }
422
423            // Extract markdown heading anchors
424            if let Some(heading) = &line_info.heading {
425                // Custom ID from {#custom-id} syntax
426                if let Some(custom_id) = &heading.custom_id {
427                    markdown_headings.insert(custom_id.to_lowercase());
428                    if track_exact {
429                        markdown_headings_exact.insert(custom_id.clone());
430                    }
431                }
432
433                // Generate fragment directly from heading text
434                // Note: HTML stripping was removed because it interfered with arrow patterns
435                // like <-> and placeholders like <FILE>. The anchor styles handle these correctly.
436                let fragment = self.config.anchor_style.generate_fragment(&heading.text);
437
438                Self::insert_deduplicated_fragment(
439                    fragment,
440                    &mut fragment_counts,
441                    &mut markdown_headings,
442                    track_exact.then_some(&mut markdown_headings_exact),
443                    use_underscore_dedup,
444                );
445            }
446        }
447
448        AnchorSets {
449            markdown_headings,
450            markdown_headings_exact,
451            html_anchors,
452            html_anchors_exact,
453        }
454    }
455
456    /// Fast check if URL is external (doesn't need to be validated)
457    #[inline]
458    fn is_external_url_fast(url: &str) -> bool {
459        // Quick prefix checks for common protocols
460        url.starts_with("http://")
461            || url.starts_with("https://")
462            || url.starts_with("ftp://")
463            || url.starts_with("mailto:")
464            || url.starts_with("tel:")
465            || url.starts_with("//")
466    }
467
468    /// Resolve a path by trying markdown extensions if it has no extension
469    ///
470    /// For extension-less paths (e.g., `page`), returns a list of paths to try:
471    /// 1. The original path (in case it's already in the index)
472    /// 2. The path with each markdown extension (e.g., `page.md`, `page.markdown`, etc.)
473    ///
474    /// For paths with extensions, returns just the original path.
475    #[inline]
476    fn resolve_path_with_extensions(path: &Path, extensions: &[&str]) -> Vec<PathBuf> {
477        if path.extension().is_none() {
478            // Extension-less path - try with markdown extensions
479            let mut paths = Vec::with_capacity(extensions.len() + 1);
480            // First try the exact path (in case it's already in the index)
481            paths.push(path.to_path_buf());
482            // Then try with each markdown extension
483            for ext in extensions {
484                let path_with_ext = path.with_extension(&ext[1..]); // Remove leading dot
485                paths.push(path_with_ext);
486            }
487            paths
488        } else {
489            // Path has extension - use as-is
490            vec![path.to_path_buf()]
491        }
492    }
493
494    /// Check if a path part (without fragment) is an extension-less path
495    ///
496    /// Extension-less paths are potential cross-file links that need resolution
497    /// with markdown extensions (e.g., `page#section` -> `page.md#section`).
498    ///
499    /// We recognize them as extension-less if:
500    /// 1. Path has no extension (no dot)
501    /// 2. Path is not empty
502    /// 3. Path doesn't look like a query parameter or special syntax
503    /// 4. Path contains at least one alphanumeric character (valid filename)
504    /// 5. Path contains only valid path characters (alphanumeric, slashes, hyphens, underscores)
505    ///
506    /// Optimized: single pass through characters to check both conditions.
507    #[inline]
508    fn is_extensionless_path(path_part: &str) -> bool {
509        // Quick rejections for common non-extension-less cases
510        if path_part.is_empty()
511            || path_part.contains('.')
512            || path_part.contains('?')
513            || path_part.contains('&')
514            || path_part.contains('=')
515        {
516            return false;
517        }
518
519        // Single pass: check for alphanumeric and validate all characters
520        let mut has_alphanumeric = false;
521        for c in path_part.chars() {
522            if c.is_alphanumeric() {
523                has_alphanumeric = true;
524            } else if !matches!(c, '/' | '\\' | '-' | '_') {
525                // Invalid character found - early exit
526                return false;
527            }
528        }
529
530        // Must have at least one alphanumeric character to be a valid filename
531        has_alphanumeric
532    }
533
534    /// Check if URL is a cross-file link (contains a file path before #)
535    #[inline]
536    fn is_cross_file_link(url: &str) -> bool {
537        if let Some(fragment_pos) = url.find('#') {
538            let path_part = &url[..fragment_pos];
539
540            // If there's no path part, it's just a fragment (#heading)
541            if path_part.is_empty() {
542                return false;
543            }
544
545            // Check for Liquid syntax used by Jekyll and other static site generators
546            // Liquid tags: {% ... %} for control flow and includes
547            // Liquid variables: {{ ... }} for outputting values
548            // These are template directives that reference external content and should be skipped
549            // We check for proper bracket order to avoid false positives
550            if let Some(tag_start) = path_part.find("{%")
551                && path_part[tag_start + 2..].contains("%}")
552            {
553                return true;
554            }
555            if let Some(var_start) = path_part.find("{{")
556                && path_part[var_start + 2..].contains("}}")
557            {
558                return true;
559            }
560
561            // Check if it's an absolute path (starts with /)
562            // These are links to other pages on the same site
563            if path_part.starts_with('/') {
564                return true;
565            }
566
567            // Check if it looks like a file path:
568            // - Contains a file extension (dot followed by letters)
569            // - Contains path separators
570            // - Contains relative path indicators
571            // - OR is an extension-less path with a fragment (GitHub-style: page#section)
572            let has_extension = path_part.contains('.')
573                && (
574                    // Has file extension pattern (handle query parameters by splitting on them first)
575                    {
576                    let clean_path = path_part.split('?').next().unwrap_or(path_part);
577                    // Handle files starting with dot
578                    if let Some(after_dot) = clean_path.strip_prefix('.') {
579                        let dots_count = clean_path.matches('.').count();
580                        if dots_count == 1 {
581                            // Could be ".ext" (file extension) or ".hidden" (hidden file)
582                            // Treat short alphanumeric suffixes as file extensions
583                            !after_dot.is_empty() && after_dot.len() <= 10 &&
584                            after_dot.chars().all(|c| c.is_ascii_alphanumeric())
585                        } else {
586                            // Hidden file with extension like ".hidden.txt"
587                            clean_path.split('.').next_back().is_some_and(|ext| {
588                                !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
589                            })
590                        }
591                    } else {
592                        // Regular file path
593                        clean_path.split('.').next_back().is_some_and(|ext| {
594                            !ext.is_empty() && ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric())
595                        })
596                    }
597                } ||
598                // Or contains path separators
599                path_part.contains('/') || path_part.contains('\\') ||
600                // Or starts with relative path indicators
601                path_part.starts_with("./") || path_part.starts_with("../")
602                );
603
604            // Extension-less paths with fragments are potential cross-file links
605            // This supports GitHub-style links like [link](page#section) that resolve to page.md#section
606            let is_extensionless = Self::is_extensionless_path(path_part);
607
608            has_extension || is_extensionless
609        } else {
610            false
611        }
612    }
613}
614
615impl Rule for MD051LinkFragments {
616    fn name(&self) -> &'static str {
617        "MD051"
618    }
619
620    fn description(&self) -> &'static str {
621        "Link fragments should reference valid headings"
622    }
623
624    fn fix_capability(&self) -> FixCapability {
625        FixCapability::Unfixable
626    }
627
628    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
629        // Skip if no link fragments present
630        if !ctx.likely_has_links_or_images() {
631            return true;
632        }
633        // Check for # character (fragments)
634        !ctx.has_char('#')
635    }
636
637    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
638        let mut warnings = Vec::new();
639
640        if ctx.content.is_empty() || ctx.links.is_empty() || self.should_skip(ctx) {
641            return Ok(warnings);
642        }
643
644        let AnchorSets {
645            markdown_headings,
646            markdown_headings_exact,
647            html_anchors,
648            html_anchors_exact,
649        } = self.extract_headings_from_context(ctx);
650        let ignored_pattern = self.ignored_pattern_regex.as_ref();
651
652        for link in &ctx.links {
653            if link.is_reference {
654                continue;
655            }
656
657            // Skip links inside PyMdown blocks (MkDocs flavor)
658            if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
659                continue;
660            }
661
662            // Skip wiki-links - they reference other files and may have their own fragment validation
663            if matches!(link.link_type, LinkType::WikiLink { .. }) {
664                continue;
665            }
666
667            // Skip links inside Jinja templates
668            if ctx.is_in_jinja_range(link.byte_offset) {
669                continue;
670            }
671
672            // Skip Pandoc/Quarto citations ([@citation], @citation)
673            // Citations are bibliography references, not link fragments
674            if ctx.flavor.is_pandoc_compatible() && ctx.is_in_citation(link.byte_offset) {
675                continue;
676            }
677
678            // Skip links inside shortcodes ({{< ... >}} or {{% ... %}})
679            // Shortcodes may contain template syntax that looks like fragment links
680            if ctx.is_in_shortcode(link.byte_offset) {
681                continue;
682            }
683
684            let url = &link.url;
685
686            // Skip links without fragments or external URLs
687            if !url.contains('#') || Self::is_external_url_fast(url) {
688                continue;
689            }
690
691            // Skip mdbook template placeholders ({{#VARIABLE}})
692            // mdbook uses {{#VARIABLE}} syntax where # is part of the template, not a fragment
693            if url.contains("{{#") && url.contains("}}") {
694                continue;
695            }
696
697            // Resolve link fragments against Pandoc heading slugs. Pandoc/Quarto
698            // auto-generate slugs that diverge from GitHub style for headings that
699            // contain punctuation (e.g. `# 5. Five Things` becomes `5.-five-things`
700            // under Pandoc but `5-five-things` under GitHub). Treat such fragments
701            // as resolved when running under a Pandoc-compatible flavor.
702            if ctx.flavor.is_pandoc_compatible()
703                && let Some(frag) = url.strip_prefix('#')
704                && ctx.has_pandoc_slug(frag)
705            {
706                continue;
707            }
708
709            // Skip Quarto/RMarkdown cross-references (@fig-, @tbl-, @sec-, @eq-, etc.)
710            // These are special cross-reference syntax, not HTML anchors
711            // Format: @prefix-identifier or just @identifier
712            if url.starts_with('@') {
713                continue;
714            }
715
716            // Cross-file links are valid if the file exists (not checked here)
717            if Self::is_cross_file_link(url) {
718                continue;
719            }
720
721            let Some(fragment_pos) = url.find('#') else {
722                continue;
723            };
724
725            let fragment = &url[fragment_pos + 1..];
726
727            // Skip Liquid template variables and filters
728            if (url.contains("{{") && fragment.contains('|')) || fragment.ends_with("}}") || fragment.ends_with("%}") {
729                continue;
730            }
731
732            if fragment.is_empty() {
733                continue;
734            }
735
736            // Skip MkDocs runtime-generated anchors:
737            // - #fn:NAME, #fnref:NAME from the footnotes extension
738            // - #+key.path or #+key:value from Material for MkDocs option references
739            //   (e.g., #+type:abstract, #+toc.slugify, #+pymdownx.highlight.anchor_linenums)
740            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs
741                && (fragment.starts_with("fn:")
742                    || fragment.starts_with("fnref:")
743                    || (fragment.starts_with('+') && (fragment.contains('.') || fragment.contains(':'))))
744            {
745                continue;
746            }
747
748            // Skip fragments matching the user-configured ignored_pattern
749            if ignored_pattern.is_some_and(|re| re.is_match(fragment)) {
750                continue;
751            }
752
753            // Validate fragment against document headings. Both HTML and
754            // markdown anchors honor the `ignore_case` option, mirroring
755            // markdownlint and the cross-file path.
756            let found = if self.config.ignore_case {
757                let lower = fragment.to_lowercase();
758                html_anchors.contains(&lower) || markdown_headings.contains(&lower)
759            } else {
760                html_anchors_exact.contains(fragment) || markdown_headings_exact.contains(fragment)
761            };
762
763            if !found {
764                warnings.push(LintWarning {
765                    rule_name: Some(self.name().to_string()),
766                    message: format!("Link anchor '#{fragment}' does not exist in document headings"),
767                    line: link.line,
768                    column: link.start_col + 1,
769                    end_line: link.line,
770                    end_column: link.end_col + 1,
771                    severity: Severity::Error,
772                    fix: None,
773                });
774            }
775        }
776
777        Ok(warnings)
778    }
779
780    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
781        // MD051 does not provide auto-fix
782        // Link fragment corrections require human judgment to avoid incorrect fixes
783        Ok(ctx.content.to_string())
784    }
785
786    fn as_any(&self) -> &dyn std::any::Any {
787        self
788    }
789
790    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
791    where
792        Self: Sized,
793    {
794        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD051Config>(config);
795
796        // When no explicit anchor style is configured (the user didn't override the default),
797        // and a flavor is active, fall back to the flavor's native anchor generation.
798        let explicit_style_present = config
799            .rules
800            .get("MD051")
801            .is_some_and(|rc| rc.values.contains_key("anchor-style") || rc.values.contains_key("anchor_style"));
802        if !explicit_style_present {
803            rule_config.anchor_style = match config.global.flavor {
804                crate::config::MarkdownFlavor::MkDocs => AnchorStyle::PythonMarkdown,
805                crate::config::MarkdownFlavor::Kramdown => AnchorStyle::KramdownGfm,
806                _ => AnchorStyle::GitHub,
807            };
808        }
809
810        Box::new(MD051LinkFragments::from_config_struct(rule_config))
811    }
812
813    fn category(&self) -> RuleCategory {
814        RuleCategory::Link
815    }
816
817    fn cross_file_scope(&self) -> CrossFileScope {
818        CrossFileScope::Workspace
819    }
820
821    fn contribute_to_index(&self, ctx: &crate::lint_context::LintContext, file_index: &mut FileIndex) {
822        let mut fragment_counts = HashMap::new();
823        let use_underscore_dedup = self.config.anchor_style == AnchorStyle::PythonMarkdown;
824
825        // Extract headings, HTML anchors, and attribute anchors (for other files to reference)
826        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
827            if line_info.in_front_matter {
828                continue;
829            }
830
831            // Skip code blocks for anchor extraction
832            if line_info.in_code_block {
833                continue;
834            }
835
836            let content = line_info.content(ctx.content);
837
838            // Extract HTML anchors (id or name attributes on any element)
839            if content.contains('<') && (content.contains("id=") || content.contains("name=")) {
840                let mut pos = 0;
841                while pos < content.len() {
842                    if let Some(start) = content[pos..].find('<') {
843                        let tag_start = pos + start;
844                        if let Some(end) = content[tag_start..].find('>') {
845                            let tag_end = tag_start + end + 1;
846                            let tag = &content[tag_start..tag_end];
847
848                            if let Some(caps) = HTML_ANCHOR_PATTERN.captures(tag)
849                                && let Some(id_match) = caps.get(1)
850                            {
851                                file_index.add_html_anchor(id_match.as_str());
852                            }
853                            pos = tag_end;
854                        } else {
855                            break;
856                        }
857                    } else {
858                        break;
859                    }
860                }
861            }
862
863            // Extract attribute anchors { #id } on non-heading lines
864            // Headings already have custom_id extracted via heading.custom_id
865            if line_info.heading.is_none() && content.contains('{') && content.contains('#') {
866                for caps in ATTR_ANCHOR_PATTERN.captures_iter(content) {
867                    if let Some(id_match) = caps.get(1) {
868                        file_index.add_attribute_anchor(id_match.as_str());
869                    }
870                }
871            }
872
873            // Extract heading anchors from blockquote content
874            if line_info.heading.is_none()
875                && let Some(bq) = &line_info.blockquote
876                && let Some((clean_text, custom_id)) = Self::parse_blockquote_heading(&bq.content)
877            {
878                let fragment = self.config.anchor_style.generate_fragment(&clean_text);
879                Self::add_heading_to_index(
880                    &fragment,
881                    &clean_text,
882                    custom_id,
883                    line_idx + 1,
884                    &mut fragment_counts,
885                    file_index,
886                    use_underscore_dedup,
887                );
888            }
889
890            // Extract heading anchors
891            if let Some(heading) = &line_info.heading {
892                let fragment = self.config.anchor_style.generate_fragment(&heading.text);
893
894                Self::add_heading_to_index(
895                    &fragment,
896                    &heading.text,
897                    heading.custom_id.clone(),
898                    line_idx + 1,
899                    &mut fragment_counts,
900                    file_index,
901                    use_underscore_dedup,
902                );
903
904                // Extract Material for MkDocs setting anchors from headings.
905                // These are rendered as anchors at build time by Material's JS.
906                // Most references use #+key.path format (handled by the skip logic in check()),
907                // but this extraction enables cross-file validation for direct #key.path references.
908                if ctx.flavor == crate::config::MarkdownFlavor::MkDocs
909                    && let Some(caps) = MD_SETTING_PATTERN.captures(content)
910                    && let Some(name) = caps.get(1)
911                {
912                    file_index.add_html_anchor(name.as_str());
913                }
914            }
915        }
916
917        // Extract cross-file links (for validation against other files)
918        for link in &ctx.links {
919            if link.is_reference {
920                continue;
921            }
922
923            // Skip links inside PyMdown blocks (MkDocs flavor)
924            if ctx.line_info(link.line).is_some_and(|info| info.in_pymdown_block) {
925                continue;
926            }
927
928            // Skip wiki-links - they use a different linking system and are not validated
929            // as relative file paths
930            if matches!(link.link_type, LinkType::WikiLink { .. }) {
931                continue;
932            }
933
934            let url = &link.url;
935
936            // Skip external URLs
937            if Self::is_external_url_fast(url) {
938                continue;
939            }
940
941            // Only process cross-file links with fragments
942            if Self::is_cross_file_link(url)
943                && let Some(fragment_pos) = url.find('#')
944            {
945                let path_part = &url[..fragment_pos];
946                let fragment = &url[fragment_pos + 1..];
947
948                // Skip empty fragments or template syntax
949                if fragment.is_empty() || fragment.contains("{{") || fragment.contains("{%") {
950                    continue;
951                }
952
953                file_index.add_cross_file_link(CrossFileLinkIndex {
954                    target_path: path_part.to_string(),
955                    fragment: fragment.to_string(),
956                    line: link.line,
957                    column: link.start_col + 1,
958                });
959            }
960        }
961    }
962
963    fn cross_file_check(
964        &self,
965        file_path: &Path,
966        file_index: &FileIndex,
967        workspace_index: &crate::workspace_index::WorkspaceIndex,
968    ) -> LintResult {
969        let mut warnings = Vec::new();
970
971        // Supported markdown file extensions (with leading dot, matching MD057)
972        const MARKDOWN_EXTENSIONS: &[&str] = &[
973            ".md",
974            ".markdown",
975            ".mdx",
976            ".mkd",
977            ".mkdn",
978            ".mdown",
979            ".mdwn",
980            ".qmd",
981            ".rmd",
982        ];
983
984        let ignored_pattern = self.ignored_pattern_regex.as_ref();
985        let ignore_case = self.config.ignore_case;
986
987        // Check each cross-file link in this file
988        for cross_link in &file_index.cross_file_links {
989            // Skip cross-file links without fragments - nothing to validate
990            if cross_link.fragment.is_empty() {
991                continue;
992            }
993
994            // Honor `ignored-pattern`: skip fragments matching the configured regex.
995            if ignored_pattern.is_some_and(|re| re.is_match(&cross_link.fragment)) {
996                continue;
997            }
998
999            // Resolve the target file path relative to the current file
1000            let base_target_path = if let Some(parent) = file_path.parent() {
1001                parent.join(&cross_link.target_path)
1002            } else {
1003                Path::new(&cross_link.target_path).to_path_buf()
1004            };
1005
1006            // Normalize the path (remove . and ..)
1007            let base_target_path = normalize_path(&base_target_path);
1008
1009            // For extension-less paths, try resolving with markdown extensions
1010            // This handles GitHub-style links like [link](page#section) -> page.md#section
1011            let target_paths_to_try = Self::resolve_path_with_extensions(&base_target_path, MARKDOWN_EXTENSIONS);
1012
1013            // Try to find the target file in the workspace index
1014            let mut target_file_index = None;
1015
1016            for target_path in &target_paths_to_try {
1017                if let Some(index) = workspace_index.get_file(target_path) {
1018                    target_file_index = Some(index);
1019                    break;
1020                }
1021            }
1022
1023            if let Some(target_file_index) = target_file_index {
1024                // Check if the fragment matches any heading in the target file (O(1) lookup)
1025                if !target_file_index.has_anchor_with_case(&cross_link.fragment, ignore_case) {
1026                    warnings.push(LintWarning {
1027                        rule_name: Some(self.name().to_string()),
1028                        line: cross_link.line,
1029                        column: cross_link.column,
1030                        end_line: cross_link.line,
1031                        end_column: cross_link.column + cross_link.target_path.len() + 1 + cross_link.fragment.len(),
1032                        message: format!(
1033                            "Link fragment '{}' not found in '{}'",
1034                            cross_link.fragment, cross_link.target_path
1035                        ),
1036                        severity: Severity::Error,
1037                        fix: None,
1038                    });
1039                }
1040            }
1041            // If target file not in index, skip (could be external file or not in workspace)
1042        }
1043
1044        Ok(warnings)
1045    }
1046
1047    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1048        let table = crate::rule_config_serde::config_schema_table(&MD051Config::default())?;
1049        if table.is_empty() {
1050            None
1051        } else {
1052            Some((MD051Config::RULE_NAME.to_string(), toml::Value::Table(table)))
1053        }
1054    }
1055}
1056
1057#[cfg(test)]
1058mod tests {
1059    use super::*;
1060    use crate::lint_context::LintContext;
1061
1062    #[test]
1063    fn test_quarto_cross_references() {
1064        let rule = MD051LinkFragments::new();
1065
1066        // Test that Quarto cross-references are skipped
1067        let content = r#"# Test Document
1068
1069## Figures
1070
1071See [@fig-plot] for the visualization.
1072
1073More details in [@tbl-results] and [@sec-methods].
1074
1075The equation [@eq-regression] shows the relationship.
1076
1077Reference to [@lst-code] for implementation."#;
1078        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
1079        let result = rule.check(&ctx).unwrap();
1080        assert!(
1081            result.is_empty(),
1082            "Quarto cross-references (@fig-, @tbl-, @sec-, @eq-) should not trigger MD051 warnings. Got {} warnings",
1083            result.len()
1084        );
1085
1086        // Test that normal anchors still work
1087        let content_with_anchor = r#"# Test
1088
1089See [link](#test) for details."#;
1090        let ctx_anchor = LintContext::new(content_with_anchor, crate::config::MarkdownFlavor::Quarto, None);
1091        let result_anchor = rule.check(&ctx_anchor).unwrap();
1092        assert!(result_anchor.is_empty(), "Valid anchor should not trigger warning");
1093
1094        // Test that invalid anchors are still flagged
1095        let content_invalid = r#"# Test
1096
1097See [link](#nonexistent) for details."#;
1098        let ctx_invalid = LintContext::new(content_invalid, crate::config::MarkdownFlavor::Quarto, None);
1099        let result_invalid = rule.check(&ctx_invalid).unwrap();
1100        assert_eq!(result_invalid.len(), 1, "Invalid anchor should still trigger warning");
1101    }
1102
1103    #[test]
1104    fn test_jsx_in_heading_anchor() {
1105        // Issue #510: JSX/HTML tags in headings should be stripped for anchor generation
1106        let rule = MD051LinkFragments::new();
1107
1108        // Self-closing JSX tag
1109        let content = "# Test\n\n### `retentionPolicy`<Component />\n\n[link](#retentionpolicy)\n";
1110        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1111        let result = rule.check(&ctx).unwrap();
1112        assert!(
1113            result.is_empty(),
1114            "JSX self-closing tag should be stripped from anchor: got {result:?}"
1115        );
1116
1117        // JSX with attributes
1118        let content2 =
1119            "### retentionPolicy<HeaderTag type=\"danger\" text=\"required\" />\n\n[link](#retentionpolicy)\n";
1120        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1121        let result2 = rule.check(&ctx2).unwrap();
1122        assert!(
1123            result2.is_empty(),
1124            "JSX tag with attributes should be stripped from anchor: got {result2:?}"
1125        );
1126
1127        // HTML tags with inner text preserved
1128        let content3 = "### Test <span>extra</span>\n\n[link](#test-extra)\n";
1129        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard, None);
1130        let result3 = rule.check(&ctx3).unwrap();
1131        assert!(
1132            result3.is_empty(),
1133            "HTML tag content should be preserved in anchor: got {result3:?}"
1134        );
1135    }
1136
1137    // Cross-file validation tests
1138    #[test]
1139    fn test_cross_file_scope() {
1140        let rule = MD051LinkFragments::new();
1141        assert_eq!(rule.cross_file_scope(), CrossFileScope::Workspace);
1142    }
1143
1144    #[test]
1145    fn test_contribute_to_index_extracts_headings() {
1146        let rule = MD051LinkFragments::new();
1147        let content = "# First Heading\n\n# Second { #custom }\n\n## Third";
1148        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1149
1150        let mut file_index = FileIndex::new();
1151        rule.contribute_to_index(&ctx, &mut file_index);
1152
1153        assert_eq!(file_index.headings.len(), 3);
1154        assert_eq!(file_index.headings[0].text, "First Heading");
1155        assert_eq!(file_index.headings[0].auto_anchor, "first-heading");
1156        assert!(file_index.headings[0].custom_anchor.is_none());
1157
1158        assert_eq!(file_index.headings[1].text, "Second");
1159        assert_eq!(file_index.headings[1].custom_anchor, Some("custom".to_string()));
1160
1161        assert_eq!(file_index.headings[2].text, "Third");
1162    }
1163
1164    #[test]
1165    fn test_contribute_to_index_extracts_cross_file_links() {
1166        let rule = MD051LinkFragments::new();
1167        let content = "See [docs](other.md#installation) and [more](../guide.md#getting-started)";
1168        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1169
1170        let mut file_index = FileIndex::new();
1171        rule.contribute_to_index(&ctx, &mut file_index);
1172
1173        assert_eq!(file_index.cross_file_links.len(), 2);
1174        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
1175        assert_eq!(file_index.cross_file_links[0].fragment, "installation");
1176        assert_eq!(file_index.cross_file_links[1].target_path, "../guide.md");
1177        assert_eq!(file_index.cross_file_links[1].fragment, "getting-started");
1178    }
1179
1180    #[test]
1181    fn test_cross_file_check_valid_fragment() {
1182        use crate::workspace_index::WorkspaceIndex;
1183
1184        let rule = MD051LinkFragments::new();
1185
1186        // Build workspace index with target file
1187        let mut workspace_index = WorkspaceIndex::new();
1188        let mut target_file_index = FileIndex::new();
1189        target_file_index.add_heading(HeadingIndex {
1190            text: "Installation Guide".to_string(),
1191            auto_anchor: "installation-guide".to_string(),
1192            custom_anchor: None,
1193            line: 1,
1194            is_setext: false,
1195        });
1196        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
1197
1198        // Create a FileIndex for the file being checked
1199        let mut current_file_index = FileIndex::new();
1200        current_file_index.add_cross_file_link(CrossFileLinkIndex {
1201            target_path: "install.md".to_string(),
1202            fragment: "installation-guide".to_string(),
1203            line: 3,
1204            column: 5,
1205        });
1206
1207        let warnings = rule
1208            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
1209            .unwrap();
1210
1211        // Should find no warnings since fragment exists
1212        assert!(warnings.is_empty());
1213    }
1214
1215    #[test]
1216    fn test_cross_file_check_invalid_fragment() {
1217        use crate::workspace_index::WorkspaceIndex;
1218
1219        let rule = MD051LinkFragments::new();
1220
1221        // Build workspace index with target file
1222        let mut workspace_index = WorkspaceIndex::new();
1223        let mut target_file_index = FileIndex::new();
1224        target_file_index.add_heading(HeadingIndex {
1225            text: "Installation Guide".to_string(),
1226            auto_anchor: "installation-guide".to_string(),
1227            custom_anchor: None,
1228            line: 1,
1229            is_setext: false,
1230        });
1231        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
1232
1233        // Create a FileIndex with a cross-file link pointing to non-existent fragment
1234        let mut current_file_index = FileIndex::new();
1235        current_file_index.add_cross_file_link(CrossFileLinkIndex {
1236            target_path: "install.md".to_string(),
1237            fragment: "nonexistent".to_string(),
1238            line: 3,
1239            column: 5,
1240        });
1241
1242        let warnings = rule
1243            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
1244            .unwrap();
1245
1246        // Should find one warning since fragment doesn't exist
1247        assert_eq!(warnings.len(), 1);
1248        assert!(warnings[0].message.contains("nonexistent"));
1249        assert!(warnings[0].message.contains("install.md"));
1250    }
1251
1252    #[test]
1253    fn test_cross_file_check_custom_anchor_match() {
1254        use crate::workspace_index::WorkspaceIndex;
1255
1256        let rule = MD051LinkFragments::new();
1257
1258        // Build workspace index with target file that has custom anchor
1259        let mut workspace_index = WorkspaceIndex::new();
1260        let mut target_file_index = FileIndex::new();
1261        target_file_index.add_heading(HeadingIndex {
1262            text: "Installation Guide".to_string(),
1263            auto_anchor: "installation-guide".to_string(),
1264            custom_anchor: Some("install".to_string()),
1265            line: 1,
1266            is_setext: false,
1267        });
1268        workspace_index.insert_file(PathBuf::from("docs/install.md"), target_file_index);
1269
1270        // Link uses custom anchor
1271        let mut current_file_index = FileIndex::new();
1272        current_file_index.add_cross_file_link(CrossFileLinkIndex {
1273            target_path: "install.md".to_string(),
1274            fragment: "install".to_string(),
1275            line: 3,
1276            column: 5,
1277        });
1278
1279        let warnings = rule
1280            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
1281            .unwrap();
1282
1283        // Should find no warnings since custom anchor matches
1284        assert!(warnings.is_empty());
1285    }
1286
1287    #[test]
1288    fn test_cross_file_check_target_not_in_workspace() {
1289        use crate::workspace_index::WorkspaceIndex;
1290
1291        let rule = MD051LinkFragments::new();
1292
1293        // Empty workspace index
1294        let workspace_index = WorkspaceIndex::new();
1295
1296        // Link to file not in workspace
1297        let mut current_file_index = FileIndex::new();
1298        current_file_index.add_cross_file_link(CrossFileLinkIndex {
1299            target_path: "external.md".to_string(),
1300            fragment: "heading".to_string(),
1301            line: 3,
1302            column: 5,
1303        });
1304
1305        let warnings = rule
1306            .cross_file_check(Path::new("docs/readme.md"), &current_file_index, &workspace_index)
1307            .unwrap();
1308
1309        // Should not warn about files not in workspace
1310        assert!(warnings.is_empty());
1311    }
1312
1313    #[test]
1314    fn test_wikilinks_skipped_in_check() {
1315        // Wikilinks should not trigger MD051 warnings for missing fragments
1316        let rule = MD051LinkFragments::new();
1317
1318        let content = r#"# Test Document
1319
1320## Valid Heading
1321
1322[[Microsoft#Windows OS]]
1323[[SomePage#section]]
1324[[page|Display Text]]
1325[[path/to/page#section]]
1326"#;
1327        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1328        let result = rule.check(&ctx).unwrap();
1329
1330        assert!(
1331            result.is_empty(),
1332            "Wikilinks should not trigger MD051 warnings. Got: {result:?}"
1333        );
1334    }
1335
1336    #[test]
1337    fn test_wikilinks_not_added_to_cross_file_index() {
1338        // Wikilinks should not be added to the cross-file link index
1339        let rule = MD051LinkFragments::new();
1340
1341        let content = r#"# Test Document
1342
1343[[Microsoft#Windows OS]]
1344[[SomePage#section]]
1345[Regular Link](other.md#section)
1346"#;
1347        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1348
1349        let mut file_index = FileIndex::new();
1350        rule.contribute_to_index(&ctx, &mut file_index);
1351
1352        // Should only have one cross-file link (the regular markdown link)
1353        // Wikilinks should not be added
1354        let cross_file_links = &file_index.cross_file_links;
1355        assert_eq!(
1356            cross_file_links.len(),
1357            1,
1358            "Only regular markdown links should be indexed, not wikilinks. Got: {cross_file_links:?}"
1359        );
1360        assert_eq!(file_index.cross_file_links[0].target_path, "other.md");
1361        assert_eq!(file_index.cross_file_links[0].fragment, "section");
1362    }
1363
1364    #[test]
1365    fn test_pandoc_flavor_skips_citations() {
1366        // Pandoc citations ([@key]) are bibliography references, not link fragments.
1367        // MD051 should skip them under Pandoc flavor, mirroring the Quarto skip behavior
1368        // tested in test_quarto_cross_references.
1369        let rule = MD051LinkFragments::new();
1370        let content = "# Test Document\n\nSee [@smith2020] for details.\n";
1371        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Pandoc, None);
1372        let result = rule.check(&ctx).unwrap();
1373        assert!(
1374            result.is_empty(),
1375            "MD051 should skip Pandoc citations under Pandoc flavor: {result:?}"
1376        );
1377    }
1378
1379    #[test]
1380    fn md051_pandoc_resolves_pandoc_slug_diverging_from_github() {
1381        // The Pandoc heading slug for `# 5. Five Things` is `5.-five-things` (the
1382        // dot is preserved per Pandoc's rule of keeping `.`/`_`/`-`), whereas the
1383        // GitHub anchor for the same heading is `5-five-things` (the dot is
1384        // stripped). A link to `#5.-five-things` would be flagged under the
1385        // GitHub default but must be accepted under Pandoc-compatible flavors via
1386        // the `has_pandoc_slug` short-circuit.
1387        use crate::config::MarkdownFlavor;
1388        let rule = MD051LinkFragments::new();
1389        let content = "# 5. Five Things\n\nSee [details](#5.-five-things).\n";
1390
1391        // Sanity check: under Standard flavor (GitHub anchor style), the
1392        // divergent fragment is reported as an unknown anchor.
1393        let ctx_std = LintContext::new(content, MarkdownFlavor::Standard, None);
1394        let std_result = rule.check(&ctx_std).unwrap();
1395        assert_eq!(
1396            std_result.len(),
1397            1,
1398            "Standard flavor should flag the Pandoc-style fragment: {std_result:?}"
1399        );
1400
1401        // Under Pandoc flavor, the Pandoc slug guard should resolve it.
1402        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1403        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1404        assert!(
1405            pandoc_result.is_empty(),
1406            "Pandoc flavor should resolve `#5.-five-things` against the heading slug: {pandoc_result:?}"
1407        );
1408    }
1409
1410    /// A link whose text contains an email address must still be checked under
1411    /// Pandoc — the `@` embedded in a word is not a citation marker, so the
1412    /// citation guard must not silence MD051 on a missing fragment.
1413    #[test]
1414    fn md051_pandoc_flags_missing_fragment_with_email_in_link_text() {
1415        use crate::config::MarkdownFlavor;
1416        let rule = MD051LinkFragments::new();
1417        let content = "# Title\n\n[contact user@example.com](#missing)\n";
1418
1419        let ctx_std = LintContext::new(content, MarkdownFlavor::Standard, None);
1420        let std_result = rule.check(&ctx_std).unwrap();
1421        assert_eq!(
1422            std_result.len(),
1423            1,
1424            "Standard flavor must flag the missing fragment: {std_result:?}"
1425        );
1426
1427        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1428        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1429        assert_eq!(
1430            pandoc_result.len(),
1431            1,
1432            "Pandoc flavor must also flag the missing fragment — link text with embedded email is not a citation: {pandoc_result:?}"
1433        );
1434    }
1435
1436    /// `[see @smith2020](#missing)` is a Markdown link, not a citation —
1437    /// Pandoc prefers the link interpretation when `[...]` is immediately
1438    /// followed by `(...)`. MD051 must still flag the missing fragment.
1439    #[test]
1440    fn md051_pandoc_flags_missing_fragment_with_citation_in_link_text() {
1441        use crate::config::MarkdownFlavor;
1442        let rule = MD051LinkFragments::new();
1443        let content = "# Title\n\n[see @smith2020](#missing)\n";
1444
1445        let ctx_std = LintContext::new(content, MarkdownFlavor::Standard, None);
1446        let std_result = rule.check(&ctx_std).unwrap();
1447        assert_eq!(
1448            std_result.len(),
1449            1,
1450            "Standard flavor must flag the missing fragment: {std_result:?}"
1451        );
1452
1453        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1454        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1455        assert_eq!(
1456            pandoc_result.len(),
1457            1,
1458            "Pandoc flavor must flag the missing fragment — `[label](url)` is a link, not a citation: {pandoc_result:?}"
1459        );
1460    }
1461
1462    /// Pandoc's auto_identifiers extension disambiguates duplicate headings by
1463    /// appending `-1`, `-2`, etc. A link to `#a.-1` must resolve against the
1464    /// second `# A.` heading.
1465    #[test]
1466    fn md051_pandoc_resolves_duplicate_heading_suffix_slug() {
1467        use crate::config::MarkdownFlavor;
1468        let rule = MD051LinkFragments::new();
1469        let content = "# A.\n\nfirst\n\n# A.\n\nsecond\n\n[first](#a.) and [second](#a.-1).\n";
1470
1471        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1472        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1473        assert!(
1474            pandoc_result.is_empty(),
1475            "Pandoc flavor should resolve `#a.` and `#a.-1` against duplicate headings: {pandoc_result:?}"
1476        );
1477
1478        let ctx_quarto = LintContext::new(content, MarkdownFlavor::Quarto, None);
1479        let quarto_result = rule.check(&ctx_quarto).unwrap();
1480        assert!(
1481            quarto_result.is_empty(),
1482            "Quarto flavor should also resolve duplicate-heading suffix slugs: {quarto_result:?}"
1483        );
1484    }
1485
1486    /// A link to `#a.-2` with only two `# A.` headings must still be flagged —
1487    /// only `-1` exists when there are two duplicates.
1488    #[test]
1489    fn md051_pandoc_flags_overshoot_duplicate_suffix() {
1490        use crate::config::MarkdownFlavor;
1491        let rule = MD051LinkFragments::new();
1492        let content = "# A.\n\n# A.\n\n[overshoot](#a.-2)\n";
1493
1494        let ctx_pandoc = LintContext::new(content, MarkdownFlavor::Pandoc, None);
1495        let pandoc_result = rule.check(&ctx_pandoc).unwrap();
1496        assert_eq!(
1497            pandoc_result.len(),
1498            1,
1499            "Pandoc must flag `#a.-2` when only `-1` exists (two duplicates): {pandoc_result:?}"
1500        );
1501    }
1502}
rumdl_lib/rules/md051_link_fragments.rs

rumdl_lib/rules/
md051_link_fragments.rs