Skip to main content

rumdl_lib/rules/
md080_heading_anchor_collision.rs

1//! Rule MD080: Heading anchors must be unique.
2//!
3//! Two headings whose generated URL-safe anchor (slug) is identical produce a
4//! collision: a `[text](#slug)` link and, under the MDXG virtual-page model,
5//! the page identifier derived from an H1/H2 title can only resolve to the
6//! *first* occurrence. GitHub/MkDocs paper over this by auto-suffixing the
7//! later anchor (`slug-1`), which is functional but surprising and breaks any
8//! hand-written `#slug` link that meant the second heading.
9//!
10//! This is distinct from:
11//! - **MD024** (duplicate heading *text*) - misses distinct texts that
12//!   slugify identically (`Setup & Run` vs `Setup Run`, `C++` vs `C`).
13//! - **MD051** (broken/missing fragment *targets*) - this flags *ambiguous*
14//!   targets, where the reference resolves but not unambiguously.
15//!
16//! Diagnostic only: renaming a heading is a semantic choice, so there is no
17//! auto-fix. Opt-in, because the collision is functional under platform
18//! auto-suffixing and flagging it changes established lint output.
19
20use crate::lint_context::LintContext;
21use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
22use crate::rule_config_serde::RuleConfig;
23use crate::utils::anchor_styles::AnchorStyle;
24use crate::utils::range_utils::calculate_match_range;
25use serde::{Deserialize, Serialize};
26use std::collections::HashMap;
27
28fn default_levels() -> Vec<u8> {
29    vec![1, 2, 3, 4, 5, 6]
30}
31
32/// Configuration for MD080 (Heading anchor collision)
33#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
34#[serde(rename_all = "kebab-case")]
35pub struct MD080Config {
36    /// Anchor generation style to match the target platform.
37    #[serde(default, alias = "anchor_style")]
38    pub anchor_style: AnchorStyle,
39
40    /// Heading levels whose anchors must be unique. Defaults to all levels
41    /// (any heading can be a fragment target). Set to `[1, 2]` to check only
42    /// the MDXG virtual-page identifiers derived from H1/H2 titles.
43    #[serde(default = "default_levels")]
44    pub levels: Vec<u8>,
45}
46
47impl Default for MD080Config {
48    fn default() -> Self {
49        Self {
50            anchor_style: AnchorStyle::default(),
51            levels: default_levels(),
52        }
53    }
54}
55
56impl RuleConfig for MD080Config {
57    const RULE_NAME: &'static str = "MD080";
58}
59
60#[derive(Debug, Clone, Default)]
61pub struct MD080HeadingAnchorCollision {
62    config: MD080Config,
63}
64
65impl MD080HeadingAnchorCollision {
66    pub fn new() -> Self {
67        Self::default()
68    }
69
70    pub fn from_config_struct(config: MD080Config) -> Self {
71        Self { config }
72    }
73
74    /// The anchor a heading actually resolves to. An explicit `{#custom-id}`
75    /// wins over the generated slug (it is what platforms emit) and is
76    /// compared in its emitted case: HTML `id` matching is case-sensitive, so
77    /// `{#API}` and `{#api}` are distinct anchors. Generated slugs are already
78    /// case-normalized by the anchor style.
79    fn effective_anchor(&self, text: &str, custom_id: Option<&str>) -> String {
80        match custom_id {
81            Some(id) => id.to_string(),
82            None => self.config.anchor_style.generate_fragment(text),
83        }
84    }
85
86    /// Resolve a heading's anchor and either record it as the first occurrence
87    /// or, if some earlier heading already produced the same anchor, emit a
88    /// collision warning pointing back at that first heading.
89    #[allow(clippy::too_many_arguments)]
90    fn record(
91        &self,
92        text: &str,
93        custom_id: Option<&str>,
94        level: u8,
95        line_num: usize,
96        content: &str,
97        seen: &mut HashMap<String, usize>,
98        warnings: &mut Vec<LintWarning>,
99    ) {
100        if !self.config.levels.contains(&level) {
101            return;
102        }
103
104        let anchor = self.effective_anchor(text, custom_id);
105        if anchor.is_empty() {
106            return;
107        }
108
109        if let Some(&first_line) = seen.get(&anchor) {
110            let (start_line, start_col, end_line, end_col) =
111                calculate_match_range(line_num, content, content.find(text).unwrap_or(0), text.len());
112            warnings.push(LintWarning {
113                rule_name: Some(self.name().to_string()),
114                severity: Severity::Warning,
115                line: start_line,
116                column: start_col,
117                end_line,
118                end_column: end_col,
119                message: format!(
120                    "Heading anchor '{anchor}' collides with the heading at line {first_line}; \
121                     fragment links and any derived page identifier resolve only to the first occurrence"
122                ),
123                fix: None,
124            });
125        } else {
126            seen.insert(anchor, line_num);
127        }
128    }
129}
130
131impl Rule for MD080HeadingAnchorCollision {
132    fn name(&self) -> &'static str {
133        "MD080"
134    }
135
136    fn description(&self) -> &'static str {
137        "Heading anchors must be unique"
138    }
139
140    fn check(&self, ctx: &LintContext) -> LintResult {
141        let mut warnings = Vec::new();
142        // anchor -> 1-based line of the first heading that produced it.
143        let mut seen: HashMap<String, usize> = HashMap::new();
144
145        for (idx, line_info) in ctx.lines.iter().enumerate() {
146            if line_info.in_front_matter || line_info.in_code_block {
147                continue;
148            }
149            let line_num = idx + 1;
150            let content = line_info.content(ctx.content);
151
152            // Regular ATX/Setext headings parsed by the line scanner.
153            if let Some(heading) = &line_info.heading {
154                if heading.is_valid && !heading.text.is_empty() {
155                    self.record(
156                        &heading.text,
157                        heading.custom_id.as_deref(),
158                        heading.level,
159                        line_num,
160                        content,
161                        &mut seen,
162                        &mut warnings,
163                    );
164                }
165                continue;
166            }
167
168            // Blockquote headings (`> ## Intro`) are not seen by the line
169            // scanner but still emit fragment anchors - mirror MD051 so the
170            // two rules agree on what targets exist.
171            if let Some(bq) = &line_info.blockquote
172                && let Some((clean_text, custom_id)) =
173                    crate::utils::header_id_utils::parse_blockquote_atx_heading(&bq.content)
174                && !clean_text.is_empty()
175            {
176                let level = bq
177                    .content
178                    .trim_start()
179                    .bytes()
180                    .take_while(|&b| b == b'#')
181                    .count()
182                    .clamp(1, 6) as u8;
183                self.record(
184                    &clean_text,
185                    custom_id.as_deref(),
186                    level,
187                    line_num,
188                    content,
189                    &mut seen,
190                    &mut warnings,
191                );
192            }
193        }
194
195        Ok(warnings)
196    }
197
198    fn fix_capability(&self) -> FixCapability {
199        // Renaming a heading (and every link that targets it) is a semantic
200        // decision the linter must not make automatically, so the fix
201        // coordinator must treat MD080 as diagnostic-only.
202        FixCapability::Unfixable
203    }
204
205    fn fix(&self, _ctx: &LintContext) -> Result<String, LintError> {
206        Err(LintError::FixFailed("MD080 has no auto-fix".to_string()))
207    }
208
209    fn category(&self) -> RuleCategory {
210        RuleCategory::Heading
211    }
212
213    fn as_any(&self) -> &dyn std::any::Any {
214        self
215    }
216
217    fn default_config_section(&self) -> Option<(String, toml::Value)> {
218        let table = crate::rule_config_serde::config_schema_table(&MD080Config::default())?;
219        if table.is_empty() {
220            None
221        } else {
222            Some((MD080Config::RULE_NAME.to_string(), toml::Value::Table(table)))
223        }
224    }
225
226    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
227    where
228        Self: Sized,
229    {
230        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD080Config>(config);
231
232        // Mirror MD051: when the user has not pinned an anchor style, follow
233        // the active flavor's native anchor generation.
234        let explicit_style_present = config
235            .rules
236            .get("MD080")
237            .is_some_and(|rc| rc.values.contains_key("anchor-style") || rc.values.contains_key("anchor_style"));
238        if !explicit_style_present {
239            rule_config.anchor_style = match config.global.flavor {
240                crate::config::MarkdownFlavor::MkDocs => AnchorStyle::PythonMarkdown,
241                crate::config::MarkdownFlavor::Kramdown => AnchorStyle::KramdownGfm,
242                _ => AnchorStyle::GitHub,
243            };
244        }
245
246        Box::new(MD080HeadingAnchorCollision::from_config_struct(rule_config))
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253    use crate::config::MarkdownFlavor;
254
255    fn check(content: &str) -> Vec<LintWarning> {
256        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
257        MD080HeadingAnchorCollision::new().check(&ctx).unwrap()
258    }
259
260    fn check_with(config: MD080Config, content: &str) -> Vec<LintWarning> {
261        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
262        MD080HeadingAnchorCollision::from_config_struct(config)
263            .check(&ctx)
264            .unwrap()
265    }
266
267    #[test]
268    fn flags_distinct_text_same_github_slug() {
269        // "Setup & Run" and "Setup Run" both slugify to `setup--run` /
270        // `setup-run` family; under GitHub they collide on `setup--run`.
271        let w = check("# Setup & Run\n\n# Setup  Run\n");
272        assert_eq!(w.len(), 1, "got: {w:?}");
273        assert!(w[0].message.contains("collides with the heading at line 1"));
274        assert_eq!(w[0].line, 3);
275    }
276
277    #[test]
278    fn flags_punctuation_only_difference() {
279        // "C++" -> "c", "C" -> "c" under GitHub.
280        let w = check("# C++\n\n## C\n");
281        assert_eq!(w.len(), 1, "got: {w:?}");
282    }
283
284    #[test]
285    fn flags_same_text_across_levels() {
286        // Same text at different levels: MD024 with allow_different_nesting
287        // would NOT flag this, but the anchor `#intro` is genuinely ambiguous.
288        let w = check("# Intro\n\nbody\n\n## Intro\n");
289        assert_eq!(w.len(), 1, "distinct-level slug collision must flag: {w:?}");
290        assert_eq!(w[0].line, 5);
291    }
292
293    #[test]
294    fn no_warning_when_slugs_differ() {
295        assert!(check("# Alpha\n\n## Beta\n\n### Gamma\n").is_empty());
296    }
297
298    #[test]
299    fn flags_three_way_collision_once_per_extra() {
300        let w = check("# Dup\n\n## Dup\n\n### Dup\n");
301        assert_eq!(w.len(), 2, "first defines, each later collides: {w:?}");
302        assert_eq!(w[0].line, 3);
303        assert_eq!(w[1].line, 5);
304    }
305
306    #[test]
307    fn flags_colliding_custom_ids() {
308        let w = check("# Alpha {#dup}\n\n## Beta {#dup}\n");
309        assert_eq!(w.len(), 1, "got: {w:?}");
310        assert!(w[0].message.contains("'dup'"));
311    }
312
313    #[test]
314    fn custom_id_disambiguates_same_text() {
315        // Same visible text but explicit distinct ids => no collision.
316        let w = check("# Repeat {#first}\n\n## Repeat {#second}\n");
317        assert!(w.is_empty(), "explicit ids disambiguate: {w:?}");
318    }
319
320    #[test]
321    fn ignores_headings_in_code_fences() {
322        let w = check("# Title\n\n```\n# Title\n```\n");
323        assert!(w.is_empty(), "fenced `# Title` is not a heading: {w:?}");
324    }
325
326    #[test]
327    fn ignores_front_matter() {
328        let w = check("---\ntitle: Title\n---\n\n# Title\n\n## Title\n");
329        // Two real headings still collide; front matter must not add a third.
330        assert_eq!(w.len(), 1, "got: {w:?}");
331        assert_eq!(w[0].line, 7);
332    }
333
334    #[test]
335    fn levels_filter_restricts_scope() {
336        // H3 collision is ignored when only H1/H2 page ids are checked.
337        let cfg = MD080Config {
338            anchor_style: AnchorStyle::GitHub,
339            levels: vec![1, 2],
340        };
341        let w = check_with(cfg, "# Page\n\n### Dup\n\n### Dup\n");
342        assert!(w.is_empty(), "H3 collisions excluded by levels=[1,2]: {w:?}");
343    }
344
345    #[test]
346    fn anchor_style_changes_collision_outcome() {
347        // "a_b" vs "ab": GitHub preserves `_` (slugs `a_b` / `ab`, distinct),
348        // Kramdown strips `_` (both become `ab`, a collision).
349        let content = "# a_b\n\n## ab\n";
350        assert!(
351            check_with(
352                MD080Config {
353                    anchor_style: AnchorStyle::GitHub,
354                    levels: default_levels()
355                },
356                content
357            )
358            .is_empty(),
359            "GitHub keeps the underscore, slugs stay distinct"
360        );
361        assert_eq!(
362            check_with(
363                MD080Config {
364                    anchor_style: AnchorStyle::Kramdown,
365                    levels: default_levels()
366                },
367                content
368            )
369            .len(),
370            1,
371            "Kramdown removes `_`, so both headings slug to `ab`"
372        );
373    }
374
375    #[test]
376    fn flags_setext_heading_collision() {
377        // Setext headings produce fragment anchors too; a Setext H1 and an
378        // ATX H2 with the same slug collide just like two ATX headings.
379        let w = check("Intro\n=====\n\nbody\n\n## Intro\n");
380        assert_eq!(w.len(), 1, "setext + atx slug collision must flag: {w:?}");
381        assert_eq!(w[0].line, 6);
382    }
383
384    #[test]
385    fn custom_id_case_is_significant() {
386        // HTML id matching is case-sensitive: {#API} and {#api} are distinct
387        // anchors, so they must NOT be reported as a collision.
388        let w = check("# Alpha {#API}\n\n## Beta {#api}\n");
389        assert!(w.is_empty(), "custom ids differing only in case are distinct: {w:?}");
390    }
391
392    #[test]
393    fn flags_blockquote_heading_collision() {
394        // A blockquoted ATX heading still emits a fragment anchor (mirrors
395        // MD051), so it collides with a same-slug top-level heading.
396        let w = check("> ## Intro\n\n## Intro\n");
397        assert_eq!(w.len(), 1, "blockquote heading slug collision must flag: {w:?}");
398        assert_eq!(w[0].line, 3);
399    }
400
401    #[test]
402    fn blockquote_in_html_block_mirrors_md051_anchor_model() {
403        // MD080 deliberately mirrors MD051's view of which fragment targets
404        // exist. MD051 records the anchor for a blockquoted `> ## Intro` even
405        // inside a plain `<div>` block (its anchor-extraction loop only skips
406        // front matter and code blocks), so `[x](#intro)` resolves there.
407        // MD080 must therefore agree that a later real `## Intro` collides on
408        // `#intro` - diverging would make the two rules contradict each other
409        // about whether the target exists.
410        let w = check("<div>\n> ## Intro\n</div>\n\n## Intro\n");
411        assert_eq!(w.len(), 1, "must agree with MD051's anchor model: {w:?}");
412        assert_eq!(w[0].line, 5);
413    }
414
415    #[test]
416    fn no_auto_fix_offered() {
417        let w = check("# Dup\n\n## Dup\n");
418        assert!(w[0].fix.is_none());
419        let ctx = LintContext::new("# Dup\n\n## Dup\n", MarkdownFlavor::Standard, None);
420        assert!(MD080HeadingAnchorCollision::new().fix(&ctx).is_err());
421    }
422
423    #[test]
424    fn empty_document_is_clean() {
425        assert!(check("").is_empty());
426        assert!(check("Just prose, no headings.\n").is_empty());
427    }
428}