1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
//! Rule MD080: Heading anchors must be unique.
//!
//! Two headings whose generated URL-safe anchor (slug) is identical produce a
//! collision: a `[text](#slug)` link and, under the MDXG virtual-page model,
//! the page identifier derived from an H1/H2 title can only resolve to the
//! *first* occurrence. GitHub/MkDocs paper over this by auto-suffixing the
//! later anchor (`slug-1`), which is functional but surprising and breaks any
//! hand-written `#slug` link that meant the second heading.
//!
//! This is distinct from:
//! - **MD024** (duplicate heading *text*) - misses distinct texts that
//! slugify identically (`Setup & Run` vs `Setup Run`, `C++` vs `C`).
//! - **MD051** (broken/missing fragment *targets*) - this flags *ambiguous*
//! targets, where the reference resolves but not unambiguously.
//!
//! Diagnostic only: renaming a heading is a semantic choice, so there is no
//! auto-fix. Opt-in, because the collision is functional under platform
//! auto-suffixing and flagging it changes established lint output.
use crate::lint_context::LintContext;
use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
use crate::rule_config_serde::RuleConfig;
use crate::utils::anchor_styles::AnchorStyle;
use crate::utils::range_utils::calculate_match_range;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
fn default_levels() -> Vec<u8> {
vec![1, 2, 3, 4, 5, 6]
}
/// Configuration for MD080 (Heading anchor collision)
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "kebab-case")]
pub struct MD080Config {
/// Anchor generation style to match the target platform.
#[serde(default, alias = "anchor_style")]
pub anchor_style: AnchorStyle,
/// Heading levels whose anchors must be unique. Defaults to all levels
/// (any heading can be a fragment target). Set to `[1, 2]` to check only
/// the MDXG virtual-page identifiers derived from H1/H2 titles.
#[serde(default = "default_levels")]
pub levels: Vec<u8>,
}
impl Default for MD080Config {
fn default() -> Self {
Self {
anchor_style: AnchorStyle::default(),
levels: default_levels(),
}
}
}
impl RuleConfig for MD080Config {
const RULE_NAME: &'static str = "MD080";
}
#[derive(Debug, Clone, Default)]
pub struct MD080HeadingAnchorCollision {
config: MD080Config,
}
impl MD080HeadingAnchorCollision {
pub fn new() -> Self {
Self::default()
}
pub fn from_config_struct(config: MD080Config) -> Self {
Self { config }
}
/// The anchor a heading actually resolves to. An explicit `{#custom-id}`
/// wins over the generated slug (it is what platforms emit) and is
/// compared in its emitted case: HTML `id` matching is case-sensitive, so
/// `{#API}` and `{#api}` are distinct anchors. Generated slugs are already
/// case-normalized by the anchor style.
fn effective_anchor(&self, text: &str, custom_id: Option<&str>) -> String {
match custom_id {
Some(id) => id.to_string(),
None => self.config.anchor_style.generate_fragment(text),
}
}
/// Resolve a heading's anchor and either record it as the first occurrence
/// or, if some earlier heading already produced the same anchor, emit a
/// collision warning pointing back at that first heading.
#[allow(clippy::too_many_arguments)]
fn record(
&self,
text: &str,
custom_id: Option<&str>,
level: u8,
line_num: usize,
content: &str,
seen: &mut HashMap<String, usize>,
warnings: &mut Vec<LintWarning>,
) {
if !self.config.levels.contains(&level) {
return;
}
let anchor = self.effective_anchor(text, custom_id);
if anchor.is_empty() {
return;
}
if let Some(&first_line) = seen.get(&anchor) {
let (start_line, start_col, end_line, end_col) =
calculate_match_range(line_num, content, content.find(text).unwrap_or(0), text.len());
warnings.push(LintWarning {
rule_name: Some(self.name().to_string()),
severity: Severity::Warning,
line: start_line,
column: start_col,
end_line,
end_column: end_col,
message: format!(
"Heading anchor '{anchor}' collides with the heading at line {first_line}; \
fragment links and any derived page identifier resolve only to the first occurrence"
),
fix: None,
});
} else {
seen.insert(anchor, line_num);
}
}
}
impl Rule for MD080HeadingAnchorCollision {
fn name(&self) -> &'static str {
"MD080"
}
fn description(&self) -> &'static str {
"Heading anchors must be unique"
}
fn check(&self, ctx: &LintContext) -> LintResult {
let mut warnings = Vec::new();
// anchor -> 1-based line of the first heading that produced it.
let mut seen: HashMap<String, usize> = HashMap::new();
for (idx, line_info) in ctx.lines.iter().enumerate() {
if line_info.in_front_matter || line_info.in_code_block {
continue;
}
let line_num = idx + 1;
let content = line_info.content(ctx.content);
// Regular ATX/Setext headings parsed by the line scanner.
if let Some(heading) = &line_info.heading {
if heading.is_valid && !heading.text.is_empty() {
self.record(
&heading.text,
heading.custom_id.as_deref(),
heading.level,
line_num,
content,
&mut seen,
&mut warnings,
);
}
continue;
}
// Blockquote headings (`> ## Intro`) are not seen by the line
// scanner but still emit fragment anchors - mirror MD051 so the
// two rules agree on what targets exist.
if let Some(bq) = &line_info.blockquote
&& let Some((clean_text, custom_id)) =
crate::utils::header_id_utils::parse_blockquote_atx_heading(&bq.content)
&& !clean_text.is_empty()
{
let level = bq
.content
.trim_start()
.bytes()
.take_while(|&b| b == b'#')
.count()
.clamp(1, 6) as u8;
self.record(
&clean_text,
custom_id.as_deref(),
level,
line_num,
content,
&mut seen,
&mut warnings,
);
}
}
Ok(warnings)
}
fn fix_capability(&self) -> FixCapability {
// Renaming a heading (and every link that targets it) is a semantic
// decision the linter must not make automatically, so the fix
// coordinator must treat MD080 as diagnostic-only.
FixCapability::Unfixable
}
fn fix(&self, _ctx: &LintContext) -> Result<String, LintError> {
Err(LintError::FixFailed("MD080 has no auto-fix".to_string()))
}
fn category(&self) -> RuleCategory {
RuleCategory::Heading
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn default_config_section(&self) -> Option<(String, toml::Value)> {
let table = crate::rule_config_serde::config_schema_table(&MD080Config::default())?;
if table.is_empty() {
None
} else {
Some((MD080Config::RULE_NAME.to_string(), toml::Value::Table(table)))
}
}
fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
where
Self: Sized,
{
let mut rule_config = crate::rule_config_serde::load_rule_config::<MD080Config>(config);
// Mirror MD051: when the user has not pinned an anchor style, follow
// the active flavor's native anchor generation.
let explicit_style_present = config
.rules
.get("MD080")
.is_some_and(|rc| rc.values.contains_key("anchor-style") || rc.values.contains_key("anchor_style"));
if !explicit_style_present {
rule_config.anchor_style = match config.global.flavor {
crate::config::MarkdownFlavor::MkDocs => AnchorStyle::PythonMarkdown,
crate::config::MarkdownFlavor::Kramdown => AnchorStyle::KramdownGfm,
_ => AnchorStyle::GitHub,
};
}
Box::new(MD080HeadingAnchorCollision::from_config_struct(rule_config))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::MarkdownFlavor;
fn check(content: &str) -> Vec<LintWarning> {
let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
MD080HeadingAnchorCollision::new().check(&ctx).unwrap()
}
fn check_with(config: MD080Config, content: &str) -> Vec<LintWarning> {
let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
MD080HeadingAnchorCollision::from_config_struct(config)
.check(&ctx)
.unwrap()
}
#[test]
fn flags_distinct_text_same_github_slug() {
// "Setup & Run" and "Setup Run" both slugify to `setup--run` /
// `setup-run` family; under GitHub they collide on `setup--run`.
let w = check("# Setup & Run\n\n# Setup Run\n");
assert_eq!(w.len(), 1, "got: {w:?}");
assert!(w[0].message.contains("collides with the heading at line 1"));
assert_eq!(w[0].line, 3);
}
#[test]
fn flags_punctuation_only_difference() {
// "C++" -> "c", "C" -> "c" under GitHub.
let w = check("# C++\n\n## C\n");
assert_eq!(w.len(), 1, "got: {w:?}");
}
#[test]
fn flags_same_text_across_levels() {
// Same text at different levels: MD024 with allow_different_nesting
// would NOT flag this, but the anchor `#intro` is genuinely ambiguous.
let w = check("# Intro\n\nbody\n\n## Intro\n");
assert_eq!(w.len(), 1, "distinct-level slug collision must flag: {w:?}");
assert_eq!(w[0].line, 5);
}
#[test]
fn no_warning_when_slugs_differ() {
assert!(check("# Alpha\n\n## Beta\n\n### Gamma\n").is_empty());
}
#[test]
fn flags_three_way_collision_once_per_extra() {
let w = check("# Dup\n\n## Dup\n\n### Dup\n");
assert_eq!(w.len(), 2, "first defines, each later collides: {w:?}");
assert_eq!(w[0].line, 3);
assert_eq!(w[1].line, 5);
}
#[test]
fn flags_colliding_custom_ids() {
let w = check("# Alpha {#dup}\n\n## Beta {#dup}\n");
assert_eq!(w.len(), 1, "got: {w:?}");
assert!(w[0].message.contains("'dup'"));
}
#[test]
fn custom_id_disambiguates_same_text() {
// Same visible text but explicit distinct ids => no collision.
let w = check("# Repeat {#first}\n\n## Repeat {#second}\n");
assert!(w.is_empty(), "explicit ids disambiguate: {w:?}");
}
#[test]
fn ignores_headings_in_code_fences() {
let w = check("# Title\n\n```\n# Title\n```\n");
assert!(w.is_empty(), "fenced `# Title` is not a heading: {w:?}");
}
#[test]
fn ignores_front_matter() {
let w = check("---\ntitle: Title\n---\n\n# Title\n\n## Title\n");
// Two real headings still collide; front matter must not add a third.
assert_eq!(w.len(), 1, "got: {w:?}");
assert_eq!(w[0].line, 7);
}
#[test]
fn levels_filter_restricts_scope() {
// H3 collision is ignored when only H1/H2 page ids are checked.
let cfg = MD080Config {
anchor_style: AnchorStyle::GitHub,
levels: vec![1, 2],
};
let w = check_with(cfg, "# Page\n\n### Dup\n\n### Dup\n");
assert!(w.is_empty(), "H3 collisions excluded by levels=[1,2]: {w:?}");
}
#[test]
fn anchor_style_changes_collision_outcome() {
// "a_b" vs "ab": GitHub preserves `_` (slugs `a_b` / `ab`, distinct),
// Kramdown strips `_` (both become `ab`, a collision).
let content = "# a_b\n\n## ab\n";
assert!(
check_with(
MD080Config {
anchor_style: AnchorStyle::GitHub,
levels: default_levels()
},
content
)
.is_empty(),
"GitHub keeps the underscore, slugs stay distinct"
);
assert_eq!(
check_with(
MD080Config {
anchor_style: AnchorStyle::Kramdown,
levels: default_levels()
},
content
)
.len(),
1,
"Kramdown removes `_`, so both headings slug to `ab`"
);
}
#[test]
fn flags_setext_heading_collision() {
// Setext headings produce fragment anchors too; a Setext H1 and an
// ATX H2 with the same slug collide just like two ATX headings.
let w = check("Intro\n=====\n\nbody\n\n## Intro\n");
assert_eq!(w.len(), 1, "setext + atx slug collision must flag: {w:?}");
assert_eq!(w[0].line, 6);
}
#[test]
fn custom_id_case_is_significant() {
// HTML id matching is case-sensitive: {#API} and {#api} are distinct
// anchors, so they must NOT be reported as a collision.
let w = check("# Alpha {#API}\n\n## Beta {#api}\n");
assert!(w.is_empty(), "custom ids differing only in case are distinct: {w:?}");
}
#[test]
fn flags_blockquote_heading_collision() {
// A blockquoted ATX heading still emits a fragment anchor (mirrors
// MD051), so it collides with a same-slug top-level heading.
let w = check("> ## Intro\n\n## Intro\n");
assert_eq!(w.len(), 1, "blockquote heading slug collision must flag: {w:?}");
assert_eq!(w[0].line, 3);
}
#[test]
fn blockquote_in_html_block_mirrors_md051_anchor_model() {
// MD080 deliberately mirrors MD051's view of which fragment targets
// exist. MD051 records the anchor for a blockquoted `> ## Intro` even
// inside a plain `<div>` block (its anchor-extraction loop only skips
// front matter and code blocks), so `[x](#intro)` resolves there.
// MD080 must therefore agree that a later real `## Intro` collides on
// `#intro` - diverging would make the two rules contradict each other
// about whether the target exists.
let w = check("<div>\n> ## Intro\n</div>\n\n## Intro\n");
assert_eq!(w.len(), 1, "must agree with MD051's anchor model: {w:?}");
assert_eq!(w[0].line, 5);
}
#[test]
fn no_auto_fix_offered() {
let w = check("# Dup\n\n## Dup\n");
assert!(w[0].fix.is_none());
let ctx = LintContext::new("# Dup\n\n## Dup\n", MarkdownFlavor::Standard, None);
assert!(MD080HeadingAnchorCollision::new().fix(&ctx).is_err());
}
#[test]
fn empty_document_is_clean() {
assert!(check("").is_empty());
assert!(check("Just prose, no headings.\n").is_empty());
}
}