mdbook_lint_core/rules/standard/
md024.rs

1//! MD024: Multiple headings with the same content
2//!
3//! This rule checks that headings with the same content are not duplicated within the document.
4
5use crate::error::Result;
6use crate::rule::{AstRule, RuleCategory, RuleMetadata};
7use crate::{
8    Document,
9    violation::{Severity, Violation},
10};
11use comrak::nodes::{AstNode, NodeValue};
12use std::collections::HashMap;
13
14/// Rule to check for duplicate headings
15pub struct MD024 {
16    /// Only check headings at the same level (default: false)
17    siblings_only: bool,
18}
19
20impl MD024 {
21    /// Create a new MD024 rule with default settings
22    pub fn new() -> Self {
23        Self {
24            siblings_only: false,
25        }
26    }
27
28    /// Create a new MD024 rule with custom settings
29    #[allow(dead_code)]
30    pub fn with_siblings_only(siblings_only: bool) -> Self {
31        Self { siblings_only }
32    }
33}
34
35impl Default for MD024 {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl AstRule for MD024 {
42    fn id(&self) -> &'static str {
43        "MD024"
44    }
45
46    fn name(&self) -> &'static str {
47        "no-duplicate-heading"
48    }
49
50    fn description(&self) -> &'static str {
51        "Multiple headings with the same content"
52    }
53
54    fn metadata(&self) -> RuleMetadata {
55        RuleMetadata::stable(RuleCategory::Content).introduced_in("mdbook-lint v0.1.0")
56    }
57
58    fn check_ast<'a>(&self, document: &Document, ast: &'a AstNode<'a>) -> Result<Vec<Violation>> {
59        let mut violations = Vec::new();
60
61        if self.siblings_only {
62            // Check for duplicates only at the same heading level
63            self.check_siblings_only(document, ast, &mut violations)?;
64        } else {
65            // Check for duplicates across all heading levels
66            self.check_all_levels(document, ast, &mut violations)?;
67        }
68
69        Ok(violations)
70    }
71}
72
73impl MD024 {
74    /// Check for duplicate headings across all levels
75    fn check_all_levels<'a>(
76        &self,
77        document: &Document,
78        ast: &'a AstNode<'a>,
79        violations: &mut Vec<Violation>,
80    ) -> Result<()> {
81        let mut seen_headings: HashMap<String, (usize, usize)> = HashMap::new();
82
83        for node in ast.descendants() {
84            if let NodeValue::Heading(_heading) = &node.data.borrow().value
85                && let Some((line, column)) = document.node_position(node)
86            {
87                let heading_text = document.node_text(node);
88                let heading_text = heading_text.trim();
89
90                // Skip empty headings
91                if heading_text.is_empty() {
92                    continue;
93                }
94
95                // Normalize heading text for comparison (case-insensitive, whitespace normalized)
96                let normalized_text = self.normalize_heading_text(heading_text);
97
98                if let Some((first_line, _first_column)) = seen_headings.get(&normalized_text) {
99                    violations.push(self.create_violation(
100                        format!(
101                            "Duplicate heading content: '{heading_text}' (first occurrence at line {first_line})"
102                        ),
103                        line,
104                        column,
105                        Severity::Warning,
106                    ));
107                } else {
108                    seen_headings.insert(normalized_text, (line, column));
109                }
110            }
111        }
112
113        Ok(())
114    }
115
116    /// Check for duplicate headings only at the same level
117    fn check_siblings_only<'a>(
118        &self,
119        document: &Document,
120        ast: &'a AstNode<'a>,
121        violations: &mut Vec<Violation>,
122    ) -> Result<()> {
123        // Group headings by level, then check for duplicates within each level
124        let mut headings_by_level: HashMap<u8, HashMap<String, (usize, usize)>> = HashMap::new();
125
126        for node in ast.descendants() {
127            if let NodeValue::Heading(heading) = &node.data.borrow().value
128                && let Some((line, column)) = document.node_position(node)
129            {
130                let heading_text = document.node_text(node);
131                let heading_text = heading_text.trim();
132
133                // Skip empty headings
134                if heading_text.is_empty() {
135                    continue;
136                }
137
138                let level = heading.level;
139                let normalized_text = self.normalize_heading_text(heading_text);
140
141                let level_map = headings_by_level.entry(level).or_default();
142
143                if let Some((first_line, _first_column)) = level_map.get(&normalized_text) {
144                    violations.push(self.create_violation(
145                        format!(
146                            "Duplicate heading content at level {level}: '{heading_text}' (first occurrence at line {first_line})"
147                        ),
148                        line,
149                        column,
150                        Severity::Warning,
151                    ));
152                } else {
153                    level_map.insert(normalized_text, (line, column));
154                }
155            }
156        }
157
158        Ok(())
159    }
160
161    /// Normalize heading text for comparison
162    fn normalize_heading_text(&self, text: &str) -> String {
163        // Convert to lowercase and normalize whitespace for comparison
164        text.to_lowercase()
165            .split_whitespace()
166            .collect::<Vec<&str>>()
167            .join(" ")
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174    use crate::Document;
175    use crate::rule::Rule;
176    use std::path::PathBuf;
177
178    #[test]
179    fn test_md024_no_violations() {
180        let content = r#"# Unique First Heading
181## Unique Second Heading
182### Unique Third Heading
183## Another Unique Second Heading
184### Another Unique Third Heading
185"#;
186        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
187        let rule = MD024::new();
188        let violations = rule.check(&document).unwrap();
189
190        assert_eq!(violations.len(), 0);
191    }
192
193    #[test]
194    fn test_md024_duplicate_headings_violation() {
195        let content = r#"# Introduction
196## Getting Started
197### Installation
198## Getting Started
199### Configuration
200"#;
201        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
202        let rule = MD024::new();
203        let violations = rule.check(&document).unwrap();
204
205        assert_eq!(violations.len(), 1);
206        assert!(violations[0].message.contains("Duplicate heading content"));
207        assert!(violations[0].message.contains("Getting Started"));
208        assert!(violations[0].message.contains("first occurrence at line 2"));
209        assert_eq!(violations[0].line, 4);
210    }
211
212    #[test]
213    fn test_md024_case_insensitive_duplicates() {
214        let content = r#"# Getting Started
215## Configuration
216### getting started
217## CONFIGURATION
218"#;
219        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
220        let rule = MD024::new();
221        let violations = rule.check(&document).unwrap();
222
223        assert_eq!(violations.len(), 2);
224        assert!(violations[0].message.contains("getting started"));
225        assert!(violations[1].message.contains("CONFIGURATION"));
226    }
227
228    #[test]
229    fn test_md024_whitespace_normalization() {
230        let content = r#"# Getting   Started
231## Multiple    Spaces
232### Getting Started
233## Multiple Spaces
234"#;
235        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
236        let rule = MD024::new();
237        let violations = rule.check(&document).unwrap();
238
239        assert_eq!(violations.len(), 2);
240        assert!(violations[0].message.contains("Getting Started"));
241        assert!(violations[1].message.contains("Multiple Spaces"));
242    }
243
244    #[test]
245    fn test_md024_siblings_only_mode() {
246        let content = r#"# Main Heading
247## Introduction
248### Introduction
249## Configuration
250### Configuration
251"#;
252        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
253        let rule = MD024::with_siblings_only(true);
254        let violations = rule.check(&document).unwrap();
255
256        // Should only detect duplicates at the same level
257        // Both "Introduction" headings are at different levels (## vs ###), so no violations
258        // Both "Configuration" headings are at different levels (## vs ###), so no violations
259        assert_eq!(violations.len(), 0);
260    }
261
262    #[test]
263    fn test_md024_siblings_only_with_same_level_duplicates() {
264        let content = r#"# Main Heading
265## Introduction
266## Configuration
267## Introduction
268### Different Level Introduction
269"#;
270        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
271        let rule = MD024::with_siblings_only(true);
272        let violations = rule.check(&document).unwrap();
273
274        // Should detect the duplicate "Introduction" at level 2, but ignore the level 3 one
275        assert_eq!(violations.len(), 1);
276        assert!(
277            violations[0]
278                .message
279                .contains("Duplicate heading content at level 2")
280        );
281        assert!(violations[0].message.contains("Introduction"));
282        assert_eq!(violations[0].line, 4);
283    }
284
285    #[test]
286    fn test_md024_multiple_duplicates() {
287        let content = r#"# Main
288## Section A
289### Subsection
290## Section B
291### Subsection
292## Section A
293### Another Subsection
294### Subsection
295"#;
296        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
297        let rule = MD024::new();
298        let violations = rule.check(&document).unwrap();
299
300        assert_eq!(violations.len(), 3);
301
302        // Check that all duplicates are detected
303        let messages: Vec<&str> = violations.iter().map(|v| v.message.as_str()).collect();
304        assert!(
305            messages
306                .iter()
307                .any(|m| m.contains("Subsection") && m.contains("line 3"))
308        );
309        assert!(
310            messages
311                .iter()
312                .any(|m| m.contains("Section A") && m.contains("line 2"))
313        );
314        assert!(
315            messages
316                .iter()
317                .any(|m| m.contains("Subsection") && m.contains("line 3"))
318        );
319    }
320
321    #[test]
322    fn test_md024_empty_headings_ignored() {
323        let content = r#"# Main Heading
324##
325###
326## Valid Heading
327###
328## Valid Heading
329"#;
330        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
331        let rule = MD024::new();
332        let violations = rule.check(&document).unwrap();
333
334        // Should only detect the duplicate "Valid Heading", not the empty ones
335        assert_eq!(violations.len(), 1);
336        assert!(violations[0].message.contains("Valid Heading"));
337    }
338
339    #[test]
340    fn test_md024_mixed_heading_types() {
341        let content = r#"# ATX Heading
342
343Setext Heading
344==============
345
346## Another Section
347
348ATX Heading
349-----------
350
351### Final Section
352"#;
353        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
354        let rule = MD024::new();
355        let violations = rule.check(&document).unwrap();
356
357        // Should detect duplicate "ATX Heading" regardless of heading style
358        assert_eq!(violations.len(), 1);
359        assert!(violations[0].message.contains("ATX Heading"));
360    }
361
362    #[test]
363    fn test_md024_headings_with_formatting() {
364        let content = r#"# Introduction to **Markdown**
365## Getting Started
366### Introduction to Markdown
367## *Getting* Started
368"#;
369        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
370        let rule = MD024::new();
371        let violations = rule.check(&document).unwrap();
372
373        // Should detect duplicates based on text content, ignoring markdown formatting
374        // document.node_text() correctly extracts plain text without formatting markers
375        assert_eq!(violations.len(), 2); // Both pairs are duplicates when formatting is ignored
376        assert!(violations[0].message.contains("Introduction to Markdown"));
377        assert!(violations[1].message.contains("Getting Started"));
378    }
379
380    #[test]
381    fn test_md024_long_document_with_sections() {
382        let content = r#"# User Guide
383
384## Installation
385### Prerequisites
386### Download
387### Setup
388
389## Configuration
390### Basic Settings
391### Advanced Settings
392
393## Usage
394### Getting Started
395### Advanced Features
396
397## Troubleshooting
398### Common Issues
399### Getting Started
400
401## Reference
402### API Documentation
403### Configuration
404"#;
405        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
406        let rule = MD024::new();
407        let violations = rule.check(&document).unwrap();
408
409        assert_eq!(violations.len(), 2);
410
411        // Should detect "Getting Started" and "Configuration" duplicates
412        let violation_texts: Vec<String> = violations.iter().map(|v| v.message.clone()).collect();
413        assert!(
414            violation_texts
415                .iter()
416                .any(|m| m.contains("Getting Started"))
417        );
418        assert!(violation_texts.iter().any(|m| m.contains("Configuration")));
419    }
420}