rumdl_lib/rules/
md018_no_missing_space_atx.rs

1/// Rule MD018: No missing space after ATX heading marker
2///
3/// See [docs/md018.md](../../docs/md018.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_single_line_range;
6use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10    // Pattern to detect emoji hashtags like #️⃣
11    static ref EMOJI_HASHTAG_PATTERN: Regex = Regex::new(r"^#️⃣|^#⃣").unwrap();
12
13    // Pattern to detect Unicode hashtag symbols that shouldn't be treated as headings
14    static ref UNICODE_HASHTAG_PATTERN: Regex = Regex::new(r"^#[\u{FE0F}\u{20E3}]").unwrap();
15}
16
17#[derive(Clone)]
18pub struct MD018NoMissingSpaceAtx;
19
20impl Default for MD018NoMissingSpaceAtx {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl MD018NoMissingSpaceAtx {
27    pub fn new() -> Self {
28        Self
29    }
30
31    /// Check if an ATX heading line is missing space after the marker
32    fn check_atx_heading_line(&self, line: &str) -> Option<(usize, String)> {
33        // Look for ATX marker at start of line (with optional indentation)
34        let trimmed_line = line.trim_start();
35        let indent = line.len() - trimmed_line.len();
36
37        if !trimmed_line.starts_with('#') {
38            return None;
39        }
40
41        // Skip emoji hashtags and Unicode hashtag patterns
42        if EMOJI_HASHTAG_PATTERN.is_match(trimmed_line) || UNICODE_HASHTAG_PATTERN.is_match(trimmed_line) {
43            return None;
44        }
45
46        // Count the number of hashes
47        let hash_count = trimmed_line.chars().take_while(|&c| c == '#').count();
48        if hash_count == 0 || hash_count > 6 {
49            return None;
50        }
51
52        // Check what comes after the hashes
53        let after_hashes = &trimmed_line[hash_count..];
54
55        // Skip if what follows the hashes is an emoji modifier or variant selector
56        if after_hashes
57            .chars()
58            .next()
59            .is_some_and(|ch| matches!(ch, '\u{FE0F}' | '\u{20E3}' | '\u{FE0E}'))
60        {
61            return None;
62        }
63
64        // If there's content immediately after hashes (no space), it needs fixing
65        if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t') {
66            // Additional checks to avoid false positives
67            let content = after_hashes.trim();
68
69            // Skip if it's just more hashes (horizontal rule)
70            if content.chars().all(|c| c == '#') {
71                return None;
72            }
73
74            // Skip if content is too short to be meaningful
75            if content.len() < 2 {
76                return None;
77            }
78
79            // Skip if it starts with emphasis markers
80            if content.starts_with('*') || content.starts_with('_') {
81                return None;
82            }
83
84            // Skip if it looks like a hashtag (e.g., #tag, #123)
85            // But only skip if it's lowercase or a number to avoid skipping headings like #Summary
86            if hash_count == 1 && !content.is_empty() {
87                let first_char = content.chars().next();
88                if let Some(ch) = first_char {
89                    // Skip if it's a lowercase letter or number (common hashtag pattern)
90                    // Don't skip uppercase as those are likely headings
91                    if (ch.is_lowercase() || ch.is_numeric()) && !content.contains(' ') {
92                        return None;
93                    }
94                }
95            }
96
97            // This looks like a malformed heading that needs a space
98            let fixed = format!("{}{} {}", " ".repeat(indent), "#".repeat(hash_count), after_hashes);
99            return Some((indent + hash_count, fixed));
100        }
101
102        None
103    }
104
105    // Calculate the byte range for a specific line in the content
106    fn get_line_byte_range(&self, content: &str, line_num: usize) -> std::ops::Range<usize> {
107        let mut current_line = 1;
108        let mut start_byte = 0;
109
110        for (i, c) in content.char_indices() {
111            if current_line == line_num && c == '\n' {
112                return start_byte..i;
113            } else if c == '\n' {
114                current_line += 1;
115                if current_line == line_num {
116                    start_byte = i + 1;
117                }
118            }
119        }
120
121        // If we're looking for the last line and it doesn't end with a newline
122        if current_line == line_num {
123            return start_byte..content.len();
124        }
125
126        // Fallback if line not found (shouldn't happen)
127        0..0
128    }
129}
130
131impl Rule for MD018NoMissingSpaceAtx {
132    fn name(&self) -> &'static str {
133        "MD018"
134    }
135
136    fn description(&self) -> &'static str {
137        "No space after hash in heading"
138    }
139
140    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
141        let mut warnings = Vec::new();
142
143        // Check all lines that have ATX headings from cached info
144        for (line_num, line_info) in ctx.lines.iter().enumerate() {
145            if let Some(heading) = &line_info.heading {
146                // Only check ATX headings
147                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
148                    // Check if there's a space after the marker
149                    let line = &line_info.content;
150                    let trimmed = line.trim_start();
151
152                    // Skip emoji hashtags and Unicode hashtag patterns
153                    if EMOJI_HASHTAG_PATTERN.is_match(trimmed) || UNICODE_HASHTAG_PATTERN.is_match(trimmed) {
154                        continue;
155                    }
156
157                    if trimmed.len() > heading.marker.len() {
158                        let after_marker = &trimmed[heading.marker.len()..];
159                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
160                        {
161                            // Missing space after ATX marker
162                            let hash_end_col = line_info.indent + heading.marker.len() + 1; // 1-indexed
163                            let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
164                                line_num + 1, // Convert to 1-indexed
165                                hash_end_col,
166                                0, // Zero-width to indicate missing space
167                            );
168
169                            warnings.push(LintWarning {
170                                rule_name: Some(self.name()),
171                                message: format!("No space after {} in heading", "#".repeat(heading.level as usize)),
172                                line: start_line,
173                                column: start_col,
174                                end_line,
175                                end_column: end_col,
176                                severity: Severity::Warning,
177                                fix: Some(Fix {
178                                    range: self.get_line_byte_range(ctx.content, line_num + 1),
179                                    replacement: format!(
180                                        "{}{} {}",
181                                        " ".repeat(line_info.indent),
182                                        heading.marker,
183                                        after_marker
184                                    ),
185                                }),
186                            });
187                        }
188                    }
189                }
190            } else if !line_info.in_code_block && !line_info.is_blank {
191                // Check for malformed headings that weren't detected as proper headings
192                if let Some((hash_end_pos, fixed_line)) = self.check_atx_heading_line(&line_info.content) {
193                    let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
194                        line_num + 1,     // Convert to 1-indexed
195                        hash_end_pos + 1, // 1-indexed column
196                        0,                // Zero-width to indicate missing space
197                    );
198
199                    warnings.push(LintWarning {
200                        rule_name: Some(self.name()),
201                        message: "No space after hash in heading".to_string(),
202                        line: start_line,
203                        column: start_col,
204                        end_line,
205                        end_column: end_col,
206                        severity: Severity::Warning,
207                        fix: Some(Fix {
208                            range: self.get_line_byte_range(ctx.content, line_num + 1),
209                            replacement: fixed_line,
210                        }),
211                    });
212                }
213            }
214        }
215
216        Ok(warnings)
217    }
218
219    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
220        let mut lines = Vec::new();
221
222        for line_info in ctx.lines.iter() {
223            let mut fixed = false;
224
225            if let Some(heading) = &line_info.heading {
226                // Fix ATX headings missing space
227                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
228                    let line = &line_info.content;
229                    let trimmed = line.trim_start();
230
231                    // Skip emoji hashtags and Unicode hashtag patterns
232                    if EMOJI_HASHTAG_PATTERN.is_match(trimmed) || UNICODE_HASHTAG_PATTERN.is_match(trimmed) {
233                        continue;
234                    }
235
236                    if trimmed.len() > heading.marker.len() {
237                        let after_marker = &trimmed[heading.marker.len()..];
238                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
239                        {
240                            // Add space after marker
241                            lines.push(format!(
242                                "{}{} {}",
243                                " ".repeat(line_info.indent),
244                                heading.marker,
245                                after_marker
246                            ));
247                            fixed = true;
248                        }
249                    }
250                }
251            } else if !line_info.in_code_block && !line_info.is_blank {
252                // Fix malformed headings
253                if let Some((_, fixed_line)) = self.check_atx_heading_line(&line_info.content) {
254                    lines.push(fixed_line);
255                    fixed = true;
256                }
257            }
258
259            if !fixed {
260                lines.push(line_info.content.clone());
261            }
262        }
263
264        // Reconstruct content preserving line endings
265        let mut result = lines.join("\n");
266        if ctx.content.ends_with('\n') && !result.ends_with('\n') {
267            result.push('\n');
268        }
269
270        Ok(result)
271    }
272
273    /// Get the category of this rule for selective processing
274    fn category(&self) -> RuleCategory {
275        RuleCategory::Heading
276    }
277
278    /// Check if this rule should be skipped
279    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
280        // Skip if no lines contain hash symbols
281        !ctx.lines.iter().any(|line| line.content.contains('#'))
282    }
283
284    fn as_any(&self) -> &dyn std::any::Any {
285        self
286    }
287
288    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
289    where
290        Self: Sized,
291    {
292        Box::new(MD018NoMissingSpaceAtx::new())
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299    use crate::lint_context::LintContext;
300
301    #[test]
302    fn test_basic_functionality() {
303        let rule = MD018NoMissingSpaceAtx;
304
305        // Test with correct space
306        let content = "# Heading 1\n## Heading 2\n### Heading 3";
307        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
308        let result = rule.check(&ctx).unwrap();
309        assert!(result.is_empty());
310
311        // Test with missing space
312        let content = "#Heading 1\n## Heading 2\n###Heading 3";
313        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
314        let result = rule.check(&ctx).unwrap();
315        assert_eq!(result.len(), 2); // Should flag the two headings with missing spaces
316        assert_eq!(result[0].line, 1);
317        assert_eq!(result[1].line, 3);
318    }
319
320    #[test]
321    fn test_malformed_heading_detection() {
322        let rule = MD018NoMissingSpaceAtx::new();
323
324        // Test the check_atx_heading_line method
325        assert!(rule.check_atx_heading_line("##Introduction").is_some());
326        assert!(rule.check_atx_heading_line("###Background").is_some());
327        assert!(rule.check_atx_heading_line("####Details").is_some());
328        assert!(rule.check_atx_heading_line("#Summary").is_some());
329        assert!(rule.check_atx_heading_line("######Conclusion").is_some());
330        assert!(rule.check_atx_heading_line("##Table of Contents").is_some());
331
332        // Should NOT detect these
333        assert!(rule.check_atx_heading_line("###").is_none()); // Just hashes
334        assert!(rule.check_atx_heading_line("#").is_none()); // Single hash
335        assert!(rule.check_atx_heading_line("##a").is_none()); // Too short
336        assert!(rule.check_atx_heading_line("#*emphasis").is_none()); // Emphasis marker
337        assert!(rule.check_atx_heading_line("#######TooBig").is_none()); // More than 6 hashes
338    }
339
340    #[test]
341    fn test_malformed_heading_with_context() {
342        let rule = MD018NoMissingSpaceAtx::new();
343
344        // Test with full content that includes code blocks
345        let content = r#"# Test Document
346
347##Introduction
348This should be detected.
349
350    ##CodeBlock
351This should NOT be detected (indented code block).
352
353```
354##FencedCodeBlock
355This should NOT be detected (fenced code block).
356```
357
358##Conclusion
359This should be detected.
360"#;
361
362        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
363        let result = rule.check(&ctx).unwrap();
364
365        // Should detect malformed headings but ignore code blocks
366        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
367        assert!(detected_lines.contains(&3)); // ##Introduction
368        assert!(detected_lines.contains(&14)); // ##Conclusion (updated line number)
369        assert!(!detected_lines.contains(&6)); // ##CodeBlock (should be ignored)
370        assert!(!detected_lines.contains(&10)); // ##FencedCodeBlock (should be ignored)
371    }
372
373    #[test]
374    fn test_malformed_heading_fix() {
375        let rule = MD018NoMissingSpaceAtx::new();
376
377        let content = r#"##Introduction
378This is a test.
379
380###Background
381More content."#;
382
383        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
384        let fixed = rule.fix(&ctx).unwrap();
385
386        let expected = r#"## Introduction
387This is a test.
388
389### Background
390More content."#;
391
392        assert_eq!(fixed, expected);
393    }
394
395    #[test]
396    fn test_mixed_proper_and_malformed_headings() {
397        let rule = MD018NoMissingSpaceAtx::new();
398
399        let content = r#"# Proper Heading
400
401##Malformed Heading
402
403## Another Proper Heading
404
405###Another Malformed
406
407#### Proper with space
408"#;
409
410        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
411        let result = rule.check(&ctx).unwrap();
412
413        // Should only detect the malformed ones
414        assert_eq!(result.len(), 2);
415        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
416        assert!(detected_lines.contains(&3)); // ##Malformed Heading
417        assert!(detected_lines.contains(&7)); // ###Another Malformed
418    }
419}