rumdl_lib/rules/
md018_no_missing_space_atx.rs

1/// Rule MD018: No missing space after ATX heading marker
2///
3/// See [docs/md018.md](../../docs/md018.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_single_line_range;
6use crate::utils::regex_cache::get_cached_regex;
7
8// Emoji and Unicode hashtag patterns
9const EMOJI_HASHTAG_PATTERN_STR: &str = r"^#️⃣|^#⃣";
10const UNICODE_HASHTAG_PATTERN_STR: &str = r"^#[\u{FE0F}\u{20E3}]";
11
12#[derive(Clone)]
13pub struct MD018NoMissingSpaceAtx;
14
15impl Default for MD018NoMissingSpaceAtx {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl MD018NoMissingSpaceAtx {
22    pub fn new() -> Self {
23        Self
24    }
25
26    /// Check if an ATX heading line is missing space after the marker
27    fn check_atx_heading_line(&self, line: &str) -> Option<(usize, String)> {
28        // Look for ATX marker at start of line (with optional indentation)
29        let trimmed_line = line.trim_start();
30        let indent = line.len() - trimmed_line.len();
31
32        if !trimmed_line.starts_with('#') {
33            return None;
34        }
35
36        // Skip emoji hashtags and Unicode hashtag patterns
37        let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
38            .map(|re| re.is_match(trimmed_line))
39            .unwrap_or(false);
40        let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
41            .map(|re| re.is_match(trimmed_line))
42            .unwrap_or(false);
43        if is_emoji || is_unicode {
44            return None;
45        }
46
47        // Count the number of hashes
48        let hash_count = trimmed_line.chars().take_while(|&c| c == '#').count();
49        if hash_count == 0 || hash_count > 6 {
50            return None;
51        }
52
53        // Check what comes after the hashes
54        let after_hashes = &trimmed_line[hash_count..];
55
56        // Skip if what follows the hashes is an emoji modifier or variant selector
57        if after_hashes
58            .chars()
59            .next()
60            .is_some_and(|ch| matches!(ch, '\u{FE0F}' | '\u{20E3}' | '\u{FE0E}'))
61        {
62            return None;
63        }
64
65        // If there's content immediately after hashes (no space), it needs fixing
66        if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t') {
67            // Additional checks to avoid false positives
68            let content = after_hashes.trim();
69
70            // Skip if it's just more hashes (horizontal rule)
71            if content.chars().all(|c| c == '#') {
72                return None;
73            }
74
75            // Skip if content is too short to be meaningful
76            if content.len() < 2 {
77                return None;
78            }
79
80            // Skip if it starts with emphasis markers
81            if content.starts_with('*') || content.starts_with('_') {
82                return None;
83            }
84
85            // Skip if it looks like a hashtag (e.g., #tag, #123)
86            // But only skip if it's lowercase or a number to avoid skipping headings like #Summary
87            if hash_count == 1 && !content.is_empty() {
88                let first_char = content.chars().next();
89                if let Some(ch) = first_char {
90                    // Skip if it's a lowercase letter or number (common hashtag pattern)
91                    // Don't skip uppercase as those are likely headings
92                    if (ch.is_lowercase() || ch.is_numeric()) && !content.contains(' ') {
93                        return None;
94                    }
95                }
96            }
97
98            // This looks like a malformed heading that needs a space
99            let fixed = format!("{}{} {}", " ".repeat(indent), "#".repeat(hash_count), after_hashes);
100            return Some((indent + hash_count, fixed));
101        }
102
103        None
104    }
105
106    // Calculate the byte range for a specific line in the content
107    fn get_line_byte_range(&self, content: &str, line_num: usize) -> std::ops::Range<usize> {
108        let mut current_line = 1;
109        let mut start_byte = 0;
110
111        for (i, c) in content.char_indices() {
112            if current_line == line_num && c == '\n' {
113                return start_byte..i;
114            } else if c == '\n' {
115                current_line += 1;
116                if current_line == line_num {
117                    start_byte = i + 1;
118                }
119            }
120        }
121
122        // If we're looking for the last line and it doesn't end with a newline
123        if current_line == line_num {
124            return start_byte..content.len();
125        }
126
127        // Fallback if line not found (shouldn't happen)
128        0..0
129    }
130}
131
132impl Rule for MD018NoMissingSpaceAtx {
133    fn name(&self) -> &'static str {
134        "MD018"
135    }
136
137    fn description(&self) -> &'static str {
138        "No space after hash in heading"
139    }
140
141    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
142        let mut warnings = Vec::new();
143
144        // Check all lines that have ATX headings from cached info
145        for (line_num, line_info) in ctx.lines.iter().enumerate() {
146            if let Some(heading) = &line_info.heading {
147                // Only check ATX headings
148                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
149                    // Check if there's a space after the marker
150                    let line = &line_info.content;
151                    let trimmed = line.trim_start();
152
153                    // Skip emoji hashtags and Unicode hashtag patterns
154                    let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
155                        .map(|re| re.is_match(trimmed))
156                        .unwrap_or(false);
157                    let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
158                        .map(|re| re.is_match(trimmed))
159                        .unwrap_or(false);
160                    if is_emoji || is_unicode {
161                        continue;
162                    }
163
164                    if trimmed.len() > heading.marker.len() {
165                        let after_marker = &trimmed[heading.marker.len()..];
166                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
167                        {
168                            // Missing space after ATX marker
169                            let hash_end_col = line_info.indent + heading.marker.len() + 1; // 1-indexed
170                            let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
171                                line_num + 1, // Convert to 1-indexed
172                                hash_end_col,
173                                0, // Zero-width to indicate missing space
174                            );
175
176                            warnings.push(LintWarning {
177                                rule_name: Some(self.name()),
178                                message: format!("No space after {} in heading", "#".repeat(heading.level as usize)),
179                                line: start_line,
180                                column: start_col,
181                                end_line,
182                                end_column: end_col,
183                                severity: Severity::Warning,
184                                fix: Some(Fix {
185                                    range: self.get_line_byte_range(ctx.content, line_num + 1),
186                                    replacement: format!(
187                                        "{}{} {}",
188                                        " ".repeat(line_info.indent),
189                                        heading.marker,
190                                        after_marker
191                                    ),
192                                }),
193                            });
194                        }
195                    }
196                }
197            } else if !line_info.in_code_block && !line_info.is_blank {
198                // Check for malformed headings that weren't detected as proper headings
199                if let Some((hash_end_pos, fixed_line)) = self.check_atx_heading_line(&line_info.content) {
200                    let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
201                        line_num + 1,     // Convert to 1-indexed
202                        hash_end_pos + 1, // 1-indexed column
203                        0,                // Zero-width to indicate missing space
204                    );
205
206                    warnings.push(LintWarning {
207                        rule_name: Some(self.name()),
208                        message: "No space after hash in heading".to_string(),
209                        line: start_line,
210                        column: start_col,
211                        end_line,
212                        end_column: end_col,
213                        severity: Severity::Warning,
214                        fix: Some(Fix {
215                            range: self.get_line_byte_range(ctx.content, line_num + 1),
216                            replacement: fixed_line,
217                        }),
218                    });
219                }
220            }
221        }
222
223        Ok(warnings)
224    }
225
226    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
227        let mut lines = Vec::new();
228
229        for line_info in ctx.lines.iter() {
230            let mut fixed = false;
231
232            if let Some(heading) = &line_info.heading {
233                // Fix ATX headings missing space
234                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
235                    let line = &line_info.content;
236                    let trimmed = line.trim_start();
237
238                    // Skip emoji hashtags and Unicode hashtag patterns
239                    let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
240                        .map(|re| re.is_match(trimmed))
241                        .unwrap_or(false);
242                    let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
243                        .map(|re| re.is_match(trimmed))
244                        .unwrap_or(false);
245                    if is_emoji || is_unicode {
246                        continue;
247                    }
248
249                    if trimmed.len() > heading.marker.len() {
250                        let after_marker = &trimmed[heading.marker.len()..];
251                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
252                        {
253                            // Add space after marker
254                            lines.push(format!(
255                                "{}{} {}",
256                                " ".repeat(line_info.indent),
257                                heading.marker,
258                                after_marker
259                            ));
260                            fixed = true;
261                        }
262                    }
263                }
264            } else if !line_info.in_code_block && !line_info.is_blank {
265                // Fix malformed headings
266                if let Some((_, fixed_line)) = self.check_atx_heading_line(&line_info.content) {
267                    lines.push(fixed_line);
268                    fixed = true;
269                }
270            }
271
272            if !fixed {
273                lines.push(line_info.content.clone());
274            }
275        }
276
277        // Reconstruct content preserving line endings
278        let mut result = lines.join("\n");
279        if ctx.content.ends_with('\n') && !result.ends_with('\n') {
280            result.push('\n');
281        }
282
283        Ok(result)
284    }
285
286    /// Get the category of this rule for selective processing
287    fn category(&self) -> RuleCategory {
288        RuleCategory::Heading
289    }
290
291    /// Check if this rule should be skipped
292    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
293        // Fast path: check if document likely has headings
294        if !ctx.likely_has_headings() {
295            return true;
296        }
297        // Verify lines with hash symbols exist
298        !ctx.lines.iter().any(|line| line.content.contains('#'))
299    }
300
301    fn as_any(&self) -> &dyn std::any::Any {
302        self
303    }
304
305    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
306    where
307        Self: Sized,
308    {
309        Box::new(MD018NoMissingSpaceAtx::new())
310    }
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use crate::lint_context::LintContext;
317
318    #[test]
319    fn test_basic_functionality() {
320        let rule = MD018NoMissingSpaceAtx;
321
322        // Test with correct space
323        let content = "# Heading 1\n## Heading 2\n### Heading 3";
324        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
325        let result = rule.check(&ctx).unwrap();
326        assert!(result.is_empty());
327
328        // Test with missing space
329        let content = "#Heading 1\n## Heading 2\n###Heading 3";
330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
331        let result = rule.check(&ctx).unwrap();
332        assert_eq!(result.len(), 2); // Should flag the two headings with missing spaces
333        assert_eq!(result[0].line, 1);
334        assert_eq!(result[1].line, 3);
335    }
336
337    #[test]
338    fn test_malformed_heading_detection() {
339        let rule = MD018NoMissingSpaceAtx::new();
340
341        // Test the check_atx_heading_line method
342        assert!(rule.check_atx_heading_line("##Introduction").is_some());
343        assert!(rule.check_atx_heading_line("###Background").is_some());
344        assert!(rule.check_atx_heading_line("####Details").is_some());
345        assert!(rule.check_atx_heading_line("#Summary").is_some());
346        assert!(rule.check_atx_heading_line("######Conclusion").is_some());
347        assert!(rule.check_atx_heading_line("##Table of Contents").is_some());
348
349        // Should NOT detect these
350        assert!(rule.check_atx_heading_line("###").is_none()); // Just hashes
351        assert!(rule.check_atx_heading_line("#").is_none()); // Single hash
352        assert!(rule.check_atx_heading_line("##a").is_none()); // Too short
353        assert!(rule.check_atx_heading_line("#*emphasis").is_none()); // Emphasis marker
354        assert!(rule.check_atx_heading_line("#######TooBig").is_none()); // More than 6 hashes
355    }
356
357    #[test]
358    fn test_malformed_heading_with_context() {
359        let rule = MD018NoMissingSpaceAtx::new();
360
361        // Test with full content that includes code blocks
362        let content = r#"# Test Document
363
364##Introduction
365This should be detected.
366
367    ##CodeBlock
368This should NOT be detected (indented code block).
369
370```
371##FencedCodeBlock
372This should NOT be detected (fenced code block).
373```
374
375##Conclusion
376This should be detected.
377"#;
378
379        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
380        let result = rule.check(&ctx).unwrap();
381
382        // Should detect malformed headings but ignore code blocks
383        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
384        assert!(detected_lines.contains(&3)); // ##Introduction
385        assert!(detected_lines.contains(&14)); // ##Conclusion (updated line number)
386        assert!(!detected_lines.contains(&6)); // ##CodeBlock (should be ignored)
387        assert!(!detected_lines.contains(&10)); // ##FencedCodeBlock (should be ignored)
388    }
389
390    #[test]
391    fn test_malformed_heading_fix() {
392        let rule = MD018NoMissingSpaceAtx::new();
393
394        let content = r#"##Introduction
395This is a test.
396
397###Background
398More content."#;
399
400        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
401        let fixed = rule.fix(&ctx).unwrap();
402
403        let expected = r#"## Introduction
404This is a test.
405
406### Background
407More content."#;
408
409        assert_eq!(fixed, expected);
410    }
411
412    #[test]
413    fn test_mixed_proper_and_malformed_headings() {
414        let rule = MD018NoMissingSpaceAtx::new();
415
416        let content = r#"# Proper Heading
417
418##Malformed Heading
419
420## Another Proper Heading
421
422###Another Malformed
423
424#### Proper with space
425"#;
426
427        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
428        let result = rule.check(&ctx).unwrap();
429
430        // Should only detect the malformed ones
431        assert_eq!(result.len(), 2);
432        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
433        assert!(detected_lines.contains(&3)); // ##Malformed Heading
434        assert!(detected_lines.contains(&7)); // ###Another Malformed
435    }
436}