rumdl_lib/rules/
md018_no_missing_space_atx.rs

1/// Rule MD018: No missing space after ATX heading marker
2///
3/// See [docs/md018.md](../../docs/md018.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_single_line_range;
6use crate::utils::regex_cache::get_cached_regex;
7
8// Emoji and Unicode hashtag patterns
9const EMOJI_HASHTAG_PATTERN_STR: &str = r"^#️⃣|^#⃣";
10const UNICODE_HASHTAG_PATTERN_STR: &str = r"^#[\u{FE0F}\u{20E3}]";
11
12#[derive(Clone)]
13pub struct MD018NoMissingSpaceAtx;
14
15impl Default for MD018NoMissingSpaceAtx {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl MD018NoMissingSpaceAtx {
22    pub fn new() -> Self {
23        Self
24    }
25
26    /// Check if an ATX heading line is missing space after the marker
27    fn check_atx_heading_line(&self, line: &str) -> Option<(usize, String)> {
28        // Look for ATX marker at start of line (with optional indentation)
29        let trimmed_line = line.trim_start();
30        let indent = line.len() - trimmed_line.len();
31
32        if !trimmed_line.starts_with('#') {
33            return None;
34        }
35
36        // Skip emoji hashtags and Unicode hashtag patterns
37        let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
38            .map(|re| re.is_match(trimmed_line))
39            .unwrap_or(false);
40        let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
41            .map(|re| re.is_match(trimmed_line))
42            .unwrap_or(false);
43        if is_emoji || is_unicode {
44            return None;
45        }
46
47        // Count the number of hashes
48        let hash_count = trimmed_line.chars().take_while(|&c| c == '#').count();
49        if hash_count == 0 || hash_count > 6 {
50            return None;
51        }
52
53        // Check what comes after the hashes
54        let after_hashes = &trimmed_line[hash_count..];
55
56        // Skip if what follows the hashes is an emoji modifier or variant selector
57        if after_hashes
58            .chars()
59            .next()
60            .is_some_and(|ch| matches!(ch, '\u{FE0F}' | '\u{20E3}' | '\u{FE0E}'))
61        {
62            return None;
63        }
64
65        // If there's content immediately after hashes (no space), it needs fixing
66        if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t') {
67            // Additional checks to avoid false positives
68            let content = after_hashes.trim();
69
70            // Skip if it's just more hashes (horizontal rule)
71            if content.chars().all(|c| c == '#') {
72                return None;
73            }
74
75            // Skip if content is too short to be meaningful
76            if content.len() < 2 {
77                return None;
78            }
79
80            // Skip if it starts with emphasis markers
81            if content.starts_with('*') || content.starts_with('_') {
82                return None;
83            }
84
85            // Skip if it looks like a hashtag (e.g., #tag, #123)
86            // But only skip if it's lowercase or a number to avoid skipping headings like #Summary
87            if hash_count == 1 && !content.is_empty() {
88                let first_char = content.chars().next();
89                if let Some(ch) = first_char {
90                    // Skip if it's a lowercase letter or number (common hashtag pattern)
91                    // Don't skip uppercase as those are likely headings
92                    if (ch.is_lowercase() || ch.is_numeric()) && !content.contains(' ') {
93                        return None;
94                    }
95                }
96            }
97
98            // This looks like a malformed heading that needs a space
99            let fixed = format!("{}{} {}", " ".repeat(indent), "#".repeat(hash_count), after_hashes);
100            return Some((indent + hash_count, fixed));
101        }
102
103        None
104    }
105
106    // Calculate the byte range for a specific line in the content
107    fn get_line_byte_range(&self, content: &str, line_num: usize) -> std::ops::Range<usize> {
108        let mut current_line = 1;
109        let mut start_byte = 0;
110
111        for (i, c) in content.char_indices() {
112            if current_line == line_num && c == '\n' {
113                return start_byte..i;
114            } else if c == '\n' {
115                current_line += 1;
116                if current_line == line_num {
117                    start_byte = i + 1;
118                }
119            }
120        }
121
122        // If we're looking for the last line and it doesn't end with a newline
123        if current_line == line_num {
124            return start_byte..content.len();
125        }
126
127        // Fallback if line not found (shouldn't happen)
128        0..0
129    }
130}
131
132impl Rule for MD018NoMissingSpaceAtx {
133    fn name(&self) -> &'static str {
134        "MD018"
135    }
136
137    fn description(&self) -> &'static str {
138        "No space after hash in heading"
139    }
140
141    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
142        let mut warnings = Vec::new();
143
144        // Check all lines that have ATX headings from cached info
145        for (line_num, line_info) in ctx.lines.iter().enumerate() {
146            // Skip lines inside HTML blocks (e.g., CSS selectors like #id)
147            if line_info.in_html_block {
148                continue;
149            }
150
151            if let Some(heading) = &line_info.heading {
152                // Only check ATX headings
153                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
154                    // Check if there's a space after the marker
155                    let line = line_info.content(ctx.content);
156                    let trimmed = line.trim_start();
157
158                    // Skip emoji hashtags and Unicode hashtag patterns
159                    let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
160                        .map(|re| re.is_match(trimmed))
161                        .unwrap_or(false);
162                    let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
163                        .map(|re| re.is_match(trimmed))
164                        .unwrap_or(false);
165                    if is_emoji || is_unicode {
166                        continue;
167                    }
168
169                    if trimmed.len() > heading.marker.len() {
170                        let after_marker = &trimmed[heading.marker.len()..];
171                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
172                        {
173                            // Missing space after ATX marker
174                            let hash_end_col = line_info.indent + heading.marker.len() + 1; // 1-indexed
175                            let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
176                                line_num + 1, // Convert to 1-indexed
177                                hash_end_col,
178                                0, // Zero-width to indicate missing space
179                            );
180
181                            warnings.push(LintWarning {
182                                rule_name: Some(self.name().to_string()),
183                                message: format!("No space after {} in heading", "#".repeat(heading.level as usize)),
184                                line: start_line,
185                                column: start_col,
186                                end_line,
187                                end_column: end_col,
188                                severity: Severity::Warning,
189                                fix: Some(Fix {
190                                    range: self.get_line_byte_range(ctx.content, line_num + 1),
191                                    replacement: format!(
192                                        "{}{} {}",
193                                        " ".repeat(line_info.indent),
194                                        heading.marker,
195                                        after_marker
196                                    ),
197                                }),
198                            });
199                        }
200                    }
201                }
202            } else if !line_info.in_code_block && !line_info.is_blank {
203                // Check for malformed headings that weren't detected as proper headings
204                if let Some((hash_end_pos, fixed_line)) = self.check_atx_heading_line(line_info.content(ctx.content)) {
205                    let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
206                        line_num + 1,     // Convert to 1-indexed
207                        hash_end_pos + 1, // 1-indexed column
208                        0,                // Zero-width to indicate missing space
209                    );
210
211                    warnings.push(LintWarning {
212                        rule_name: Some(self.name().to_string()),
213                        message: "No space after hash in heading".to_string(),
214                        line: start_line,
215                        column: start_col,
216                        end_line,
217                        end_column: end_col,
218                        severity: Severity::Warning,
219                        fix: Some(Fix {
220                            range: self.get_line_byte_range(ctx.content, line_num + 1),
221                            replacement: fixed_line,
222                        }),
223                    });
224                }
225            }
226        }
227
228        Ok(warnings)
229    }
230
231    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
232        let mut lines = Vec::new();
233
234        for line_info in ctx.lines.iter() {
235            let mut fixed = false;
236
237            if let Some(heading) = &line_info.heading {
238                // Fix ATX headings missing space
239                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
240                    let line = line_info.content(ctx.content);
241                    let trimmed = line.trim_start();
242
243                    // Skip emoji hashtags and Unicode hashtag patterns
244                    let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
245                        .map(|re| re.is_match(trimmed))
246                        .unwrap_or(false);
247                    let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
248                        .map(|re| re.is_match(trimmed))
249                        .unwrap_or(false);
250                    if is_emoji || is_unicode {
251                        continue;
252                    }
253
254                    if trimmed.len() > heading.marker.len() {
255                        let after_marker = &trimmed[heading.marker.len()..];
256                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
257                        {
258                            // Add space after marker
259                            lines.push(format!(
260                                "{}{} {}",
261                                " ".repeat(line_info.indent),
262                                heading.marker,
263                                after_marker
264                            ));
265                            fixed = true;
266                        }
267                    }
268                }
269            } else if !line_info.in_code_block && !line_info.is_blank {
270                // Fix malformed headings
271                if let Some((_, fixed_line)) = self.check_atx_heading_line(line_info.content(ctx.content)) {
272                    lines.push(fixed_line);
273                    fixed = true;
274                }
275            }
276
277            if !fixed {
278                lines.push(line_info.content(ctx.content).to_string());
279            }
280        }
281
282        // Reconstruct content preserving line endings
283        let mut result = lines.join("\n");
284        if ctx.content.ends_with('\n') && !result.ends_with('\n') {
285            result.push('\n');
286        }
287
288        Ok(result)
289    }
290
291    /// Get the category of this rule for selective processing
292    fn category(&self) -> RuleCategory {
293        RuleCategory::Heading
294    }
295
296    /// Check if this rule should be skipped
297    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
298        // Fast path: check if document likely has headings
299        !ctx.likely_has_headings()
300    }
301
302    fn as_any(&self) -> &dyn std::any::Any {
303        self
304    }
305
306    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
307    where
308        Self: Sized,
309    {
310        Box::new(MD018NoMissingSpaceAtx::new())
311    }
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317    use crate::lint_context::LintContext;
318
319    #[test]
320    fn test_basic_functionality() {
321        let rule = MD018NoMissingSpaceAtx;
322
323        // Test with correct space
324        let content = "# Heading 1\n## Heading 2\n### Heading 3";
325        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
326        let result = rule.check(&ctx).unwrap();
327        assert!(result.is_empty());
328
329        // Test with missing space
330        let content = "#Heading 1\n## Heading 2\n###Heading 3";
331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
332        let result = rule.check(&ctx).unwrap();
333        assert_eq!(result.len(), 2); // Should flag the two headings with missing spaces
334        assert_eq!(result[0].line, 1);
335        assert_eq!(result[1].line, 3);
336    }
337
338    #[test]
339    fn test_malformed_heading_detection() {
340        let rule = MD018NoMissingSpaceAtx::new();
341
342        // Test the check_atx_heading_line method
343        assert!(rule.check_atx_heading_line("##Introduction").is_some());
344        assert!(rule.check_atx_heading_line("###Background").is_some());
345        assert!(rule.check_atx_heading_line("####Details").is_some());
346        assert!(rule.check_atx_heading_line("#Summary").is_some());
347        assert!(rule.check_atx_heading_line("######Conclusion").is_some());
348        assert!(rule.check_atx_heading_line("##Table of Contents").is_some());
349
350        // Should NOT detect these
351        assert!(rule.check_atx_heading_line("###").is_none()); // Just hashes
352        assert!(rule.check_atx_heading_line("#").is_none()); // Single hash
353        assert!(rule.check_atx_heading_line("##a").is_none()); // Too short
354        assert!(rule.check_atx_heading_line("#*emphasis").is_none()); // Emphasis marker
355        assert!(rule.check_atx_heading_line("#######TooBig").is_none()); // More than 6 hashes
356    }
357
358    #[test]
359    fn test_malformed_heading_with_context() {
360        let rule = MD018NoMissingSpaceAtx::new();
361
362        // Test with full content that includes code blocks
363        let content = r#"# Test Document
364
365##Introduction
366This should be detected.
367
368    ##CodeBlock
369This should NOT be detected (indented code block).
370
371```
372##FencedCodeBlock
373This should NOT be detected (fenced code block).
374```
375
376##Conclusion
377This should be detected.
378"#;
379
380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
381        let result = rule.check(&ctx).unwrap();
382
383        // Should detect malformed headings but ignore code blocks
384        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
385        assert!(detected_lines.contains(&3)); // ##Introduction
386        assert!(detected_lines.contains(&14)); // ##Conclusion (updated line number)
387        assert!(!detected_lines.contains(&6)); // ##CodeBlock (should be ignored)
388        assert!(!detected_lines.contains(&10)); // ##FencedCodeBlock (should be ignored)
389    }
390
391    #[test]
392    fn test_malformed_heading_fix() {
393        let rule = MD018NoMissingSpaceAtx::new();
394
395        let content = r#"##Introduction
396This is a test.
397
398###Background
399More content."#;
400
401        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
402        let fixed = rule.fix(&ctx).unwrap();
403
404        let expected = r#"## Introduction
405This is a test.
406
407### Background
408More content."#;
409
410        assert_eq!(fixed, expected);
411    }
412
413    #[test]
414    fn test_mixed_proper_and_malformed_headings() {
415        let rule = MD018NoMissingSpaceAtx::new();
416
417        let content = r#"# Proper Heading
418
419##Malformed Heading
420
421## Another Proper Heading
422
423###Another Malformed
424
425#### Proper with space
426"#;
427
428        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
429        let result = rule.check(&ctx).unwrap();
430
431        // Should only detect the malformed ones
432        assert_eq!(result.len(), 2);
433        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
434        assert!(detected_lines.contains(&3)); // ##Malformed Heading
435        assert!(detected_lines.contains(&7)); // ###Another Malformed
436    }
437
438    #[test]
439    fn test_css_selectors_in_html_blocks() {
440        let rule = MD018NoMissingSpaceAtx::new();
441
442        // Test CSS selectors inside <style> tags should not trigger MD018
443        // This is a common pattern in Quarto/RMarkdown files
444        let content = r#"# Proper Heading
445
446<style>
447#slide-1 ol li {
448    margin-top: 0;
449}
450
451#special-slide ol li {
452    margin-top: 2em;
453}
454</style>
455
456## Another Heading
457"#;
458
459        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
460        let result = rule.check(&ctx).unwrap();
461
462        // Should not detect CSS selectors as malformed headings
463        assert_eq!(
464            result.len(),
465            0,
466            "CSS selectors in <style> blocks should not be flagged as malformed headings"
467        );
468    }
469
470    #[test]
471    fn test_js_code_in_script_blocks() {
472        let rule = MD018NoMissingSpaceAtx::new();
473
474        // Test that patterns like #element in <script> tags don't trigger MD018
475        let content = r#"# Heading
476
477<script>
478const element = document.querySelector('#main-content');
479#another-comment
480</script>
481
482## Another Heading
483"#;
484
485        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
486        let result = rule.check(&ctx).unwrap();
487
488        // Should not detect JS code as malformed headings
489        assert_eq!(
490            result.len(),
491            0,
492            "JavaScript code in <script> blocks should not be flagged as malformed headings"
493        );
494    }
495}