rumdl_lib/rules/
md018_no_missing_space_atx.rs

1/// Rule MD018: No missing space after ATX heading marker
2///
3/// See [docs/md018.md](../../docs/md018.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_single_line_range;
6use crate::utils::regex_cache::get_cached_regex;
7
8// Emoji and Unicode hashtag patterns
9const EMOJI_HASHTAG_PATTERN_STR: &str = r"^#️⃣|^#⃣";
10const UNICODE_HASHTAG_PATTERN_STR: &str = r"^#[\u{FE0F}\u{20E3}]";
11
12#[derive(Clone)]
13pub struct MD018NoMissingSpaceAtx;
14
15impl Default for MD018NoMissingSpaceAtx {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl MD018NoMissingSpaceAtx {
22    pub fn new() -> Self {
23        Self
24    }
25
26    /// Check if an ATX heading line is missing space after the marker
27    fn check_atx_heading_line(&self, line: &str) -> Option<(usize, String)> {
28        // Look for ATX marker at start of line (with optional indentation)
29        let trimmed_line = line.trim_start();
30        let indent = line.len() - trimmed_line.len();
31
32        if !trimmed_line.starts_with('#') {
33            return None;
34        }
35
36        // Skip emoji hashtags and Unicode hashtag patterns
37        let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
38            .map(|re| re.is_match(trimmed_line))
39            .unwrap_or(false);
40        let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
41            .map(|re| re.is_match(trimmed_line))
42            .unwrap_or(false);
43        if is_emoji || is_unicode {
44            return None;
45        }
46
47        // Count the number of hashes
48        let hash_count = trimmed_line.chars().take_while(|&c| c == '#').count();
49        if hash_count == 0 || hash_count > 6 {
50            return None;
51        }
52
53        // Check what comes after the hashes
54        let after_hashes = &trimmed_line[hash_count..];
55
56        // Skip if what follows the hashes is an emoji modifier or variant selector
57        if after_hashes
58            .chars()
59            .next()
60            .is_some_and(|ch| matches!(ch, '\u{FE0F}' | '\u{20E3}' | '\u{FE0E}'))
61        {
62            return None;
63        }
64
65        // If there's content immediately after hashes (no space), it needs fixing
66        if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t') {
67            // Additional checks to avoid false positives
68            let content = after_hashes.trim();
69
70            // Skip if it's just more hashes (horizontal rule)
71            if content.chars().all(|c| c == '#') {
72                return None;
73            }
74
75            // Skip if content is too short to be meaningful
76            if content.len() < 2 {
77                return None;
78            }
79
80            // Skip if it starts with emphasis markers
81            if content.starts_with('*') || content.starts_with('_') {
82                return None;
83            }
84
85            // Skip if it looks like a hashtag (e.g., #tag, #123)
86            // But only skip if it's lowercase or a number to avoid skipping headings like #Summary
87            if hash_count == 1 && !content.is_empty() {
88                let first_char = content.chars().next();
89                if let Some(ch) = first_char {
90                    // Skip if it's a lowercase letter or number (common hashtag pattern)
91                    // Don't skip uppercase as those are likely headings
92                    if (ch.is_lowercase() || ch.is_numeric()) && !content.contains(' ') {
93                        return None;
94                    }
95                }
96            }
97
98            // This looks like a malformed heading that needs a space
99            let fixed = format!("{}{} {}", " ".repeat(indent), "#".repeat(hash_count), after_hashes);
100            return Some((indent + hash_count, fixed));
101        }
102
103        None
104    }
105
106    // Calculate the byte range for a specific line in the content
107    fn get_line_byte_range(&self, content: &str, line_num: usize) -> std::ops::Range<usize> {
108        let mut current_line = 1;
109        let mut start_byte = 0;
110
111        for (i, c) in content.char_indices() {
112            if current_line == line_num && c == '\n' {
113                return start_byte..i;
114            } else if c == '\n' {
115                current_line += 1;
116                if current_line == line_num {
117                    start_byte = i + 1;
118                }
119            }
120        }
121
122        // If we're looking for the last line and it doesn't end with a newline
123        if current_line == line_num {
124            return start_byte..content.len();
125        }
126
127        // Fallback if line not found (shouldn't happen)
128        0..0
129    }
130}
131
132impl Rule for MD018NoMissingSpaceAtx {
133    fn name(&self) -> &'static str {
134        "MD018"
135    }
136
137    fn description(&self) -> &'static str {
138        "No space after hash in heading"
139    }
140
141    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
142        let mut warnings = Vec::new();
143
144        // Check all lines that have ATX headings from cached info
145        for (line_num, line_info) in ctx.lines.iter().enumerate() {
146            // Skip lines inside HTML blocks (e.g., CSS selectors like #id)
147            if line_info.in_html_block {
148                continue;
149            }
150
151            if let Some(heading) = &line_info.heading {
152                // Only check ATX headings
153                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
154                    // Check if there's a space after the marker
155                    let line = line_info.content(ctx.content);
156                    let trimmed = line.trim_start();
157
158                    // Skip emoji hashtags and Unicode hashtag patterns
159                    let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
160                        .map(|re| re.is_match(trimmed))
161                        .unwrap_or(false);
162                    let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
163                        .map(|re| re.is_match(trimmed))
164                        .unwrap_or(false);
165                    if is_emoji || is_unicode {
166                        continue;
167                    }
168
169                    if trimmed.len() > heading.marker.len() {
170                        let after_marker = &trimmed[heading.marker.len()..];
171                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
172                        {
173                            // Skip hashtag-like patterns (e.g., #tag, #123, #29039)
174                            // But only for single-hash patterns to avoid skipping ##Heading
175                            // This prevents false positives on GitHub issue refs and social hashtags
176                            if heading.level == 1 {
177                                let content = after_marker.trim();
178                                // Get first "word" (up to space, comma, or closing paren)
179                                let first_word: String = content
180                                    .chars()
181                                    .take_while(|c| !c.is_whitespace() && *c != ',' && *c != ')')
182                                    .collect();
183                                if let Some(first_char) = first_word.chars().next() {
184                                    // Skip if first word starts with lowercase or number (hashtag/issue ref)
185                                    // Don't skip uppercase as those are likely intended headings
186                                    if first_char.is_lowercase() || first_char.is_numeric() {
187                                        continue;
188                                    }
189                                }
190                            }
191
192                            // Missing space after ATX marker
193                            let hash_end_col = line_info.indent + heading.marker.len() + 1; // 1-indexed
194                            let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
195                                line_num + 1, // Convert to 1-indexed
196                                hash_end_col,
197                                0, // Zero-width to indicate missing space
198                            );
199
200                            warnings.push(LintWarning {
201                                rule_name: Some(self.name().to_string()),
202                                message: format!("No space after {} in heading", "#".repeat(heading.level as usize)),
203                                line: start_line,
204                                column: start_col,
205                                end_line,
206                                end_column: end_col,
207                                severity: Severity::Warning,
208                                fix: Some(Fix {
209                                    range: self.get_line_byte_range(ctx.content, line_num + 1),
210                                    replacement: {
211                                        // Preserve original indentation (including tabs)
212                                        let line = line_info.content(ctx.content);
213                                        let original_indent = &line[..line_info.indent];
214                                        format!("{original_indent}{} {after_marker}", heading.marker)
215                                    },
216                                }),
217                            });
218                        }
219                    }
220                }
221            } else if !line_info.in_code_block && !line_info.is_blank {
222                // Check for malformed headings that weren't detected as proper headings
223                if let Some((hash_end_pos, fixed_line)) = self.check_atx_heading_line(line_info.content(ctx.content)) {
224                    let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
225                        line_num + 1,     // Convert to 1-indexed
226                        hash_end_pos + 1, // 1-indexed column
227                        0,                // Zero-width to indicate missing space
228                    );
229
230                    warnings.push(LintWarning {
231                        rule_name: Some(self.name().to_string()),
232                        message: "No space after hash in heading".to_string(),
233                        line: start_line,
234                        column: start_col,
235                        end_line,
236                        end_column: end_col,
237                        severity: Severity::Warning,
238                        fix: Some(Fix {
239                            range: self.get_line_byte_range(ctx.content, line_num + 1),
240                            replacement: fixed_line,
241                        }),
242                    });
243                }
244            }
245        }
246
247        Ok(warnings)
248    }
249
250    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
251        let mut lines = Vec::new();
252
253        for line_info in ctx.lines.iter() {
254            let mut fixed = false;
255
256            if let Some(heading) = &line_info.heading {
257                // Fix ATX headings missing space
258                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
259                    let line = line_info.content(ctx.content);
260                    let trimmed = line.trim_start();
261
262                    // Skip emoji hashtags and Unicode hashtag patterns
263                    let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
264                        .map(|re| re.is_match(trimmed))
265                        .unwrap_or(false);
266                    let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
267                        .map(|re| re.is_match(trimmed))
268                        .unwrap_or(false);
269                    if is_emoji || is_unicode {
270                        continue;
271                    }
272
273                    if trimmed.len() > heading.marker.len() {
274                        let after_marker = &trimmed[heading.marker.len()..];
275                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
276                        {
277                            // Add space after marker, preserving original indentation (including tabs)
278                            let line = line_info.content(ctx.content);
279                            let original_indent = &line[..line_info.indent];
280                            lines.push(format!("{original_indent}{} {after_marker}", heading.marker));
281                            fixed = true;
282                        }
283                    }
284                }
285            } else if !line_info.in_code_block && !line_info.is_blank {
286                // Fix malformed headings
287                if let Some((_, fixed_line)) = self.check_atx_heading_line(line_info.content(ctx.content)) {
288                    lines.push(fixed_line);
289                    fixed = true;
290                }
291            }
292
293            if !fixed {
294                lines.push(line_info.content(ctx.content).to_string());
295            }
296        }
297
298        // Reconstruct content preserving line endings
299        let mut result = lines.join("\n");
300        if ctx.content.ends_with('\n') && !result.ends_with('\n') {
301            result.push('\n');
302        }
303
304        Ok(result)
305    }
306
307    /// Get the category of this rule for selective processing
308    fn category(&self) -> RuleCategory {
309        RuleCategory::Heading
310    }
311
312    /// Check if this rule should be skipped
313    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
314        // Fast path: check if document likely has headings
315        !ctx.likely_has_headings()
316    }
317
318    fn as_any(&self) -> &dyn std::any::Any {
319        self
320    }
321
322    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
323    where
324        Self: Sized,
325    {
326        Box::new(MD018NoMissingSpaceAtx::new())
327    }
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333    use crate::lint_context::LintContext;
334
335    #[test]
336    fn test_basic_functionality() {
337        let rule = MD018NoMissingSpaceAtx;
338
339        // Test with correct space
340        let content = "# Heading 1\n## Heading 2\n### Heading 3";
341        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
342        let result = rule.check(&ctx).unwrap();
343        assert!(result.is_empty());
344
345        // Test with missing space
346        let content = "#Heading 1\n## Heading 2\n###Heading 3";
347        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
348        let result = rule.check(&ctx).unwrap();
349        assert_eq!(result.len(), 2); // Should flag the two headings with missing spaces
350        assert_eq!(result[0].line, 1);
351        assert_eq!(result[1].line, 3);
352    }
353
354    #[test]
355    fn test_malformed_heading_detection() {
356        let rule = MD018NoMissingSpaceAtx::new();
357
358        // Test the check_atx_heading_line method
359        assert!(rule.check_atx_heading_line("##Introduction").is_some());
360        assert!(rule.check_atx_heading_line("###Background").is_some());
361        assert!(rule.check_atx_heading_line("####Details").is_some());
362        assert!(rule.check_atx_heading_line("#Summary").is_some());
363        assert!(rule.check_atx_heading_line("######Conclusion").is_some());
364        assert!(rule.check_atx_heading_line("##Table of Contents").is_some());
365
366        // Should NOT detect these
367        assert!(rule.check_atx_heading_line("###").is_none()); // Just hashes
368        assert!(rule.check_atx_heading_line("#").is_none()); // Single hash
369        assert!(rule.check_atx_heading_line("##a").is_none()); // Too short
370        assert!(rule.check_atx_heading_line("#*emphasis").is_none()); // Emphasis marker
371        assert!(rule.check_atx_heading_line("#######TooBig").is_none()); // More than 6 hashes
372    }
373
374    #[test]
375    fn test_malformed_heading_with_context() {
376        let rule = MD018NoMissingSpaceAtx::new();
377
378        // Test with full content that includes code blocks
379        let content = r#"# Test Document
380
381##Introduction
382This should be detected.
383
384    ##CodeBlock
385This should NOT be detected (indented code block).
386
387```
388##FencedCodeBlock
389This should NOT be detected (fenced code block).
390```
391
392##Conclusion
393This should be detected.
394"#;
395
396        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
397        let result = rule.check(&ctx).unwrap();
398
399        // Should detect malformed headings but ignore code blocks
400        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
401        assert!(detected_lines.contains(&3)); // ##Introduction
402        assert!(detected_lines.contains(&14)); // ##Conclusion (updated line number)
403        assert!(!detected_lines.contains(&6)); // ##CodeBlock (should be ignored)
404        assert!(!detected_lines.contains(&10)); // ##FencedCodeBlock (should be ignored)
405    }
406
407    #[test]
408    fn test_malformed_heading_fix() {
409        let rule = MD018NoMissingSpaceAtx::new();
410
411        let content = r#"##Introduction
412This is a test.
413
414###Background
415More content."#;
416
417        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
418        let fixed = rule.fix(&ctx).unwrap();
419
420        let expected = r#"## Introduction
421This is a test.
422
423### Background
424More content."#;
425
426        assert_eq!(fixed, expected);
427    }
428
429    #[test]
430    fn test_mixed_proper_and_malformed_headings() {
431        let rule = MD018NoMissingSpaceAtx::new();
432
433        let content = r#"# Proper Heading
434
435##Malformed Heading
436
437## Another Proper Heading
438
439###Another Malformed
440
441#### Proper with space
442"#;
443
444        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
445        let result = rule.check(&ctx).unwrap();
446
447        // Should only detect the malformed ones
448        assert_eq!(result.len(), 2);
449        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
450        assert!(detected_lines.contains(&3)); // ##Malformed Heading
451        assert!(detected_lines.contains(&7)); // ###Another Malformed
452    }
453
454    #[test]
455    fn test_css_selectors_in_html_blocks() {
456        let rule = MD018NoMissingSpaceAtx::new();
457
458        // Test CSS selectors inside <style> tags should not trigger MD018
459        // This is a common pattern in Quarto/RMarkdown files
460        let content = r#"# Proper Heading
461
462<style>
463#slide-1 ol li {
464    margin-top: 0;
465}
466
467#special-slide ol li {
468    margin-top: 2em;
469}
470</style>
471
472## Another Heading
473"#;
474
475        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
476        let result = rule.check(&ctx).unwrap();
477
478        // Should not detect CSS selectors as malformed headings
479        assert_eq!(
480            result.len(),
481            0,
482            "CSS selectors in <style> blocks should not be flagged as malformed headings"
483        );
484    }
485
486    #[test]
487    fn test_js_code_in_script_blocks() {
488        let rule = MD018NoMissingSpaceAtx::new();
489
490        // Test that patterns like #element in <script> tags don't trigger MD018
491        let content = r#"# Heading
492
493<script>
494const element = document.querySelector('#main-content');
495#another-comment
496</script>
497
498## Another Heading
499"#;
500
501        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
502        let result = rule.check(&ctx).unwrap();
503
504        // Should not detect JS code as malformed headings
505        assert_eq!(
506            result.len(),
507            0,
508            "JavaScript code in <script> blocks should not be flagged as malformed headings"
509        );
510    }
511
512    #[test]
513    fn test_github_issue_refs_and_hashtags_skipped() {
514        let rule = MD018NoMissingSpaceAtx::new();
515
516        // Issue refs like #29039 should NOT be detected (starts with number)
517        assert!(
518            rule.check_atx_heading_line("#29039)").is_none(),
519            "#29039) should not be detected as malformed heading"
520        );
521        assert!(
522            rule.check_atx_heading_line("#123").is_none(),
523            "#123 should not be detected as malformed heading"
524        );
525        assert!(
526            rule.check_atx_heading_line("#12345").is_none(),
527            "#12345 should not be detected as malformed heading"
528        );
529
530        // Hashtags starting with lowercase should NOT be detected
531        assert!(
532            rule.check_atx_heading_line("#tag").is_none(),
533            "#tag should not be detected as malformed heading"
534        );
535        assert!(
536            rule.check_atx_heading_line("#hashtag").is_none(),
537            "#hashtag should not be detected as malformed heading"
538        );
539        assert!(
540            rule.check_atx_heading_line("#javascript").is_none(),
541            "#javascript should not be detected as malformed heading"
542        );
543
544        // Uppercase single-hash SHOULD be detected (likely intended heading)
545        assert!(
546            rule.check_atx_heading_line("#Summary").is_some(),
547            "#Summary SHOULD be detected as malformed heading"
548        );
549        assert!(
550            rule.check_atx_heading_line("#Introduction").is_some(),
551            "#Introduction SHOULD be detected as malformed heading"
552        );
553        assert!(
554            rule.check_atx_heading_line("#API").is_some(),
555            "#API SHOULD be detected as malformed heading"
556        );
557
558        // Multi-hash patterns SHOULD always be detected (not social hashtags)
559        assert!(
560            rule.check_atx_heading_line("##introduction").is_some(),
561            "##introduction SHOULD be detected as malformed heading"
562        );
563        assert!(
564            rule.check_atx_heading_line("###section").is_some(),
565            "###section SHOULD be detected as malformed heading"
566        );
567        assert!(
568            rule.check_atx_heading_line("##123").is_some(),
569            "##123 SHOULD be detected as malformed heading"
570        );
571    }
572
573    #[test]
574    fn test_issue_refs_in_list_continuations() {
575        let rule = MD018NoMissingSpaceAtx::new();
576
577        // Real-world example from Deno Releases.md
578        // Issue refs in continuation lines should NOT be flagged
579        let content = "- fix(compile): temporary fallback\n  #29039)";
580        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
581        let result = rule.check(&ctx).unwrap();
582        assert!(
583            result.is_empty(),
584            "#29039) in list continuation should not be flagged. Got: {result:?}"
585        );
586
587        // Multiple issue refs
588        let content = "- fix: issue (#28986, #29005,\n  #29024, #29039)";
589        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
590        let result = rule.check(&ctx).unwrap();
591        assert!(
592            result.is_empty(),
593            "Issue refs in list should not be flagged. Got: {result:?}"
594        );
595    }
596}