rumdl_lib/rules/
md018_no_missing_space_atx.rs

1/// Rule MD018: No missing space after ATX heading marker
2///
3/// See [docs/md018.md](../../docs/md018.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_single_line_range;
6use crate::utils::regex_cache::get_cached_regex;
7
8// Emoji and Unicode hashtag patterns
9const EMOJI_HASHTAG_PATTERN_STR: &str = r"^#️⃣|^#⃣";
10const UNICODE_HASHTAG_PATTERN_STR: &str = r"^#[\u{FE0F}\u{20E3}]";
11
12#[derive(Clone)]
13pub struct MD018NoMissingSpaceAtx;
14
15impl Default for MD018NoMissingSpaceAtx {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl MD018NoMissingSpaceAtx {
22    pub fn new() -> Self {
23        Self
24    }
25
26    /// Check if an ATX heading line is missing space after the marker
27    fn check_atx_heading_line(&self, line: &str) -> Option<(usize, String)> {
28        // Look for ATX marker at start of line (with optional indentation)
29        let trimmed_line = line.trim_start();
30        let indent = line.len() - trimmed_line.len();
31
32        if !trimmed_line.starts_with('#') {
33            return None;
34        }
35
36        // Skip emoji hashtags and Unicode hashtag patterns
37        let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
38            .map(|re| re.is_match(trimmed_line))
39            .unwrap_or(false);
40        let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
41            .map(|re| re.is_match(trimmed_line))
42            .unwrap_or(false);
43        if is_emoji || is_unicode {
44            return None;
45        }
46
47        // Count the number of hashes
48        let hash_count = trimmed_line.chars().take_while(|&c| c == '#').count();
49        if hash_count == 0 || hash_count > 6 {
50            return None;
51        }
52
53        // Check what comes after the hashes
54        let after_hashes = &trimmed_line[hash_count..];
55
56        // Skip if what follows the hashes is an emoji modifier or variant selector
57        if after_hashes
58            .chars()
59            .next()
60            .is_some_and(|ch| matches!(ch, '\u{FE0F}' | '\u{20E3}' | '\u{FE0E}'))
61        {
62            return None;
63        }
64
65        // If there's content immediately after hashes (no space), it needs fixing
66        if !after_hashes.is_empty() && !after_hashes.starts_with(' ') && !after_hashes.starts_with('\t') {
67            // Additional checks to avoid false positives
68            let content = after_hashes.trim();
69
70            // Skip if it's just more hashes (horizontal rule)
71            if content.chars().all(|c| c == '#') {
72                return None;
73            }
74
75            // Skip if content is too short to be meaningful
76            if content.len() < 2 {
77                return None;
78            }
79
80            // Skip if it starts with emphasis markers
81            if content.starts_with('*') || content.starts_with('_') {
82                return None;
83            }
84
85            // Skip if it looks like a hashtag (e.g., #tag, #123)
86            // But only skip if it's lowercase or a number to avoid skipping headings like #Summary
87            if hash_count == 1 && !content.is_empty() {
88                let first_char = content.chars().next();
89                if let Some(ch) = first_char {
90                    // Skip if it's a lowercase letter or number (common hashtag pattern)
91                    // Don't skip uppercase as those are likely headings
92                    if (ch.is_lowercase() || ch.is_numeric()) && !content.contains(' ') {
93                        return None;
94                    }
95                }
96            }
97
98            // This looks like a malformed heading that needs a space
99            let fixed = format!("{}{} {}", " ".repeat(indent), "#".repeat(hash_count), after_hashes);
100            return Some((indent + hash_count, fixed));
101        }
102
103        None
104    }
105
106    // Calculate the byte range for a specific line in the content
107    fn get_line_byte_range(&self, content: &str, line_num: usize) -> std::ops::Range<usize> {
108        let mut current_line = 1;
109        let mut start_byte = 0;
110
111        for (i, c) in content.char_indices() {
112            if current_line == line_num && c == '\n' {
113                return start_byte..i;
114            } else if c == '\n' {
115                current_line += 1;
116                if current_line == line_num {
117                    start_byte = i + 1;
118                }
119            }
120        }
121
122        // If we're looking for the last line and it doesn't end with a newline
123        if current_line == line_num {
124            return start_byte..content.len();
125        }
126
127        // Fallback if line not found (shouldn't happen)
128        0..0
129    }
130}
131
132impl Rule for MD018NoMissingSpaceAtx {
133    fn name(&self) -> &'static str {
134        "MD018"
135    }
136
137    fn description(&self) -> &'static str {
138        "No space after hash in heading"
139    }
140
141    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
142        let mut warnings = Vec::new();
143
144        // Check all lines that have ATX headings from cached info
145        for (line_num, line_info) in ctx.lines.iter().enumerate() {
146            // Skip lines inside HTML blocks (e.g., CSS selectors like #id)
147            if line_info.in_html_block {
148                continue;
149            }
150
151            if let Some(heading) = &line_info.heading {
152                // Only check ATX headings
153                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
154                    // Check if there's a space after the marker
155                    let line = line_info.content(ctx.content);
156                    let trimmed = line.trim_start();
157
158                    // Skip emoji hashtags and Unicode hashtag patterns
159                    let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
160                        .map(|re| re.is_match(trimmed))
161                        .unwrap_or(false);
162                    let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
163                        .map(|re| re.is_match(trimmed))
164                        .unwrap_or(false);
165                    if is_emoji || is_unicode {
166                        continue;
167                    }
168
169                    if trimmed.len() > heading.marker.len() {
170                        let after_marker = &trimmed[heading.marker.len()..];
171                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
172                        {
173                            // Skip hashtag-like patterns (e.g., #tag, #123, #29039)
174                            // But only for single-hash patterns to avoid skipping ##Heading
175                            // This prevents false positives on GitHub issue refs and social hashtags
176                            if heading.level == 1 {
177                                let content = after_marker.trim();
178                                // Get first "word" (up to space, comma, or closing paren)
179                                let first_word: String = content
180                                    .chars()
181                                    .take_while(|c| !c.is_whitespace() && *c != ',' && *c != ')')
182                                    .collect();
183                                if let Some(first_char) = first_word.chars().next() {
184                                    // Skip if first word starts with lowercase or number (hashtag/issue ref)
185                                    // Don't skip uppercase as those are likely intended headings
186                                    if first_char.is_lowercase() || first_char.is_numeric() {
187                                        continue;
188                                    }
189                                }
190                            }
191
192                            // Missing space after ATX marker
193                            let hash_end_col = line_info.indent + heading.marker.len() + 1; // 1-indexed
194                            let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
195                                line_num + 1, // Convert to 1-indexed
196                                hash_end_col,
197                                0, // Zero-width to indicate missing space
198                            );
199
200                            warnings.push(LintWarning {
201                                rule_name: Some(self.name().to_string()),
202                                message: format!("No space after {} in heading", "#".repeat(heading.level as usize)),
203                                line: start_line,
204                                column: start_col,
205                                end_line,
206                                end_column: end_col,
207                                severity: Severity::Warning,
208                                fix: Some(Fix {
209                                    range: self.get_line_byte_range(ctx.content, line_num + 1),
210                                    replacement: format!(
211                                        "{}{} {}",
212                                        " ".repeat(line_info.indent),
213                                        heading.marker,
214                                        after_marker
215                                    ),
216                                }),
217                            });
218                        }
219                    }
220                }
221            } else if !line_info.in_code_block && !line_info.is_blank {
222                // Check for malformed headings that weren't detected as proper headings
223                if let Some((hash_end_pos, fixed_line)) = self.check_atx_heading_line(line_info.content(ctx.content)) {
224                    let (start_line, start_col, end_line, end_col) = calculate_single_line_range(
225                        line_num + 1,     // Convert to 1-indexed
226                        hash_end_pos + 1, // 1-indexed column
227                        0,                // Zero-width to indicate missing space
228                    );
229
230                    warnings.push(LintWarning {
231                        rule_name: Some(self.name().to_string()),
232                        message: "No space after hash in heading".to_string(),
233                        line: start_line,
234                        column: start_col,
235                        end_line,
236                        end_column: end_col,
237                        severity: Severity::Warning,
238                        fix: Some(Fix {
239                            range: self.get_line_byte_range(ctx.content, line_num + 1),
240                            replacement: fixed_line,
241                        }),
242                    });
243                }
244            }
245        }
246
247        Ok(warnings)
248    }
249
250    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
251        let mut lines = Vec::new();
252
253        for line_info in ctx.lines.iter() {
254            let mut fixed = false;
255
256            if let Some(heading) = &line_info.heading {
257                // Fix ATX headings missing space
258                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
259                    let line = line_info.content(ctx.content);
260                    let trimmed = line.trim_start();
261
262                    // Skip emoji hashtags and Unicode hashtag patterns
263                    let is_emoji = get_cached_regex(EMOJI_HASHTAG_PATTERN_STR)
264                        .map(|re| re.is_match(trimmed))
265                        .unwrap_or(false);
266                    let is_unicode = get_cached_regex(UNICODE_HASHTAG_PATTERN_STR)
267                        .map(|re| re.is_match(trimmed))
268                        .unwrap_or(false);
269                    if is_emoji || is_unicode {
270                        continue;
271                    }
272
273                    if trimmed.len() > heading.marker.len() {
274                        let after_marker = &trimmed[heading.marker.len()..];
275                        if !after_marker.is_empty() && !after_marker.starts_with(' ') && !after_marker.starts_with('\t')
276                        {
277                            // Add space after marker
278                            lines.push(format!(
279                                "{}{} {}",
280                                " ".repeat(line_info.indent),
281                                heading.marker,
282                                after_marker
283                            ));
284                            fixed = true;
285                        }
286                    }
287                }
288            } else if !line_info.in_code_block && !line_info.is_blank {
289                // Fix malformed headings
290                if let Some((_, fixed_line)) = self.check_atx_heading_line(line_info.content(ctx.content)) {
291                    lines.push(fixed_line);
292                    fixed = true;
293                }
294            }
295
296            if !fixed {
297                lines.push(line_info.content(ctx.content).to_string());
298            }
299        }
300
301        // Reconstruct content preserving line endings
302        let mut result = lines.join("\n");
303        if ctx.content.ends_with('\n') && !result.ends_with('\n') {
304            result.push('\n');
305        }
306
307        Ok(result)
308    }
309
310    /// Get the category of this rule for selective processing
311    fn category(&self) -> RuleCategory {
312        RuleCategory::Heading
313    }
314
315    /// Check if this rule should be skipped
316    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
317        // Fast path: check if document likely has headings
318        !ctx.likely_has_headings()
319    }
320
321    fn as_any(&self) -> &dyn std::any::Any {
322        self
323    }
324
325    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
326    where
327        Self: Sized,
328    {
329        Box::new(MD018NoMissingSpaceAtx::new())
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use crate::lint_context::LintContext;
337
338    #[test]
339    fn test_basic_functionality() {
340        let rule = MD018NoMissingSpaceAtx;
341
342        // Test with correct space
343        let content = "# Heading 1\n## Heading 2\n### Heading 3";
344        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
345        let result = rule.check(&ctx).unwrap();
346        assert!(result.is_empty());
347
348        // Test with missing space
349        let content = "#Heading 1\n## Heading 2\n###Heading 3";
350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
351        let result = rule.check(&ctx).unwrap();
352        assert_eq!(result.len(), 2); // Should flag the two headings with missing spaces
353        assert_eq!(result[0].line, 1);
354        assert_eq!(result[1].line, 3);
355    }
356
357    #[test]
358    fn test_malformed_heading_detection() {
359        let rule = MD018NoMissingSpaceAtx::new();
360
361        // Test the check_atx_heading_line method
362        assert!(rule.check_atx_heading_line("##Introduction").is_some());
363        assert!(rule.check_atx_heading_line("###Background").is_some());
364        assert!(rule.check_atx_heading_line("####Details").is_some());
365        assert!(rule.check_atx_heading_line("#Summary").is_some());
366        assert!(rule.check_atx_heading_line("######Conclusion").is_some());
367        assert!(rule.check_atx_heading_line("##Table of Contents").is_some());
368
369        // Should NOT detect these
370        assert!(rule.check_atx_heading_line("###").is_none()); // Just hashes
371        assert!(rule.check_atx_heading_line("#").is_none()); // Single hash
372        assert!(rule.check_atx_heading_line("##a").is_none()); // Too short
373        assert!(rule.check_atx_heading_line("#*emphasis").is_none()); // Emphasis marker
374        assert!(rule.check_atx_heading_line("#######TooBig").is_none()); // More than 6 hashes
375    }
376
377    #[test]
378    fn test_malformed_heading_with_context() {
379        let rule = MD018NoMissingSpaceAtx::new();
380
381        // Test with full content that includes code blocks
382        let content = r#"# Test Document
383
384##Introduction
385This should be detected.
386
387    ##CodeBlock
388This should NOT be detected (indented code block).
389
390```
391##FencedCodeBlock
392This should NOT be detected (fenced code block).
393```
394
395##Conclusion
396This should be detected.
397"#;
398
399        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
400        let result = rule.check(&ctx).unwrap();
401
402        // Should detect malformed headings but ignore code blocks
403        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
404        assert!(detected_lines.contains(&3)); // ##Introduction
405        assert!(detected_lines.contains(&14)); // ##Conclusion (updated line number)
406        assert!(!detected_lines.contains(&6)); // ##CodeBlock (should be ignored)
407        assert!(!detected_lines.contains(&10)); // ##FencedCodeBlock (should be ignored)
408    }
409
410    #[test]
411    fn test_malformed_heading_fix() {
412        let rule = MD018NoMissingSpaceAtx::new();
413
414        let content = r#"##Introduction
415This is a test.
416
417###Background
418More content."#;
419
420        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
421        let fixed = rule.fix(&ctx).unwrap();
422
423        let expected = r#"## Introduction
424This is a test.
425
426### Background
427More content."#;
428
429        assert_eq!(fixed, expected);
430    }
431
432    #[test]
433    fn test_mixed_proper_and_malformed_headings() {
434        let rule = MD018NoMissingSpaceAtx::new();
435
436        let content = r#"# Proper Heading
437
438##Malformed Heading
439
440## Another Proper Heading
441
442###Another Malformed
443
444#### Proper with space
445"#;
446
447        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
448        let result = rule.check(&ctx).unwrap();
449
450        // Should only detect the malformed ones
451        assert_eq!(result.len(), 2);
452        let detected_lines: Vec<usize> = result.iter().map(|w| w.line).collect();
453        assert!(detected_lines.contains(&3)); // ##Malformed Heading
454        assert!(detected_lines.contains(&7)); // ###Another Malformed
455    }
456
457    #[test]
458    fn test_css_selectors_in_html_blocks() {
459        let rule = MD018NoMissingSpaceAtx::new();
460
461        // Test CSS selectors inside <style> tags should not trigger MD018
462        // This is a common pattern in Quarto/RMarkdown files
463        let content = r#"# Proper Heading
464
465<style>
466#slide-1 ol li {
467    margin-top: 0;
468}
469
470#special-slide ol li {
471    margin-top: 2em;
472}
473</style>
474
475## Another Heading
476"#;
477
478        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
479        let result = rule.check(&ctx).unwrap();
480
481        // Should not detect CSS selectors as malformed headings
482        assert_eq!(
483            result.len(),
484            0,
485            "CSS selectors in <style> blocks should not be flagged as malformed headings"
486        );
487    }
488
489    #[test]
490    fn test_js_code_in_script_blocks() {
491        let rule = MD018NoMissingSpaceAtx::new();
492
493        // Test that patterns like #element in <script> tags don't trigger MD018
494        let content = r#"# Heading
495
496<script>
497const element = document.querySelector('#main-content');
498#another-comment
499</script>
500
501## Another Heading
502"#;
503
504        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
505        let result = rule.check(&ctx).unwrap();
506
507        // Should not detect JS code as malformed headings
508        assert_eq!(
509            result.len(),
510            0,
511            "JavaScript code in <script> blocks should not be flagged as malformed headings"
512        );
513    }
514
515    #[test]
516    fn test_github_issue_refs_and_hashtags_skipped() {
517        let rule = MD018NoMissingSpaceAtx::new();
518
519        // Issue refs like #29039 should NOT be detected (starts with number)
520        assert!(
521            rule.check_atx_heading_line("#29039)").is_none(),
522            "#29039) should not be detected as malformed heading"
523        );
524        assert!(
525            rule.check_atx_heading_line("#123").is_none(),
526            "#123 should not be detected as malformed heading"
527        );
528        assert!(
529            rule.check_atx_heading_line("#12345").is_none(),
530            "#12345 should not be detected as malformed heading"
531        );
532
533        // Hashtags starting with lowercase should NOT be detected
534        assert!(
535            rule.check_atx_heading_line("#tag").is_none(),
536            "#tag should not be detected as malformed heading"
537        );
538        assert!(
539            rule.check_atx_heading_line("#hashtag").is_none(),
540            "#hashtag should not be detected as malformed heading"
541        );
542        assert!(
543            rule.check_atx_heading_line("#javascript").is_none(),
544            "#javascript should not be detected as malformed heading"
545        );
546
547        // Uppercase single-hash SHOULD be detected (likely intended heading)
548        assert!(
549            rule.check_atx_heading_line("#Summary").is_some(),
550            "#Summary SHOULD be detected as malformed heading"
551        );
552        assert!(
553            rule.check_atx_heading_line("#Introduction").is_some(),
554            "#Introduction SHOULD be detected as malformed heading"
555        );
556        assert!(
557            rule.check_atx_heading_line("#API").is_some(),
558            "#API SHOULD be detected as malformed heading"
559        );
560
561        // Multi-hash patterns SHOULD always be detected (not social hashtags)
562        assert!(
563            rule.check_atx_heading_line("##introduction").is_some(),
564            "##introduction SHOULD be detected as malformed heading"
565        );
566        assert!(
567            rule.check_atx_heading_line("###section").is_some(),
568            "###section SHOULD be detected as malformed heading"
569        );
570        assert!(
571            rule.check_atx_heading_line("##123").is_some(),
572            "##123 SHOULD be detected as malformed heading"
573        );
574    }
575
576    #[test]
577    fn test_issue_refs_in_list_continuations() {
578        let rule = MD018NoMissingSpaceAtx::new();
579
580        // Real-world example from Deno Releases.md
581        // Issue refs in continuation lines should NOT be flagged
582        let content = "- fix(compile): temporary fallback\n  #29039)";
583        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
584        let result = rule.check(&ctx).unwrap();
585        assert!(
586            result.is_empty(),
587            "#29039) in list continuation should not be flagged. Got: {result:?}"
588        );
589
590        // Multiple issue refs
591        let content = "- fix: issue (#28986, #29005,\n  #29024, #29039)";
592        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
593        let result = rule.check(&ctx).unwrap();
594        assert!(
595            result.is_empty(),
596            "Issue refs in list should not be flagged. Got: {result:?}"
597        );
598    }
599}