Skip to main content

rumdl_lib/rules/
md020_no_missing_space_closed_atx.rs

1/// Rule MD020: No missing space inside closed ATX heading
2///
3/// See [docs/md020.md](../../docs/md020.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_single_line_range;
6use regex::Regex;
7use std::sync::LazyLock;
8
9// Closed ATX heading patterns
10// [^#\s\\] before closing hashes prevents matching escaped hashes like C\# (C-sharp)
11static CLOSED_ATX_NO_SPACE_PATTERN: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r"^(\s*)(#+)([^#\s].*?)([^#\s\\])(#+)(\s*(?:\{#[^}]+\})?\s*)$").unwrap());
13static CLOSED_ATX_NO_SPACE_START_PATTERN: LazyLock<Regex> =
14    LazyLock::new(|| Regex::new(r"^(\s*)(#+)([^#\s].*?)\s(#+)(\s*(?:\{#[^}]+\})?\s*)$").unwrap());
15static CLOSED_ATX_NO_SPACE_END_PATTERN: LazyLock<Regex> =
16    LazyLock::new(|| Regex::new(r"^(\s*)(#+)\s(.*?)([^#\s\\])(#+)(\s*(?:\{#[^}]+\})?\s*)$").unwrap());
17
18#[derive(Clone)]
19pub struct MD020NoMissingSpaceClosedAtx;
20
21impl Default for MD020NoMissingSpaceClosedAtx {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl MD020NoMissingSpaceClosedAtx {
28    pub fn new() -> Self {
29        Self
30    }
31
32    fn is_closed_atx_heading_without_space(&self, line: &str) -> bool {
33        CLOSED_ATX_NO_SPACE_PATTERN.is_match(line)
34            || CLOSED_ATX_NO_SPACE_START_PATTERN.is_match(line)
35            || CLOSED_ATX_NO_SPACE_END_PATTERN.is_match(line)
36    }
37
38    fn fix_closed_atx_heading(&self, line: &str) -> String {
39        if let Some(captures) = CLOSED_ATX_NO_SPACE_PATTERN.captures(line) {
40            let indentation = &captures[1];
41            let opening_hashes = &captures[2];
42            let content = &captures[3];
43            let last_char = &captures[4];
44            let closing_hashes = &captures[5];
45            let custom_id = &captures[6];
46            format!("{indentation}{opening_hashes} {content}{last_char} {closing_hashes}{custom_id}")
47        } else if let Some(captures) = CLOSED_ATX_NO_SPACE_START_PATTERN.captures(line) {
48            let indentation = &captures[1];
49            let opening_hashes = &captures[2];
50            let content = &captures[3];
51            let closing_hashes = &captures[4];
52            let custom_id = &captures[5];
53            format!("{indentation}{opening_hashes} {content} {closing_hashes}{custom_id}")
54        } else if let Some(captures) = CLOSED_ATX_NO_SPACE_END_PATTERN.captures(line) {
55            let indentation = &captures[1];
56            let opening_hashes = &captures[2];
57            let content = &captures[3];
58            let last_char = &captures[4];
59            let closing_hashes = &captures[5];
60            let custom_id = &captures[6];
61            format!("{indentation}{opening_hashes} {content}{last_char} {closing_hashes}{custom_id}")
62        } else {
63            line.to_string()
64        }
65    }
66}
67
68impl Rule for MD020NoMissingSpaceClosedAtx {
69    fn name(&self) -> &'static str {
70        "MD020"
71    }
72
73    fn description(&self) -> &'static str {
74        "No space inside hashes on closed heading"
75    }
76
77    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
78        let mut warnings = Vec::new();
79
80        // Check all closed ATX headings from cached info
81        for (line_num, line_info) in ctx.lines.iter().enumerate() {
82            if let Some(heading) = &line_info.heading {
83                // Skip headings indented 4+ spaces (they're code blocks)
84                if line_info.visual_indent >= 4 {
85                    continue;
86                }
87
88                // Check all ATX headings (both properly closed and malformed)
89                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
90                    let line = line_info.content(ctx.content);
91
92                    // Check if line matches closed ATX pattern without space
93                    // This will detect both properly closed headings with missing space
94                    // and malformed attempts at closed headings like "# Heading#"
95                    if self.is_closed_atx_heading_without_space(line) {
96                        let line_range = ctx.line_index.line_content_range(line_num + 1);
97
98                        let mut start_col = 1;
99                        let mut length = 1;
100                        let mut message = String::new();
101
102                        if let Some(captures) = CLOSED_ATX_NO_SPACE_PATTERN.captures(line) {
103                            // Missing space at both start and end: #Heading#
104                            let opening_hashes = captures.get(2).unwrap();
105                            message = format!(
106                                "Missing space inside hashes on closed heading (with {} at start and end)",
107                                "#".repeat(opening_hashes.as_str().len())
108                            );
109                            // Highlight the position right after the opening hashes
110                            // Convert byte offset to character count for correct Unicode handling
111                            start_col = line[..opening_hashes.end()].chars().count() + 1;
112                            length = 1;
113                        } else if let Some(captures) = CLOSED_ATX_NO_SPACE_START_PATTERN.captures(line) {
114                            // Missing space at start: #Heading #
115                            let opening_hashes = captures.get(2).unwrap();
116                            message = format!(
117                                "Missing space after {} at start of closed heading",
118                                "#".repeat(opening_hashes.as_str().len())
119                            );
120                            // Highlight the position right after the opening hashes
121                            // Convert byte offset to character count for correct Unicode handling
122                            start_col = line[..opening_hashes.end()].chars().count() + 1;
123                            length = 1;
124                        } else if let Some(captures) = CLOSED_ATX_NO_SPACE_END_PATTERN.captures(line) {
125                            // Missing space at end: # Heading#
126                            let content = captures.get(3).unwrap();
127                            let closing_hashes = captures.get(5).unwrap();
128                            message = format!(
129                                "Missing space before {} at end of closed heading",
130                                "#".repeat(closing_hashes.as_str().len())
131                            );
132                            // Highlight the last character before the closing hashes
133                            // Convert byte offset to character count for correct Unicode handling
134                            start_col = line[..content.end()].chars().count() + 1;
135                            length = 1;
136                        }
137
138                        let (start_line, start_col_calc, end_line, end_col) =
139                            calculate_single_line_range(line_num + 1, start_col, length);
140
141                        warnings.push(LintWarning {
142                            rule_name: Some(self.name().to_string()),
143                            message,
144                            line: start_line,
145                            column: start_col_calc,
146                            end_line,
147                            end_column: end_col,
148                            severity: Severity::Warning,
149                            fix: Some(Fix::new(line_range, self.fix_closed_atx_heading(line))),
150                        });
151                    }
152                }
153            }
154        }
155
156        Ok(warnings)
157    }
158
159    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
160        let mut lines = Vec::new();
161
162        for (i, line_info) in ctx.lines.iter().enumerate() {
163            let line_num = i + 1;
164            // If rule is disabled for this line, keep original
165            if ctx.inline_config().is_rule_disabled(self.name(), line_num) {
166                lines.push(line_info.content(ctx.content).to_string());
167                continue;
168            }
169
170            let mut fixed = false;
171
172            if let Some(heading) = &line_info.heading {
173                // Skip headings indented 4+ spaces (they're code blocks)
174                if line_info.visual_indent >= 4 {
175                    lines.push(line_info.content(ctx.content).to_string());
176                    continue;
177                }
178
179                // Fix ATX headings without space (both properly closed and malformed)
180                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX)
181                    && self.is_closed_atx_heading_without_space(line_info.content(ctx.content))
182                {
183                    lines.push(self.fix_closed_atx_heading(line_info.content(ctx.content)));
184                    fixed = true;
185                }
186            }
187
188            if !fixed {
189                lines.push(line_info.content(ctx.content).to_string());
190            }
191        }
192
193        // Reconstruct content preserving line endings
194        let mut result = lines.join("\n");
195        if ctx.content.ends_with('\n') && !result.ends_with('\n') {
196            result.push('\n');
197        }
198
199        Ok(result)
200    }
201
202    /// Get the category of this rule for selective processing
203    fn category(&self) -> RuleCategory {
204        RuleCategory::Heading
205    }
206
207    /// Check if this rule should be skipped
208    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
209        ctx.content.is_empty() || !ctx.likely_has_headings()
210    }
211
212    fn as_any(&self) -> &dyn std::any::Any {
213        self
214    }
215
216    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
217    where
218        Self: Sized,
219    {
220        Box::new(MD020NoMissingSpaceClosedAtx::new())
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use crate::lint_context::LintContext;
228
229    #[test]
230    fn test_basic_functionality() {
231        let rule = MD020NoMissingSpaceClosedAtx;
232
233        // Test with correct spacing
234        let content = "# Heading 1 #\n## Heading 2 ##\n### Heading 3 ###";
235        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
236        let result = rule.check(&ctx).unwrap();
237        assert!(result.is_empty());
238
239        // Test with missing spaces
240        let content = "# Heading 1#\n## Heading 2 ##\n### Heading 3###";
241        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
242        let result = rule.check(&ctx).unwrap();
243        assert_eq!(result.len(), 2); // Should flag the two headings with missing spaces
244        assert_eq!(result[0].line, 1);
245        assert_eq!(result[1].line, 3);
246    }
247
248    #[test]
249    fn test_multibyte_char_column_position() {
250        let rule = MD020NoMissingSpaceClosedAtx;
251
252        // Multi-byte characters before the content should not affect column calculation
253        // "Ü" is 2 bytes in UTF-8 but 1 character
254        // "##Ünited##" has ## at byte 0-1, content starts at byte 2
255        // Column should be 3 (character position), not 3 (byte position) here they match
256        // But "##über##" tests that column after ## reflects character count
257        let content = "##Ünited##";
258        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
259        let result = rule.check(&ctx).unwrap();
260
261        assert_eq!(result.len(), 1);
262        // Column should be based on character position, not byte offset
263        // "##" is 2 chars, so the position after ## is char position 3
264        // The byte offset of .end() for the opening hashes is 2, so start_col = 2 + 1 = 3
265        // For ASCII this is the same, but let's verify with a more complex case
266
267        // Content with multi-byte chars BEFORE closing hashes
268        // "##Ü test##" - Ü is 2 bytes, test starts at byte 4, char 3
269        // Content ends and closing hashes start after "Ü test" = 7 chars / 8 bytes
270        let content = "## Ü test##";
271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
272        let result = rule.check(&ctx).unwrap();
273
274        assert_eq!(result.len(), 1);
275        // "## Ü test##" - regex group 3 (content) ends at byte 9 (after "Ü tes")
276        // line[..9] = "## Ü tes" = 8 characters, so start_col = 8 + 1 = 9
277        // Without the fix, byte offset 9 + 1 = 10 (wrong for non-ASCII)
278        assert_eq!(
279            result[0].column, 9,
280            "Column should use character position, not byte offset"
281        );
282    }
283}