Skip to main content

rumdl_lib/rules/
md020_no_missing_space_closed_atx.rs

1/// Rule MD020: No missing space inside closed ATX heading
2///
3/// See [docs/md020.md](../../docs/md020.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_single_line_range;
6use regex::Regex;
7use std::sync::LazyLock;
8
9// Closed ATX heading patterns
10// [^#\s\\] before closing hashes prevents matching escaped hashes like C\# (C-sharp)
11static CLOSED_ATX_NO_SPACE_PATTERN: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r"^(\s*)(#+)([^#\s].*?)([^#\s\\])(#+)(\s*(?:\{#[^}]+\})?\s*)$").unwrap());
13static CLOSED_ATX_NO_SPACE_START_PATTERN: LazyLock<Regex> =
14    LazyLock::new(|| Regex::new(r"^(\s*)(#+)([^#\s].*?)\s(#+)(\s*(?:\{#[^}]+\})?\s*)$").unwrap());
15static CLOSED_ATX_NO_SPACE_END_PATTERN: LazyLock<Regex> =
16    LazyLock::new(|| Regex::new(r"^(\s*)(#+)\s(.*?)([^#\s\\])(#+)(\s*(?:\{#[^}]+\})?\s*)$").unwrap());
17
18#[derive(Clone)]
19pub struct MD020NoMissingSpaceClosedAtx;
20
21impl Default for MD020NoMissingSpaceClosedAtx {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl MD020NoMissingSpaceClosedAtx {
28    pub fn new() -> Self {
29        Self
30    }
31
32    fn is_closed_atx_heading_without_space(&self, line: &str) -> bool {
33        CLOSED_ATX_NO_SPACE_PATTERN.is_match(line)
34            || CLOSED_ATX_NO_SPACE_START_PATTERN.is_match(line)
35            || CLOSED_ATX_NO_SPACE_END_PATTERN.is_match(line)
36    }
37
38    fn fix_closed_atx_heading(&self, line: &str) -> String {
39        if let Some(captures) = CLOSED_ATX_NO_SPACE_PATTERN.captures(line) {
40            let indentation = &captures[1];
41            let opening_hashes = &captures[2];
42            let content = &captures[3];
43            let last_char = &captures[4];
44            let closing_hashes = &captures[5];
45            let custom_id = &captures[6];
46            format!("{indentation}{opening_hashes} {content}{last_char} {closing_hashes}{custom_id}")
47        } else if let Some(captures) = CLOSED_ATX_NO_SPACE_START_PATTERN.captures(line) {
48            let indentation = &captures[1];
49            let opening_hashes = &captures[2];
50            let content = &captures[3];
51            let closing_hashes = &captures[4];
52            let custom_id = &captures[5];
53            format!("{indentation}{opening_hashes} {content} {closing_hashes}{custom_id}")
54        } else if let Some(captures) = CLOSED_ATX_NO_SPACE_END_PATTERN.captures(line) {
55            let indentation = &captures[1];
56            let opening_hashes = &captures[2];
57            let content = &captures[3];
58            let last_char = &captures[4];
59            let closing_hashes = &captures[5];
60            let custom_id = &captures[6];
61            format!("{indentation}{opening_hashes} {content}{last_char} {closing_hashes}{custom_id}")
62        } else {
63            line.to_string()
64        }
65    }
66}
67
68impl Rule for MD020NoMissingSpaceClosedAtx {
69    fn name(&self) -> &'static str {
70        "MD020"
71    }
72
73    fn description(&self) -> &'static str {
74        "No space inside hashes on closed heading"
75    }
76
77    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
78        let mut warnings = Vec::new();
79
80        // Check all closed ATX headings from cached info
81        for (line_num, line_info) in ctx.lines.iter().enumerate() {
82            if let Some(heading) = &line_info.heading {
83                // Skip headings indented 4+ spaces (they're code blocks)
84                if line_info.visual_indent >= 4 {
85                    continue;
86                }
87
88                // Check all ATX headings (both properly closed and malformed)
89                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
90                    let line = line_info.content(ctx.content);
91
92                    // Check if line matches closed ATX pattern without space
93                    // This will detect both properly closed headings with missing space
94                    // and malformed attempts at closed headings like "# Heading#"
95                    if self.is_closed_atx_heading_without_space(line) {
96                        let line_range = ctx.line_index.line_content_range(line_num + 1);
97
98                        let mut start_col = 1;
99                        let mut length = 1;
100                        let mut message = String::new();
101
102                        if let Some(captures) = CLOSED_ATX_NO_SPACE_PATTERN.captures(line) {
103                            // Missing space at both start and end: #Heading#
104                            let opening_hashes = captures.get(2).unwrap();
105                            message = format!(
106                                "Missing space inside hashes on closed heading (with {} at start and end)",
107                                "#".repeat(opening_hashes.as_str().len())
108                            );
109                            // Highlight the position right after the opening hashes
110                            // Convert byte offset to character count for correct Unicode handling
111                            start_col = line[..opening_hashes.end()].chars().count() + 1;
112                            length = 1;
113                        } else if let Some(captures) = CLOSED_ATX_NO_SPACE_START_PATTERN.captures(line) {
114                            // Missing space at start: #Heading #
115                            let opening_hashes = captures.get(2).unwrap();
116                            message = format!(
117                                "Missing space after {} at start of closed heading",
118                                "#".repeat(opening_hashes.as_str().len())
119                            );
120                            // Highlight the position right after the opening hashes
121                            // Convert byte offset to character count for correct Unicode handling
122                            start_col = line[..opening_hashes.end()].chars().count() + 1;
123                            length = 1;
124                        } else if let Some(captures) = CLOSED_ATX_NO_SPACE_END_PATTERN.captures(line) {
125                            // Missing space at end: # Heading#
126                            let content = captures.get(3).unwrap();
127                            let closing_hashes = captures.get(5).unwrap();
128                            message = format!(
129                                "Missing space before {} at end of closed heading",
130                                "#".repeat(closing_hashes.as_str().len())
131                            );
132                            // Highlight the last character before the closing hashes
133                            // Convert byte offset to character count for correct Unicode handling
134                            start_col = line[..content.end()].chars().count() + 1;
135                            length = 1;
136                        }
137
138                        let (start_line, start_col_calc, end_line, end_col) =
139                            calculate_single_line_range(line_num + 1, start_col, length);
140
141                        warnings.push(LintWarning {
142                            rule_name: Some(self.name().to_string()),
143                            message,
144                            line: start_line,
145                            column: start_col_calc,
146                            end_line,
147                            end_column: end_col,
148                            severity: Severity::Warning,
149                            fix: Some(Fix {
150                                range: line_range,
151                                replacement: self.fix_closed_atx_heading(line),
152                            }),
153                        });
154                    }
155                }
156            }
157        }
158
159        Ok(warnings)
160    }
161
162    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
163        let mut lines = Vec::new();
164
165        for (i, line_info) in ctx.lines.iter().enumerate() {
166            let line_num = i + 1;
167            // If rule is disabled for this line, keep original
168            if ctx.inline_config().is_rule_disabled(self.name(), line_num) {
169                lines.push(line_info.content(ctx.content).to_string());
170                continue;
171            }
172
173            let mut fixed = false;
174
175            if let Some(heading) = &line_info.heading {
176                // Skip headings indented 4+ spaces (they're code blocks)
177                if line_info.visual_indent >= 4 {
178                    lines.push(line_info.content(ctx.content).to_string());
179                    continue;
180                }
181
182                // Fix ATX headings without space (both properly closed and malformed)
183                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX)
184                    && self.is_closed_atx_heading_without_space(line_info.content(ctx.content))
185                {
186                    lines.push(self.fix_closed_atx_heading(line_info.content(ctx.content)));
187                    fixed = true;
188                }
189            }
190
191            if !fixed {
192                lines.push(line_info.content(ctx.content).to_string());
193            }
194        }
195
196        // Reconstruct content preserving line endings
197        let mut result = lines.join("\n");
198        if ctx.content.ends_with('\n') && !result.ends_with('\n') {
199            result.push('\n');
200        }
201
202        Ok(result)
203    }
204
205    /// Get the category of this rule for selective processing
206    fn category(&self) -> RuleCategory {
207        RuleCategory::Heading
208    }
209
210    /// Check if this rule should be skipped
211    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
212        ctx.content.is_empty() || !ctx.likely_has_headings()
213    }
214
215    fn as_any(&self) -> &dyn std::any::Any {
216        self
217    }
218
219    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
220    where
221        Self: Sized,
222    {
223        Box::new(MD020NoMissingSpaceClosedAtx::new())
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::lint_context::LintContext;
231
232    #[test]
233    fn test_basic_functionality() {
234        let rule = MD020NoMissingSpaceClosedAtx;
235
236        // Test with correct spacing
237        let content = "# Heading 1 #\n## Heading 2 ##\n### Heading 3 ###";
238        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
239        let result = rule.check(&ctx).unwrap();
240        assert!(result.is_empty());
241
242        // Test with missing spaces
243        let content = "# Heading 1#\n## Heading 2 ##\n### Heading 3###";
244        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
245        let result = rule.check(&ctx).unwrap();
246        assert_eq!(result.len(), 2); // Should flag the two headings with missing spaces
247        assert_eq!(result[0].line, 1);
248        assert_eq!(result[1].line, 3);
249    }
250
251    #[test]
252    fn test_multibyte_char_column_position() {
253        let rule = MD020NoMissingSpaceClosedAtx;
254
255        // Multi-byte characters before the content should not affect column calculation
256        // "Ü" is 2 bytes in UTF-8 but 1 character
257        // "##Ünited##" has ## at byte 0-1, content starts at byte 2
258        // Column should be 3 (character position), not 3 (byte position) here they match
259        // But "##über##" tests that column after ## reflects character count
260        let content = "##Ünited##";
261        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
262        let result = rule.check(&ctx).unwrap();
263
264        assert_eq!(result.len(), 1);
265        // Column should be based on character position, not byte offset
266        // "##" is 2 chars, so the position after ## is char position 3
267        // The byte offset of .end() for the opening hashes is 2, so start_col = 2 + 1 = 3
268        // For ASCII this is the same, but let's verify with a more complex case
269
270        // Content with multi-byte chars BEFORE closing hashes
271        // "##Ü test##" - Ü is 2 bytes, test starts at byte 4, char 3
272        // Content ends and closing hashes start after "Ü test" = 7 chars / 8 bytes
273        let content = "## Ü test##";
274        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
275        let result = rule.check(&ctx).unwrap();
276
277        assert_eq!(result.len(), 1);
278        // "## Ü test##" - regex group 3 (content) ends at byte 9 (after "Ü tes")
279        // line[..9] = "## Ü tes" = 8 characters, so start_col = 8 + 1 = 9
280        // Without the fix, byte offset 9 + 1 = 10 (wrong for non-ASCII)
281        assert_eq!(
282            result[0].column, 9,
283            "Column should use character position, not byte offset"
284        );
285    }
286}