Skip to main content

rumdl_lib/rules/
md020_no_missing_space_closed_atx.rs

1/// Rule MD020: No missing space inside closed ATX heading
2///
3/// See [docs/md020.md](../../docs/md020.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::range_utils::calculate_single_line_range;
6use crate::utils::regex_cache::get_cached_fancy_regex;
7
8// Closed ATX heading patterns
9// Use negative lookbehind (?<!\\) to avoid matching escaped hashes like C\# (C-sharp)
10const CLOSED_ATX_NO_SPACE_PATTERN_STR: &str = r"^(\s*)(#+)([^#\s].*?)([^#\s\\])(?<!\\)(#+)(\s*(?:\{#[^}]+\})?\s*)$";
11const CLOSED_ATX_NO_SPACE_START_PATTERN_STR: &str = r"^(\s*)(#+)([^#\s].*?)\s(?<!\\)(#+)(\s*(?:\{#[^}]+\})?\s*)$";
12const CLOSED_ATX_NO_SPACE_END_PATTERN_STR: &str = r"^(\s*)(#+)\s(.*?)([^#\s\\])(?<!\\)(#+)(\s*(?:\{#[^}]+\})?\s*)$";
13
14#[derive(Clone)]
15pub struct MD020NoMissingSpaceClosedAtx;
16
17impl Default for MD020NoMissingSpaceClosedAtx {
18    fn default() -> Self {
19        Self::new()
20    }
21}
22
23impl MD020NoMissingSpaceClosedAtx {
24    pub fn new() -> Self {
25        Self
26    }
27
28    fn is_closed_atx_heading_without_space(&self, line: &str) -> bool {
29        get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_PATTERN_STR)
30            .map(|re| re.is_match(line).unwrap_or(false))
31            .unwrap_or(false)
32            || get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_START_PATTERN_STR)
33                .map(|re| re.is_match(line).unwrap_or(false))
34                .unwrap_or(false)
35            || get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_END_PATTERN_STR)
36                .map(|re| re.is_match(line).unwrap_or(false))
37                .unwrap_or(false)
38    }
39
40    fn fix_closed_atx_heading(&self, line: &str) -> String {
41        if let Some(captures) = get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_PATTERN_STR)
42            .ok()
43            .and_then(|re| re.captures(line).ok().flatten())
44        {
45            let indentation = &captures[1];
46            let opening_hashes = &captures[2];
47            let content = &captures[3];
48            let last_char = &captures[4];
49            let closing_hashes = &captures[5];
50            let custom_id = &captures[6];
51            format!("{indentation}{opening_hashes} {content}{last_char} {closing_hashes}{custom_id}")
52        } else if let Some(captures) = get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_START_PATTERN_STR)
53            .ok()
54            .and_then(|re| re.captures(line).ok().flatten())
55        {
56            let indentation = &captures[1];
57            let opening_hashes = &captures[2];
58            let content = &captures[3];
59            let closing_hashes = &captures[4];
60            let custom_id = &captures[5];
61            format!("{indentation}{opening_hashes} {content} {closing_hashes}{custom_id}")
62        } else if let Some(captures) = get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_END_PATTERN_STR)
63            .ok()
64            .and_then(|re| re.captures(line).ok().flatten())
65        {
66            let indentation = &captures[1];
67            let opening_hashes = &captures[2];
68            let content = &captures[3];
69            let last_char = &captures[4];
70            let closing_hashes = &captures[5];
71            let custom_id = &captures[6];
72            format!("{indentation}{opening_hashes} {content}{last_char} {closing_hashes}{custom_id}")
73        } else {
74            line.to_string()
75        }
76    }
77}
78
79impl Rule for MD020NoMissingSpaceClosedAtx {
80    fn name(&self) -> &'static str {
81        "MD020"
82    }
83
84    fn description(&self) -> &'static str {
85        "No space inside hashes on closed heading"
86    }
87
88    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
89        let mut warnings = Vec::new();
90
91        // Check all closed ATX headings from cached info
92        for (line_num, line_info) in ctx.lines.iter().enumerate() {
93            if let Some(heading) = &line_info.heading {
94                // Skip headings indented 4+ spaces (they're code blocks)
95                if line_info.visual_indent >= 4 {
96                    continue;
97                }
98
99                // Check all ATX headings (both properly closed and malformed)
100                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
101                    let line = line_info.content(ctx.content);
102
103                    // Check if line matches closed ATX pattern without space
104                    // This will detect both properly closed headings with missing space
105                    // and malformed attempts at closed headings like "# Heading#"
106                    if self.is_closed_atx_heading_without_space(line) {
107                        let line_range = ctx.line_index.line_content_range(line_num + 1);
108
109                        let mut start_col = 1;
110                        let mut length = 1;
111                        let mut message = String::new();
112
113                        if let Some(captures) = get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_PATTERN_STR)
114                            .ok()
115                            .and_then(|re| re.captures(line).ok().flatten())
116                        {
117                            // Missing space at both start and end: #Heading#
118                            let opening_hashes = captures.get(2).unwrap();
119                            message = format!(
120                                "Missing space inside hashes on closed heading (with {} at start and end)",
121                                "#".repeat(opening_hashes.as_str().len())
122                            );
123                            // Highlight the position right after the opening hashes
124                            // Convert byte offset to character count for correct Unicode handling
125                            start_col = line[..opening_hashes.end()].chars().count() + 1;
126                            length = 1;
127                        } else if let Some(captures) = get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_START_PATTERN_STR)
128                            .ok()
129                            .and_then(|re| re.captures(line).ok().flatten())
130                        {
131                            // Missing space at start: #Heading #
132                            let opening_hashes = captures.get(2).unwrap();
133                            message = format!(
134                                "Missing space after {} at start of closed heading",
135                                "#".repeat(opening_hashes.as_str().len())
136                            );
137                            // Highlight the position right after the opening hashes
138                            // Convert byte offset to character count for correct Unicode handling
139                            start_col = line[..opening_hashes.end()].chars().count() + 1;
140                            length = 1;
141                        } else if let Some(captures) = get_cached_fancy_regex(CLOSED_ATX_NO_SPACE_END_PATTERN_STR)
142                            .ok()
143                            .and_then(|re| re.captures(line).ok().flatten())
144                        {
145                            // Missing space at end: # Heading#
146                            let content = captures.get(3).unwrap();
147                            let closing_hashes = captures.get(5).unwrap();
148                            message = format!(
149                                "Missing space before {} at end of closed heading",
150                                "#".repeat(closing_hashes.as_str().len())
151                            );
152                            // Highlight the last character before the closing hashes
153                            // Convert byte offset to character count for correct Unicode handling
154                            start_col = line[..content.end()].chars().count() + 1;
155                            length = 1;
156                        }
157
158                        let (start_line, start_col_calc, end_line, end_col) =
159                            calculate_single_line_range(line_num + 1, start_col, length);
160
161                        warnings.push(LintWarning {
162                            rule_name: Some(self.name().to_string()),
163                            message,
164                            line: start_line,
165                            column: start_col_calc,
166                            end_line,
167                            end_column: end_col,
168                            severity: Severity::Warning,
169                            fix: Some(Fix {
170                                range: line_range,
171                                replacement: self.fix_closed_atx_heading(line),
172                            }),
173                        });
174                    }
175                }
176            }
177        }
178
179        Ok(warnings)
180    }
181
182    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
183        let mut lines = Vec::new();
184
185        for line_info in ctx.lines.iter() {
186            let mut fixed = false;
187
188            if let Some(heading) = &line_info.heading {
189                // Skip headings indented 4+ spaces (they're code blocks)
190                if line_info.visual_indent >= 4 {
191                    lines.push(line_info.content(ctx.content).to_string());
192                    continue;
193                }
194
195                // Fix ATX headings without space (both properly closed and malformed)
196                if matches!(heading.style, crate::lint_context::HeadingStyle::ATX)
197                    && self.is_closed_atx_heading_without_space(line_info.content(ctx.content))
198                {
199                    lines.push(self.fix_closed_atx_heading(line_info.content(ctx.content)));
200                    fixed = true;
201                }
202            }
203
204            if !fixed {
205                lines.push(line_info.content(ctx.content).to_string());
206            }
207        }
208
209        // Reconstruct content preserving line endings
210        let mut result = lines.join("\n");
211        if ctx.content.ends_with('\n') && !result.ends_with('\n') {
212            result.push('\n');
213        }
214
215        Ok(result)
216    }
217
218    /// Get the category of this rule for selective processing
219    fn category(&self) -> RuleCategory {
220        RuleCategory::Heading
221    }
222
223    /// Check if this rule should be skipped
224    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
225        ctx.content.is_empty() || !ctx.likely_has_headings()
226    }
227
228    fn as_any(&self) -> &dyn std::any::Any {
229        self
230    }
231
232    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
233    where
234        Self: Sized,
235    {
236        Box::new(MD020NoMissingSpaceClosedAtx::new())
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243    use crate::lint_context::LintContext;
244
245    #[test]
246    fn test_basic_functionality() {
247        let rule = MD020NoMissingSpaceClosedAtx;
248
249        // Test with correct spacing
250        let content = "# Heading 1 #\n## Heading 2 ##\n### Heading 3 ###";
251        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
252        let result = rule.check(&ctx).unwrap();
253        assert!(result.is_empty());
254
255        // Test with missing spaces
256        let content = "# Heading 1#\n## Heading 2 ##\n### Heading 3###";
257        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
258        let result = rule.check(&ctx).unwrap();
259        assert_eq!(result.len(), 2); // Should flag the two headings with missing spaces
260        assert_eq!(result[0].line, 1);
261        assert_eq!(result[1].line, 3);
262    }
263
264    #[test]
265    fn test_multibyte_char_column_position() {
266        let rule = MD020NoMissingSpaceClosedAtx;
267
268        // Multi-byte characters before the content should not affect column calculation
269        // "Ü" is 2 bytes in UTF-8 but 1 character
270        // "##Ünited##" has ## at byte 0-1, content starts at byte 2
271        // Column should be 3 (character position), not 3 (byte position) here they match
272        // But "##über##" tests that column after ## reflects character count
273        let content = "##Ünited##";
274        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
275        let result = rule.check(&ctx).unwrap();
276
277        assert_eq!(result.len(), 1);
278        // Column should be based on character position, not byte offset
279        // "##" is 2 chars, so the position after ## is char position 3
280        // The byte offset of .end() for the opening hashes is 2, so start_col = 2 + 1 = 3
281        // For ASCII this is the same, but let's verify with a more complex case
282
283        // Content with multi-byte chars BEFORE closing hashes
284        // "##Ü test##" - Ü is 2 bytes, test starts at byte 4, char 3
285        // Content ends and closing hashes start after "Ü test" = 7 chars / 8 bytes
286        let content = "## Ü test##";
287        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
288        let result = rule.check(&ctx).unwrap();
289
290        assert_eq!(result.len(), 1);
291        // "## Ü test##" - regex group 3 (content) ends at byte 9 (after "Ü tes")
292        // line[..9] = "## Ü tes" = 8 characters, so start_col = 8 + 1 = 9
293        // Without the fix, byte offset 9 + 1 = 10 (wrong for non-ASCII)
294        assert_eq!(
295            result[0].column, 9,
296            "Column should use character position, not byte offset"
297        );
298    }
299}