quickmark_core/rules/
md021.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3use std::collections::HashSet;
4use std::rc::Rc;
5use tree_sitter::Node;
6
7use crate::linter::{range_from_tree_sitter, Context, RuleLinter, RuleViolation};
8
9use super::{Rule, RuleType};
10
11static CLOSED_ATX_REGEX: Lazy<Regex> = Lazy::new(|| {
12    // Match closed ATX headings but exclude escaped hashes (consistent with original markdownlint)
13    // The pattern ensures that the closing hashes are not escaped
14    Regex::new(r"^(#+)([ \t]*)([^# \t\\]|[^# \t][^#]*?[^# \t\\])([ \t]*)(#+)(\s*)$")
15        .expect("Invalid regex for MD021")
16});
17
18pub(crate) struct MD021Linter {
19    context: Rc<Context>,
20    violations: Vec<RuleViolation>,
21}
22
23impl MD021Linter {
24    pub fn new(context: Rc<Context>) -> Self {
25        Self {
26            context,
27            violations: Vec::new(),
28        }
29    }
30
31    fn analyze_all_lines(&mut self) {
32        let lines = self.context.lines.borrow();
33
34        // Get line numbers that should be ignored (inside code blocks or HTML blocks)
35        let ignore_lines = self.get_ignore_lines();
36
37        for (line_index, line) in lines.iter().enumerate() {
38            if ignore_lines.contains(&(line_index + 1)) {
39                continue; // Skip lines in code blocks or HTML blocks
40            }
41
42            if let Some(mut line_violations) = self.check_line(line, line_index) {
43                self.violations.append(&mut line_violations);
44            }
45        }
46    }
47
48    /// Get line numbers that should be ignored (inside code blocks or HTML blocks)
49    fn get_ignore_lines(&self) -> HashSet<usize> {
50        let mut ignore_lines = HashSet::new();
51        let node_cache = self.context.node_cache.borrow();
52
53        for node_type in ["fenced_code_block", "indented_code_block", "html_block"] {
54            if let Some(blocks) = node_cache.get(node_type) {
55                for node_info in blocks {
56                    for line_num in (node_info.line_start + 1)..=(node_info.line_end + 1) {
57                        ignore_lines.insert(line_num);
58                    }
59                }
60            }
61        }
62
63        ignore_lines
64    }
65
66    fn check_line(&self, line: &str, line_index: usize) -> Option<Vec<RuleViolation>> {
67        let mut violations = Vec::new();
68
69        if let Some(captures) = CLOSED_ATX_REGEX.captures(line) {
70            let opening_spaces = captures.get(2).unwrap().as_str();
71            let closing_spaces = captures.get(4).unwrap().as_str();
72
73            // Check for multiple spaces after opening hashes
74            if opening_spaces.len() > 1 {
75                let start_col = captures.get(2).unwrap().start();
76                violations.push(RuleViolation::new(
77                    &MD021,
78                    format!(
79                        "Multiple spaces inside hashes on closed atx style heading [Expected: 1; Actual: {}]",
80                        opening_spaces.len()
81                    ),
82                    self.context.file_path.clone(),
83                    // The location points to the second space, which is the beginning of the violation.
84                    range_from_tree_sitter(&tree_sitter::Range {
85                        start_byte: 0, // Not accurate, but line/col is used
86                        end_byte: 0,
87                        start_point: tree_sitter::Point { row: line_index, column: start_col + 2 },
88                        end_point: tree_sitter::Point { row: line_index, column: start_col + 3 },
89                    }),
90                ));
91            }
92
93            // Check for multiple spaces before closing hashes
94            if closing_spaces.len() > 1 {
95                let start_col = captures.get(4).unwrap().start();
96                violations.push(RuleViolation::new(
97                    &MD021,
98                    format!(
99                        "Multiple spaces inside hashes on closed atx style heading [Expected: 1; Actual: {}]",
100                        closing_spaces.len()
101                    ),
102                    self.context.file_path.clone(),
103                    // The location points to the second space, which is the beginning of the violation.
104                    range_from_tree_sitter(&tree_sitter::Range {
105                        start_byte: 0, // Not accurate, but line/col is used
106                        end_byte: 0,
107                        start_point: tree_sitter::Point { row: line_index, column: start_col + 2 },
108                        end_point: tree_sitter::Point { row: line_index, column: start_col + 3 },
109                    }),
110                ));
111            }
112        }
113
114        if violations.is_empty() {
115            None
116        } else {
117            Some(violations)
118        }
119    }
120}
121
122impl RuleLinter for MD021Linter {
123    fn feed(&mut self, node: &Node) {
124        if node.kind() == "document" {
125            self.analyze_all_lines();
126        }
127    }
128
129    fn finalize(&mut self) -> Vec<RuleViolation> {
130        std::mem::take(&mut self.violations)
131    }
132}
133
134pub const MD021: Rule = Rule {
135    id: "MD021",
136    alias: "no-multiple-space-closed-atx",
137    tags: &["headings", "atx_closed", "spaces"],
138    description: "Multiple spaces inside hashes on closed atx style heading",
139    rule_type: RuleType::Line,
140    required_nodes: &[],
141    new_linter: |context| Box::new(MD021Linter::new(context)),
142};
143
144#[cfg(test)]
145mod test {
146    use std::path::PathBuf;
147
148    use crate::config::RuleSeverity;
149    use crate::linter::MultiRuleLinter;
150    use crate::test_utils::test_helpers::test_config_with_rules;
151
152    fn test_config() -> crate::config::QuickmarkConfig {
153        test_config_with_rules(vec![
154            ("no-multiple-space-closed-atx", RuleSeverity::Error),
155            ("heading-style", RuleSeverity::Off),
156            ("heading-increment", RuleSeverity::Off),
157        ])
158    }
159
160    #[test]
161    fn test_md021_multiple_spaces_after_opening_hashes() {
162        let config = test_config();
163
164        let input = "##  Heading with multiple spaces after opening ##\n###   Another heading ###\n####    Yet another heading ####\n";
165        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
166        let violations = linter.analyze();
167
168        // Should detect 3 violations for multiple spaces after opening hashes
169        assert_eq!(violations.len(), 3);
170
171        for violation in &violations {
172            assert_eq!(violation.rule().id, "MD021");
173        }
174    }
175
176    #[test]
177    fn test_md021_multiple_spaces_before_closing_hashes() {
178        let config = test_config();
179
180        let input = "## Heading with multiple spaces before closing  ##\n### Another heading with spaces before closing   ###\n#### Yet another heading    ####\n";
181        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
182        let violations = linter.analyze();
183
184        // Should detect 3 violations for multiple spaces before closing hashes
185        assert_eq!(violations.len(), 3);
186
187        for violation in &violations {
188            assert_eq!(violation.rule().id, "MD021");
189        }
190    }
191
192    #[test]
193    fn test_md021_multiple_spaces_both_sides() {
194        let config = test_config();
195
196        let input = "##  Heading with multiple spaces on both sides  ##\n###   Another heading with multiple spaces   ###\n";
197        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
198        let violations = linter.analyze();
199
200        // Should detect 4 violations: 2 for opening spaces, 2 for closing spaces
201        assert_eq!(violations.len(), 4);
202
203        for violation in &violations {
204            assert_eq!(violation.rule().id, "MD021");
205        }
206    }
207
208    #[test]
209    fn test_md021_correct_single_spaces() {
210        let config = test_config();
211
212        let input = "# Heading with correct spacing #\n## Another heading with correct spacing ##\n### Third heading with correct spacing ###\n#### Fourth heading ####\n";
213        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
214        let violations = linter.analyze();
215
216        // Should have no violations - single space is correct
217        assert_eq!(violations.len(), 0);
218    }
219
220    #[test]
221    fn test_md021_only_applies_to_closed_headings() {
222        let config = test_config();
223
224        let input = "# Regular ATX heading\n##  Regular ATX heading with multiple spaces\n### Regular ATX heading\n##  Closed heading with multiple spaces ##\n### Another closed heading with multiple spaces  ###\n";
225        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
226        let violations = linter.analyze();
227
228        // Should only detect violations for closed headings, not regular ATX headings
229        // Expected: 2 violations (one for opening spaces, one for closing spaces)
230        assert_eq!(violations.len(), 2);
231
232        for violation in &violations {
233            assert_eq!(violation.rule().id, "MD021");
234        }
235    }
236
237    #[test]
238    fn test_md021_no_spaces_around_hashes() {
239        let config = test_config();
240
241        let input = "##Heading with no spaces##\n###Another heading with no spaces###\n";
242        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
243        let violations = linter.analyze();
244
245        // MD021 only cares about multiple spaces, not missing spaces
246        // No violations expected for this case
247        assert_eq!(violations.len(), 0);
248    }
249
250    #[test]
251    fn test_md021_mixed_tabs_and_spaces() {
252        let config = test_config();
253
254        let input = "##\t\tHeading with tabs after opening ##\n## Heading with spaces before closing\t\t##\n###  \tMixed tabs and spaces   ###\n";
255        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
256        let violations = linter.analyze();
257
258        // Should detect violations for any whitespace longer than 1 character
259        assert_eq!(violations.len(), 4); // 2 + 1 + 1 = 4 violations
260
261        for violation in &violations {
262            assert_eq!(violation.rule().id, "MD021");
263        }
264    }
265
266    #[test]
267    fn test_md021_edge_case_single_hash() {
268        let config = test_config();
269
270        let input = "#  Heading with single hash and multiple spaces #\n#   Another single hash heading   #\n";
271        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
272        let violations = linter.analyze();
273
274        // Should detect 3 violations: 1 for first line opening, 1 for second line opening, 1 for second line closing
275        assert_eq!(violations.len(), 3);
276
277        for violation in &violations {
278            assert_eq!(violation.rule().id, "MD021");
279        }
280    }
281
282    #[test]
283    fn test_md021_escaped_hash_not_detected() {
284        let config = test_config();
285
286        // These escaped hash headings should NOT trigger MD021 violations
287        // (they should be ignored as they're not true closed ATX headings)
288        let input = "## Multiple spaces before escaped hash  \\##\n### Multiple spaces with escaped hash  \\###\n####  Yet another escaped hash  \\####\n";
289        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
290        let violations = linter.analyze();
291
292        // Should have NO violations - escaped hashes are not closed ATX headings for MD021
293        assert_eq!(violations.len(), 0);
294    }
295
296    #[test]
297    fn test_md021_column_positions_accuracy() {
298        let config = test_config();
299
300        // Test that column positions are reported correctly (1-based indexing)
301        let input = "##  Two spaces after opening ##\n### Three spaces before closing   ###\n";
302        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
303        let violations = linter.analyze();
304
305        assert_eq!(violations.len(), 2);
306
307        // First violation: opening spaces on line 1
308        // Line: "##  Two spaces after opening ##"
309        // Column should be 4 (the second space)
310        assert_eq!(violations[0].location().range.start.line, 0);
311        assert_eq!(violations[0].location().range.start.character, 4);
312
313        // Second violation: closing spaces on line 2
314        // Line: "### Three spaces before closing   ###"
315        // Column should be 33 (the second space)
316        assert_eq!(violations[1].location().range.start.line, 1);
317        assert_eq!(violations[1].location().range.start.character, 33);
318    }
319
320    #[test]
321    fn test_md021_mixed_tabs_spaces_comprehensive() {
322        let config = test_config();
323
324        // Test various combinations of tabs and spaces
325        let input = "##\t\tTab after opening ##\n##  \tSpace then tab ##\n##\t Mixed tab and space\t##\n###\t  Tab and spaces  \t###\n";
326        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
327        let violations = linter.analyze();
328
329        // Expected violations:
330        // Line 1: 1 violation (opening: 2 tabs)
331        // Line 2: 1 violation (opening: 2 spaces + 1 tab = 3 chars)
332        // Line 3: 1 violation (opening: 1 tab + 1 space = 2 chars)
333        // Line 4: 2 violations (opening: 1 tab + 2 spaces = 3 chars, closing: 2 spaces + 1 tab = 3 chars)
334        assert_eq!(violations.len(), 5);
335
336        for violation in &violations {
337            assert_eq!(violation.rule().id, "MD021");
338            // Each violation message should indicate the actual count > 1
339            assert!(violation.message().contains("Actual:"));
340            assert!(!violation.message().contains("Actual: 1]")); // None should be exactly 1
341        }
342    }
343
344    #[test]
345    fn test_md021_single_vs_multiple_hash_combinations() {
346        let config = test_config();
347
348        // Test different combinations of hash counts
349        let input = "#  Single hash with multiple opening spaces #\n##   Double hash with multiple opening spaces ##\n###    Triple hash with multiple opening spaces ###\n# Single hash with multiple closing spaces  #\n##  Double hash with multiple closing spaces  ##\n###   Triple hash with multiple closing spaces   ###\n";
350        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
351        let violations = linter.analyze();
352
353        // Expected violations:
354        // Line 1: 1 violation (opening: 2 spaces)
355        // Line 2: 1 violation (opening: 3 spaces)
356        // Line 3: 1 violation (opening: 4 spaces)
357        // Line 4: 1 violation (closing: 2 spaces)
358        // Line 5: 2 violations (opening and closing: 2 spaces each)
359        // Line 6: 2 violations (opening and closing: 3 spaces each)
360        assert_eq!(violations.len(), 8);
361
362        // Verify all are MD021 violations
363        for violation in &violations {
364            assert_eq!(violation.rule().id, "MD021");
365        }
366    }
367
368    #[test]
369    fn test_md021_boundary_conditions() {
370        let config = test_config();
371
372        // Test boundary conditions: exactly 1 space (valid) vs 2+ spaces (invalid)
373        let input = "# Exactly one space on both sides #\n##  Exactly two spaces after opening ##\n## Exactly two spaces before closing  ##\n###   Three spaces both sides   ###\n";
374        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
375        let violations = linter.analyze();
376
377        // First line should have NO violations (exactly 1 space is correct)
378        // Other lines should have violations
379        assert_eq!(violations.len(), 4);
380
381        // Verify that the single-space line is not included in violations
382        for violation in &violations {
383            assert_ne!(violation.location().range.start.line, 0); // First line should not have violations
384        }
385    }
386
387    #[test]
388    fn test_md021_violation_message_format() {
389        let config = test_config();
390
391        // Test that violation messages contain correct actual counts
392        let input = "##  Two spaces ##\n###   Three spaces   ###\n####    Four spaces    ####\n";
393        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
394        let violations = linter.analyze();
395
396        assert_eq!(violations.len(), 5); // Line 1: 1 violation (opening), Line 2: 2 violations, Line 3: 2 violations
397
398        // Check that messages contain the correct counts
399        let messages: Vec<String> = violations.iter().map(|v| v.message().to_string()).collect();
400
401        // Should have messages with different actual counts
402        assert!(messages.iter().any(|m| m.contains("Actual: 2]")));
403        assert!(messages.iter().any(|m| m.contains("Actual: 3]")));
404        assert!(messages.iter().any(|m| m.contains("Actual: 4]")));
405    }
406
407    #[test]
408    fn test_md021_regex_edge_cases() {
409        let config = test_config();
410
411        // Test edge cases that might confuse the regex
412        let input = "## Normal heading ##\n##  Heading with  multiple  internal  spaces ##\n###   Heading with trailing hash###\n####    Heading with unmatched hashes ###\n##### Heading with content containing # symbols #####\n";
413        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
414        let violations = linter.analyze();
415
416        // Expected violations:
417        // Line 1: No violations (correct spacing)
418        // Line 2: 1 violation (opening: 2 spaces)
419        // Line 3: 1 violation (opening: 3 spaces, no closing violation due to no space before ###)
420        // Line 4: 1 violation (opening: 4 spaces, but unbalanced hashes so no closing violation)
421        // Line 5: No violations (this doesn't match our regex as a closed ATX heading)
422
423        assert_eq!(violations.len(), 3);
424
425        for violation in &violations {
426            assert_eq!(violation.rule().id, "MD021");
427        }
428    }
429
430    #[test]
431    fn test_md021_parity_comprehensive() {
432        let config = test_config();
433
434        // Test cases that exactly match the comprehensive test file scenarios
435        let input = "##  Two spaces after opening ##\n###   Three spaces after opening ###\n## Two spaces before closing  ##\n### Three spaces before closing   ###\n##  Both sides have multiple  ##\n#  Multiple spaces after single hash #\n##\tTab after opening\t##\n##    Many spaces    ##\n###     Even more spaces     ###\n";
436        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
437        let violations = linter.analyze();
438
439        // Expected violations:
440        // Line 1: 1 (opening: 2 spaces)
441        // Line 2: 1 (opening: 3 spaces)
442        // Line 3: 1 (closing: 2 spaces)
443        // Line 4: 1 (closing: 3 spaces)
444        // Line 5: 2 (opening: 2 spaces, closing: 2 spaces)
445        // Line 6: 1 (opening: 2 spaces)
446        // Line 7: 0 (exactly 1 tab on both sides is valid)
447        // Line 8: 2 (opening: 4 spaces, closing: 4 spaces)
448        // Line 9: 2 (opening: 5 spaces, closing: 5 spaces)
449        assert_eq!(violations.len(), 11);
450
451        // Verify all violations are MD021
452        for violation in &violations {
453            assert_eq!(violation.rule().id, "MD021");
454            assert!(violation
455                .message()
456                .contains("Multiple spaces inside hashes on closed atx style heading"));
457        }
458
459        // Verify column positions are 1-based and accurate
460        for violation in &violations {
461            assert!(violation.location().range.start.character > 0); // Should be 1-based
462            assert!(violation.location().range.start.character < 50); // Reasonable column range
463        }
464    }
465
466    #[test]
467    fn test_md021_only_closed_not_setext() {
468        let config = test_config();
469
470        let input = "Setext Heading 1\n================\n\nSetext Heading 2\n----------------\n\n##  Closed ATX heading  ##\n";
471        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
472        let violations = linter.analyze();
473
474        // Should only detect violations for the closed ATX heading
475        assert_eq!(violations.len(), 2); // opening and closing spaces
476
477        for violation in &violations {
478            assert_eq!(violation.rule().id, "MD021");
479        }
480    }
481}