quickmark_core/rules/
md020.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3use std::collections::HashSet;
4use std::rc::Rc;
5use tree_sitter::Node;
6
7use crate::linter::{range_from_tree_sitter, Context, RuleLinter, RuleViolation};
8
9use super::{Rule, RuleType};
10
11static CLOSED_ATX_REGEX: Lazy<Regex> = Lazy::new(|| {
12    Regex::new(r"^(#+)([ \t]*)([^# \t\\]|[^# \t][^#]*?[^# \t\\])([ \t]*)((?:\\#)?)(#+)(\s*)$")
13        .expect("Invalid regex for MD020")
14});
15
16pub(crate) struct MD020Linter {
17    context: Rc<Context>,
18    violations: Vec<RuleViolation>,
19}
20
21impl MD020Linter {
22    pub fn new(context: Rc<Context>) -> Self {
23        Self {
24            context,
25            violations: Vec::new(),
26        }
27    }
28
29    fn analyze_all_lines(&mut self) {
30        let lines = self.context.lines.borrow();
31
32        // Get line numbers that should be ignored (inside code blocks or HTML blocks)
33        let ignore_lines = self.get_ignore_lines();
34
35        for (line_index, line) in lines.iter().enumerate() {
36            if ignore_lines.contains(&(line_index + 1)) {
37                continue; // Skip lines in code blocks or HTML blocks
38            }
39
40            if let Some(violation) = self.check_line(line, line_index) {
41                self.violations.push(violation);
42            }
43        }
44    }
45
46    /// Get line numbers that should be ignored (inside code blocks or HTML blocks)
47    fn get_ignore_lines(&self) -> HashSet<usize> {
48        let mut ignore_lines = HashSet::new();
49        let node_cache = self.context.node_cache.borrow();
50
51        for node_type in ["fenced_code_block", "indented_code_block", "html_block"] {
52            if let Some(blocks) = node_cache.get(node_type) {
53                for node_info in blocks {
54                    for line_num in (node_info.line_start + 1)..=(node_info.line_end + 1) {
55                        ignore_lines.insert(line_num);
56                    }
57                }
58            }
59        }
60
61        ignore_lines
62    }
63
64    fn check_line(&self, line: &str, line_index: usize) -> Option<RuleViolation> {
65        if let Some(captures) = CLOSED_ATX_REGEX.captures(line) {
66            let left_space = captures.get(2).unwrap().as_str();
67            let right_space = captures.get(4).unwrap().as_str();
68            let right_escape = captures.get(5).unwrap().as_str();
69
70            let missing_left_space = left_space.is_empty();
71            let missing_right_space = right_space.is_empty() || !right_escape.is_empty();
72
73            if missing_left_space || missing_right_space {
74                return Some(self.create_violation_for_line(line, line_index));
75            }
76        }
77        None
78    }
79
80    fn create_violation_for_line(&self, line: &str, line_index: usize) -> RuleViolation {
81        RuleViolation::new(
82            &MD020,
83            MD020.description.to_string(),
84            self.context.file_path.clone(),
85            range_from_tree_sitter(&tree_sitter::Range {
86                start_byte: 0,
87                end_byte: line.len(),
88                start_point: tree_sitter::Point {
89                    row: line_index,
90                    column: 0,
91                },
92                end_point: tree_sitter::Point {
93                    row: line_index,
94                    column: line.len(),
95                },
96            }),
97        )
98    }
99}
100
101impl RuleLinter for MD020Linter {
102    fn feed(&mut self, node: &Node) {
103        // For line-based rules, we analyze all lines at once when we see the document node.
104        if node.kind() == "document" {
105            self.analyze_all_lines();
106        }
107    }
108
109    fn finalize(&mut self) -> Vec<RuleViolation> {
110        std::mem::take(&mut self.violations)
111    }
112}
113
114pub const MD020: Rule = Rule {
115    id: "MD020",
116    alias: "no-missing-space-closed-atx",
117    tags: &["headings", "atx_closed", "spaces"],
118    description: "No space inside hashes on closed atx style heading",
119    rule_type: RuleType::Line,
120    required_nodes: &[], // Line-based rules don't require specific nodes
121    new_linter: |context| Box::new(MD020Linter::new(context)),
122};
123
124#[cfg(test)]
125mod test {
126    use crate::config::RuleSeverity;
127    use crate::linter::MultiRuleLinter;
128    use crate::test_utils::test_helpers::test_config_with_rules;
129    use std::path::PathBuf;
130
131    fn test_config() -> crate::config::QuickmarkConfig {
132        test_config_with_rules(vec![("no-missing-space-closed-atx", RuleSeverity::Error)])
133    }
134
135    #[test]
136    fn test_md020_missing_space_left_side() {
137        let config = test_config();
138        let input = "#Heading 1#";
139        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
140        let violations = linter.analyze();
141        assert_eq!(violations.len(), 1);
142        assert!(violations[0].message().contains("No space inside hashes"));
143    }
144
145    #[test]
146    fn test_md020_missing_space_right_side() {
147        let config = test_config();
148        let input = "# Heading 1#";
149        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
150        let violations = linter.analyze();
151        assert_eq!(violations.len(), 1);
152        assert!(violations[0].message().contains("No space inside hashes"));
153    }
154
155    #[test]
156    fn test_md020_missing_space_both_sides() {
157        let config = test_config();
158        let input = "##Heading 2##";
159        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
160        let violations = linter.analyze();
161        assert_eq!(violations.len(), 1);
162        assert!(violations[0].message().contains("No space inside hashes"));
163    }
164
165    #[test]
166    fn test_md020_correct_spacing() {
167        let config = test_config();
168        let input = "# Heading 1 #\n## Heading 2 ##\n### Heading 3 ###";
169        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
170        let violations = linter.analyze();
171        assert_eq!(violations.len(), 0);
172    }
173
174    #[test]
175    fn test_md020_open_atx_headings_ignored() {
176        let config = test_config();
177        let input = "# Open Heading 1\n## Open Heading 2\n### Open Heading 3";
178        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
179        let violations = linter.analyze();
180        assert_eq!(violations.len(), 0);
181    }
182
183    #[test]
184    fn test_md020_setext_headings_ignored() {
185        let config = test_config();
186        let input = "Setext Heading 1\n================\n\nSetext Heading 2\n----------------";
187        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
188        let violations = linter.analyze();
189        assert_eq!(violations.len(), 0);
190    }
191
192    #[test]
193    fn test_md020_escaped_hash() {
194        let config = test_config();
195        let input = "## Heading \\##";
196        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
197        let violations = linter.analyze();
198        assert_eq!(violations.len(), 1);
199        assert!(violations[0].message().contains("No space inside hashes"));
200    }
201
202    #[test]
203    fn test_md020_escaped_hash_with_space() {
204        let config = test_config();
205        let input = "## Heading \\# ##";
206        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
207        let violations = linter.analyze();
208        assert_eq!(violations.len(), 0);
209    }
210
211    #[test]
212    fn test_md020_multiple_violations_in_file() {
213        let config = test_config();
214        let input = "#Heading 1#\n\n## Heading 2##\n\n###Heading 3###\n\n#### Correct Heading ####";
215        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
216        let violations = linter.analyze();
217        assert_eq!(violations.len(), 3);
218    }
219
220    #[test]
221    fn test_md020_code_blocks_ignored() {
222        let config = test_config();
223        let input =
224            "```\n#BadHeading#\n##AnotherBad##\n```\n\n    #IndentedCodeBad#\n\n# Good Heading #";
225        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
226        let violations = linter.analyze();
227        assert_eq!(violations.len(), 0);
228    }
229
230    #[test]
231    fn test_md020_html_flow_ignored() {
232        let config = test_config();
233        let input = "<div>\n#BadHeading#\n##AnotherBad##\n</div>\n\n# Good Heading #";
234        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
235        let violations = linter.analyze();
236        assert_eq!(violations.len(), 0);
237    }
238
239    #[test]
240    fn test_md020_trailing_spaces() {
241        let config = test_config();
242        let input = "# Heading 1 #   \n## Heading 2 ##\t\n### Heading 3 ###\n";
243        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
244        let violations = linter.analyze();
245        assert_eq!(violations.len(), 0);
246    }
247
248    #[test]
249    fn test_md020_unbalanced_closing_hashes() {
250        let config = test_config();
251        let input = "# Heading 1 ########\n## Heading 2##########\n### Heading 3 #";
252        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
253        let violations = linter.analyze();
254        assert_eq!(violations.len(), 1); // Only the second one violates (missing space before #)
255    }
256
257    #[test]
258    fn test_md020_tabs_as_spaces() {
259        let config = test_config();
260        let input = "#\tHeading 1\t#\n##\t\tHeading 2\t##\n###   Heading 3   ###";
261        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
262        let violations = linter.analyze();
263        assert_eq!(violations.len(), 0);
264    }
265
266    #[test]
267    fn test_md020_mixed_whitespace() {
268        let config = test_config();
269        let input = "# \tHeading 1 \t#\n##  Heading 2\t ##\n### \t Heading 3 \t ###";
270        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
271        let violations = linter.analyze();
272        assert_eq!(violations.len(), 0);
273    }
274
275    #[test]
276    fn test_md020_content_with_hashes() {
277        let config = test_config();
278        let input = "# Heading with # hash #\n## Another # heading ##\n### Multiple ## hashes ###";
279        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
280        let violations = linter.analyze();
281        assert_eq!(violations.len(), 0);
282    }
283
284    #[test]
285    fn test_md020_empty_heading() {
286        let config = test_config();
287        let input = "# #\n## ##\n### ###";
288        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
289        // Empty headings should be ignored or handled by other rules
290        let violations = linter.analyze();
291        assert_eq!(violations.len(), 0);
292    }
293
294    #[test]
295    fn test_md020_complex_content() {
296        let config = test_config();
297        let input = "# Complex *italic* **bold** `code` content #\n## Link [text](url) content ##\n### Image ![alt](src) content ###";
298        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
299        assert_eq!(linter.analyze().len(), 0);
300    }
301}