quickmark_core/rules/
md012.rs

1use serde::Deserialize;
2use std::rc::Rc;
3
4use tree_sitter::Node;
5
6use crate::{
7    linter::{range_from_tree_sitter, RuleViolation},
8    rules::{Context, Rule, RuleLinter, RuleType},
9};
10
11// MD012-specific configuration types
12#[derive(Debug, PartialEq, Clone, Deserialize)]
13pub struct MD012MultipleBlankLinesTable {
14    #[serde(default)]
15    pub maximum: usize,
16}
17
18impl Default for MD012MultipleBlankLinesTable {
19    fn default() -> Self {
20        Self { maximum: 1 }
21    }
22}
23
24/// MD012 Multiple Consecutive Blank Lines Rule Linter
25///
26/// **SINGLE-USE CONTRACT**: This linter is designed for one-time use only.
27/// After processing a document (via feed() calls and finalize()), the linter
28/// should be discarded. The violations state is not cleared between uses.
29pub(crate) struct MD012Linter {
30    context: Rc<Context>,
31    violations: Vec<RuleViolation>,
32}
33
34impl MD012Linter {
35    pub fn new(context: Rc<Context>) -> Self {
36        Self {
37            context,
38            violations: Vec::new(),
39        }
40    }
41
42    /// Analyze all lines and store all violations for reporting via finalize()
43    /// Context cache is already initialized by MultiRuleLinter
44    fn analyze_all_lines(&mut self) {
45        let settings = &self.context.config.linters.settings.multiple_blank_lines;
46        let lines = self.context.lines.borrow();
47        let maximum = settings.maximum;
48
49        // Create a boolean mask for lines that are part of code blocks.
50        // This is more performant than a HashSet for dense data like line numbers
51        // due to better cache locality and no hashing overhead.
52        let mut code_block_mask = vec![false; lines.len()];
53        self.populate_code_block_mask(&mut code_block_mask);
54
55        let mut consecutive_blanks = 0;
56
57        for (line_index, line) in lines.iter().enumerate() {
58            let is_blank = line.trim().is_empty();
59            // Use the boolean mask for an O(1) lookup.
60            let is_in_code_block = code_block_mask.get(line_index).copied().unwrap_or(false);
61
62            if is_blank && !is_in_code_block {
63                consecutive_blanks += 1;
64
65                // Report violation immediately when maximum is exceeded
66                // This matches markdownlint behavior of reporting each position
67                if consecutive_blanks > maximum {
68                    let violation = self.create_violation(line_index, consecutive_blanks, maximum);
69                    self.violations.push(violation);
70                }
71            } else {
72                consecutive_blanks = 0;
73            }
74        }
75
76        // Note: No additional end-of-document check needed because violations
77        // are reported immediately during the loop when each blank line is processed
78    }
79
80    /// Populates a boolean slice indicating which lines are part of code blocks.
81    ///
82    /// This is performant as it uses the pre-parsed node cache and a contiguous
83    /// memory block (`Vec<bool>`) for marking lines, leading to better cache
84    /// performance than a `HashSet`. It uses 0-based indexing consistently.
85    ///
86    /// Note: Works around a tree-sitter-md issue where fenced code blocks
87    /// incorrectly include a blank line immediately after the closing fence.
88    fn populate_code_block_mask(&self, mask: &mut [bool]) {
89        let node_cache = self.context.node_cache.borrow();
90        let lines = self.context.lines.borrow();
91
92        // Handle indented code blocks
93        if let Some(indented_blocks) = node_cache.get("indented_code_block") {
94            for node_info in indented_blocks {
95                for line_num in node_info.line_start..=node_info.line_end {
96                    if let Some(is_in_block) = mask.get_mut(line_num) {
97                        *is_in_block = true;
98                    }
99                }
100            }
101        }
102
103        // Handle fenced code blocks with workaround for tree-sitter issue
104        if let Some(fenced_blocks) = node_cache.get("fenced_code_block") {
105            for node_info in fenced_blocks {
106                let mut end_line = node_info.line_end;
107
108                // Workaround: If the last line in the range is blank and doesn't contain
109                // a closing fence, exclude it (it's likely incorrectly included by tree-sitter)
110                if let Some(last_line) = lines.get(end_line) {
111                    if last_line.trim().is_empty() {
112                        // Check if the previous line contains a closing fence
113                        if let Some(prev_line) = lines.get(end_line.saturating_sub(1)) {
114                            if prev_line.trim().starts_with("```") {
115                                // The previous line is the closing fence, so this blank line
116                                // should not be part of the code block
117                                end_line = end_line.saturating_sub(1);
118                            }
119                        }
120                    }
121                }
122
123                for line_num in node_info.line_start..=end_line {
124                    if let Some(is_in_block) = mask.get_mut(line_num) {
125                        *is_in_block = true;
126                    }
127                }
128            }
129        }
130    }
131
132    /// Creates a RuleViolation with a correctly calculated range.
133    fn create_violation(
134        &self,
135        line_index: usize,
136        consecutive_blanks: usize,
137        maximum: usize,
138    ) -> RuleViolation {
139        let message = format!(
140            "Multiple consecutive blank lines [Expected: {maximum} or fewer; Actual: {consecutive_blanks}]"
141        );
142
143        RuleViolation::new(
144            &MD012,
145            message,
146            self.context.file_path.clone(),
147            range_from_tree_sitter(&tree_sitter::Range {
148                // FIXME: Byte offsets are not correctly calculated because line start offsets are
149                // unavailable here. To fix this, the `Context` should provide a way to resolve
150                // a line index to its starting byte offset in the source file.
151                // The current implementation of `0` is incorrect and may result in
152                // incorrect highlighting in some tools.
153                start_byte: 0,
154                end_byte: 0,
155                start_point: tree_sitter::Point {
156                    row: line_index,
157                    column: 0,
158                },
159                end_point: tree_sitter::Point {
160                    row: line_index,
161                    column: 0,
162                },
163            }),
164        )
165    }
166}
167
168impl RuleLinter for MD012Linter {
169    fn feed(&mut self, node: &Node) {
170        // This rule is line-based and only needs to run once.
171        // We trigger the analysis on seeing the top-level `document` node.
172        if node.kind() == "document" {
173            self.analyze_all_lines();
174        }
175    }
176
177    fn finalize(&mut self) -> Vec<RuleViolation> {
178        std::mem::take(&mut self.violations)
179    }
180}
181
182pub const MD012: Rule = Rule {
183    id: "MD012",
184    alias: "no-multiple-blanks",
185    tags: &["blank_lines", "whitespace"],
186    description: "Multiple consecutive blank lines",
187    rule_type: RuleType::Line,
188    // This is a line-based rule and does not require specific nodes from the AST.
189    // The logic runs once for the entire file content.
190    required_nodes: &[],
191    new_linter: |context| Box::new(MD012Linter::new(context)),
192};
193
194#[cfg(test)]
195mod test {
196    use std::path::PathBuf;
197
198    use crate::config::{LintersSettingsTable, RuleSeverity};
199    use crate::linter::MultiRuleLinter;
200    use crate::test_utils::test_helpers::{test_config_with_rules, test_config_with_settings};
201
202    fn test_config() -> crate::config::QuickmarkConfig {
203        test_config_with_rules(vec![
204            ("no-multiple-blanks", RuleSeverity::Error),
205            ("heading-style", RuleSeverity::Off),
206            ("heading-increment", RuleSeverity::Off),
207        ])
208    }
209
210    fn test_config_with_multiple_blanks(
211        multiple_blanks_config: crate::config::MD012MultipleBlankLinesTable,
212    ) -> crate::config::QuickmarkConfig {
213        test_config_with_settings(
214            vec![
215                ("no-multiple-blanks", RuleSeverity::Error),
216                ("heading-style", RuleSeverity::Off),
217                ("heading-increment", RuleSeverity::Off),
218            ],
219            LintersSettingsTable {
220                multiple_blank_lines: multiple_blanks_config,
221                ..Default::default()
222            },
223        )
224    }
225
226    #[test]
227    fn test_no_violations_single_line() {
228        let input = "Single line document";
229
230        let config = test_config();
231        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
232        let violations = linter.analyze();
233        assert_eq!(0, violations.len());
234    }
235
236    #[test]
237    fn test_no_violations_no_blank_lines() {
238        let input = r#"Line one
239Line two
240Line three"#;
241
242        let config = test_config();
243        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
244        let violations = linter.analyze();
245        assert_eq!(0, violations.len());
246    }
247
248    #[test]
249    fn test_no_violations_single_blank_line() {
250        let input = r#"Line one
251
252Line two"#;
253
254        let config = test_config();
255        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
256        let violations = linter.analyze();
257        assert_eq!(0, violations.len());
258    }
259
260    #[test]
261    fn test_violation_two_consecutive_blank_lines() {
262        let input = r#"Line one
263
264
265Line two"#;
266
267        let config = test_config();
268        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
269        let violations = linter.analyze();
270        assert_eq!(1, violations.len());
271
272        let violation = &violations[0];
273        assert_eq!("MD012", violation.rule().id);
274        assert!(violation
275            .message()
276            .contains("Multiple consecutive blank lines"));
277    }
278
279    #[test]
280    fn test_violation_three_consecutive_blank_lines() {
281        let input = r#"Line one
282
283
284
285Line two"#;
286
287        let config = test_config();
288        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
289        let violations = linter.analyze();
290        // Should have 2 violations: one at 2nd blank, one at 3rd blank (markdownlint behavior)
291        assert_eq!(2, violations.len());
292
293        for violation in &violations {
294            assert_eq!("MD012", violation.rule().id);
295        }
296    }
297
298    #[test]
299    fn test_violation_multiple_locations() {
300        let input = r#"Line one
301
302
303Line two
304
305
306Line three"#;
307
308        let config = test_config();
309        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
310        let violations = linter.analyze();
311        assert_eq!(2, violations.len());
312
313        for violation in &violations {
314            assert_eq!("MD012", violation.rule().id);
315        }
316    }
317
318    #[test]
319    fn test_custom_maximum_two() {
320        let config =
321            test_config_with_multiple_blanks(crate::config::MD012MultipleBlankLinesTable {
322                maximum: 2,
323            });
324
325        // Two blank lines should be allowed
326        let input_allowed = r#"Line one
327
328
329Line two"#;
330        let mut linter = MultiRuleLinter::new_for_document(
331            PathBuf::from("test.md"),
332            config.clone(),
333            input_allowed,
334        );
335        let violations = linter.analyze();
336        assert_eq!(0, violations.len());
337
338        // Three blank lines should violate
339        let input_violation = r#"Line one
340
341
342
343Line two"#;
344        let mut linter =
345            MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input_violation);
346        let violations = linter.analyze();
347        assert_eq!(1, violations.len());
348    }
349
350    #[test]
351    fn test_custom_maximum_zero() {
352        let config =
353            test_config_with_multiple_blanks(crate::config::MD012MultipleBlankLinesTable {
354                maximum: 0,
355            });
356
357        // Any blank line should violate
358        let input = r#"Line one
359
360Line two"#;
361        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
362        let violations = linter.analyze();
363        assert_eq!(1, violations.len());
364    }
365
366    #[test]
367    fn test_code_blocks_excluded() {
368        // Indented code block
369        let input_indented = r#"Normal text
370
371    Code line 1
372
373
374    Code line 2
375
376Normal text again"#;
377
378        let config = test_config();
379        let mut linter = MultiRuleLinter::new_for_document(
380            PathBuf::from("test.md"),
381            config.clone(),
382            input_indented,
383        );
384        let violations = linter.analyze();
385        // Should not violate for blank lines inside code blocks
386        assert_eq!(0, violations.len());
387
388        // Fenced code block
389        let input_fenced = r#"Normal text
390
391```
392Code line 1
393
394
395Code line 2
396```
397
398Normal text again"#;
399
400        let mut linter =
401            MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input_fenced);
402        let violations = linter.analyze();
403        // Should not violate for blank lines inside fenced code blocks
404        assert_eq!(0, violations.len());
405    }
406
407    #[test]
408    fn test_code_blocks_with_surrounding_violations() {
409        let input = r#"Normal text
410
411
412```
413Code with blank lines
414
415
416Inside
417```
418
419
420More normal text"#;
421
422        let config = test_config();
423        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
424        let violations = linter.analyze();
425        // Should violate for multiple blank lines outside code blocks
426        assert_eq!(2, violations.len());
427    }
428
429    #[test]
430    fn test_blank_lines_with_spaces() {
431        // Blank lines with only spaces should still count as blank
432        let input = "Line one\n\n  \n\nLine two"; // Second blank line has 2 spaces
433
434        let config = test_config();
435        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
436        let violations = linter.analyze();
437        // 3 consecutive blank lines = 2 violations (when we reach 2nd and 3rd blank)
438        assert_eq!(2, violations.len());
439    }
440
441    #[test]
442    fn test_trailing_newline_edge_case() {
443        // This test specifically covers the edge case where a file ends with newlines
444        // that create an implicit empty line. This was the root cause of the parity
445        // issue with markdownlint - markdownlint counts the implicit line created by
446        // a trailing newline, but Rust's str.lines() doesn't include it.
447
448        // File ending with single newline - should not violate (no blank lines)
449        let input_single = "Line one\nLine two\n";
450        let config = test_config();
451        let mut linter = MultiRuleLinter::new_for_document(
452            PathBuf::from("test.md"),
453            config.clone(),
454            input_single,
455        );
456        let violations = linter.analyze();
457        assert_eq!(
458            0,
459            violations.len(),
460            "Single trailing newline should not violate"
461        );
462
463        // File ending with two newlines - creates one explicit blank + one implicit blank = 2 consecutive blanks
464        // This should violate because it exceeds maximum of 1
465        let input_double = "Line one\nLine two\n\n";
466        let mut linter = MultiRuleLinter::new_for_document(
467            PathBuf::from("test.md"),
468            config.clone(),
469            input_double,
470        );
471        let violations = linter.analyze();
472        assert_eq!(
473            1,
474            violations.len(),
475            "Double trailing newline (two consecutive blanks) should violate"
476        );
477
478        // File ending with three newlines - creates two explicit blanks + one implicit blank = 3 consecutive blanks
479        // This should create 2 violations (one at 2nd blank, one at 3rd blank)
480        let input_triple = "Line one\nLine two\n\n\n";
481        let mut linter =
482            MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input_triple);
483        let violations = linter.analyze();
484        assert_eq!(
485            2,
486            violations.len(),
487            "Triple trailing newline (three consecutive blanks) should create 2 violations"
488        );
489
490        for violation in &violations {
491            assert_eq!("MD012", violation.rule().id);
492            assert!(violation
493                .message()
494                .contains("Multiple consecutive blank lines"));
495        }
496    }
497
498    #[test]
499    fn test_beginning_and_end_of_document() {
500        // Multiple blank lines at the beginning should violate
501        let input_beginning = "\n\nLine one\nLine two";
502
503        let config = test_config();
504        let mut linter = MultiRuleLinter::new_for_document(
505            PathBuf::from("test.md"),
506            config.clone(),
507            input_beginning,
508        );
509        let violations = linter.analyze();
510        // 2 blank lines = 1 violation (when 2nd blank line is reached)
511        assert_eq!(1, violations.len());
512
513        // Multiple blank lines at the end should violate
514        let input_end = "Line one\nLine two\n\n\n";
515
516        let mut linter =
517            MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input_end);
518        let violations = linter.analyze();
519        // 3 blank lines (including the implicit one from trailing newline) = 2 violations
520        assert_eq!(2, violations.len());
521    }
522}