rumdl_lib/rules/
md028_no_blanks_blockquote.rs

1/// Rule MD028: No blank lines inside blockquotes
2///
3/// This rule flags blank lines that appear to be inside a blockquote but lack the > marker.
4/// It uses heuristics to distinguish between paragraph breaks within a blockquote
5/// and intentional separators between distinct blockquotes.
6/// See [docs/md028.md](../../docs/md028.md) for full documentation, configuration, and examples.
7use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
8use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
9use crate::utils::range_utils::{LineIndex, calculate_line_range};
10
11#[derive(Clone)]
12pub struct MD028NoBlanksBlockquote;
13
14impl MD028NoBlanksBlockquote {
15    /// Check if a line is a blockquote line (has > markers)
16    #[inline]
17    fn is_blockquote_line(line: &str) -> bool {
18        // Fast path: check for '>' character before doing any string operations
19        if !line.as_bytes().contains(&b'>') {
20            return false;
21        }
22        line.trim_start().starts_with('>')
23    }
24
25    /// Get the blockquote level (number of > markers) and leading whitespace
26    /// Returns (level, whitespace_end_idx)
27    fn get_blockquote_info(line: &str) -> (usize, usize) {
28        let bytes = line.as_bytes();
29        let mut i = 0;
30
31        // Skip leading whitespace
32        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
33            i += 1;
34        }
35
36        let whitespace_end = i;
37        let mut level = 0;
38
39        // Count '>' markers
40        while i < bytes.len() {
41            if bytes[i] == b'>' {
42                level += 1;
43                i += 1;
44            } else if bytes[i] == b' ' || bytes[i] == b'\t' {
45                i += 1;
46            } else {
47                break;
48            }
49        }
50
51        (level, whitespace_end)
52    }
53
54    /// Check if there's substantive content between two blockquote sections
55    /// This helps distinguish between paragraph breaks and separate blockquotes
56    fn has_content_between(lines: &[&str], start: usize, end: usize) -> bool {
57        for line in lines.iter().take(end).skip(start) {
58            let trimmed = line.trim();
59            // If there's any non-blank, non-blockquote content, these are separate quotes
60            if !trimmed.is_empty() && !trimmed.starts_with('>') {
61                return true;
62            }
63        }
64        false
65    }
66
67    /// Analyze context to determine if quotes are likely the same or different
68    fn are_likely_same_blockquote(lines: &[&str], blank_idx: usize) -> bool {
69        // Look for patterns that suggest these are the same blockquote:
70        // 1. Only one blank line between them (multiple blanks suggest separation)
71        // 2. Same indentation level
72        // 3. No content between them
73        // 4. Similar blockquote levels
74
75        // Note: We flag ALL blank lines between blockquotes, matching markdownlint behavior.
76        // Even multiple consecutive blank lines are flagged as they can be ambiguous
77        // (some parsers treat them as one blockquote, others as separate blockquotes).
78
79        // Find previous and next blockquote lines using fast byte scanning
80        let mut prev_quote_idx = None;
81        let mut next_quote_idx = None;
82
83        // Scan backwards for previous blockquote
84        for i in (0..blank_idx).rev() {
85            let line = lines[i];
86            // Fast check: if no '>' character, skip
87            if line.as_bytes().contains(&b'>') && Self::is_blockquote_line(line) {
88                prev_quote_idx = Some(i);
89                break;
90            }
91        }
92
93        // Scan forwards for next blockquote
94        for (i, line) in lines.iter().enumerate().skip(blank_idx + 1) {
95            // Fast check: if no '>' character, skip
96            if line.as_bytes().contains(&b'>') && Self::is_blockquote_line(line) {
97                next_quote_idx = Some(i);
98                break;
99            }
100        }
101
102        let (prev_idx, next_idx) = match (prev_quote_idx, next_quote_idx) {
103            (Some(p), Some(n)) => (p, n),
104            _ => return false,
105        };
106
107        // Check for content between blockquotes
108        if Self::has_content_between(lines, prev_idx + 1, next_idx) {
109            return false;
110        }
111
112        // Get blockquote info once per line to avoid repeated parsing
113        let (prev_level, prev_whitespace_end) = Self::get_blockquote_info(lines[prev_idx]);
114        let (next_level, next_whitespace_end) = Self::get_blockquote_info(lines[next_idx]);
115
116        // Different levels suggest different contexts
117        // But next_level > prev_level could be nested continuation
118        if next_level < prev_level {
119            return false;
120        }
121
122        // Check indentation consistency using byte indices
123        let prev_line = lines[prev_idx];
124        let next_line = lines[next_idx];
125        let prev_indent = &prev_line[..prev_whitespace_end];
126        let next_indent = &next_line[..next_whitespace_end];
127
128        // Different indentation indicates separate blockquote contexts
129        // Same indentation with no content between = same blockquote (blank line inside)
130        prev_indent == next_indent
131    }
132
133    /// Check if a blank line is problematic (inside a blockquote)
134    fn is_problematic_blank_line(lines: &[&str], index: usize) -> Option<(usize, String)> {
135        let current_line = lines[index];
136
137        // Must be a blank line (no content, no > markers)
138        if !current_line.trim().is_empty() || Self::is_blockquote_line(current_line) {
139            return None;
140        }
141
142        // Use heuristics to determine if this blank line is inside a blockquote
143        // or if it's an intentional separator between blockquotes
144        if !Self::are_likely_same_blockquote(lines, index) {
145            return None;
146        }
147
148        // This blank line appears to be inside a blockquote
149        // Find the appropriate fix using optimized parsing
150        for i in (0..index).rev() {
151            let line = lines[i];
152            // Fast check: if no '>' character, skip
153            if line.as_bytes().contains(&b'>') && Self::is_blockquote_line(line) {
154                let (level, whitespace_end) = Self::get_blockquote_info(line);
155                let indent = &line[..whitespace_end];
156                let mut fix = String::with_capacity(indent.len() + level);
157                fix.push_str(indent);
158                for _ in 0..level {
159                    fix.push('>');
160                }
161                return Some((level, fix));
162            }
163        }
164
165        None
166    }
167}
168
169impl Default for MD028NoBlanksBlockquote {
170    fn default() -> Self {
171        Self
172    }
173}
174
175impl Rule for MD028NoBlanksBlockquote {
176    fn name(&self) -> &'static str {
177        "MD028"
178    }
179
180    fn description(&self) -> &'static str {
181        "Blank line inside blockquote"
182    }
183
184    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
185        Some(self)
186    }
187
188    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
189        // Early return for content without blockquotes
190        if !ctx.content.contains('>') {
191            return Ok(Vec::new());
192        }
193
194        let line_index = LineIndex::new(ctx.content.to_string());
195        let mut warnings = Vec::new();
196
197        // Get all lines
198        let lines: Vec<&str> = ctx.content.lines().collect();
199
200        // Pre-scan to find blank lines and blockquote lines for faster processing
201        let mut blank_line_indices = Vec::new();
202        let mut has_blockquotes = false;
203
204        for (line_idx, line) in lines.iter().enumerate() {
205            // Skip lines in code blocks
206            if line_idx < ctx.lines.len() && ctx.lines[line_idx].in_code_block {
207                continue;
208            }
209
210            if line.trim().is_empty() {
211                blank_line_indices.push(line_idx);
212            } else if Self::is_blockquote_line(line) {
213                has_blockquotes = true;
214            }
215        }
216
217        // If no blockquotes found, no need to check blank lines
218        if !has_blockquotes {
219            return Ok(Vec::new());
220        }
221
222        // Only check blank lines that could be problematic
223        for &line_idx in &blank_line_indices {
224            let line_num = line_idx + 1;
225
226            // Check if this is a problematic blank line inside a blockquote
227            if let Some((level, fix_content)) = Self::is_problematic_blank_line(&lines, line_idx) {
228                let line = lines[line_idx];
229                let (start_line, start_col, end_line, end_col) = calculate_line_range(line_num, line);
230
231                warnings.push(LintWarning {
232                    rule_name: Some(self.name()),
233                    message: format!("Blank line inside blockquote (level {level})"),
234                    line: start_line,
235                    column: start_col,
236                    end_line,
237                    end_column: end_col,
238                    severity: Severity::Warning,
239                    fix: Some(Fix {
240                        range: line_index.line_col_to_byte_range_with_length(line_num, 1, line.len()),
241                        replacement: fix_content,
242                    }),
243                });
244            }
245        }
246
247        Ok(warnings)
248    }
249
250    /// Optimized check using document structure
251    fn check_with_structure(
252        &self,
253        ctx: &crate::lint_context::LintContext,
254        _structure: &DocumentStructure,
255    ) -> LintResult {
256        // Just delegate to the main check method
257        self.check(ctx)
258    }
259
260    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
261        let mut result = Vec::with_capacity(ctx.lines.len());
262        let lines: Vec<&str> = ctx.content.lines().collect();
263
264        for (line_idx, line) in lines.iter().enumerate() {
265            // Check if this blank line needs fixing
266            if let Some((_, fix_content)) = Self::is_problematic_blank_line(&lines, line_idx) {
267                result.push(fix_content);
268            } else {
269                result.push(line.to_string());
270            }
271        }
272
273        Ok(result.join("\n") + if ctx.content.ends_with('\n') { "\n" } else { "" })
274    }
275
276    /// Get the category of this rule for selective processing
277    fn category(&self) -> RuleCategory {
278        RuleCategory::Blockquote
279    }
280
281    /// Check if this rule should be skipped
282    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
283        !ctx.content.contains('>')
284    }
285
286    fn as_any(&self) -> &dyn std::any::Any {
287        self
288    }
289
290    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
291    where
292        Self: Sized,
293    {
294        Box::new(MD028NoBlanksBlockquote)
295    }
296}
297
298impl DocumentStructureExtensions for MD028NoBlanksBlockquote {
299    fn has_relevant_elements(
300        &self,
301        _ctx: &crate::lint_context::LintContext,
302        doc_structure: &DocumentStructure,
303    ) -> bool {
304        !doc_structure.blockquotes.is_empty()
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311    use crate::lint_context::LintContext;
312
313    #[test]
314    fn test_no_blockquotes() {
315        let rule = MD028NoBlanksBlockquote;
316        let content = "This is regular text\n\nWith blank lines\n\nBut no blockquotes";
317        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
318        let result = rule.check(&ctx).unwrap();
319        assert!(result.is_empty(), "Should not flag content without blockquotes");
320    }
321
322    #[test]
323    fn test_valid_blockquote_no_blanks() {
324        let rule = MD028NoBlanksBlockquote;
325        let content = "> This is a blockquote\n> With multiple lines\n> But no blank lines";
326        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
327        let result = rule.check(&ctx).unwrap();
328        assert!(result.is_empty(), "Should not flag blockquotes without blank lines");
329    }
330
331    #[test]
332    fn test_blockquote_with_empty_line_marker() {
333        let rule = MD028NoBlanksBlockquote;
334        // Lines with just > are valid and should NOT be flagged
335        let content = "> First line\n>\n> Third line";
336        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
337        let result = rule.check(&ctx).unwrap();
338        assert!(result.is_empty(), "Should not flag lines with just > marker");
339    }
340
341    #[test]
342    fn test_blockquote_with_empty_line_marker_and_space() {
343        let rule = MD028NoBlanksBlockquote;
344        // Lines with > and space are also valid
345        let content = "> First line\n> \n> Third line";
346        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
347        let result = rule.check(&ctx).unwrap();
348        assert!(result.is_empty(), "Should not flag lines with > and space");
349    }
350
351    #[test]
352    fn test_blank_line_in_blockquote() {
353        let rule = MD028NoBlanksBlockquote;
354        // Truly blank line (no >) inside blockquote should be flagged
355        let content = "> First line\n\n> Third line";
356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
357        let result = rule.check(&ctx).unwrap();
358        assert_eq!(result.len(), 1, "Should flag truly blank line inside blockquote");
359        assert_eq!(result[0].line, 2);
360        assert!(result[0].message.contains("Blank line inside blockquote"));
361    }
362
363    #[test]
364    fn test_multiple_blank_lines() {
365        let rule = MD028NoBlanksBlockquote;
366        let content = "> First\n\n\n> Fourth";
367        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
368        let result = rule.check(&ctx).unwrap();
369        // With proper indentation checking, both blank lines are flagged as they're within the same blockquote
370        assert_eq!(result.len(), 2, "Should flag each blank line within the blockquote");
371        assert_eq!(result[0].line, 2);
372        assert_eq!(result[1].line, 3);
373    }
374
375    #[test]
376    fn test_nested_blockquote_blank() {
377        let rule = MD028NoBlanksBlockquote;
378        let content = ">> Nested quote\n\n>> More nested";
379        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
380        let result = rule.check(&ctx).unwrap();
381        assert_eq!(result.len(), 1);
382        assert_eq!(result[0].line, 2);
383    }
384
385    #[test]
386    fn test_nested_blockquote_with_marker() {
387        let rule = MD028NoBlanksBlockquote;
388        // Lines with >> are valid
389        let content = ">> Nested quote\n>>\n>> More nested";
390        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
391        let result = rule.check(&ctx).unwrap();
392        assert!(result.is_empty(), "Should not flag lines with >> marker");
393    }
394
395    #[test]
396    fn test_fix_single_blank() {
397        let rule = MD028NoBlanksBlockquote;
398        let content = "> First\n\n> Third";
399        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
400        let fixed = rule.fix(&ctx).unwrap();
401        assert_eq!(fixed, "> First\n>\n> Third");
402    }
403
404    #[test]
405    fn test_fix_nested_blank() {
406        let rule = MD028NoBlanksBlockquote;
407        let content = ">> Nested\n\n>> More";
408        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
409        let fixed = rule.fix(&ctx).unwrap();
410        assert_eq!(fixed, ">> Nested\n>>\n>> More");
411    }
412
413    #[test]
414    fn test_fix_with_indentation() {
415        let rule = MD028NoBlanksBlockquote;
416        let content = "  > Indented quote\n\n  > More";
417        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
418        let fixed = rule.fix(&ctx).unwrap();
419        assert_eq!(fixed, "  > Indented quote\n  >\n  > More");
420    }
421
422    #[test]
423    fn test_mixed_levels() {
424        let rule = MD028NoBlanksBlockquote;
425        // Blank lines between different levels
426        let content = "> Level 1\n\n>> Level 2\n\n> Level 1 again";
427        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
428        let result = rule.check(&ctx).unwrap();
429        // Line 2 is a blank between > and >>, level 1 to level 2, considered inside level 1
430        // Line 4 is a blank between >> and >, level 2 to level 1, NOT inside blockquote
431        assert_eq!(result.len(), 1);
432        assert_eq!(result[0].line, 2);
433    }
434
435    #[test]
436    fn test_blockquote_with_code_block() {
437        let rule = MD028NoBlanksBlockquote;
438        let content = "> Quote with code:\n> ```\n> code\n> ```\n>\n> More quote";
439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
440        let result = rule.check(&ctx).unwrap();
441        // Line 5 has > marker, so it's not a blank line
442        assert!(result.is_empty(), "Should not flag line with > marker");
443    }
444
445    #[test]
446    fn test_category() {
447        let rule = MD028NoBlanksBlockquote;
448        assert_eq!(rule.category(), RuleCategory::Blockquote);
449    }
450
451    #[test]
452    fn test_should_skip() {
453        let rule = MD028NoBlanksBlockquote;
454        let ctx1 = LintContext::new("No blockquotes here", crate::config::MarkdownFlavor::Standard);
455        assert!(rule.should_skip(&ctx1));
456
457        let ctx2 = LintContext::new("> Has blockquote", crate::config::MarkdownFlavor::Standard);
458        assert!(!rule.should_skip(&ctx2));
459    }
460
461    #[test]
462    fn test_empty_content() {
463        let rule = MD028NoBlanksBlockquote;
464        let content = "";
465        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
466        let result = rule.check(&ctx).unwrap();
467        assert!(result.is_empty());
468    }
469
470    #[test]
471    fn test_blank_after_blockquote() {
472        let rule = MD028NoBlanksBlockquote;
473        let content = "> Quote\n\nNot a quote";
474        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
475        let result = rule.check(&ctx).unwrap();
476        assert!(result.is_empty(), "Blank line after blockquote ends is valid");
477    }
478
479    #[test]
480    fn test_blank_before_blockquote() {
481        let rule = MD028NoBlanksBlockquote;
482        let content = "Not a quote\n\n> Quote";
483        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
484        let result = rule.check(&ctx).unwrap();
485        assert!(result.is_empty(), "Blank line before blockquote starts is valid");
486    }
487
488    #[test]
489    fn test_preserve_trailing_newline() {
490        let rule = MD028NoBlanksBlockquote;
491        let content = "> Quote\n\n> More\n";
492        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
493        let fixed = rule.fix(&ctx).unwrap();
494        assert!(fixed.ends_with('\n'));
495
496        let content_no_newline = "> Quote\n\n> More";
497        let ctx2 = LintContext::new(content_no_newline, crate::config::MarkdownFlavor::Standard);
498        let fixed2 = rule.fix(&ctx2).unwrap();
499        assert!(!fixed2.ends_with('\n'));
500    }
501
502    #[test]
503    fn test_document_structure_extension() {
504        let rule = MD028NoBlanksBlockquote;
505        let ctx = LintContext::new("> test", crate::config::MarkdownFlavor::Standard);
506        let doc_structure = DocumentStructure::new("> test");
507        assert!(rule.has_relevant_elements(&ctx, &doc_structure));
508
509        let ctx2 = LintContext::new("no blockquote", crate::config::MarkdownFlavor::Standard);
510        let doc_structure2 = DocumentStructure::new("no blockquote");
511        assert!(!rule.has_relevant_elements(&ctx2, &doc_structure2));
512    }
513
514    #[test]
515    fn test_deeply_nested_blank() {
516        let rule = MD028NoBlanksBlockquote;
517        let content = ">>> Deep nest\n\n>>> More deep";
518        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
519        let result = rule.check(&ctx).unwrap();
520        assert_eq!(result.len(), 1);
521
522        let fixed = rule.fix(&ctx).unwrap();
523        assert_eq!(fixed, ">>> Deep nest\n>>>\n>>> More deep");
524    }
525
526    #[test]
527    fn test_deeply_nested_with_marker() {
528        let rule = MD028NoBlanksBlockquote;
529        // Lines with >>> are valid
530        let content = ">>> Deep nest\n>>>\n>>> More deep";
531        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
532        let result = rule.check(&ctx).unwrap();
533        assert!(result.is_empty(), "Should not flag lines with >>> marker");
534    }
535
536    #[test]
537    fn test_complex_blockquote_structure() {
538        let rule = MD028NoBlanksBlockquote;
539        // Line with > is valid, not a blank line
540        let content = "> Level 1\n> > Nested properly\n>\n> Back to level 1";
541        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
542        let result = rule.check(&ctx).unwrap();
543        assert!(result.is_empty(), "Should not flag line with > marker");
544    }
545
546    #[test]
547    fn test_complex_with_blank() {
548        let rule = MD028NoBlanksBlockquote;
549        // Blank line between different nesting levels is not flagged
550        // (going from >> back to > is a context change)
551        let content = "> Level 1\n> > Nested\n\n> Back to level 1";
552        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
553        let result = rule.check(&ctx).unwrap();
554        assert_eq!(
555            result.len(),
556            0,
557            "Blank between different nesting levels is not inside blockquote"
558        );
559    }
560}