rumdl_lib/rules/
md027_multiple_spaces_blockquote.rs

1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2
3use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
4use lazy_static::lazy_static;
5use regex::Regex;
6
7lazy_static! {
8    // Pattern to match quote lines with multiple spaces after >
9    static ref BLOCKQUOTE_MULTIPLE_SPACES: Regex = Regex::new(r"^(\s*)>(\s{2,})(.*)$").unwrap();
10
11    // New patterns for detecting malformed blockquote attempts where user intent is clear
12    static ref MALFORMED_BLOCKQUOTE_PATTERNS: Vec<(Regex, &'static str)> = vec![
13        // Double > without space: >>text (looks like nested but missing spaces)
14        (Regex::new(r"^(\s*)>>([^\s>].*|$)").unwrap(), "missing spaces in nested blockquote"),
15
16        // Triple > without space: >>>text
17        (Regex::new(r"^(\s*)>>>([^\s>].*|$)").unwrap(), "missing spaces in deeply nested blockquote"),
18
19        // Space then > then text: > >text (extra > by mistake)
20        (Regex::new(r"^(\s*)>\s+>([^\s>].*|$)").unwrap(), "extra blockquote marker"),
21
22        // Multiple spaces then >: (spaces)>text (indented blockquote without space)
23        (Regex::new(r"^(\s{4,})>([^\s].*|$)").unwrap(), "indented blockquote missing space"),
24    ];
25}
26
27/// Rule MD027: No multiple spaces after blockquote symbol
28///
29/// See [docs/md027.md](../../docs/md027.md) for full documentation, configuration, and examples.
30
31#[derive(Debug, Default, Clone)]
32pub struct MD027MultipleSpacesBlockquote;
33
34impl Rule for MD027MultipleSpacesBlockquote {
35    fn name(&self) -> &'static str {
36        "MD027"
37    }
38
39    fn description(&self) -> &'static str {
40        "Multiple spaces after quote marker (>)"
41    }
42
43    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
44        let mut warnings = Vec::new();
45
46        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
47            let line_num = line_idx + 1;
48
49            // Skip lines in code blocks
50            if line_info.in_code_block {
51                continue;
52            }
53
54            // Check if this line is a blockquote using cached info
55            if let Some(blockquote) = &line_info.blockquote {
56                // Part 1: Check for multiple spaces after the blockquote marker
57                if blockquote.has_multiple_spaces_after_marker {
58                    // Find where the extra spaces start in the line
59                    // We need to find the position after the markers and first space/tab
60                    let mut byte_pos = 0;
61                    let mut found_markers = 0;
62                    let mut found_first_space = false;
63
64                    for (i, ch) in line_info.content.char_indices() {
65                        if found_markers < blockquote.nesting_level {
66                            if ch == '>' {
67                                found_markers += 1;
68                            }
69                        } else if !found_first_space && (ch == ' ' || ch == '\t') {
70                            // This is the first space/tab after markers
71                            found_first_space = true;
72                        } else if found_first_space && (ch == ' ' || ch == '\t') {
73                            // This is where extra spaces start
74                            byte_pos = i;
75                            break;
76                        }
77                    }
78
79                    // Count how many extra spaces/tabs there are
80                    let extra_spaces_bytes = line_info.content[byte_pos..]
81                        .chars()
82                        .take_while(|&c| c == ' ' || c == '\t')
83                        .fold(0, |acc, ch| acc + ch.len_utf8());
84
85                    if extra_spaces_bytes > 0 {
86                        let (start_line, start_col, end_line, end_col) =
87                            calculate_match_range(line_num, &line_info.content, byte_pos, extra_spaces_bytes);
88
89                        warnings.push(LintWarning {
90                            rule_name: Some(self.name()),
91                            line: start_line,
92                            column: start_col,
93                            end_line,
94                            end_column: end_col,
95                            message: "Multiple spaces after quote marker (>)".to_string(),
96                            severity: Severity::Warning,
97                            fix: Some(Fix {
98                                range: {
99                                    let line_index = LineIndex::new(ctx.content.to_string());
100                                    let start_byte = line_index.line_col_to_byte_range(line_num, start_col).start;
101                                    let end_byte = line_index.line_col_to_byte_range(line_num, end_col).start;
102                                    start_byte..end_byte
103                                },
104                                replacement: "".to_string(), // Remove the extra spaces
105                            }),
106                        });
107                    }
108                }
109            } else {
110                // Part 2: Check for malformed blockquote attempts on non-blockquote lines
111                let malformed_attempts = self.detect_malformed_blockquote_attempts(&line_info.content);
112                for (start, len, fixed_line, description) in malformed_attempts {
113                    let (start_line, start_col, end_line, end_col) =
114                        calculate_match_range(line_num, &line_info.content, start, len);
115
116                    warnings.push(LintWarning {
117                        rule_name: Some(self.name()),
118                        line: start_line,
119                        column: start_col,
120                        end_line,
121                        end_column: end_col,
122                        message: format!("Malformed quote: {description}"),
123                        severity: Severity::Warning,
124                        fix: Some(Fix {
125                            range: {
126                                let line_index = LineIndex::new(ctx.content.to_string());
127                                line_index.line_col_to_byte_range(line_num, 1)
128                            },
129                            replacement: fixed_line,
130                        }),
131                    });
132                }
133            }
134        }
135
136        Ok(warnings)
137    }
138
139    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
140        let mut result = Vec::with_capacity(ctx.lines.len());
141
142        for line_info in &ctx.lines {
143            if let Some(blockquote) = &line_info.blockquote {
144                // Fix blockquotes with multiple spaces after the marker
145                if blockquote.has_multiple_spaces_after_marker {
146                    // Rebuild the line with exactly one space after the markers
147                    let fixed_line = format!(
148                        "{}{} {}",
149                        blockquote.indent,
150                        ">".repeat(blockquote.nesting_level),
151                        blockquote.content
152                    );
153                    result.push(fixed_line);
154                } else {
155                    result.push(line_info.content.clone());
156                }
157            } else {
158                // Check for malformed blockquote attempts
159                let malformed_attempts = self.detect_malformed_blockquote_attempts(&line_info.content);
160                if !malformed_attempts.is_empty() {
161                    // Use the first fix (there should only be one per line)
162                    let (_, _, fixed_line, _) = &malformed_attempts[0];
163                    result.push(fixed_line.clone());
164                } else {
165                    result.push(line_info.content.clone());
166                }
167            }
168        }
169
170        // Preserve trailing newline if original content had one
171        Ok(result.join("\n") + if ctx.content.ends_with('\n') { "\n" } else { "" })
172    }
173
174    fn as_any(&self) -> &dyn std::any::Any {
175        self
176    }
177
178    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
179    where
180        Self: Sized,
181    {
182        Box::new(MD027MultipleSpacesBlockquote)
183    }
184
185    /// Check if this rule should be skipped
186    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
187        ctx.content.is_empty() || !ctx.content.contains('>')
188    }
189}
190
191impl MD027MultipleSpacesBlockquote {
192    /// Detect malformed blockquote attempts where user intent is clear
193    fn detect_malformed_blockquote_attempts(&self, line: &str) -> Vec<(usize, usize, String, String)> {
194        let mut results = Vec::new();
195
196        for (pattern, issue_type) in MALFORMED_BLOCKQUOTE_PATTERNS.iter() {
197            if let Some(cap) = pattern.captures(line) {
198                let match_obj = cap.get(0).unwrap();
199                let start = match_obj.start();
200                let len = match_obj.len();
201
202                // Extract potential blockquote components
203                if let Some((fixed_line, description)) = self.extract_blockquote_fix_from_match(&cap, issue_type, line)
204                {
205                    // Only proceed if this looks like a genuine blockquote attempt
206                    if self.looks_like_blockquote_attempt(line, &fixed_line) {
207                        results.push((start, len, fixed_line, description));
208                    }
209                }
210            }
211        }
212
213        results
214    }
215
216    /// Extract the proper blockquote format from a malformed match
217    fn extract_blockquote_fix_from_match(
218        &self,
219        cap: &regex::Captures,
220        issue_type: &str,
221        _original_line: &str,
222    ) -> Option<(String, String)> {
223        match issue_type {
224            "missing spaces in nested blockquote" => {
225                // >>text -> > > text
226                let indent = cap.get(1).map_or("", |m| m.as_str());
227                let content = cap.get(2).map_or("", |m| m.as_str());
228                Some((
229                    format!("{}> > {}", indent, content.trim()),
230                    "Missing spaces in nested blockquote".to_string(),
231                ))
232            }
233            "missing spaces in deeply nested blockquote" => {
234                // >>>text -> > > > text
235                let indent = cap.get(1).map_or("", |m| m.as_str());
236                let content = cap.get(2).map_or("", |m| m.as_str());
237                Some((
238                    format!("{}> > > {}", indent, content.trim()),
239                    "Missing spaces in deeply nested blockquote".to_string(),
240                ))
241            }
242            "extra blockquote marker" => {
243                // > >text -> > text
244                let indent = cap.get(1).map_or("", |m| m.as_str());
245                let content = cap.get(2).map_or("", |m| m.as_str());
246                Some((
247                    format!("{}> {}", indent, content.trim()),
248                    "Extra blockquote marker".to_string(),
249                ))
250            }
251            "indented blockquote missing space" => {
252                // (spaces)>text -> (spaces)> text
253                let indent = cap.get(1).map_or("", |m| m.as_str());
254                let content = cap.get(2).map_or("", |m| m.as_str());
255                Some((
256                    format!("{}> {}", indent, content.trim()),
257                    "Indented blockquote missing space".to_string(),
258                ))
259            }
260            _ => None,
261        }
262    }
263
264    /// Check if the pattern looks like a genuine blockquote attempt
265    fn looks_like_blockquote_attempt(&self, original: &str, fixed: &str) -> bool {
266        // Basic heuristics to avoid false positives
267
268        // 1. Content should not be too short (avoid flagging things like ">>>" alone)
269        let trimmed_original = original.trim();
270        if trimmed_original.len() < 5 {
271            // More restrictive
272            return false;
273        }
274
275        // 2. Should contain some text content after the markers
276        let content_after_markers = trimmed_original.trim_start_matches('>').trim_start_matches(' ');
277        if content_after_markers.is_empty() || content_after_markers.len() < 3 {
278            // More restrictive
279            return false;
280        }
281
282        // 3. Content should contain some alphabetic characters (not just symbols)
283        if !content_after_markers.chars().any(|c| c.is_alphabetic()) {
284            return false;
285        }
286
287        // 4. Fixed version should actually be a valid blockquote
288        // Check if it starts with optional whitespace followed by >
289        let blockquote_pattern = regex::Regex::new(r"^\s*>").unwrap();
290        if !blockquote_pattern.is_match(fixed) {
291            return false;
292        }
293
294        // 5. Avoid flagging things that might be code or special syntax
295        if content_after_markers.starts_with('#') // Headers
296            || content_after_markers.starts_with('[') // Links
297            || content_after_markers.starts_with('`') // Code
298            || content_after_markers.starts_with("http") // URLs
299            || content_after_markers.starts_with("www.") // URLs
300            || content_after_markers.starts_with("ftp")
301        // URLs
302        {
303            return false;
304        }
305
306        // 6. Content should look like prose, not code or markup
307        let word_count = content_after_markers.split_whitespace().count();
308        if word_count < 3 {
309            // Should be at least 3 words to look like prose
310            return false;
311        }
312
313        true
314    }
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320    use crate::lint_context::LintContext;
321
322    #[test]
323    fn test_valid_blockquote() {
324        let rule = MD027MultipleSpacesBlockquote;
325        let content = "> This is a blockquote\n> > Nested quote";
326        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
327        let result = rule.check(&ctx).unwrap();
328        assert!(result.is_empty(), "Valid blockquotes should not be flagged");
329    }
330
331    #[test]
332    fn test_multiple_spaces_after_marker() {
333        let rule = MD027MultipleSpacesBlockquote;
334        let content = ">  This has two spaces\n>   This has three spaces";
335        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
336        let result = rule.check(&ctx).unwrap();
337        assert_eq!(result.len(), 2);
338        assert_eq!(result[0].line, 1);
339        assert_eq!(result[0].column, 3); // Points to the extra space (after > and first space)
340        assert_eq!(result[0].message, "Multiple spaces after quote marker (>)");
341        assert_eq!(result[1].line, 2);
342        assert_eq!(result[1].column, 3);
343    }
344
345    #[test]
346    fn test_nested_multiple_spaces() {
347        let rule = MD027MultipleSpacesBlockquote;
348        // LintContext sees these as single-level blockquotes because of the space between markers
349        let content = ">  Two spaces after marker\n>>  Two spaces in nested blockquote";
350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
351        let result = rule.check(&ctx).unwrap();
352        assert_eq!(result.len(), 2);
353        assert!(result[0].message.contains("Multiple spaces"));
354        assert!(result[1].message.contains("Multiple spaces"));
355    }
356
357    #[test]
358    fn test_malformed_nested_quote() {
359        let rule = MD027MultipleSpacesBlockquote;
360        // LintContext sees >>text as a valid nested blockquote with no space after marker
361        // MD027 doesn't flag this as malformed, only as missing space after marker
362        let content = ">>This is a nested blockquote without space after markers";
363        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
364        let result = rule.check(&ctx).unwrap();
365        // This should not be flagged at all since >>text is valid CommonMark
366        assert_eq!(result.len(), 0);
367    }
368
369    #[test]
370    fn test_malformed_deeply_nested() {
371        let rule = MD027MultipleSpacesBlockquote;
372        // LintContext sees >>>text as a valid triple-nested blockquote
373        let content = ">>>This is deeply nested without spaces after markers";
374        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
375        let result = rule.check(&ctx).unwrap();
376        // This should not be flagged - >>>text is valid CommonMark
377        assert_eq!(result.len(), 0);
378    }
379
380    #[test]
381    fn test_extra_quote_marker() {
382        let rule = MD027MultipleSpacesBlockquote;
383        // "> >text" is parsed as single-level blockquote with ">text" as content
384        // This is valid CommonMark and not detected as malformed
385        let content = "> >This looks like nested but is actually single level with >This as content";
386        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
387        let result = rule.check(&ctx).unwrap();
388        assert_eq!(result.len(), 0);
389    }
390
391    #[test]
392    fn test_indented_missing_space() {
393        let rule = MD027MultipleSpacesBlockquote;
394        // 4+ spaces makes this a code block, not a blockquote
395        let content = "   >This has 3 spaces indent and no space after marker";
396        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
397        let result = rule.check(&ctx).unwrap();
398        // LintContext sees this as a blockquote with no space after marker
399        // MD027 doesn't flag this as malformed
400        assert_eq!(result.len(), 0);
401    }
402
403    #[test]
404    fn test_fix_multiple_spaces() {
405        let rule = MD027MultipleSpacesBlockquote;
406        let content = ">  Two spaces\n>   Three spaces";
407        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
408        let fixed = rule.fix(&ctx).unwrap();
409        assert_eq!(fixed, "> Two spaces\n> Three spaces");
410    }
411
412    #[test]
413    fn test_fix_malformed_quotes() {
414        let rule = MD027MultipleSpacesBlockquote;
415        // These are valid nested blockquotes, not malformed
416        let content = ">>Nested without spaces\n>>>Deeply nested without spaces";
417        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
418        let fixed = rule.fix(&ctx).unwrap();
419        // No fix needed - these are valid
420        assert_eq!(fixed, content);
421    }
422
423    #[test]
424    fn test_fix_extra_marker() {
425        let rule = MD027MultipleSpacesBlockquote;
426        // This is valid - single blockquote with >Extra as content
427        let content = "> >Extra marker here";
428        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
429        let fixed = rule.fix(&ctx).unwrap();
430        // No fix needed
431        assert_eq!(fixed, content);
432    }
433
434    #[test]
435    fn test_code_block_ignored() {
436        let rule = MD027MultipleSpacesBlockquote;
437        let content = "```\n>  This is in a code block\n```";
438        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
439        let result = rule.check(&ctx).unwrap();
440        assert!(result.is_empty(), "Code blocks should be ignored");
441    }
442
443    #[test]
444    fn test_short_content_not_flagged() {
445        let rule = MD027MultipleSpacesBlockquote;
446        let content = ">>>\n>>";
447        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
448        let result = rule.check(&ctx).unwrap();
449        assert!(result.is_empty(), "Very short content should not be flagged");
450    }
451
452    #[test]
453    fn test_non_prose_not_flagged() {
454        let rule = MD027MultipleSpacesBlockquote;
455        let content = ">>#header\n>>[link]\n>>`code`\n>>http://example.com";
456        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
457        let result = rule.check(&ctx).unwrap();
458        assert!(result.is_empty(), "Non-prose content should not be flagged");
459    }
460
461    #[test]
462    fn test_preserve_trailing_newline() {
463        let rule = MD027MultipleSpacesBlockquote;
464        let content = ">  Two spaces\n";
465        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
466        let fixed = rule.fix(&ctx).unwrap();
467        assert_eq!(fixed, "> Two spaces\n");
468
469        let content_no_newline = ">  Two spaces";
470        let ctx2 = LintContext::new(content_no_newline, crate::config::MarkdownFlavor::Standard);
471        let fixed2 = rule.fix(&ctx2).unwrap();
472        assert_eq!(fixed2, "> Two spaces");
473    }
474
475    #[test]
476    fn test_mixed_issues() {
477        let rule = MD027MultipleSpacesBlockquote;
478        let content = ">  Multiple spaces here\n>>Normal nested quote\n> Normal quote";
479        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
480        let result = rule.check(&ctx).unwrap();
481        assert_eq!(result.len(), 1, "Should only flag the multiple spaces");
482        assert_eq!(result[0].line, 1);
483    }
484
485    #[test]
486    fn test_looks_like_blockquote_attempt() {
487        let rule = MD027MultipleSpacesBlockquote;
488
489        // Should return true for genuine attempts
490        assert!(rule.looks_like_blockquote_attempt(
491            ">>This is a real blockquote attempt with text",
492            "> > This is a real blockquote attempt with text"
493        ));
494
495        // Should return false for too short
496        assert!(!rule.looks_like_blockquote_attempt(">>>", "> > >"));
497
498        // Should return false for no alphabetic content
499        assert!(!rule.looks_like_blockquote_attempt(">>123", "> > 123"));
500
501        // Should return false for code-like content
502        assert!(!rule.looks_like_blockquote_attempt(">>#header", "> > #header"));
503    }
504
505    #[test]
506    fn test_extract_blockquote_fix() {
507        let rule = MD027MultipleSpacesBlockquote;
508        let regex = Regex::new(r"^(\s*)>>([^\s>].*|$)").unwrap();
509        let cap = regex.captures(">>content").unwrap();
510
511        let result = rule.extract_blockquote_fix_from_match(&cap, "missing spaces in nested blockquote", ">>content");
512        assert!(result.is_some());
513        let (fixed, desc) = result.unwrap();
514        assert_eq!(fixed, "> > content");
515        assert!(desc.contains("Missing spaces"));
516    }
517
518    #[test]
519    fn test_empty_blockquote() {
520        let rule = MD027MultipleSpacesBlockquote;
521        let content = ">\n>  \n> content";
522        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
523        let result = rule.check(&ctx).unwrap();
524        // Empty blockquotes with multiple spaces should still be flagged
525        assert_eq!(result.len(), 1);
526        assert_eq!(result[0].line, 2);
527    }
528
529    #[test]
530    fn test_fix_preserves_indentation() {
531        let rule = MD027MultipleSpacesBlockquote;
532        let content = "  >  Indented with multiple spaces";
533        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
534        let fixed = rule.fix(&ctx).unwrap();
535        assert_eq!(fixed, "  > Indented with multiple spaces");
536    }
537
538    #[test]
539    fn test_tabs_after_marker() {
540        let rule = MD027MultipleSpacesBlockquote;
541        // Tab after marker - should be flagged as multiple spaces
542        let content = ">\tTab after marker";
543        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
544        let result = rule.check(&ctx).unwrap();
545        assert_eq!(result.len(), 1, "Tab after marker should be flagged");
546        assert_eq!(result[0].message, "Multiple spaces after quote marker (>)");
547
548        // Tab and space after marker
549        let content2 = ">\t Space then tab";
550        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
551        let result2 = rule.check(&ctx2).unwrap();
552        assert_eq!(result2.len(), 1, "Tab and space should be flagged");
553
554        // Two tabs after marker
555        let content3 = ">\t\tTwo tabs";
556        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard);
557        let result3 = rule.check(&ctx3).unwrap();
558        assert_eq!(result3.len(), 1, "Two tabs should be flagged");
559    }
560
561    #[test]
562    fn test_mixed_spaces_and_tabs() {
563        let rule = MD027MultipleSpacesBlockquote;
564        // Space then tab
565        let content = "> \tSpace then tab";
566        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
567        let result = rule.check(&ctx).unwrap();
568        assert_eq!(result.len(), 1);
569        assert_eq!(result[0].column, 3); // Points to the tab
570
571        // Tab then space
572        let content2 = ">\t Tab then space";
573        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
574        let result2 = rule.check(&ctx2).unwrap();
575        assert_eq!(result2.len(), 1);
576        assert_eq!(result2[0].column, 3); // Points to the space after tab
577    }
578
579    #[test]
580    fn test_fix_tabs() {
581        let rule = MD027MultipleSpacesBlockquote;
582        // Fix should remove extra tabs
583        let content = ">\t\tTwo tabs";
584        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
585        let fixed = rule.fix(&ctx).unwrap();
586        assert_eq!(fixed, "> Two tabs");
587
588        // Fix mixed spaces and tabs
589        let content2 = "> \t Mixed";
590        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
591        let fixed2 = rule.fix(&ctx2).unwrap();
592        assert_eq!(fixed2, "> Mixed");
593    }
594}