rumdl_lib/rules/
md027_multiple_spaces_blockquote.rs

1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2
3use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
4use lazy_static::lazy_static;
5use regex::Regex;
6
7lazy_static! {
8    // Pattern to match quote lines with multiple spaces after >
9    static ref BLOCKQUOTE_MULTIPLE_SPACES: Regex = Regex::new(r"^(\s*)>(\s{2,})(.*)$").unwrap();
10
11    // New patterns for detecting malformed blockquote attempts where user intent is clear
12    static ref MALFORMED_BLOCKQUOTE_PATTERNS: Vec<(Regex, &'static str)> = vec![
13        // Double > without space: >>text (looks like nested but missing spaces)
14        (Regex::new(r"^(\s*)>>([^\s>].*|$)").unwrap(), "missing spaces in nested blockquote"),
15
16        // Triple > without space: >>>text
17        (Regex::new(r"^(\s*)>>>([^\s>].*|$)").unwrap(), "missing spaces in deeply nested blockquote"),
18
19        // Space then > then text: > >text (extra > by mistake)
20        (Regex::new(r"^(\s*)>\s+>([^\s>].*|$)").unwrap(), "extra blockquote marker"),
21
22        // Multiple spaces then >: (spaces)>text (indented blockquote without space)
23        (Regex::new(r"^(\s{4,})>([^\s].*|$)").unwrap(), "indented blockquote missing space"),
24    ];
25}
26
27/// Rule MD027: No multiple spaces after blockquote symbol
28///
29/// See [docs/md027.md](../../docs/md027.md) for full documentation, configuration, and examples.
30
31#[derive(Debug, Default, Clone)]
32pub struct MD027MultipleSpacesBlockquote;
33
34impl Rule for MD027MultipleSpacesBlockquote {
35    fn name(&self) -> &'static str {
36        "MD027"
37    }
38
39    fn description(&self) -> &'static str {
40        "Multiple spaces after quote marker (>)"
41    }
42
43    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
44        let mut warnings = Vec::new();
45
46        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
47            let line_num = line_idx + 1;
48
49            // Skip lines in code blocks
50            if line_info.in_code_block {
51                continue;
52            }
53
54            // Check if this line is a blockquote using cached info
55            if let Some(blockquote) = &line_info.blockquote {
56                // Part 1: Check for multiple spaces after the blockquote marker
57                if blockquote.has_multiple_spaces_after_marker {
58                    // Find where the extra spaces start in the line
59                    // We need to find the position after the markers and first space/tab
60                    let mut byte_pos = 0;
61                    let mut found_markers = 0;
62                    let mut found_first_space = false;
63
64                    for (i, ch) in line_info.content.char_indices() {
65                        if found_markers < blockquote.nesting_level {
66                            if ch == '>' {
67                                found_markers += 1;
68                            }
69                        } else if !found_first_space && (ch == ' ' || ch == '\t') {
70                            // This is the first space/tab after markers
71                            found_first_space = true;
72                        } else if found_first_space && (ch == ' ' || ch == '\t') {
73                            // This is where extra spaces start
74                            byte_pos = i;
75                            break;
76                        }
77                    }
78
79                    // Count how many extra spaces/tabs there are
80                    let extra_spaces_bytes = line_info.content[byte_pos..]
81                        .chars()
82                        .take_while(|&c| c == ' ' || c == '\t')
83                        .fold(0, |acc, ch| acc + ch.len_utf8());
84
85                    if extra_spaces_bytes > 0 {
86                        let (start_line, start_col, end_line, end_col) =
87                            calculate_match_range(line_num, &line_info.content, byte_pos, extra_spaces_bytes);
88
89                        warnings.push(LintWarning {
90                            rule_name: Some(self.name().to_string()),
91                            line: start_line,
92                            column: start_col,
93                            end_line,
94                            end_column: end_col,
95                            message: "Multiple spaces after quote marker (>)".to_string(),
96                            severity: Severity::Warning,
97                            fix: Some(Fix {
98                                range: {
99                                    let line_index = LineIndex::new(ctx.content.to_string());
100                                    let start_byte = line_index.line_col_to_byte_range(line_num, start_col).start;
101                                    let end_byte = line_index.line_col_to_byte_range(line_num, end_col).start;
102                                    start_byte..end_byte
103                                },
104                                replacement: "".to_string(), // Remove the extra spaces
105                            }),
106                        });
107                    }
108                }
109            } else {
110                // Part 2: Check for malformed blockquote attempts on non-blockquote lines
111                let malformed_attempts = self.detect_malformed_blockquote_attempts(&line_info.content);
112                for (start, len, fixed_line, description) in malformed_attempts {
113                    let (start_line, start_col, end_line, end_col) =
114                        calculate_match_range(line_num, &line_info.content, start, len);
115
116                    warnings.push(LintWarning {
117                        rule_name: Some(self.name().to_string()),
118                        line: start_line,
119                        column: start_col,
120                        end_line,
121                        end_column: end_col,
122                        message: format!("Malformed quote: {description}"),
123                        severity: Severity::Warning,
124                        fix: Some(Fix {
125                            range: {
126                                let line_index = LineIndex::new(ctx.content.to_string());
127                                line_index.line_col_to_byte_range(line_num, 1)
128                            },
129                            replacement: fixed_line,
130                        }),
131                    });
132                }
133            }
134        }
135
136        Ok(warnings)
137    }
138
139    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
140        let mut result = Vec::with_capacity(ctx.lines.len());
141
142        for line_info in &ctx.lines {
143            if let Some(blockquote) = &line_info.blockquote {
144                // Fix blockquotes with multiple spaces after the marker
145                if blockquote.has_multiple_spaces_after_marker {
146                    // Rebuild the line with exactly one space after the markers
147                    // But don't add a space if the content is empty to avoid MD009 conflicts
148                    let fixed_line = if blockquote.content.is_empty() {
149                        format!("{}{}", blockquote.indent, ">".repeat(blockquote.nesting_level))
150                    } else {
151                        format!(
152                            "{}{} {}",
153                            blockquote.indent,
154                            ">".repeat(blockquote.nesting_level),
155                            blockquote.content
156                        )
157                    };
158                    result.push(fixed_line);
159                } else {
160                    result.push(line_info.content.clone());
161                }
162            } else {
163                // Check for malformed blockquote attempts
164                let malformed_attempts = self.detect_malformed_blockquote_attempts(&line_info.content);
165                if !malformed_attempts.is_empty() {
166                    // Use the first fix (there should only be one per line)
167                    let (_, _, fixed_line, _) = &malformed_attempts[0];
168                    result.push(fixed_line.clone());
169                } else {
170                    result.push(line_info.content.clone());
171                }
172            }
173        }
174
175        // Preserve trailing newline if original content had one
176        Ok(result.join("\n") + if ctx.content.ends_with('\n') { "\n" } else { "" })
177    }
178
179    fn as_any(&self) -> &dyn std::any::Any {
180        self
181    }
182
183    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
184    where
185        Self: Sized,
186    {
187        Box::new(MD027MultipleSpacesBlockquote)
188    }
189
190    /// Check if this rule should be skipped
191    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
192        ctx.content.is_empty() || !ctx.likely_has_blockquotes()
193    }
194}
195
196impl MD027MultipleSpacesBlockquote {
197    /// Detect malformed blockquote attempts where user intent is clear
198    fn detect_malformed_blockquote_attempts(&self, line: &str) -> Vec<(usize, usize, String, String)> {
199        let mut results = Vec::new();
200
201        for (pattern, issue_type) in MALFORMED_BLOCKQUOTE_PATTERNS.iter() {
202            if let Some(cap) = pattern.captures(line) {
203                let match_obj = cap.get(0).unwrap();
204                let start = match_obj.start();
205                let len = match_obj.len();
206
207                // Extract potential blockquote components
208                if let Some((fixed_line, description)) = self.extract_blockquote_fix_from_match(&cap, issue_type, line)
209                {
210                    // Only proceed if this looks like a genuine blockquote attempt
211                    if self.looks_like_blockquote_attempt(line, &fixed_line) {
212                        results.push((start, len, fixed_line, description));
213                    }
214                }
215            }
216        }
217
218        results
219    }
220
221    /// Extract the proper blockquote format from a malformed match
222    fn extract_blockquote_fix_from_match(
223        &self,
224        cap: &regex::Captures,
225        issue_type: &str,
226        _original_line: &str,
227    ) -> Option<(String, String)> {
228        match issue_type {
229            "missing spaces in nested blockquote" => {
230                // >>text -> > > text
231                let indent = cap.get(1).map_or("", |m| m.as_str());
232                let content = cap.get(2).map_or("", |m| m.as_str());
233                Some((
234                    format!("{}> > {}", indent, content.trim()),
235                    "Missing spaces in nested blockquote".to_string(),
236                ))
237            }
238            "missing spaces in deeply nested blockquote" => {
239                // >>>text -> > > > text
240                let indent = cap.get(1).map_or("", |m| m.as_str());
241                let content = cap.get(2).map_or("", |m| m.as_str());
242                Some((
243                    format!("{}> > > {}", indent, content.trim()),
244                    "Missing spaces in deeply nested blockquote".to_string(),
245                ))
246            }
247            "extra blockquote marker" => {
248                // > >text -> > text
249                let indent = cap.get(1).map_or("", |m| m.as_str());
250                let content = cap.get(2).map_or("", |m| m.as_str());
251                Some((
252                    format!("{}> {}", indent, content.trim()),
253                    "Extra blockquote marker".to_string(),
254                ))
255            }
256            "indented blockquote missing space" => {
257                // (spaces)>text -> (spaces)> text
258                let indent = cap.get(1).map_or("", |m| m.as_str());
259                let content = cap.get(2).map_or("", |m| m.as_str());
260                Some((
261                    format!("{}> {}", indent, content.trim()),
262                    "Indented blockquote missing space".to_string(),
263                ))
264            }
265            _ => None,
266        }
267    }
268
269    /// Check if the pattern looks like a genuine blockquote attempt
270    fn looks_like_blockquote_attempt(&self, original: &str, fixed: &str) -> bool {
271        // Basic heuristics to avoid false positives
272
273        // 1. Content should not be too short (avoid flagging things like ">>>" alone)
274        let trimmed_original = original.trim();
275        if trimmed_original.len() < 5 {
276            // More restrictive
277            return false;
278        }
279
280        // 2. Should contain some text content after the markers
281        let content_after_markers = trimmed_original.trim_start_matches('>').trim_start_matches(' ');
282        if content_after_markers.is_empty() || content_after_markers.len() < 3 {
283            // More restrictive
284            return false;
285        }
286
287        // 3. Content should contain some alphabetic characters (not just symbols)
288        if !content_after_markers.chars().any(|c| c.is_alphabetic()) {
289            return false;
290        }
291
292        // 4. Fixed version should actually be a valid blockquote
293        // Check if it starts with optional whitespace followed by >
294        let blockquote_pattern = regex::Regex::new(r"^\s*>").unwrap();
295        if !blockquote_pattern.is_match(fixed) {
296            return false;
297        }
298
299        // 5. Avoid flagging things that might be code or special syntax
300        if content_after_markers.starts_with('#') // Headers
301            || content_after_markers.starts_with('[') // Links
302            || content_after_markers.starts_with('`') // Code
303            || content_after_markers.starts_with("http") // URLs
304            || content_after_markers.starts_with("www.") // URLs
305            || content_after_markers.starts_with("ftp")
306        // URLs
307        {
308            return false;
309        }
310
311        // 6. Content should look like prose, not code or markup
312        let word_count = content_after_markers.split_whitespace().count();
313        if word_count < 3 {
314            // Should be at least 3 words to look like prose
315            return false;
316        }
317
318        true
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325    use crate::lint_context::LintContext;
326
327    #[test]
328    fn test_valid_blockquote() {
329        let rule = MD027MultipleSpacesBlockquote;
330        let content = "> This is a blockquote\n> > Nested quote";
331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
332        let result = rule.check(&ctx).unwrap();
333        assert!(result.is_empty(), "Valid blockquotes should not be flagged");
334    }
335
336    #[test]
337    fn test_multiple_spaces_after_marker() {
338        let rule = MD027MultipleSpacesBlockquote;
339        let content = ">  This has two spaces\n>   This has three spaces";
340        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
341        let result = rule.check(&ctx).unwrap();
342        assert_eq!(result.len(), 2);
343        assert_eq!(result[0].line, 1);
344        assert_eq!(result[0].column, 3); // Points to the extra space (after > and first space)
345        assert_eq!(result[0].message, "Multiple spaces after quote marker (>)");
346        assert_eq!(result[1].line, 2);
347        assert_eq!(result[1].column, 3);
348    }
349
350    #[test]
351    fn test_nested_multiple_spaces() {
352        let rule = MD027MultipleSpacesBlockquote;
353        // LintContext sees these as single-level blockquotes because of the space between markers
354        let content = ">  Two spaces after marker\n>>  Two spaces in nested blockquote";
355        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
356        let result = rule.check(&ctx).unwrap();
357        assert_eq!(result.len(), 2);
358        assert!(result[0].message.contains("Multiple spaces"));
359        assert!(result[1].message.contains("Multiple spaces"));
360    }
361
362    #[test]
363    fn test_malformed_nested_quote() {
364        let rule = MD027MultipleSpacesBlockquote;
365        // LintContext sees >>text as a valid nested blockquote with no space after marker
366        // MD027 doesn't flag this as malformed, only as missing space after marker
367        let content = ">>This is a nested blockquote without space after markers";
368        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
369        let result = rule.check(&ctx).unwrap();
370        // This should not be flagged at all since >>text is valid CommonMark
371        assert_eq!(result.len(), 0);
372    }
373
374    #[test]
375    fn test_malformed_deeply_nested() {
376        let rule = MD027MultipleSpacesBlockquote;
377        // LintContext sees >>>text as a valid triple-nested blockquote
378        let content = ">>>This is deeply nested without spaces after markers";
379        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
380        let result = rule.check(&ctx).unwrap();
381        // This should not be flagged - >>>text is valid CommonMark
382        assert_eq!(result.len(), 0);
383    }
384
385    #[test]
386    fn test_extra_quote_marker() {
387        let rule = MD027MultipleSpacesBlockquote;
388        // "> >text" is parsed as single-level blockquote with ">text" as content
389        // This is valid CommonMark and not detected as malformed
390        let content = "> >This looks like nested but is actually single level with >This as content";
391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
392        let result = rule.check(&ctx).unwrap();
393        assert_eq!(result.len(), 0);
394    }
395
396    #[test]
397    fn test_indented_missing_space() {
398        let rule = MD027MultipleSpacesBlockquote;
399        // 4+ spaces makes this a code block, not a blockquote
400        let content = "   >This has 3 spaces indent and no space after marker";
401        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
402        let result = rule.check(&ctx).unwrap();
403        // LintContext sees this as a blockquote with no space after marker
404        // MD027 doesn't flag this as malformed
405        assert_eq!(result.len(), 0);
406    }
407
408    #[test]
409    fn test_fix_multiple_spaces() {
410        let rule = MD027MultipleSpacesBlockquote;
411        let content = ">  Two spaces\n>   Three spaces";
412        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
413        let fixed = rule.fix(&ctx).unwrap();
414        assert_eq!(fixed, "> Two spaces\n> Three spaces");
415    }
416
417    #[test]
418    fn test_fix_malformed_quotes() {
419        let rule = MD027MultipleSpacesBlockquote;
420        // These are valid nested blockquotes, not malformed
421        let content = ">>Nested without spaces\n>>>Deeply nested without spaces";
422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
423        let fixed = rule.fix(&ctx).unwrap();
424        // No fix needed - these are valid
425        assert_eq!(fixed, content);
426    }
427
428    #[test]
429    fn test_fix_extra_marker() {
430        let rule = MD027MultipleSpacesBlockquote;
431        // This is valid - single blockquote with >Extra as content
432        let content = "> >Extra marker here";
433        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
434        let fixed = rule.fix(&ctx).unwrap();
435        // No fix needed
436        assert_eq!(fixed, content);
437    }
438
439    #[test]
440    fn test_code_block_ignored() {
441        let rule = MD027MultipleSpacesBlockquote;
442        let content = "```\n>  This is in a code block\n```";
443        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
444        let result = rule.check(&ctx).unwrap();
445        assert!(result.is_empty(), "Code blocks should be ignored");
446    }
447
448    #[test]
449    fn test_short_content_not_flagged() {
450        let rule = MD027MultipleSpacesBlockquote;
451        let content = ">>>\n>>";
452        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
453        let result = rule.check(&ctx).unwrap();
454        assert!(result.is_empty(), "Very short content should not be flagged");
455    }
456
457    #[test]
458    fn test_non_prose_not_flagged() {
459        let rule = MD027MultipleSpacesBlockquote;
460        let content = ">>#header\n>>[link]\n>>`code`\n>>http://example.com";
461        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
462        let result = rule.check(&ctx).unwrap();
463        assert!(result.is_empty(), "Non-prose content should not be flagged");
464    }
465
466    #[test]
467    fn test_preserve_trailing_newline() {
468        let rule = MD027MultipleSpacesBlockquote;
469        let content = ">  Two spaces\n";
470        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
471        let fixed = rule.fix(&ctx).unwrap();
472        assert_eq!(fixed, "> Two spaces\n");
473
474        let content_no_newline = ">  Two spaces";
475        let ctx2 = LintContext::new(content_no_newline, crate::config::MarkdownFlavor::Standard);
476        let fixed2 = rule.fix(&ctx2).unwrap();
477        assert_eq!(fixed2, "> Two spaces");
478    }
479
480    #[test]
481    fn test_mixed_issues() {
482        let rule = MD027MultipleSpacesBlockquote;
483        let content = ">  Multiple spaces here\n>>Normal nested quote\n> Normal quote";
484        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
485        let result = rule.check(&ctx).unwrap();
486        assert_eq!(result.len(), 1, "Should only flag the multiple spaces");
487        assert_eq!(result[0].line, 1);
488    }
489
490    #[test]
491    fn test_looks_like_blockquote_attempt() {
492        let rule = MD027MultipleSpacesBlockquote;
493
494        // Should return true for genuine attempts
495        assert!(rule.looks_like_blockquote_attempt(
496            ">>This is a real blockquote attempt with text",
497            "> > This is a real blockquote attempt with text"
498        ));
499
500        // Should return false for too short
501        assert!(!rule.looks_like_blockquote_attempt(">>>", "> > >"));
502
503        // Should return false for no alphabetic content
504        assert!(!rule.looks_like_blockquote_attempt(">>123", "> > 123"));
505
506        // Should return false for code-like content
507        assert!(!rule.looks_like_blockquote_attempt(">>#header", "> > #header"));
508    }
509
510    #[test]
511    fn test_extract_blockquote_fix() {
512        let rule = MD027MultipleSpacesBlockquote;
513        let regex = Regex::new(r"^(\s*)>>([^\s>].*|$)").unwrap();
514        let cap = regex.captures(">>content").unwrap();
515
516        let result = rule.extract_blockquote_fix_from_match(&cap, "missing spaces in nested blockquote", ">>content");
517        assert!(result.is_some());
518        let (fixed, desc) = result.unwrap();
519        assert_eq!(fixed, "> > content");
520        assert!(desc.contains("Missing spaces"));
521    }
522
523    #[test]
524    fn test_empty_blockquote() {
525        let rule = MD027MultipleSpacesBlockquote;
526        let content = ">\n>  \n> content";
527        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
528        let result = rule.check(&ctx).unwrap();
529        // Empty blockquotes with multiple spaces should still be flagged
530        assert_eq!(result.len(), 1);
531        assert_eq!(result[0].line, 2);
532    }
533
534    #[test]
535    fn test_fix_preserves_indentation() {
536        let rule = MD027MultipleSpacesBlockquote;
537        let content = "  >  Indented with multiple spaces";
538        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
539        let fixed = rule.fix(&ctx).unwrap();
540        assert_eq!(fixed, "  > Indented with multiple spaces");
541    }
542
543    #[test]
544    fn test_tabs_after_marker() {
545        let rule = MD027MultipleSpacesBlockquote;
546        // Tab after marker - should be flagged as multiple spaces
547        let content = ">\tTab after marker";
548        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
549        let result = rule.check(&ctx).unwrap();
550        assert_eq!(result.len(), 1, "Tab after marker should be flagged");
551        assert_eq!(result[0].message, "Multiple spaces after quote marker (>)");
552
553        // Tab and space after marker
554        let content2 = ">\t Space then tab";
555        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
556        let result2 = rule.check(&ctx2).unwrap();
557        assert_eq!(result2.len(), 1, "Tab and space should be flagged");
558
559        // Two tabs after marker
560        let content3 = ">\t\tTwo tabs";
561        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard);
562        let result3 = rule.check(&ctx3).unwrap();
563        assert_eq!(result3.len(), 1, "Two tabs should be flagged");
564    }
565
566    #[test]
567    fn test_mixed_spaces_and_tabs() {
568        let rule = MD027MultipleSpacesBlockquote;
569        // Space then tab
570        let content = "> \tSpace then tab";
571        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
572        let result = rule.check(&ctx).unwrap();
573        assert_eq!(result.len(), 1);
574        assert_eq!(result[0].column, 3); // Points to the tab
575
576        // Tab then space
577        let content2 = ">\t Tab then space";
578        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
579        let result2 = rule.check(&ctx2).unwrap();
580        assert_eq!(result2.len(), 1);
581        assert_eq!(result2[0].column, 3); // Points to the space after tab
582    }
583
584    #[test]
585    fn test_fix_tabs() {
586        let rule = MD027MultipleSpacesBlockquote;
587        // Fix should remove extra tabs
588        let content = ">\t\tTwo tabs";
589        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
590        let fixed = rule.fix(&ctx).unwrap();
591        assert_eq!(fixed, "> Two tabs");
592
593        // Fix mixed spaces and tabs
594        let content2 = "> \t Mixed";
595        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
596        let fixed2 = rule.fix(&ctx2).unwrap();
597        assert_eq!(fixed2, "> Mixed");
598    }
599}