rumdl_lib/rules/
md027_multiple_spaces_blockquote.rs

1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2
3use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
4use regex::Regex;
5use std::sync::LazyLock;
6
7// New patterns for detecting malformed blockquote attempts where user intent is clear
8static MALFORMED_BLOCKQUOTE_PATTERNS: LazyLock<Vec<(Regex, &'static str)>> = LazyLock::new(|| {
9    vec![
10        // Double > without space: >>text (looks like nested but missing spaces)
11        (
12            Regex::new(r"^(\s*)>>([^\s>].*|$)").unwrap(),
13            "missing spaces in nested blockquote",
14        ),
15        // Triple > without space: >>>text
16        (
17            Regex::new(r"^(\s*)>>>([^\s>].*|$)").unwrap(),
18            "missing spaces in deeply nested blockquote",
19        ),
20        // Space then > then text: > >text (extra > by mistake)
21        (
22            Regex::new(r"^(\s*)>\s+>([^\s>].*|$)").unwrap(),
23            "extra blockquote marker",
24        ),
25        // Multiple spaces then >: (spaces)>text (indented blockquote without space)
26        (
27            Regex::new(r"^(\s{4,})>([^\s].*|$)").unwrap(),
28            "indented blockquote missing space",
29        ),
30    ]
31});
32
33/// Rule MD027: No multiple spaces after blockquote symbol
34///
35/// See [docs/md027.md](../../docs/md027.md) for full documentation, configuration, and examples.
36
37#[derive(Debug, Default, Clone)]
38pub struct MD027MultipleSpacesBlockquote;
39
40impl Rule for MD027MultipleSpacesBlockquote {
41    fn name(&self) -> &'static str {
42        "MD027"
43    }
44
45    fn description(&self) -> &'static str {
46        "Multiple spaces after quote marker (>)"
47    }
48
49    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
50        let mut warnings = Vec::new();
51
52        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
53            let line_num = line_idx + 1;
54
55            // Skip lines in code blocks
56            if line_info.in_code_block {
57                continue;
58            }
59
60            // Check if this line is a blockquote using cached info
61            if let Some(blockquote) = &line_info.blockquote {
62                // Part 1: Check for multiple spaces after the blockquote marker
63                if blockquote.has_multiple_spaces_after_marker {
64                    // Find where the extra spaces start in the line
65                    // We need to find the position after the markers and first space/tab
66                    let mut byte_pos = 0;
67                    let mut found_markers = 0;
68                    let mut found_first_space = false;
69
70                    for (i, ch) in line_info.content.char_indices() {
71                        if found_markers < blockquote.nesting_level {
72                            if ch == '>' {
73                                found_markers += 1;
74                            }
75                        } else if !found_first_space && (ch == ' ' || ch == '\t') {
76                            // This is the first space/tab after markers
77                            found_first_space = true;
78                        } else if found_first_space && (ch == ' ' || ch == '\t') {
79                            // This is where extra spaces start
80                            byte_pos = i;
81                            break;
82                        }
83                    }
84
85                    // Count how many extra spaces/tabs there are
86                    let extra_spaces_bytes = line_info.content[byte_pos..]
87                        .chars()
88                        .take_while(|&c| c == ' ' || c == '\t')
89                        .fold(0, |acc, ch| acc + ch.len_utf8());
90
91                    if extra_spaces_bytes > 0 {
92                        let (start_line, start_col, end_line, end_col) =
93                            calculate_match_range(line_num, &line_info.content, byte_pos, extra_spaces_bytes);
94
95                        warnings.push(LintWarning {
96                            rule_name: Some(self.name().to_string()),
97                            line: start_line,
98                            column: start_col,
99                            end_line,
100                            end_column: end_col,
101                            message: "Multiple spaces after quote marker (>)".to_string(),
102                            severity: Severity::Warning,
103                            fix: Some(Fix {
104                                range: {
105                                    let line_index = LineIndex::new(ctx.content.to_string());
106                                    let start_byte = line_index.line_col_to_byte_range(line_num, start_col).start;
107                                    let end_byte = line_index.line_col_to_byte_range(line_num, end_col).start;
108                                    start_byte..end_byte
109                                },
110                                replacement: "".to_string(), // Remove the extra spaces
111                            }),
112                        });
113                    }
114                }
115            } else {
116                // Part 2: Check for malformed blockquote attempts on non-blockquote lines
117                let malformed_attempts = self.detect_malformed_blockquote_attempts(&line_info.content);
118                for (start, len, fixed_line, description) in malformed_attempts {
119                    let (start_line, start_col, end_line, end_col) =
120                        calculate_match_range(line_num, &line_info.content, start, len);
121
122                    warnings.push(LintWarning {
123                        rule_name: Some(self.name().to_string()),
124                        line: start_line,
125                        column: start_col,
126                        end_line,
127                        end_column: end_col,
128                        message: format!("Malformed quote: {description}"),
129                        severity: Severity::Warning,
130                        fix: Some(Fix {
131                            range: {
132                                let line_index = LineIndex::new(ctx.content.to_string());
133                                line_index.line_col_to_byte_range(line_num, 1)
134                            },
135                            replacement: fixed_line,
136                        }),
137                    });
138                }
139            }
140        }
141
142        Ok(warnings)
143    }
144
145    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
146        let mut result = Vec::with_capacity(ctx.lines.len());
147
148        for line_info in &ctx.lines {
149            if let Some(blockquote) = &line_info.blockquote {
150                // Fix blockquotes with multiple spaces after the marker
151                if blockquote.has_multiple_spaces_after_marker {
152                    // Rebuild the line with exactly one space after the markers
153                    // But don't add a space if the content is empty to avoid MD009 conflicts
154                    let fixed_line = if blockquote.content.is_empty() {
155                        format!("{}{}", blockquote.indent, ">".repeat(blockquote.nesting_level))
156                    } else {
157                        format!(
158                            "{}{} {}",
159                            blockquote.indent,
160                            ">".repeat(blockquote.nesting_level),
161                            blockquote.content
162                        )
163                    };
164                    result.push(fixed_line);
165                } else {
166                    result.push(line_info.content.clone());
167                }
168            } else {
169                // Check for malformed blockquote attempts
170                let malformed_attempts = self.detect_malformed_blockquote_attempts(&line_info.content);
171                if !malformed_attempts.is_empty() {
172                    // Use the first fix (there should only be one per line)
173                    let (_, _, fixed_line, _) = &malformed_attempts[0];
174                    result.push(fixed_line.clone());
175                } else {
176                    result.push(line_info.content.clone());
177                }
178            }
179        }
180
181        // Preserve trailing newline if original content had one
182        Ok(result.join("\n") + if ctx.content.ends_with('\n') { "\n" } else { "" })
183    }
184
185    fn as_any(&self) -> &dyn std::any::Any {
186        self
187    }
188
189    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
190    where
191        Self: Sized,
192    {
193        Box::new(MD027MultipleSpacesBlockquote)
194    }
195
196    /// Check if this rule should be skipped
197    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
198        ctx.content.is_empty() || !ctx.likely_has_blockquotes()
199    }
200}
201
202impl MD027MultipleSpacesBlockquote {
203    /// Detect malformed blockquote attempts where user intent is clear
204    fn detect_malformed_blockquote_attempts(&self, line: &str) -> Vec<(usize, usize, String, String)> {
205        let mut results = Vec::new();
206
207        for (pattern, issue_type) in MALFORMED_BLOCKQUOTE_PATTERNS.iter() {
208            if let Some(cap) = pattern.captures(line) {
209                let match_obj = cap.get(0).unwrap();
210                let start = match_obj.start();
211                let len = match_obj.len();
212
213                // Extract potential blockquote components
214                if let Some((fixed_line, description)) = self.extract_blockquote_fix_from_match(&cap, issue_type, line)
215                {
216                    // Only proceed if this looks like a genuine blockquote attempt
217                    if self.looks_like_blockquote_attempt(line, &fixed_line) {
218                        results.push((start, len, fixed_line, description));
219                    }
220                }
221            }
222        }
223
224        results
225    }
226
227    /// Extract the proper blockquote format from a malformed match
228    fn extract_blockquote_fix_from_match(
229        &self,
230        cap: &regex::Captures,
231        issue_type: &str,
232        _original_line: &str,
233    ) -> Option<(String, String)> {
234        match issue_type {
235            "missing spaces in nested blockquote" => {
236                // >>text -> > > text
237                let indent = cap.get(1).map_or("", |m| m.as_str());
238                let content = cap.get(2).map_or("", |m| m.as_str());
239                Some((
240                    format!("{}> > {}", indent, content.trim()),
241                    "Missing spaces in nested blockquote".to_string(),
242                ))
243            }
244            "missing spaces in deeply nested blockquote" => {
245                // >>>text -> > > > text
246                let indent = cap.get(1).map_or("", |m| m.as_str());
247                let content = cap.get(2).map_or("", |m| m.as_str());
248                Some((
249                    format!("{}> > > {}", indent, content.trim()),
250                    "Missing spaces in deeply nested blockquote".to_string(),
251                ))
252            }
253            "extra blockquote marker" => {
254                // > >text -> > text
255                let indent = cap.get(1).map_or("", |m| m.as_str());
256                let content = cap.get(2).map_or("", |m| m.as_str());
257                Some((
258                    format!("{}> {}", indent, content.trim()),
259                    "Extra blockquote marker".to_string(),
260                ))
261            }
262            "indented blockquote missing space" => {
263                // (spaces)>text -> (spaces)> text
264                let indent = cap.get(1).map_or("", |m| m.as_str());
265                let content = cap.get(2).map_or("", |m| m.as_str());
266                Some((
267                    format!("{}> {}", indent, content.trim()),
268                    "Indented blockquote missing space".to_string(),
269                ))
270            }
271            _ => None,
272        }
273    }
274
275    /// Check if the pattern looks like a genuine blockquote attempt
276    fn looks_like_blockquote_attempt(&self, original: &str, fixed: &str) -> bool {
277        // Basic heuristics to avoid false positives
278
279        // 1. Content should not be too short (avoid flagging things like ">>>" alone)
280        let trimmed_original = original.trim();
281        if trimmed_original.len() < 5 {
282            // More restrictive
283            return false;
284        }
285
286        // 2. Should contain some text content after the markers
287        let content_after_markers = trimmed_original.trim_start_matches('>').trim_start_matches(' ');
288        if content_after_markers.is_empty() || content_after_markers.len() < 3 {
289            // More restrictive
290            return false;
291        }
292
293        // 3. Content should contain some alphabetic characters (not just symbols)
294        if !content_after_markers.chars().any(|c| c.is_alphabetic()) {
295            return false;
296        }
297
298        // 4. Fixed version should actually be a valid blockquote
299        // Check if it starts with optional whitespace followed by >
300        let blockquote_pattern = regex::Regex::new(r"^\s*>").unwrap();
301        if !blockquote_pattern.is_match(fixed) {
302            return false;
303        }
304
305        // 5. Avoid flagging things that might be code or special syntax
306        if content_after_markers.starts_with('#') // Headers
307            || content_after_markers.starts_with('[') // Links
308            || content_after_markers.starts_with('`') // Code
309            || content_after_markers.starts_with("http") // URLs
310            || content_after_markers.starts_with("www.") // URLs
311            || content_after_markers.starts_with("ftp")
312        // URLs
313        {
314            return false;
315        }
316
317        // 6. Content should look like prose, not code or markup
318        let word_count = content_after_markers.split_whitespace().count();
319        if word_count < 3 {
320            // Should be at least 3 words to look like prose
321            return false;
322        }
323
324        true
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331    use crate::lint_context::LintContext;
332
333    #[test]
334    fn test_valid_blockquote() {
335        let rule = MD027MultipleSpacesBlockquote;
336        let content = "> This is a blockquote\n> > Nested quote";
337        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
338        let result = rule.check(&ctx).unwrap();
339        assert!(result.is_empty(), "Valid blockquotes should not be flagged");
340    }
341
342    #[test]
343    fn test_multiple_spaces_after_marker() {
344        let rule = MD027MultipleSpacesBlockquote;
345        let content = ">  This has two spaces\n>   This has three spaces";
346        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
347        let result = rule.check(&ctx).unwrap();
348        assert_eq!(result.len(), 2);
349        assert_eq!(result[0].line, 1);
350        assert_eq!(result[0].column, 3); // Points to the extra space (after > and first space)
351        assert_eq!(result[0].message, "Multiple spaces after quote marker (>)");
352        assert_eq!(result[1].line, 2);
353        assert_eq!(result[1].column, 3);
354    }
355
356    #[test]
357    fn test_nested_multiple_spaces() {
358        let rule = MD027MultipleSpacesBlockquote;
359        // LintContext sees these as single-level blockquotes because of the space between markers
360        let content = ">  Two spaces after marker\n>>  Two spaces in nested blockquote";
361        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
362        let result = rule.check(&ctx).unwrap();
363        assert_eq!(result.len(), 2);
364        assert!(result[0].message.contains("Multiple spaces"));
365        assert!(result[1].message.contains("Multiple spaces"));
366    }
367
368    #[test]
369    fn test_malformed_nested_quote() {
370        let rule = MD027MultipleSpacesBlockquote;
371        // LintContext sees >>text as a valid nested blockquote with no space after marker
372        // MD027 doesn't flag this as malformed, only as missing space after marker
373        let content = ">>This is a nested blockquote without space after markers";
374        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
375        let result = rule.check(&ctx).unwrap();
376        // This should not be flagged at all since >>text is valid CommonMark
377        assert_eq!(result.len(), 0);
378    }
379
380    #[test]
381    fn test_malformed_deeply_nested() {
382        let rule = MD027MultipleSpacesBlockquote;
383        // LintContext sees >>>text as a valid triple-nested blockquote
384        let content = ">>>This is deeply nested without spaces after markers";
385        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
386        let result = rule.check(&ctx).unwrap();
387        // This should not be flagged - >>>text is valid CommonMark
388        assert_eq!(result.len(), 0);
389    }
390
391    #[test]
392    fn test_extra_quote_marker() {
393        let rule = MD027MultipleSpacesBlockquote;
394        // "> >text" is parsed as single-level blockquote with ">text" as content
395        // This is valid CommonMark and not detected as malformed
396        let content = "> >This looks like nested but is actually single level with >This as content";
397        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
398        let result = rule.check(&ctx).unwrap();
399        assert_eq!(result.len(), 0);
400    }
401
402    #[test]
403    fn test_indented_missing_space() {
404        let rule = MD027MultipleSpacesBlockquote;
405        // 4+ spaces makes this a code block, not a blockquote
406        let content = "   >This has 3 spaces indent and no space after marker";
407        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
408        let result = rule.check(&ctx).unwrap();
409        // LintContext sees this as a blockquote with no space after marker
410        // MD027 doesn't flag this as malformed
411        assert_eq!(result.len(), 0);
412    }
413
414    #[test]
415    fn test_fix_multiple_spaces() {
416        let rule = MD027MultipleSpacesBlockquote;
417        let content = ">  Two spaces\n>   Three spaces";
418        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
419        let fixed = rule.fix(&ctx).unwrap();
420        assert_eq!(fixed, "> Two spaces\n> Three spaces");
421    }
422
423    #[test]
424    fn test_fix_malformed_quotes() {
425        let rule = MD027MultipleSpacesBlockquote;
426        // These are valid nested blockquotes, not malformed
427        let content = ">>Nested without spaces\n>>>Deeply nested without spaces";
428        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
429        let fixed = rule.fix(&ctx).unwrap();
430        // No fix needed - these are valid
431        assert_eq!(fixed, content);
432    }
433
434    #[test]
435    fn test_fix_extra_marker() {
436        let rule = MD027MultipleSpacesBlockquote;
437        // This is valid - single blockquote with >Extra as content
438        let content = "> >Extra marker here";
439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
440        let fixed = rule.fix(&ctx).unwrap();
441        // No fix needed
442        assert_eq!(fixed, content);
443    }
444
445    #[test]
446    fn test_code_block_ignored() {
447        let rule = MD027MultipleSpacesBlockquote;
448        let content = "```\n>  This is in a code block\n```";
449        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
450        let result = rule.check(&ctx).unwrap();
451        assert!(result.is_empty(), "Code blocks should be ignored");
452    }
453
454    #[test]
455    fn test_short_content_not_flagged() {
456        let rule = MD027MultipleSpacesBlockquote;
457        let content = ">>>\n>>";
458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459        let result = rule.check(&ctx).unwrap();
460        assert!(result.is_empty(), "Very short content should not be flagged");
461    }
462
463    #[test]
464    fn test_non_prose_not_flagged() {
465        let rule = MD027MultipleSpacesBlockquote;
466        let content = ">>#header\n>>[link]\n>>`code`\n>>http://example.com";
467        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
468        let result = rule.check(&ctx).unwrap();
469        assert!(result.is_empty(), "Non-prose content should not be flagged");
470    }
471
472    #[test]
473    fn test_preserve_trailing_newline() {
474        let rule = MD027MultipleSpacesBlockquote;
475        let content = ">  Two spaces\n";
476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
477        let fixed = rule.fix(&ctx).unwrap();
478        assert_eq!(fixed, "> Two spaces\n");
479
480        let content_no_newline = ">  Two spaces";
481        let ctx2 = LintContext::new(content_no_newline, crate::config::MarkdownFlavor::Standard);
482        let fixed2 = rule.fix(&ctx2).unwrap();
483        assert_eq!(fixed2, "> Two spaces");
484    }
485
486    #[test]
487    fn test_mixed_issues() {
488        let rule = MD027MultipleSpacesBlockquote;
489        let content = ">  Multiple spaces here\n>>Normal nested quote\n> Normal quote";
490        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
491        let result = rule.check(&ctx).unwrap();
492        assert_eq!(result.len(), 1, "Should only flag the multiple spaces");
493        assert_eq!(result[0].line, 1);
494    }
495
496    #[test]
497    fn test_looks_like_blockquote_attempt() {
498        let rule = MD027MultipleSpacesBlockquote;
499
500        // Should return true for genuine attempts
501        assert!(rule.looks_like_blockquote_attempt(
502            ">>This is a real blockquote attempt with text",
503            "> > This is a real blockquote attempt with text"
504        ));
505
506        // Should return false for too short
507        assert!(!rule.looks_like_blockquote_attempt(">>>", "> > >"));
508
509        // Should return false for no alphabetic content
510        assert!(!rule.looks_like_blockquote_attempt(">>123", "> > 123"));
511
512        // Should return false for code-like content
513        assert!(!rule.looks_like_blockquote_attempt(">>#header", "> > #header"));
514    }
515
516    #[test]
517    fn test_extract_blockquote_fix() {
518        let rule = MD027MultipleSpacesBlockquote;
519        let regex = Regex::new(r"^(\s*)>>([^\s>].*|$)").unwrap();
520        let cap = regex.captures(">>content").unwrap();
521
522        let result = rule.extract_blockquote_fix_from_match(&cap, "missing spaces in nested blockquote", ">>content");
523        assert!(result.is_some());
524        let (fixed, desc) = result.unwrap();
525        assert_eq!(fixed, "> > content");
526        assert!(desc.contains("Missing spaces"));
527    }
528
529    #[test]
530    fn test_empty_blockquote() {
531        let rule = MD027MultipleSpacesBlockquote;
532        let content = ">\n>  \n> content";
533        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
534        let result = rule.check(&ctx).unwrap();
535        // Empty blockquotes with multiple spaces should still be flagged
536        assert_eq!(result.len(), 1);
537        assert_eq!(result[0].line, 2);
538    }
539
540    #[test]
541    fn test_fix_preserves_indentation() {
542        let rule = MD027MultipleSpacesBlockquote;
543        let content = "  >  Indented with multiple spaces";
544        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
545        let fixed = rule.fix(&ctx).unwrap();
546        assert_eq!(fixed, "  > Indented with multiple spaces");
547    }
548
549    #[test]
550    fn test_tabs_after_marker() {
551        let rule = MD027MultipleSpacesBlockquote;
552        // Tab after marker - should be flagged as multiple spaces
553        let content = ">\tTab after marker";
554        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
555        let result = rule.check(&ctx).unwrap();
556        assert_eq!(result.len(), 1, "Tab after marker should be flagged");
557        assert_eq!(result[0].message, "Multiple spaces after quote marker (>)");
558
559        // Tab and space after marker
560        let content2 = ">\t Space then tab";
561        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
562        let result2 = rule.check(&ctx2).unwrap();
563        assert_eq!(result2.len(), 1, "Tab and space should be flagged");
564
565        // Two tabs after marker
566        let content3 = ">\t\tTwo tabs";
567        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard);
568        let result3 = rule.check(&ctx3).unwrap();
569        assert_eq!(result3.len(), 1, "Two tabs should be flagged");
570    }
571
572    #[test]
573    fn test_mixed_spaces_and_tabs() {
574        let rule = MD027MultipleSpacesBlockquote;
575        // Space then tab
576        let content = "> \tSpace then tab";
577        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
578        let result = rule.check(&ctx).unwrap();
579        assert_eq!(result.len(), 1);
580        assert_eq!(result[0].column, 3); // Points to the tab
581
582        // Tab then space
583        let content2 = ">\t Tab then space";
584        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
585        let result2 = rule.check(&ctx2).unwrap();
586        assert_eq!(result2.len(), 1);
587        assert_eq!(result2[0].column, 3); // Points to the space after tab
588    }
589
590    #[test]
591    fn test_fix_tabs() {
592        let rule = MD027MultipleSpacesBlockquote;
593        // Fix should remove extra tabs
594        let content = ">\t\tTwo tabs";
595        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
596        let fixed = rule.fix(&ctx).unwrap();
597        assert_eq!(fixed, "> Two tabs");
598
599        // Fix mixed spaces and tabs
600        let content2 = "> \t Mixed";
601        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
602        let fixed2 = rule.fix(&ctx2).unwrap();
603        assert_eq!(fixed2, "> Mixed");
604    }
605}