rumdl_lib/rules/
md027_multiple_spaces_blockquote.rs

1use crate::utils::range_utils::calculate_match_range;
2
3use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
4use regex::Regex;
5use std::sync::LazyLock;
6
7// New patterns for detecting malformed blockquote attempts where user intent is clear
8static MALFORMED_BLOCKQUOTE_PATTERNS: LazyLock<Vec<(Regex, &'static str)>> = LazyLock::new(|| {
9    vec![
10        // Double > without space: >>text (looks like nested but missing spaces)
11        (
12            Regex::new(r"^(\s*)>>([^\s>].*|$)").unwrap(),
13            "missing spaces in nested blockquote",
14        ),
15        // Triple > without space: >>>text
16        (
17            Regex::new(r"^(\s*)>>>([^\s>].*|$)").unwrap(),
18            "missing spaces in deeply nested blockquote",
19        ),
20        // Space then > then text: > >text (extra > by mistake)
21        (
22            Regex::new(r"^(\s*)>\s+>([^\s>].*|$)").unwrap(),
23            "extra blockquote marker",
24        ),
25        // Multiple spaces then >: (spaces)>text (indented blockquote without space)
26        (
27            Regex::new(r"^(\s{4,})>([^\s].*|$)").unwrap(),
28            "indented blockquote missing space",
29        ),
30    ]
31});
32
33// Cached regex for blockquote validation
34static BLOCKQUOTE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*>").unwrap());
35
36/// Rule MD027: No multiple spaces after blockquote symbol
37///
38/// See [docs/md027.md](../../docs/md027.md) for full documentation, configuration, and examples.
39
40#[derive(Debug, Default, Clone)]
41pub struct MD027MultipleSpacesBlockquote;
42
43impl Rule for MD027MultipleSpacesBlockquote {
44    fn name(&self) -> &'static str {
45        "MD027"
46    }
47
48    fn description(&self) -> &'static str {
49        "Multiple spaces after quote marker (>)"
50    }
51
52    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
53        let mut warnings = Vec::new();
54
55        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
56            let line_num = line_idx + 1;
57
58            // Skip lines in code blocks
59            if line_info.in_code_block {
60                continue;
61            }
62
63            // Check if this line is a blockquote using cached info
64            if let Some(blockquote) = &line_info.blockquote {
65                // Part 1: Check for multiple spaces after the blockquote marker
66                if blockquote.has_multiple_spaces_after_marker {
67                    // Find where the extra spaces start in the line
68                    // We need to find the position after the markers and first space/tab
69                    let mut byte_pos = 0;
70                    let mut found_markers = 0;
71                    let mut found_first_space = false;
72
73                    for (i, ch) in line_info.content(ctx.content).char_indices() {
74                        if found_markers < blockquote.nesting_level {
75                            if ch == '>' {
76                                found_markers += 1;
77                            }
78                        } else if !found_first_space && (ch == ' ' || ch == '\t') {
79                            // This is the first space/tab after markers
80                            found_first_space = true;
81                        } else if found_first_space && (ch == ' ' || ch == '\t') {
82                            // This is where extra spaces start
83                            byte_pos = i;
84                            break;
85                        }
86                    }
87
88                    // Count how many extra spaces/tabs there are
89                    let extra_spaces_bytes = line_info.content(ctx.content)[byte_pos..]
90                        .chars()
91                        .take_while(|&c| c == ' ' || c == '\t')
92                        .fold(0, |acc, ch| acc + ch.len_utf8());
93
94                    if extra_spaces_bytes > 0 {
95                        let (start_line, start_col, end_line, end_col) = calculate_match_range(
96                            line_num,
97                            line_info.content(ctx.content),
98                            byte_pos,
99                            extra_spaces_bytes,
100                        );
101
102                        warnings.push(LintWarning {
103                            rule_name: Some(self.name().to_string()),
104                            line: start_line,
105                            column: start_col,
106                            end_line,
107                            end_column: end_col,
108                            message: "Multiple spaces after quote marker (>)".to_string(),
109                            severity: Severity::Warning,
110                            fix: Some(Fix {
111                                range: {
112                                    let start_byte = ctx.line_index.line_col_to_byte_range(line_num, start_col).start;
113                                    let end_byte = ctx.line_index.line_col_to_byte_range(line_num, end_col).start;
114                                    start_byte..end_byte
115                                },
116                                replacement: "".to_string(), // Remove the extra spaces
117                            }),
118                        });
119                    }
120                }
121            } else {
122                // Part 2: Check for malformed blockquote attempts on non-blockquote lines
123                let malformed_attempts = self.detect_malformed_blockquote_attempts(line_info.content(ctx.content));
124                for (start, len, fixed_line, description) in malformed_attempts {
125                    let (start_line, start_col, end_line, end_col) =
126                        calculate_match_range(line_num, line_info.content(ctx.content), start, len);
127
128                    warnings.push(LintWarning {
129                        rule_name: Some(self.name().to_string()),
130                        line: start_line,
131                        column: start_col,
132                        end_line,
133                        end_column: end_col,
134                        message: format!("Malformed quote: {description}"),
135                        severity: Severity::Warning,
136                        fix: Some(Fix {
137                            range: ctx.line_index.line_col_to_byte_range(line_num, 1),
138                            replacement: fixed_line,
139                        }),
140                    });
141                }
142            }
143        }
144
145        Ok(warnings)
146    }
147
148    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
149        let mut result = Vec::with_capacity(ctx.lines.len());
150
151        for line_info in &ctx.lines {
152            if let Some(blockquote) = &line_info.blockquote {
153                // Fix blockquotes with multiple spaces after the marker
154                if blockquote.has_multiple_spaces_after_marker {
155                    // Rebuild the line with exactly one space after the markers
156                    // But don't add a space if the content is empty to avoid MD009 conflicts
157                    let fixed_line = if blockquote.content.is_empty() {
158                        format!("{}{}", blockquote.indent, ">".repeat(blockquote.nesting_level))
159                    } else {
160                        format!(
161                            "{}{} {}",
162                            blockquote.indent,
163                            ">".repeat(blockquote.nesting_level),
164                            blockquote.content
165                        )
166                    };
167                    result.push(fixed_line);
168                } else {
169                    result.push(line_info.content(ctx.content).to_string());
170                }
171            } else {
172                // Check for malformed blockquote attempts
173                let malformed_attempts = self.detect_malformed_blockquote_attempts(line_info.content(ctx.content));
174                if !malformed_attempts.is_empty() {
175                    // Use the first fix (there should only be one per line)
176                    let (_, _, fixed_line, _) = &malformed_attempts[0];
177                    result.push(fixed_line.clone());
178                } else {
179                    result.push(line_info.content(ctx.content).to_string());
180                }
181            }
182        }
183
184        // Preserve trailing newline if original content had one
185        Ok(result.join("\n") + if ctx.content.ends_with('\n') { "\n" } else { "" })
186    }
187
188    fn as_any(&self) -> &dyn std::any::Any {
189        self
190    }
191
192    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
193    where
194        Self: Sized,
195    {
196        Box::new(MD027MultipleSpacesBlockquote)
197    }
198
199    /// Check if this rule should be skipped
200    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
201        ctx.content.is_empty() || !ctx.likely_has_blockquotes()
202    }
203}
204
205impl MD027MultipleSpacesBlockquote {
206    /// Detect malformed blockquote attempts where user intent is clear
207    fn detect_malformed_blockquote_attempts(&self, line: &str) -> Vec<(usize, usize, String, String)> {
208        let mut results = Vec::new();
209
210        for (pattern, issue_type) in MALFORMED_BLOCKQUOTE_PATTERNS.iter() {
211            if let Some(cap) = pattern.captures(line) {
212                let match_obj = cap.get(0).unwrap();
213                let start = match_obj.start();
214                let len = match_obj.len();
215
216                // Extract potential blockquote components
217                if let Some((fixed_line, description)) = self.extract_blockquote_fix_from_match(&cap, issue_type, line)
218                {
219                    // Only proceed if this looks like a genuine blockquote attempt
220                    if self.looks_like_blockquote_attempt(line, &fixed_line) {
221                        results.push((start, len, fixed_line, description));
222                    }
223                }
224            }
225        }
226
227        results
228    }
229
230    /// Extract the proper blockquote format from a malformed match
231    fn extract_blockquote_fix_from_match(
232        &self,
233        cap: &regex::Captures,
234        issue_type: &str,
235        _original_line: &str,
236    ) -> Option<(String, String)> {
237        match issue_type {
238            "missing spaces in nested blockquote" => {
239                // >>text -> > > text
240                let indent = cap.get(1).map_or("", |m| m.as_str());
241                let content = cap.get(2).map_or("", |m| m.as_str());
242                Some((
243                    format!("{}> > {}", indent, content.trim()),
244                    "Missing spaces in nested blockquote".to_string(),
245                ))
246            }
247            "missing spaces in deeply nested blockquote" => {
248                // >>>text -> > > > text
249                let indent = cap.get(1).map_or("", |m| m.as_str());
250                let content = cap.get(2).map_or("", |m| m.as_str());
251                Some((
252                    format!("{}> > > {}", indent, content.trim()),
253                    "Missing spaces in deeply nested blockquote".to_string(),
254                ))
255            }
256            "extra blockquote marker" => {
257                // > >text -> > text
258                let indent = cap.get(1).map_or("", |m| m.as_str());
259                let content = cap.get(2).map_or("", |m| m.as_str());
260                Some((
261                    format!("{}> {}", indent, content.trim()),
262                    "Extra blockquote marker".to_string(),
263                ))
264            }
265            "indented blockquote missing space" => {
266                // (spaces)>text -> (spaces)> text
267                let indent = cap.get(1).map_or("", |m| m.as_str());
268                let content = cap.get(2).map_or("", |m| m.as_str());
269                Some((
270                    format!("{}> {}", indent, content.trim()),
271                    "Indented blockquote missing space".to_string(),
272                ))
273            }
274            _ => None,
275        }
276    }
277
278    /// Check if the pattern looks like a genuine blockquote attempt
279    fn looks_like_blockquote_attempt(&self, original: &str, fixed: &str) -> bool {
280        // Basic heuristics to avoid false positives
281
282        // 1. Content should not be too short (avoid flagging things like ">>>" alone)
283        let trimmed_original = original.trim();
284        if trimmed_original.len() < 5 {
285            // More restrictive
286            return false;
287        }
288
289        // 2. Should contain some text content after the markers
290        let content_after_markers = trimmed_original.trim_start_matches('>').trim_start_matches(' ');
291        if content_after_markers.is_empty() || content_after_markers.len() < 3 {
292            // More restrictive
293            return false;
294        }
295
296        // 3. Content should contain some alphabetic characters (not just symbols)
297        if !content_after_markers.chars().any(|c| c.is_alphabetic()) {
298            return false;
299        }
300
301        // 4. Fixed version should actually be a valid blockquote
302        // Check if it starts with optional whitespace followed by >
303        if !BLOCKQUOTE_PATTERN.is_match(fixed) {
304            return false;
305        }
306
307        // 5. Avoid flagging things that might be code or special syntax
308        if content_after_markers.starts_with('#') // Headers
309            || content_after_markers.starts_with('[') // Links
310            || content_after_markers.starts_with('`') // Code
311            || content_after_markers.starts_with("http") // URLs
312            || content_after_markers.starts_with("www.") // URLs
313            || content_after_markers.starts_with("ftp")
314        // URLs
315        {
316            return false;
317        }
318
319        // 6. Content should look like prose, not code or markup
320        let word_count = content_after_markers.split_whitespace().count();
321        if word_count < 3 {
322            // Should be at least 3 words to look like prose
323            return false;
324        }
325
326        true
327    }
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333    use crate::lint_context::LintContext;
334
335    #[test]
336    fn test_valid_blockquote() {
337        let rule = MD027MultipleSpacesBlockquote;
338        let content = "> This is a blockquote\n> > Nested quote";
339        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
340        let result = rule.check(&ctx).unwrap();
341        assert!(result.is_empty(), "Valid blockquotes should not be flagged");
342    }
343
344    #[test]
345    fn test_multiple_spaces_after_marker() {
346        let rule = MD027MultipleSpacesBlockquote;
347        let content = ">  This has two spaces\n>   This has three spaces";
348        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
349        let result = rule.check(&ctx).unwrap();
350        assert_eq!(result.len(), 2);
351        assert_eq!(result[0].line, 1);
352        assert_eq!(result[0].column, 3); // Points to the extra space (after > and first space)
353        assert_eq!(result[0].message, "Multiple spaces after quote marker (>)");
354        assert_eq!(result[1].line, 2);
355        assert_eq!(result[1].column, 3);
356    }
357
358    #[test]
359    fn test_nested_multiple_spaces() {
360        let rule = MD027MultipleSpacesBlockquote;
361        // LintContext sees these as single-level blockquotes because of the space between markers
362        let content = ">  Two spaces after marker\n>>  Two spaces in nested blockquote";
363        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
364        let result = rule.check(&ctx).unwrap();
365        assert_eq!(result.len(), 2);
366        assert!(result[0].message.contains("Multiple spaces"));
367        assert!(result[1].message.contains("Multiple spaces"));
368    }
369
370    #[test]
371    fn test_malformed_nested_quote() {
372        let rule = MD027MultipleSpacesBlockquote;
373        // LintContext sees >>text as a valid nested blockquote with no space after marker
374        // MD027 doesn't flag this as malformed, only as missing space after marker
375        let content = ">>This is a nested blockquote without space after markers";
376        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
377        let result = rule.check(&ctx).unwrap();
378        // This should not be flagged at all since >>text is valid CommonMark
379        assert_eq!(result.len(), 0);
380    }
381
382    #[test]
383    fn test_malformed_deeply_nested() {
384        let rule = MD027MultipleSpacesBlockquote;
385        // LintContext sees >>>text as a valid triple-nested blockquote
386        let content = ">>>This is deeply nested without spaces after markers";
387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
388        let result = rule.check(&ctx).unwrap();
389        // This should not be flagged - >>>text is valid CommonMark
390        assert_eq!(result.len(), 0);
391    }
392
393    #[test]
394    fn test_extra_quote_marker() {
395        let rule = MD027MultipleSpacesBlockquote;
396        // "> >text" is parsed as single-level blockquote with ">text" as content
397        // This is valid CommonMark and not detected as malformed
398        let content = "> >This looks like nested but is actually single level with >This as content";
399        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
400        let result = rule.check(&ctx).unwrap();
401        assert_eq!(result.len(), 0);
402    }
403
404    #[test]
405    fn test_indented_missing_space() {
406        let rule = MD027MultipleSpacesBlockquote;
407        // 4+ spaces makes this a code block, not a blockquote
408        let content = "   >This has 3 spaces indent and no space after marker";
409        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
410        let result = rule.check(&ctx).unwrap();
411        // LintContext sees this as a blockquote with no space after marker
412        // MD027 doesn't flag this as malformed
413        assert_eq!(result.len(), 0);
414    }
415
416    #[test]
417    fn test_fix_multiple_spaces() {
418        let rule = MD027MultipleSpacesBlockquote;
419        let content = ">  Two spaces\n>   Three spaces";
420        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
421        let fixed = rule.fix(&ctx).unwrap();
422        assert_eq!(fixed, "> Two spaces\n> Three spaces");
423    }
424
425    #[test]
426    fn test_fix_malformed_quotes() {
427        let rule = MD027MultipleSpacesBlockquote;
428        // These are valid nested blockquotes, not malformed
429        let content = ">>Nested without spaces\n>>>Deeply nested without spaces";
430        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
431        let fixed = rule.fix(&ctx).unwrap();
432        // No fix needed - these are valid
433        assert_eq!(fixed, content);
434    }
435
436    #[test]
437    fn test_fix_extra_marker() {
438        let rule = MD027MultipleSpacesBlockquote;
439        // This is valid - single blockquote with >Extra as content
440        let content = "> >Extra marker here";
441        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
442        let fixed = rule.fix(&ctx).unwrap();
443        // No fix needed
444        assert_eq!(fixed, content);
445    }
446
447    #[test]
448    fn test_code_block_ignored() {
449        let rule = MD027MultipleSpacesBlockquote;
450        let content = "```\n>  This is in a code block\n```";
451        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
452        let result = rule.check(&ctx).unwrap();
453        assert!(result.is_empty(), "Code blocks should be ignored");
454    }
455
456    #[test]
457    fn test_short_content_not_flagged() {
458        let rule = MD027MultipleSpacesBlockquote;
459        let content = ">>>\n>>";
460        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
461        let result = rule.check(&ctx).unwrap();
462        assert!(result.is_empty(), "Very short content should not be flagged");
463    }
464
465    #[test]
466    fn test_non_prose_not_flagged() {
467        let rule = MD027MultipleSpacesBlockquote;
468        let content = ">>#header\n>>[link]\n>>`code`\n>>http://example.com";
469        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
470        let result = rule.check(&ctx).unwrap();
471        assert!(result.is_empty(), "Non-prose content should not be flagged");
472    }
473
474    #[test]
475    fn test_preserve_trailing_newline() {
476        let rule = MD027MultipleSpacesBlockquote;
477        let content = ">  Two spaces\n";
478        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
479        let fixed = rule.fix(&ctx).unwrap();
480        assert_eq!(fixed, "> Two spaces\n");
481
482        let content_no_newline = ">  Two spaces";
483        let ctx2 = LintContext::new(content_no_newline, crate::config::MarkdownFlavor::Standard);
484        let fixed2 = rule.fix(&ctx2).unwrap();
485        assert_eq!(fixed2, "> Two spaces");
486    }
487
488    #[test]
489    fn test_mixed_issues() {
490        let rule = MD027MultipleSpacesBlockquote;
491        let content = ">  Multiple spaces here\n>>Normal nested quote\n> Normal quote";
492        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
493        let result = rule.check(&ctx).unwrap();
494        assert_eq!(result.len(), 1, "Should only flag the multiple spaces");
495        assert_eq!(result[0].line, 1);
496    }
497
498    #[test]
499    fn test_looks_like_blockquote_attempt() {
500        let rule = MD027MultipleSpacesBlockquote;
501
502        // Should return true for genuine attempts
503        assert!(rule.looks_like_blockquote_attempt(
504            ">>This is a real blockquote attempt with text",
505            "> > This is a real blockquote attempt with text"
506        ));
507
508        // Should return false for too short
509        assert!(!rule.looks_like_blockquote_attempt(">>>", "> > >"));
510
511        // Should return false for no alphabetic content
512        assert!(!rule.looks_like_blockquote_attempt(">>123", "> > 123"));
513
514        // Should return false for code-like content
515        assert!(!rule.looks_like_blockquote_attempt(">>#header", "> > #header"));
516    }
517
518    #[test]
519    fn test_extract_blockquote_fix() {
520        let rule = MD027MultipleSpacesBlockquote;
521        let regex = Regex::new(r"^(\s*)>>([^\s>].*|$)").unwrap();
522        let cap = regex.captures(">>content").unwrap();
523
524        let result = rule.extract_blockquote_fix_from_match(&cap, "missing spaces in nested blockquote", ">>content");
525        assert!(result.is_some());
526        let (fixed, desc) = result.unwrap();
527        assert_eq!(fixed, "> > content");
528        assert!(desc.contains("Missing spaces"));
529    }
530
531    #[test]
532    fn test_empty_blockquote() {
533        let rule = MD027MultipleSpacesBlockquote;
534        let content = ">\n>  \n> content";
535        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
536        let result = rule.check(&ctx).unwrap();
537        // Empty blockquotes with multiple spaces should still be flagged
538        assert_eq!(result.len(), 1);
539        assert_eq!(result[0].line, 2);
540    }
541
542    #[test]
543    fn test_fix_preserves_indentation() {
544        let rule = MD027MultipleSpacesBlockquote;
545        let content = "  >  Indented with multiple spaces";
546        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
547        let fixed = rule.fix(&ctx).unwrap();
548        assert_eq!(fixed, "  > Indented with multiple spaces");
549    }
550
551    #[test]
552    fn test_tabs_after_marker_not_flagged() {
553        // MD027 only flags multiple SPACES, not tabs
554        // Tabs after blockquote markers are handled by MD010 (no-hard-tabs)
555        // This matches markdownlint reference behavior
556        let rule = MD027MultipleSpacesBlockquote;
557
558        // Tab after marker - NOT flagged by MD027 (that's MD010's job)
559        let content = ">\tTab after marker";
560        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
561        let result = rule.check(&ctx).unwrap();
562        assert_eq!(result.len(), 0, "Single tab should not be flagged by MD027");
563
564        // Two tabs after marker - NOT flagged by MD027
565        let content2 = ">\t\tTwo tabs";
566        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
567        let result2 = rule.check(&ctx2).unwrap();
568        assert_eq!(result2.len(), 0, "Tabs should not be flagged by MD027");
569    }
570
571    #[test]
572    fn test_mixed_spaces_and_tabs() {
573        let rule = MD027MultipleSpacesBlockquote;
574        // Space then tab - only flags if there are multiple spaces
575        // The tab itself is MD010's domain
576        let content = ">  Space Space";
577        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
578        let result = rule.check(&ctx).unwrap();
579        assert_eq!(result.len(), 1);
580        assert_eq!(result[0].column, 3); // Points to the extra space
581
582        // Three spaces should be flagged
583        let content2 = ">   Three spaces";
584        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
585        let result2 = rule.check(&ctx2).unwrap();
586        assert_eq!(result2.len(), 1);
587    }
588
589    #[test]
590    fn test_fix_multiple_spaces_various() {
591        let rule = MD027MultipleSpacesBlockquote;
592        // Fix should remove extra spaces
593        let content = ">   Three spaces";
594        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
595        let fixed = rule.fix(&ctx).unwrap();
596        assert_eq!(fixed, "> Three spaces");
597
598        // Fix multiple spaces
599        let content2 = ">    Four spaces";
600        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
601        let fixed2 = rule.fix(&ctx2).unwrap();
602        assert_eq!(fixed2, "> Four spaces");
603    }
604}