rumdl_lib/rules/
blockquote_utils.rs

1use regex::Regex;
2use std::sync::LazyLock;
3
4// Pattern to match blockquote lines
5static BLOCKQUOTE_LINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>\s?(.*)$").unwrap());
6
7// Pattern to match empty blockquote lines (> with no space or content)
8static EMPTY_BLOCKQUOTE_LINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>$").unwrap());
9
10// Pattern to match nested empty blockquote lines (>> with no space or content)
11static NESTED_EMPTY_BLOCKQUOTE_LINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>+$").unwrap());
12
13// Pattern to match blockquote lines with no space after >
14static BLOCKQUOTE_NO_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>([^\s].*)$").unwrap());
15
16// Pattern to match blockquote lines with multiple spaces after >
17// Only matches literal spaces (not tabs) to match markdownlint behavior
18// Tabs are handled by MD010 (no-hard-tabs)
19static BLOCKQUOTE_MULTIPLE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>([ ]{2,})(.*)$").unwrap());
20
21// Pattern to match nested blockquotes
22static NESTED_BLOCKQUOTE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>((?:\s*>)+)(\s*.*)$").unwrap());
23
24/// Utility functions for detecting and handling blockquotes in Markdown documents
25pub struct BlockquoteUtils;
26
27impl BlockquoteUtils {
28    /// Check if a line is a blockquote
29    pub fn is_blockquote(line: &str) -> bool {
30        BLOCKQUOTE_LINE.is_match(line)
31    }
32
33    /// Check if a line is an empty blockquote (> with no content)
34    pub fn is_empty_blockquote(line: &str) -> bool {
35        // Check for simple empty blockquote (> with no space)
36        if EMPTY_BLOCKQUOTE_LINE.is_match(line) {
37            return true;
38        }
39
40        // Check for nested empty blockquote (>> with no space)
41        if NESTED_EMPTY_BLOCKQUOTE_LINE.is_match(line) {
42            return true;
43        }
44
45        // Check if it's a blockquote with only whitespace content
46        if BLOCKQUOTE_LINE.is_match(line) {
47            let content = Self::extract_content(line);
48            return content.trim().is_empty();
49        }
50
51        false
52    }
53
54    /// Check if an empty blockquote line needs fixing for MD028
55    /// This is more restrictive than is_empty_blockquote - only flags lines that actually need fixing
56    pub fn needs_md028_fix(line: &str) -> bool {
57        // Only flag blockquotes that have NO space after the > marker
58        // Lines with a single space ("> ") are already correct and don't need fixing
59        if EMPTY_BLOCKQUOTE_LINE.is_match(line) {
60            return true;
61        }
62
63        if NESTED_EMPTY_BLOCKQUOTE_LINE.is_match(line) {
64            return true;
65        }
66
67        false
68    }
69
70    /// Check if a blockquote line has no space after the > marker
71    pub fn has_no_space_after_marker(line: &str) -> bool {
72        BLOCKQUOTE_NO_SPACE.is_match(line)
73    }
74
75    /// Check if a blockquote line has multiple spaces after the > marker
76    pub fn has_multiple_spaces_after_marker(line: &str) -> bool {
77        BLOCKQUOTE_MULTIPLE_SPACES.is_match(line)
78    }
79
80    /// Check if a line is a nested blockquote
81    pub fn is_nested_blockquote(line: &str) -> bool {
82        NESTED_BLOCKQUOTE.is_match(line)
83    }
84
85    /// Get the nesting level of a blockquote line
86    pub fn get_nesting_level(line: &str) -> usize {
87        if !Self::is_blockquote(line) {
88            return 0;
89        }
90
91        // Count the number of '>' characters at the beginning of the line
92        let trimmed = line.trim_start();
93        let mut count = 0;
94
95        for c in trimmed.chars() {
96            if c == '>' {
97                count += 1;
98            } else {
99                break;
100            }
101        }
102
103        count
104    }
105
106    /// Extract the content of a blockquote line
107    pub fn extract_content(line: &str) -> String {
108        if let Some(captures) = BLOCKQUOTE_LINE.captures(line)
109            && let Some(content) = captures.get(2)
110        {
111            return content.as_str().to_string();
112        }
113
114        String::new()
115    }
116
117    /// Extract the indentation of a blockquote line
118    pub fn extract_indentation(line: &str) -> String {
119        if let Some(captures) = BLOCKQUOTE_LINE.captures(line)
120            && let Some(indent) = captures.get(1)
121        {
122            return indent.as_str().to_string();
123        }
124
125        String::new()
126    }
127
128    /// Fix a blockquote line to ensure it has exactly one space after the > marker
129    pub fn fix_blockquote_spacing(line: &str) -> String {
130        if !Self::is_blockquote(line) {
131            return line.to_string();
132        }
133
134        if Self::has_no_space_after_marker(line) {
135            if let Some(captures) = BLOCKQUOTE_NO_SPACE.captures(line) {
136                let indent = captures.get(1).map_or("", |m| m.as_str());
137                let content = captures.get(2).map_or("", |m| m.as_str());
138                return format!("{indent}> {content}");
139            }
140        } else if Self::has_multiple_spaces_after_marker(line)
141            && let Some(captures) = BLOCKQUOTE_MULTIPLE_SPACES.captures(line)
142        {
143            let indent = captures.get(1).map_or("", |m| m.as_str());
144            let content = captures.get(3).map_or("", |m| m.as_str());
145            return format!("{indent}> {content}");
146        }
147
148        line.to_string()
149    }
150
151    /// Fix nested blockquotes to ensure each level has exactly one space after the > marker
152    pub fn fix_nested_blockquote_spacing(line: &str) -> String {
153        if !Self::is_blockquote(line) {
154            return line.to_string();
155        }
156
157        let trimmed = line.trim_start();
158        let indent = &line[..line.len() - trimmed.len()];
159
160        // Parse through the blockquote markers
161        let mut remaining = trimmed;
162        let mut markers = Vec::new();
163
164        while remaining.starts_with('>') {
165            markers.push('>');
166            remaining = &remaining[1..];
167
168            // Skip any spaces between markers
169            remaining = remaining.trim_start();
170        }
171
172        // Build the result with proper spacing
173        let mut result = indent.to_string();
174        for (i, _) in markers.iter().enumerate() {
175            if i > 0 {
176                result.push(' ');
177            }
178            result.push('>');
179        }
180
181        // Add the content with a single space before it (if there's content)
182        if !remaining.is_empty() {
183            result.push(' ');
184            result.push_str(remaining);
185        }
186
187        result
188    }
189
190    /// Check if there are blank lines between blockquotes
191    pub fn has_blank_between_blockquotes(content: &str) -> Vec<usize> {
192        let lines: Vec<&str> = content.lines().collect();
193        let mut blank_line_numbers = Vec::new();
194
195        for i in 1..lines.len() {
196            let prev_line = lines[i - 1];
197            let current_line = lines[i];
198
199            if Self::is_blockquote(prev_line) && Self::is_blockquote(current_line) {
200                // Check if the current blockquote line is empty
201                if Self::is_empty_blockquote(current_line) {
202                    blank_line_numbers.push(i + 1); // 1-indexed line number
203                }
204            }
205        }
206
207        blank_line_numbers
208    }
209
210    /// Fix blank lines between blockquotes by removing them
211    pub fn fix_blank_between_blockquotes(content: &str) -> String {
212        let lines: Vec<&str> = content.lines().collect();
213        let mut result = Vec::new();
214        let mut skip_next = false;
215
216        for i in 0..lines.len() {
217            if skip_next {
218                skip_next = false;
219                continue;
220            }
221
222            let current_line = lines[i];
223
224            if i > 0 && i < lines.len() - 1 {
225                let prev_line = lines[i - 1];
226                let next_line = lines[i + 1];
227
228                if Self::is_blockquote(prev_line) && Self::is_blockquote(next_line) && current_line.trim().is_empty() {
229                    // Skip this blank line between blockquotes
230                    skip_next = false;
231                    continue;
232                }
233            }
234
235            result.push(current_line);
236        }
237
238        result.join("\n")
239    }
240
241    /// Get the starting column of the blockquote marker '>'
242    pub fn get_blockquote_start_col(line: &str) -> usize {
243        let indent_length = Self::extract_indentation(line).len();
244        indent_length + 1 // 1-indexed column for the '>' character
245    }
246
247    /// Get the content after the blockquote marker
248    pub fn get_blockquote_content(line: &str) -> String {
249        Self::extract_content(line)
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn test_is_blockquote() {
259        // Valid blockquotes
260        assert!(BlockquoteUtils::is_blockquote("> Quote"));
261        assert!(BlockquoteUtils::is_blockquote(">Quote"));
262        assert!(BlockquoteUtils::is_blockquote("  > Indented quote"));
263        assert!(BlockquoteUtils::is_blockquote(">> Nested quote"));
264        assert!(BlockquoteUtils::is_blockquote(">"));
265        assert!(BlockquoteUtils::is_blockquote("> "));
266
267        // Not blockquotes
268        assert!(!BlockquoteUtils::is_blockquote(""));
269        assert!(!BlockquoteUtils::is_blockquote("Plain text"));
270        assert!(!BlockquoteUtils::is_blockquote("a > b"));
271        assert!(!BlockquoteUtils::is_blockquote("# > Not a quote"));
272    }
273
274    #[test]
275    fn test_is_empty_blockquote() {
276        // Empty blockquotes
277        assert!(BlockquoteUtils::is_empty_blockquote(">"));
278        assert!(BlockquoteUtils::is_empty_blockquote("> "));
279        assert!(BlockquoteUtils::is_empty_blockquote(">   "));
280        assert!(BlockquoteUtils::is_empty_blockquote(">>"));
281        assert!(BlockquoteUtils::is_empty_blockquote("  >  "));
282
283        // Not empty blockquotes
284        assert!(!BlockquoteUtils::is_empty_blockquote("> Content"));
285        assert!(!BlockquoteUtils::is_empty_blockquote(">Text"));
286        assert!(!BlockquoteUtils::is_empty_blockquote(""));
287        assert!(!BlockquoteUtils::is_empty_blockquote("Plain text"));
288    }
289
290    #[test]
291    fn test_needs_md028_fix() {
292        // Needs fixing (no space after >)
293        assert!(BlockquoteUtils::needs_md028_fix(">"));
294        assert!(BlockquoteUtils::needs_md028_fix(">>"));
295        assert!(BlockquoteUtils::needs_md028_fix("  >"));
296
297        // Does not need fixing
298        assert!(!BlockquoteUtils::needs_md028_fix("> "));
299        assert!(!BlockquoteUtils::needs_md028_fix("> Content"));
300        assert!(!BlockquoteUtils::needs_md028_fix(""));
301        assert!(!BlockquoteUtils::needs_md028_fix("Plain text"));
302    }
303
304    #[test]
305    fn test_has_no_space_after_marker() {
306        assert!(BlockquoteUtils::has_no_space_after_marker(">Content"));
307        assert!(BlockquoteUtils::has_no_space_after_marker("  >Text"));
308
309        assert!(!BlockquoteUtils::has_no_space_after_marker("> Content"));
310        assert!(!BlockquoteUtils::has_no_space_after_marker(">  Content"));
311        assert!(!BlockquoteUtils::has_no_space_after_marker(">"));
312        assert!(!BlockquoteUtils::has_no_space_after_marker(""));
313    }
314
315    #[test]
316    fn test_has_multiple_spaces_after_marker() {
317        assert!(BlockquoteUtils::has_multiple_spaces_after_marker(">  Content"));
318        assert!(BlockquoteUtils::has_multiple_spaces_after_marker(">   Text"));
319        assert!(BlockquoteUtils::has_multiple_spaces_after_marker("  >    Quote"));
320
321        assert!(!BlockquoteUtils::has_multiple_spaces_after_marker("> Content"));
322        assert!(!BlockquoteUtils::has_multiple_spaces_after_marker(">Content"));
323        assert!(!BlockquoteUtils::has_multiple_spaces_after_marker(">"));
324        assert!(!BlockquoteUtils::has_multiple_spaces_after_marker(""));
325    }
326
327    #[test]
328    fn test_is_nested_blockquote() {
329        assert!(BlockquoteUtils::is_nested_blockquote(">> Nested"));
330        assert!(BlockquoteUtils::is_nested_blockquote(">>> Triple nested"));
331        assert!(BlockquoteUtils::is_nested_blockquote("> > Spaced nested"));
332        assert!(BlockquoteUtils::is_nested_blockquote("  > >> Indented nested"));
333
334        assert!(!BlockquoteUtils::is_nested_blockquote("> Single level"));
335        assert!(!BlockquoteUtils::is_nested_blockquote(">Single"));
336        assert!(!BlockquoteUtils::is_nested_blockquote(""));
337        assert!(!BlockquoteUtils::is_nested_blockquote("Plain text"));
338    }
339
340    #[test]
341    fn test_get_nesting_level() {
342        assert_eq!(BlockquoteUtils::get_nesting_level(""), 0);
343        assert_eq!(BlockquoteUtils::get_nesting_level("Plain text"), 0);
344        assert_eq!(BlockquoteUtils::get_nesting_level("> Quote"), 1);
345        assert_eq!(BlockquoteUtils::get_nesting_level(">> Nested"), 2);
346        assert_eq!(BlockquoteUtils::get_nesting_level(">>> Triple"), 3);
347        assert_eq!(BlockquoteUtils::get_nesting_level("  > Indented"), 1);
348        assert_eq!(BlockquoteUtils::get_nesting_level("  >> Indented nested"), 2);
349        assert_eq!(BlockquoteUtils::get_nesting_level(">>>> Four levels"), 4);
350    }
351
352    #[test]
353    fn test_extract_content() {
354        assert_eq!(BlockquoteUtils::extract_content("> Content"), "Content");
355        assert_eq!(BlockquoteUtils::extract_content(">Content"), "Content");
356        assert_eq!(BlockquoteUtils::extract_content(">  Content"), " Content");
357        assert_eq!(BlockquoteUtils::extract_content("> "), "");
358        assert_eq!(BlockquoteUtils::extract_content(">"), "");
359        assert_eq!(
360            BlockquoteUtils::extract_content("  > Indented content"),
361            "Indented content"
362        );
363        assert_eq!(BlockquoteUtils::extract_content(""), "");
364        assert_eq!(BlockquoteUtils::extract_content("Plain text"), "");
365    }
366
367    #[test]
368    fn test_extract_indentation() {
369        assert_eq!(BlockquoteUtils::extract_indentation("> Content"), "");
370        assert_eq!(BlockquoteUtils::extract_indentation("  > Content"), "  ");
371        assert_eq!(BlockquoteUtils::extract_indentation("    > Content"), "    ");
372        assert_eq!(BlockquoteUtils::extract_indentation("\t> Content"), "\t");
373        assert_eq!(BlockquoteUtils::extract_indentation(">Content"), "");
374        assert_eq!(BlockquoteUtils::extract_indentation(""), "");
375        assert_eq!(BlockquoteUtils::extract_indentation("Plain text"), "");
376    }
377
378    #[test]
379    fn test_fix_blockquote_spacing() {
380        // Fix missing space
381        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(">Content"), "> Content");
382        assert_eq!(BlockquoteUtils::fix_blockquote_spacing("  >Text"), "  > Text");
383
384        // Fix multiple spaces
385        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(">  Content"), "> Content");
386        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(">   Text"), "> Text");
387
388        // Already correct
389        assert_eq!(BlockquoteUtils::fix_blockquote_spacing("> Content"), "> Content");
390        assert_eq!(BlockquoteUtils::fix_blockquote_spacing("  > Text"), "  > Text");
391
392        // Not blockquotes
393        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(""), "");
394        assert_eq!(BlockquoteUtils::fix_blockquote_spacing("Plain text"), "Plain text");
395    }
396
397    #[test]
398    fn test_fix_nested_blockquote_spacing() {
399        // Fix missing spaces between markers
400        assert_eq!(
401            BlockquoteUtils::fix_nested_blockquote_spacing(">>Content"),
402            "> > Content"
403        );
404        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">>>Text"), "> > > Text");
405
406        // Fix inconsistent spacing
407        assert_eq!(
408            BlockquoteUtils::fix_nested_blockquote_spacing("> >Content"),
409            "> > Content"
410        );
411        assert_eq!(
412            BlockquoteUtils::fix_nested_blockquote_spacing(">  >Content"),
413            "> > Content"
414        );
415
416        // Already correct
417        assert_eq!(
418            BlockquoteUtils::fix_nested_blockquote_spacing("> > Content"),
419            "> > Content"
420        );
421        assert_eq!(
422            BlockquoteUtils::fix_nested_blockquote_spacing("> > > Text"),
423            "> > > Text"
424        );
425
426        // Single level
427        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing("> Content"), "> Content");
428        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">Content"), "> Content");
429
430        // Empty blockquotes
431        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">"), ">");
432        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">>"), "> >");
433        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">>>"), "> > >");
434
435        // With indentation
436        assert_eq!(
437            BlockquoteUtils::fix_nested_blockquote_spacing("  >>Content"),
438            "  > > Content"
439        );
440        assert_eq!(
441            BlockquoteUtils::fix_nested_blockquote_spacing("\t> > Content"),
442            "\t> > Content"
443        );
444
445        // Not blockquotes
446        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(""), "");
447        assert_eq!(
448            BlockquoteUtils::fix_nested_blockquote_spacing("Plain text"),
449            "Plain text"
450        );
451    }
452
453    #[test]
454    fn test_has_blank_between_blockquotes() {
455        let content1 = "> Quote 1\n> Quote 2";
456        assert_eq!(
457            BlockquoteUtils::has_blank_between_blockquotes(content1),
458            Vec::<usize>::new()
459        );
460
461        let content2 = "> Quote 1\n>\n> Quote 2";
462        assert_eq!(BlockquoteUtils::has_blank_between_blockquotes(content2), vec![2]);
463
464        let content3 = "> Quote 1\n> \n> Quote 2";
465        assert_eq!(BlockquoteUtils::has_blank_between_blockquotes(content3), vec![2]);
466
467        let content4 = "> Line 1\n>\n>\n> Line 4";
468        assert_eq!(BlockquoteUtils::has_blank_between_blockquotes(content4), vec![2, 3]);
469
470        let content5 = "Plain text\n> Quote";
471        assert_eq!(
472            BlockquoteUtils::has_blank_between_blockquotes(content5),
473            Vec::<usize>::new()
474        );
475    }
476
477    #[test]
478    fn test_fix_blank_between_blockquotes() {
479        let content1 = "> Quote 1\n> Quote 2";
480        assert_eq!(
481            BlockquoteUtils::fix_blank_between_blockquotes(content1),
482            "> Quote 1\n> Quote 2"
483        );
484
485        let content2 = "> Quote 1\n\n> Quote 2";
486        assert_eq!(
487            BlockquoteUtils::fix_blank_between_blockquotes(content2),
488            "> Quote 1\n> Quote 2"
489        );
490
491        // Multiple blank lines - the function keeps them all except when between blockquotes
492        let content3 = "> Quote 1\n\n\n> Quote 2";
493        assert_eq!(
494            BlockquoteUtils::fix_blank_between_blockquotes(content3),
495            "> Quote 1\n\n\n> Quote 2"
496        );
497
498        let content4 = "Text\n\n> Quote";
499        assert_eq!(
500            BlockquoteUtils::fix_blank_between_blockquotes(content4),
501            "Text\n\n> Quote"
502        );
503    }
504
505    #[test]
506    fn test_get_blockquote_start_col() {
507        assert_eq!(BlockquoteUtils::get_blockquote_start_col("> Content"), 1);
508        assert_eq!(BlockquoteUtils::get_blockquote_start_col("  > Content"), 3);
509        assert_eq!(BlockquoteUtils::get_blockquote_start_col("    > Content"), 5);
510        assert_eq!(BlockquoteUtils::get_blockquote_start_col(">Content"), 1);
511    }
512
513    #[test]
514    fn test_get_blockquote_content() {
515        assert_eq!(BlockquoteUtils::get_blockquote_content("> Content"), "Content");
516        assert_eq!(BlockquoteUtils::get_blockquote_content(">Content"), "Content");
517        assert_eq!(BlockquoteUtils::get_blockquote_content("> "), "");
518        assert_eq!(BlockquoteUtils::get_blockquote_content(""), "");
519    }
520
521    #[test]
522    fn test_unicode_content() {
523        assert!(BlockquoteUtils::is_blockquote("> 你好"));
524        assert_eq!(BlockquoteUtils::extract_content("> émphasis"), "émphasis");
525        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(">🌟"), "> 🌟");
526        assert_eq!(BlockquoteUtils::get_nesting_level(">> 日本語"), 2);
527    }
528
529    #[test]
530    fn test_edge_cases() {
531        // Empty string
532        assert!(!BlockquoteUtils::is_blockquote(""));
533        assert_eq!(BlockquoteUtils::extract_content(""), "");
534        assert_eq!(BlockquoteUtils::get_nesting_level(""), 0);
535
536        // Just ">" character in middle of line
537        assert!(!BlockquoteUtils::is_blockquote("a > b"));
538
539        // Tabs
540        assert!(BlockquoteUtils::is_blockquote("\t> Tab indent"));
541        assert_eq!(BlockquoteUtils::extract_indentation("\t> Content"), "\t");
542
543        // Mixed indentation
544        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(" \t>Content"), " \t> Content");
545    }
546}