rumdl_lib/rules/
blockquote_utils.rs

1use regex::Regex;
2use std::sync::LazyLock;
3
4// Pattern to match blockquote lines
5static BLOCKQUOTE_LINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>\s?(.*)$").unwrap());
6
7// Pattern to match empty blockquote lines (> with no space or content)
8static EMPTY_BLOCKQUOTE_LINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>$").unwrap());
9
10// Pattern to match nested empty blockquote lines (>> with no space or content)
11static NESTED_EMPTY_BLOCKQUOTE_LINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>+$").unwrap());
12
13// Pattern to match blockquote lines with no space after >
14static BLOCKQUOTE_NO_SPACE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>([^\s].*)$").unwrap());
15
16// Pattern to match blockquote lines with multiple spaces after >
17static BLOCKQUOTE_MULTIPLE_SPACES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>(\s{2,})(.*)$").unwrap());
18
19// Pattern to match nested blockquotes
20static NESTED_BLOCKQUOTE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)>((?:\s*>)+)(\s*.*)$").unwrap());
21
22/// Utility functions for detecting and handling blockquotes in Markdown documents
23pub struct BlockquoteUtils;
24
25impl BlockquoteUtils {
26    /// Check if a line is a blockquote
27    pub fn is_blockquote(line: &str) -> bool {
28        BLOCKQUOTE_LINE.is_match(line)
29    }
30
31    /// Check if a line is an empty blockquote (> with no content)
32    pub fn is_empty_blockquote(line: &str) -> bool {
33        // Check for simple empty blockquote (> with no space)
34        if EMPTY_BLOCKQUOTE_LINE.is_match(line) {
35            return true;
36        }
37
38        // Check for nested empty blockquote (>> with no space)
39        if NESTED_EMPTY_BLOCKQUOTE_LINE.is_match(line) {
40            return true;
41        }
42
43        // Check if it's a blockquote with only whitespace content
44        if BLOCKQUOTE_LINE.is_match(line) {
45            let content = Self::extract_content(line);
46            return content.trim().is_empty();
47        }
48
49        false
50    }
51
52    /// Check if an empty blockquote line needs fixing for MD028
53    /// This is more restrictive than is_empty_blockquote - only flags lines that actually need fixing
54    pub fn needs_md028_fix(line: &str) -> bool {
55        // Only flag blockquotes that have NO space after the > marker
56        // Lines with a single space ("> ") are already correct and don't need fixing
57        if EMPTY_BLOCKQUOTE_LINE.is_match(line) {
58            return true;
59        }
60
61        if NESTED_EMPTY_BLOCKQUOTE_LINE.is_match(line) {
62            return true;
63        }
64
65        false
66    }
67
68    /// Check if a blockquote line has no space after the > marker
69    pub fn has_no_space_after_marker(line: &str) -> bool {
70        BLOCKQUOTE_NO_SPACE.is_match(line)
71    }
72
73    /// Check if a blockquote line has multiple spaces after the > marker
74    pub fn has_multiple_spaces_after_marker(line: &str) -> bool {
75        BLOCKQUOTE_MULTIPLE_SPACES.is_match(line)
76    }
77
78    /// Check if a line is a nested blockquote
79    pub fn is_nested_blockquote(line: &str) -> bool {
80        NESTED_BLOCKQUOTE.is_match(line)
81    }
82
83    /// Get the nesting level of a blockquote line
84    pub fn get_nesting_level(line: &str) -> usize {
85        if !Self::is_blockquote(line) {
86            return 0;
87        }
88
89        // Count the number of '>' characters at the beginning of the line
90        let trimmed = line.trim_start();
91        let mut count = 0;
92
93        for c in trimmed.chars() {
94            if c == '>' {
95                count += 1;
96            } else {
97                break;
98            }
99        }
100
101        count
102    }
103
104    /// Extract the content of a blockquote line
105    pub fn extract_content(line: &str) -> String {
106        if let Some(captures) = BLOCKQUOTE_LINE.captures(line)
107            && let Some(content) = captures.get(2)
108        {
109            return content.as_str().to_string();
110        }
111
112        String::new()
113    }
114
115    /// Extract the indentation of a blockquote line
116    pub fn extract_indentation(line: &str) -> String {
117        if let Some(captures) = BLOCKQUOTE_LINE.captures(line)
118            && let Some(indent) = captures.get(1)
119        {
120            return indent.as_str().to_string();
121        }
122
123        String::new()
124    }
125
126    /// Fix a blockquote line to ensure it has exactly one space after the > marker
127    pub fn fix_blockquote_spacing(line: &str) -> String {
128        if !Self::is_blockquote(line) {
129            return line.to_string();
130        }
131
132        if Self::has_no_space_after_marker(line) {
133            if let Some(captures) = BLOCKQUOTE_NO_SPACE.captures(line) {
134                let indent = captures.get(1).map_or("", |m| m.as_str());
135                let content = captures.get(2).map_or("", |m| m.as_str());
136                return format!("{indent}> {content}");
137            }
138        } else if Self::has_multiple_spaces_after_marker(line)
139            && let Some(captures) = BLOCKQUOTE_MULTIPLE_SPACES.captures(line)
140        {
141            let indent = captures.get(1).map_or("", |m| m.as_str());
142            let content = captures.get(3).map_or("", |m| m.as_str());
143            return format!("{indent}> {content}");
144        }
145
146        line.to_string()
147    }
148
149    /// Fix nested blockquotes to ensure each level has exactly one space after the > marker
150    pub fn fix_nested_blockquote_spacing(line: &str) -> String {
151        if !Self::is_blockquote(line) {
152            return line.to_string();
153        }
154
155        let trimmed = line.trim_start();
156        let indent = &line[..line.len() - trimmed.len()];
157
158        // Parse through the blockquote markers
159        let mut remaining = trimmed;
160        let mut markers = Vec::new();
161
162        while remaining.starts_with('>') {
163            markers.push('>');
164            remaining = &remaining[1..];
165
166            // Skip any spaces between markers
167            remaining = remaining.trim_start();
168        }
169
170        // Build the result with proper spacing
171        let mut result = indent.to_string();
172        for (i, _) in markers.iter().enumerate() {
173            if i > 0 {
174                result.push(' ');
175            }
176            result.push('>');
177        }
178
179        // Add the content with a single space before it (if there's content)
180        if !remaining.is_empty() {
181            result.push(' ');
182            result.push_str(remaining);
183        }
184
185        result
186    }
187
188    /// Check if there are blank lines between blockquotes
189    pub fn has_blank_between_blockquotes(content: &str) -> Vec<usize> {
190        let lines: Vec<&str> = content.lines().collect();
191        let mut blank_line_numbers = Vec::new();
192
193        for i in 1..lines.len() {
194            let prev_line = lines[i - 1];
195            let current_line = lines[i];
196
197            if Self::is_blockquote(prev_line) && Self::is_blockquote(current_line) {
198                // Check if the current blockquote line is empty
199                if Self::is_empty_blockquote(current_line) {
200                    blank_line_numbers.push(i + 1); // 1-indexed line number
201                }
202            }
203        }
204
205        blank_line_numbers
206    }
207
208    /// Fix blank lines between blockquotes by removing them
209    pub fn fix_blank_between_blockquotes(content: &str) -> String {
210        let lines: Vec<&str> = content.lines().collect();
211        let mut result = Vec::new();
212        let mut skip_next = false;
213
214        for i in 0..lines.len() {
215            if skip_next {
216                skip_next = false;
217                continue;
218            }
219
220            let current_line = lines[i];
221
222            if i > 0 && i < lines.len() - 1 {
223                let prev_line = lines[i - 1];
224                let next_line = lines[i + 1];
225
226                if Self::is_blockquote(prev_line) && Self::is_blockquote(next_line) && current_line.trim().is_empty() {
227                    // Skip this blank line between blockquotes
228                    skip_next = false;
229                    continue;
230                }
231            }
232
233            result.push(current_line);
234        }
235
236        result.join("\n")
237    }
238
239    /// Get the starting column of the blockquote marker '>'
240    pub fn get_blockquote_start_col(line: &str) -> usize {
241        let indent_length = Self::extract_indentation(line).len();
242        indent_length + 1 // 1-indexed column for the '>' character
243    }
244
245    /// Get the content after the blockquote marker
246    pub fn get_blockquote_content(line: &str) -> String {
247        Self::extract_content(line)
248    }
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254
255    #[test]
256    fn test_is_blockquote() {
257        // Valid blockquotes
258        assert!(BlockquoteUtils::is_blockquote("> Quote"));
259        assert!(BlockquoteUtils::is_blockquote(">Quote"));
260        assert!(BlockquoteUtils::is_blockquote("  > Indented quote"));
261        assert!(BlockquoteUtils::is_blockquote(">> Nested quote"));
262        assert!(BlockquoteUtils::is_blockquote(">"));
263        assert!(BlockquoteUtils::is_blockquote("> "));
264
265        // Not blockquotes
266        assert!(!BlockquoteUtils::is_blockquote(""));
267        assert!(!BlockquoteUtils::is_blockquote("Plain text"));
268        assert!(!BlockquoteUtils::is_blockquote("a > b"));
269        assert!(!BlockquoteUtils::is_blockquote("# > Not a quote"));
270    }
271
272    #[test]
273    fn test_is_empty_blockquote() {
274        // Empty blockquotes
275        assert!(BlockquoteUtils::is_empty_blockquote(">"));
276        assert!(BlockquoteUtils::is_empty_blockquote("> "));
277        assert!(BlockquoteUtils::is_empty_blockquote(">   "));
278        assert!(BlockquoteUtils::is_empty_blockquote(">>"));
279        assert!(BlockquoteUtils::is_empty_blockquote("  >  "));
280
281        // Not empty blockquotes
282        assert!(!BlockquoteUtils::is_empty_blockquote("> Content"));
283        assert!(!BlockquoteUtils::is_empty_blockquote(">Text"));
284        assert!(!BlockquoteUtils::is_empty_blockquote(""));
285        assert!(!BlockquoteUtils::is_empty_blockquote("Plain text"));
286    }
287
288    #[test]
289    fn test_needs_md028_fix() {
290        // Needs fixing (no space after >)
291        assert!(BlockquoteUtils::needs_md028_fix(">"));
292        assert!(BlockquoteUtils::needs_md028_fix(">>"));
293        assert!(BlockquoteUtils::needs_md028_fix("  >"));
294
295        // Does not need fixing
296        assert!(!BlockquoteUtils::needs_md028_fix("> "));
297        assert!(!BlockquoteUtils::needs_md028_fix("> Content"));
298        assert!(!BlockquoteUtils::needs_md028_fix(""));
299        assert!(!BlockquoteUtils::needs_md028_fix("Plain text"));
300    }
301
302    #[test]
303    fn test_has_no_space_after_marker() {
304        assert!(BlockquoteUtils::has_no_space_after_marker(">Content"));
305        assert!(BlockquoteUtils::has_no_space_after_marker("  >Text"));
306
307        assert!(!BlockquoteUtils::has_no_space_after_marker("> Content"));
308        assert!(!BlockquoteUtils::has_no_space_after_marker(">  Content"));
309        assert!(!BlockquoteUtils::has_no_space_after_marker(">"));
310        assert!(!BlockquoteUtils::has_no_space_after_marker(""));
311    }
312
313    #[test]
314    fn test_has_multiple_spaces_after_marker() {
315        assert!(BlockquoteUtils::has_multiple_spaces_after_marker(">  Content"));
316        assert!(BlockquoteUtils::has_multiple_spaces_after_marker(">   Text"));
317        assert!(BlockquoteUtils::has_multiple_spaces_after_marker("  >    Quote"));
318
319        assert!(!BlockquoteUtils::has_multiple_spaces_after_marker("> Content"));
320        assert!(!BlockquoteUtils::has_multiple_spaces_after_marker(">Content"));
321        assert!(!BlockquoteUtils::has_multiple_spaces_after_marker(">"));
322        assert!(!BlockquoteUtils::has_multiple_spaces_after_marker(""));
323    }
324
325    #[test]
326    fn test_is_nested_blockquote() {
327        assert!(BlockquoteUtils::is_nested_blockquote(">> Nested"));
328        assert!(BlockquoteUtils::is_nested_blockquote(">>> Triple nested"));
329        assert!(BlockquoteUtils::is_nested_blockquote("> > Spaced nested"));
330        assert!(BlockquoteUtils::is_nested_blockquote("  > >> Indented nested"));
331
332        assert!(!BlockquoteUtils::is_nested_blockquote("> Single level"));
333        assert!(!BlockquoteUtils::is_nested_blockquote(">Single"));
334        assert!(!BlockquoteUtils::is_nested_blockquote(""));
335        assert!(!BlockquoteUtils::is_nested_blockquote("Plain text"));
336    }
337
338    #[test]
339    fn test_get_nesting_level() {
340        assert_eq!(BlockquoteUtils::get_nesting_level(""), 0);
341        assert_eq!(BlockquoteUtils::get_nesting_level("Plain text"), 0);
342        assert_eq!(BlockquoteUtils::get_nesting_level("> Quote"), 1);
343        assert_eq!(BlockquoteUtils::get_nesting_level(">> Nested"), 2);
344        assert_eq!(BlockquoteUtils::get_nesting_level(">>> Triple"), 3);
345        assert_eq!(BlockquoteUtils::get_nesting_level("  > Indented"), 1);
346        assert_eq!(BlockquoteUtils::get_nesting_level("  >> Indented nested"), 2);
347        assert_eq!(BlockquoteUtils::get_nesting_level(">>>> Four levels"), 4);
348    }
349
350    #[test]
351    fn test_extract_content() {
352        assert_eq!(BlockquoteUtils::extract_content("> Content"), "Content");
353        assert_eq!(BlockquoteUtils::extract_content(">Content"), "Content");
354        assert_eq!(BlockquoteUtils::extract_content(">  Content"), " Content");
355        assert_eq!(BlockquoteUtils::extract_content("> "), "");
356        assert_eq!(BlockquoteUtils::extract_content(">"), "");
357        assert_eq!(
358            BlockquoteUtils::extract_content("  > Indented content"),
359            "Indented content"
360        );
361        assert_eq!(BlockquoteUtils::extract_content(""), "");
362        assert_eq!(BlockquoteUtils::extract_content("Plain text"), "");
363    }
364
365    #[test]
366    fn test_extract_indentation() {
367        assert_eq!(BlockquoteUtils::extract_indentation("> Content"), "");
368        assert_eq!(BlockquoteUtils::extract_indentation("  > Content"), "  ");
369        assert_eq!(BlockquoteUtils::extract_indentation("    > Content"), "    ");
370        assert_eq!(BlockquoteUtils::extract_indentation("\t> Content"), "\t");
371        assert_eq!(BlockquoteUtils::extract_indentation(">Content"), "");
372        assert_eq!(BlockquoteUtils::extract_indentation(""), "");
373        assert_eq!(BlockquoteUtils::extract_indentation("Plain text"), "");
374    }
375
376    #[test]
377    fn test_fix_blockquote_spacing() {
378        // Fix missing space
379        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(">Content"), "> Content");
380        assert_eq!(BlockquoteUtils::fix_blockquote_spacing("  >Text"), "  > Text");
381
382        // Fix multiple spaces
383        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(">  Content"), "> Content");
384        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(">   Text"), "> Text");
385
386        // Already correct
387        assert_eq!(BlockquoteUtils::fix_blockquote_spacing("> Content"), "> Content");
388        assert_eq!(BlockquoteUtils::fix_blockquote_spacing("  > Text"), "  > Text");
389
390        // Not blockquotes
391        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(""), "");
392        assert_eq!(BlockquoteUtils::fix_blockquote_spacing("Plain text"), "Plain text");
393    }
394
395    #[test]
396    fn test_fix_nested_blockquote_spacing() {
397        // Fix missing spaces between markers
398        assert_eq!(
399            BlockquoteUtils::fix_nested_blockquote_spacing(">>Content"),
400            "> > Content"
401        );
402        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">>>Text"), "> > > Text");
403
404        // Fix inconsistent spacing
405        assert_eq!(
406            BlockquoteUtils::fix_nested_blockquote_spacing("> >Content"),
407            "> > Content"
408        );
409        assert_eq!(
410            BlockquoteUtils::fix_nested_blockquote_spacing(">  >Content"),
411            "> > Content"
412        );
413
414        // Already correct
415        assert_eq!(
416            BlockquoteUtils::fix_nested_blockquote_spacing("> > Content"),
417            "> > Content"
418        );
419        assert_eq!(
420            BlockquoteUtils::fix_nested_blockquote_spacing("> > > Text"),
421            "> > > Text"
422        );
423
424        // Single level
425        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing("> Content"), "> Content");
426        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">Content"), "> Content");
427
428        // Empty blockquotes
429        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">"), ">");
430        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">>"), "> >");
431        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(">>>"), "> > >");
432
433        // With indentation
434        assert_eq!(
435            BlockquoteUtils::fix_nested_blockquote_spacing("  >>Content"),
436            "  > > Content"
437        );
438        assert_eq!(
439            BlockquoteUtils::fix_nested_blockquote_spacing("\t> > Content"),
440            "\t> > Content"
441        );
442
443        // Not blockquotes
444        assert_eq!(BlockquoteUtils::fix_nested_blockquote_spacing(""), "");
445        assert_eq!(
446            BlockquoteUtils::fix_nested_blockquote_spacing("Plain text"),
447            "Plain text"
448        );
449    }
450
451    #[test]
452    fn test_has_blank_between_blockquotes() {
453        let content1 = "> Quote 1\n> Quote 2";
454        assert_eq!(
455            BlockquoteUtils::has_blank_between_blockquotes(content1),
456            Vec::<usize>::new()
457        );
458
459        let content2 = "> Quote 1\n>\n> Quote 2";
460        assert_eq!(BlockquoteUtils::has_blank_between_blockquotes(content2), vec![2]);
461
462        let content3 = "> Quote 1\n> \n> Quote 2";
463        assert_eq!(BlockquoteUtils::has_blank_between_blockquotes(content3), vec![2]);
464
465        let content4 = "> Line 1\n>\n>\n> Line 4";
466        assert_eq!(BlockquoteUtils::has_blank_between_blockquotes(content4), vec![2, 3]);
467
468        let content5 = "Plain text\n> Quote";
469        assert_eq!(
470            BlockquoteUtils::has_blank_between_blockquotes(content5),
471            Vec::<usize>::new()
472        );
473    }
474
475    #[test]
476    fn test_fix_blank_between_blockquotes() {
477        let content1 = "> Quote 1\n> Quote 2";
478        assert_eq!(
479            BlockquoteUtils::fix_blank_between_blockquotes(content1),
480            "> Quote 1\n> Quote 2"
481        );
482
483        let content2 = "> Quote 1\n\n> Quote 2";
484        assert_eq!(
485            BlockquoteUtils::fix_blank_between_blockquotes(content2),
486            "> Quote 1\n> Quote 2"
487        );
488
489        // Multiple blank lines - the function keeps them all except when between blockquotes
490        let content3 = "> Quote 1\n\n\n> Quote 2";
491        assert_eq!(
492            BlockquoteUtils::fix_blank_between_blockquotes(content3),
493            "> Quote 1\n\n\n> Quote 2"
494        );
495
496        let content4 = "Text\n\n> Quote";
497        assert_eq!(
498            BlockquoteUtils::fix_blank_between_blockquotes(content4),
499            "Text\n\n> Quote"
500        );
501    }
502
503    #[test]
504    fn test_get_blockquote_start_col() {
505        assert_eq!(BlockquoteUtils::get_blockquote_start_col("> Content"), 1);
506        assert_eq!(BlockquoteUtils::get_blockquote_start_col("  > Content"), 3);
507        assert_eq!(BlockquoteUtils::get_blockquote_start_col("    > Content"), 5);
508        assert_eq!(BlockquoteUtils::get_blockquote_start_col(">Content"), 1);
509    }
510
511    #[test]
512    fn test_get_blockquote_content() {
513        assert_eq!(BlockquoteUtils::get_blockquote_content("> Content"), "Content");
514        assert_eq!(BlockquoteUtils::get_blockquote_content(">Content"), "Content");
515        assert_eq!(BlockquoteUtils::get_blockquote_content("> "), "");
516        assert_eq!(BlockquoteUtils::get_blockquote_content(""), "");
517    }
518
519    #[test]
520    fn test_unicode_content() {
521        assert!(BlockquoteUtils::is_blockquote("> 你好"));
522        assert_eq!(BlockquoteUtils::extract_content("> émphasis"), "émphasis");
523        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(">🌟"), "> 🌟");
524        assert_eq!(BlockquoteUtils::get_nesting_level(">> 日本語"), 2);
525    }
526
527    #[test]
528    fn test_edge_cases() {
529        // Empty string
530        assert!(!BlockquoteUtils::is_blockquote(""));
531        assert_eq!(BlockquoteUtils::extract_content(""), "");
532        assert_eq!(BlockquoteUtils::get_nesting_level(""), 0);
533
534        // Just ">" character in middle of line
535        assert!(!BlockquoteUtils::is_blockquote("a > b"));
536
537        // Tabs
538        assert!(BlockquoteUtils::is_blockquote("\t> Tab indent"));
539        assert_eq!(BlockquoteUtils::extract_indentation("\t> Content"), "\t");
540
541        // Mixed indentation
542        assert_eq!(BlockquoteUtils::fix_blockquote_spacing(" \t>Content"), " \t> Content");
543    }
544}