marco_core/parser/blocks/
cm_blockquote_parser.rs

1//! Blockquote parser - converts grammar output to AST nodes
2//!
3//! Handles conversion of blockquotes (> prefixed lines) from grammar layer to parser AST,
4//! including recursive block parsing and lazy continuation line handling.
5
6use super::shared::{opt_span, GrammarSpan};
7use crate::parser::ast::{Document, Node, NodeKind};
8
9/// Parse a blockquote into an AST node with recursive block parsing.
10///
11/// # Arguments
12/// * `content` - The blockquote content from grammar layer (includes > markers)
13/// * `depth` - Current recursion depth for safety
14/// * `parse_blocks_fn` - Function to recursively parse nested blocks
15///
16/// # Returns
17/// A Node with NodeKind::Blockquote containing parsed block children
18///
19/// # Processing
20/// The function:
21/// 1. Extracts content by removing > markers from each line
22/// 2. Handles lazy continuation lines (lines without > markers)
23/// 3. Prevents setext heading underlines in lazy continuation (CommonMark spec)
24/// 4. Recursively parses the cleaned content as block elements
25///
26/// # Example
27/// ```ignore
28/// let content = GrammarSpan::new("> Line 1\n> Line 2");
29/// let node = parse_blockquote(content, 0, parse_blocks_internal);
30/// assert!(matches!(node.kind, NodeKind::Blockquote));
31/// ```
32pub fn parse_blockquote<F>(
33    content: GrammarSpan,
34    depth: usize,
35    parse_blocks_fn: F,
36) -> Result<Node, Box<dyn std::error::Error>>
37where
38    F: FnOnce(&str, usize) -> Result<Document, Box<dyn std::error::Error>>,
39{
40    let span = opt_span(content);
41
42    // Extract the block quote content (remove leading > markers)
43    // CRITICAL: Per CommonMark spec, "The setext heading underline cannot be a lazy continuation line"
44    // So we need to track which lines had > markers and prevent setext matching on lazy lines
45    let content_str = content.fragment();
46    let mut cleaned_content = String::with_capacity(content_str.len());
47
48    for line in content_str.split_inclusive('\n') {
49        let line_trimmed_start = line.trim_start();
50        let has_marker = line_trimmed_start.starts_with('>');
51
52        if has_marker {
53            // Line has > marker - remove it and optional space
54            let after_marker = line_trimmed_start.strip_prefix('>').unwrap();
55            let cleaned = after_marker.strip_prefix(' ').unwrap_or(after_marker);
56            cleaned_content.push_str(cleaned);
57        } else {
58            // Lazy continuation line - no > marker
59            // Check if this looks like a setext underline (all === or all ---)
60            let line_content = line_trimmed_start.trim_end();
61            let line_sans_spaces = line_content.replace([' ', '\t'], "");
62
63            let is_underline = !line_sans_spaces.is_empty()
64                && (line_sans_spaces.chars().all(|c| c == '=')
65                    || line_sans_spaces.chars().all(|c| c == '-'));
66
67            if is_underline {
68                // This lazy continuation looks like setext underline
69                // Per CommonMark: "underline cannot be lazy continuation"
70                // Escape the first character to prevent setext parsing
71                if let Some(first_char) = line_content.chars().next() {
72                    if first_char == '=' || first_char == '-' {
73                        // Add backslash escape before first underline character
74                        cleaned_content.push('\\');
75                    }
76                }
77            }
78
79            // Add the line as-is (or with escape prepended)
80            cleaned_content.push_str(line);
81        }
82    }
83
84    // Recursively parse the block quote content
85    let inner_doc = parse_blocks_fn(&cleaned_content, depth + 1)?;
86
87    Ok(Node {
88        kind: NodeKind::Blockquote,
89        span,
90        children: inner_doc.children, // Use parsed children
91    })
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97    use crate::parser::ast::NodeKind;
98
99    // Mock parse function for testing
100    fn mock_parse_blocks(
101        input: &str,
102        _depth: usize,
103    ) -> Result<Document, Box<dyn std::error::Error>> {
104        let mut doc = Document::new();
105        if !input.is_empty() {
106            doc.children.push(Node {
107                kind: NodeKind::Text(input.to_string()),
108                span: None,
109                children: Vec::new(),
110            });
111        }
112        Ok(doc)
113    }
114
115    #[test]
116    fn smoke_test_parse_blockquote_basic() {
117        let content = GrammarSpan::new("> Line 1\n> Line 2");
118        let node = parse_blockquote(content, 0, mock_parse_blocks).unwrap();
119
120        assert!(matches!(node.kind, NodeKind::Blockquote));
121        assert!(!node.children.is_empty());
122    }
123
124    #[test]
125    fn smoke_test_blockquote_lazy_continuation() {
126        let content = GrammarSpan::new("> Line 1\nLine 2 (lazy)");
127        let node = parse_blockquote(content, 0, mock_parse_blocks).unwrap();
128
129        assert!(matches!(node.kind, NodeKind::Blockquote));
130    }
131
132    #[test]
133    fn smoke_test_blockquote_span() {
134        let content = GrammarSpan::new("> Test");
135        let node = parse_blockquote(content, 0, mock_parse_blocks).unwrap();
136
137        assert!(node.span.is_some());
138    }
139
140    #[test]
141    fn smoke_test_blockquote_empty() {
142        let content = GrammarSpan::new(">");
143        let node = parse_blockquote(content, 0, mock_parse_blocks).unwrap();
144
145        assert!(matches!(node.kind, NodeKind::Blockquote));
146    }
147
148    #[test]
149    fn smoke_test_blockquote_nested_content() {
150        let content = GrammarSpan::new("> # Heading\n> Paragraph");
151        let node = parse_blockquote(content, 0, mock_parse_blocks).unwrap();
152
153        assert!(matches!(node.kind, NodeKind::Blockquote));
154        assert!(!node.children.is_empty());
155    }
156}
marco_core/parser/blocks/cm_blockquote_parser.rs

marco_core/parser/blocks/
cm_blockquote_parser.rs