Skip to main content

marco_core/parser/blocks/
cm_paragraph_parser.rs

1//! Paragraph parser - converts grammar output to AST nodes with inline parsing
2//!
3//! Handles conversion of paragraphs from grammar layer to parser AST,
4//! including recursive inline element parsing for emphasis, links, etc.
5
6use super::shared::{opt_span, GrammarSpan};
7use crate::parser::ast::{Node, NodeKind};
8
9use nom::Input;
10
11/// Parse a paragraph into an AST node with inline elements.
12///
13/// # Arguments
14/// * `content` - The paragraph content from grammar layer
15///
16/// # Returns
17/// A Node with NodeKind::Paragraph containing parsed inline children
18///
19/// # Processing
20/// The function:
21/// 1. Converts the grammar span to parser span
22/// 2. Recursively parses inline elements (emphasis, strong, links, etc.)
23/// 3. Falls back to plain text on inline parsing errors
24///
25/// # Example
26/// ```ignore
27/// let content = GrammarSpan::new("This is **bold** text.");
28/// let node = parse_paragraph(content);
29/// assert!(matches!(node.kind, NodeKind::Paragraph));
30/// assert!(!node.children.is_empty()); // Contains inline nodes
31/// ```
32pub fn parse_paragraph(content: GrammarSpan) -> Node {
33    let span = opt_span(content);
34
35    // Support task checkbox markers at the start of a paragraph *and* at the
36    // start of any subsequent line inside the same paragraph.
37    //
38    // This matters when the author uses hard breaks (two spaces + newline) to
39    // create a checklist-like block without list markers:
40    //   [ ] first
41    //   [ ] second
42    //
43    // Those lines are still a single paragraph in CommonMark; we still want to
44    // render the checkbox SVG on each line.
45    let mut inline_children: Vec<Node> = Vec::new();
46    let mut remaining = content;
47
48    while let Some((start, checked, consumed)) =
49        find_next_task_checkbox_marker(remaining.fragment())
50    {
51        // Emit any content before the marker using the inline parser.
52        if start > 0 {
53            let (rest, prefix) = remaining.take_split(start);
54            inline_children.extend(parse_inlines_or_fallback_text(prefix));
55            remaining = rest;
56        }
57
58        // `remaining` now begins at the marker.
59        let (after_marker, _marker_taken) = remaining.take_split(consumed);
60        inline_children.push(Node {
61            kind: NodeKind::TaskCheckboxInline { checked },
62            span: crate::parser::shared::opt_span_range(remaining, after_marker),
63            children: Vec::new(),
64        });
65        remaining = after_marker;
66    }
67
68    // Emit any trailing content after the last marker.
69    inline_children.extend(parse_inlines_or_fallback_text(remaining));
70
71    Node {
72        kind: NodeKind::Paragraph,
73        span,
74        children: inline_children,
75    }
76}
77
78fn parse_inlines_or_fallback_text(input: GrammarSpan) -> Vec<Node> {
79    if input.fragment().is_empty() {
80        return Vec::new();
81    }
82
83    match crate::parser::inlines::parse_inlines_from_span(input) {
84        Ok(children) => children,
85        Err(e) => {
86            log::warn!("Failed to parse inline elements: {}", e);
87            vec![Node {
88                kind: NodeKind::Text(input.fragment().to_string()),
89                span: opt_span(input),
90                children: Vec::new(),
91            }]
92        }
93    }
94}
95
96/// Find the next task checkbox marker that appears at a line start.
97///
98/// Returns (byte_offset_from_start, checked, consumed_bytes).
99fn find_next_task_checkbox_marker(input: &str) -> Option<(usize, bool, usize)> {
100    let mut line_start = 0usize;
101    loop {
102        if let Some((checked, consumed)) = parse_task_checkbox_prefix_len(&input[line_start..]) {
103            return Some((line_start, checked, consumed));
104        }
105
106        let rel = input[line_start..].find('\n')?;
107        line_start += rel + 1;
108        if line_start >= input.len() {
109            return None;
110        }
111    }
112}
113
114/// Detect a task checkbox marker at the start of a paragraph.
115///
116/// Recognizes:
117/// - `[ ] ` (unchecked)
118/// - `[x] ` / `[X] ` (checked)
119///
120/// Returns (checked, consumed_bytes).
121fn parse_task_checkbox_prefix_len(input: &str) -> Option<(bool, usize)> {
122    let mut i = 0usize;
123    for _ in 0..3 {
124        if input.as_bytes().get(i) == Some(&b' ') {
125            i += 1;
126        } else {
127            break;
128        }
129    }
130
131    let rest = &input[i..];
132
133    let (checked, after_marker) = if let Some(after) = rest.strip_prefix("[ ]") {
134        (false, after)
135    } else if let Some(after) = rest
136        .strip_prefix("[x]")
137        .or_else(|| rest.strip_prefix("[X]"))
138    {
139        (true, after)
140    } else {
141        return None;
142    };
143
144    // Must be followed by at least one whitespace character.
145    let mut chars = after_marker.chars();
146    match chars.next() {
147        Some(' ') | Some('\t') => {
148            // Consumed: leading spaces + marker + exactly one whitespace.
149            // Marker is 3 bytes: "[ ]" / "[x]" / "[X]".
150            Some((checked, i + 3 + 1))
151        }
152        _ => None,
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    #[test]
161    fn smoke_test_parse_paragraph_plain_text() {
162        let content = GrammarSpan::new("This is a simple paragraph.");
163        let node = parse_paragraph(content);
164
165        assert!(matches!(node.kind, NodeKind::Paragraph));
166        assert!(!node.children.is_empty());
167    }
168
169    #[test]
170    fn smoke_test_paragraph_with_inline_elements() {
171        let content = GrammarSpan::new("This has **bold** and *italic*.");
172        let node = parse_paragraph(content);
173
174        assert!(matches!(node.kind, NodeKind::Paragraph));
175        assert!(!node.children.is_empty());
176    }
177
178    #[test]
179    fn smoke_test_paragraph_empty() {
180        let content = GrammarSpan::new("");
181        let node = parse_paragraph(content);
182
183        assert!(matches!(node.kind, NodeKind::Paragraph));
184        // Empty paragraph may have no children or empty text node
185    }
186
187    #[test]
188    fn smoke_test_paragraph_span() {
189        let content = GrammarSpan::new("Test paragraph");
190        let node = parse_paragraph(content);
191
192        assert!(node.span.is_some());
193        let span = node.span.unwrap();
194        assert_eq!(span.start.line, 1);
195        assert_eq!(span.start.column, 1);
196    }
197
198    #[test]
199    fn smoke_test_paragraph_multiline() {
200        let content = GrammarSpan::new("Line one\nLine two\nLine three");
201        let node = parse_paragraph(content);
202
203        assert!(matches!(node.kind, NodeKind::Paragraph));
204        assert!(!node.children.is_empty());
205    }
206
207    #[test]
208    fn smoke_test_paragraph_with_link() {
209        let content = GrammarSpan::new("Check [this link](https://example.com) out.");
210        let node = parse_paragraph(content);
211
212        assert!(matches!(node.kind, NodeKind::Paragraph));
213        assert!(!node.children.is_empty());
214    }
215}