Skip to main content

panache_parser/parser/utils/
text_buffer.rs

1//! Text buffer for accumulating multi-line block content.
2//!
3//! Used during paragraph and plain text parsing to collect lines before
4//! emitting them with inline parsing applied.
5
6use super::inline_emission;
7use crate::options::ParserOptions;
8use crate::syntax::{SyntaxKind, SyntaxNode, SyntaxToken};
9use rowan::{GreenNodeBuilder, NodeOrToken};
10
11/// Buffer for accumulating text lines before emission.
12///
13/// Designed for minimal allocation overhead - reuses the same buffer
14/// across multiple paragraph/plain blocks by clearing between uses.
15#[derive(Debug, Default, Clone)]
16pub(crate) struct TextBuffer {
17    /// Accumulated lines (stored WITH trailing newlines if they had them in source).
18    lines: Vec<String>,
19}
20
21impl TextBuffer {
22    /// Create a new empty text buffer.
23    pub(crate) fn new() -> Self {
24        Self { lines: Vec::new() }
25    }
26
27    /// Push a line of text to the buffer.
28    ///
29    /// The line should include its trailing newline if it had one in the source.
30    pub(crate) fn push_line(&mut self, text: impl Into<String>) {
31        self.lines.push(text.into());
32    }
33
34    /// Get the accumulated text by concatenating all lines.
35    ///
36    /// Returns empty string if buffer is empty.
37    /// Lines are concatenated as-is (they should include their own newlines if needed).
38    pub(crate) fn get_accumulated_text(&self) -> String {
39        self.lines.concat()
40    }
41
42    /// Clear the buffer for reuse.
43    pub(crate) fn clear(&mut self) {
44        self.lines.clear();
45    }
46
47    /// Check if buffer is empty.
48    pub(crate) fn is_empty(&self) -> bool {
49        self.lines.is_empty()
50    }
51}
52
53#[cfg(test)]
54mod tests {
55    use super::*;
56
57    #[test]
58    fn test_new_buffer_is_empty() {
59        let buffer = TextBuffer::new();
60        assert!(buffer.is_empty());
61        assert!(buffer.is_empty());
62        assert_eq!(buffer.get_accumulated_text(), "");
63    }
64
65    #[test]
66    fn test_push_single_line() {
67        let mut buffer = TextBuffer::new();
68        buffer.push_line("Hello, world!");
69        assert!(!buffer.is_empty());
70        assert_eq!(buffer.get_accumulated_text(), "Hello, world!");
71    }
72
73    #[test]
74    fn test_push_multiple_lines() {
75        let mut buffer = TextBuffer::new();
76        buffer.push_line("Line 1\n");
77        buffer.push_line("Line 2\n");
78        buffer.push_line("Line 3");
79        assert_eq!(buffer.get_accumulated_text(), "Line 1\nLine 2\nLine 3");
80    }
81
82    #[test]
83    fn test_clear_buffer() {
84        let mut buffer = TextBuffer::new();
85        buffer.push_line("Line 1");
86        buffer.push_line("Line 2");
87        buffer.clear();
88        assert!(buffer.is_empty());
89        assert_eq!(buffer.get_accumulated_text(), "");
90    }
91
92    #[test]
93    fn test_reuse_after_clear() {
94        let mut buffer = TextBuffer::new();
95
96        // First use
97        buffer.push_line("First paragraph\n");
98        buffer.push_line("continues here");
99        assert_eq!(
100            buffer.get_accumulated_text(),
101            "First paragraph\ncontinues here"
102        );
103
104        // Clear and reuse
105        buffer.clear();
106        buffer.push_line("Second paragraph\n");
107        buffer.push_line("also continues");
108        assert_eq!(
109            buffer.get_accumulated_text(),
110            "Second paragraph\nalso continues"
111        );
112    }
113
114    #[test]
115    fn test_empty_lines() {
116        let mut buffer = TextBuffer::new();
117        buffer.push_line("\n");
118        buffer.push_line("Non-empty\n");
119        buffer.push_line("");
120        assert!(!buffer.is_empty());
121        assert_eq!(buffer.get_accumulated_text(), "\nNon-empty\n");
122    }
123
124    #[test]
125    fn test_whitespace_preserved() {
126        let mut buffer = TextBuffer::new();
127        buffer.push_line("  Leading spaces\n");
128        buffer.push_line("Trailing spaces  \n");
129        buffer.push_line("\tTab at start");
130        assert_eq!(
131            buffer.get_accumulated_text(),
132            "  Leading spaces\nTrailing spaces  \n\tTab at start"
133        );
134    }
135
136    #[test]
137    fn test_default_is_empty() {
138        let buffer = TextBuffer::default();
139        assert!(buffer.is_empty());
140        assert_eq!(buffer.get_accumulated_text(), "");
141    }
142}
143
144// ============================================================================
145// ParagraphBuffer - Interleaved buffer for paragraphs with structural markers
146// ============================================================================
147
148/// A segment in the paragraph buffer - either text content or a structural marker.
149#[derive(Debug, Clone)]
150pub(crate) enum ParagraphSegment {
151    /// Text content (may include newlines)
152    Text(String),
153    /// A blockquote marker with its whitespace info
154    BlockquoteMarker {
155        leading_spaces: usize,
156        has_trailing_space: bool,
157    },
158}
159
160/// Buffer for accumulating paragraph content with interleaved structural markers.
161///
162/// This enables proper inline parsing across line boundaries while preserving
163/// the position of BLOCK_QUOTE_MARKER tokens for lossless reconstruction.
164#[derive(Debug, Default, Clone)]
165pub(crate) struct ParagraphBuffer {
166    /// Interleaved segments of text and markers
167    segments: Vec<ParagraphSegment>,
168}
169
170impl ParagraphBuffer {
171    /// Create a new empty paragraph buffer.
172    pub(crate) fn new() -> Self {
173        Self {
174            segments: Vec::new(),
175        }
176    }
177
178    /// Push text content to the buffer.
179    ///
180    /// If the last segment is Text, appends to it. Otherwise creates a new Text segment.
181    pub(crate) fn push_text(&mut self, text: &str) {
182        if text.is_empty() {
183            return;
184        }
185        match self.segments.last_mut() {
186            Some(ParagraphSegment::Text(existing)) => {
187                existing.push_str(text);
188            }
189            _ => {
190                self.segments.push(ParagraphSegment::Text(text.to_string()));
191            }
192        }
193    }
194
195    /// Push a blockquote marker to the buffer.
196    pub(crate) fn push_marker(&mut self, leading_spaces: usize, has_trailing_space: bool) {
197        self.segments.push(ParagraphSegment::BlockquoteMarker {
198            leading_spaces,
199            has_trailing_space,
200        });
201    }
202
203    /// Get concatenated text for inline parsing (excludes markers).
204    pub(crate) fn get_text_for_parsing(&self) -> String {
205        let mut result = String::new();
206        for segment in &self.segments {
207            if let ParagraphSegment::Text(text) = segment {
208                result.push_str(text);
209            }
210        }
211        result
212    }
213
214    /// Get the byte positions where markers should be inserted in the concatenated text.
215    ///
216    /// Returns a list of (byte_offset, marker_info) pairs.
217    fn get_marker_positions(&self) -> Vec<(usize, usize, bool)> {
218        let mut positions = Vec::new();
219        let mut byte_offset = 0;
220
221        for segment in &self.segments {
222            match segment {
223                ParagraphSegment::Text(text) => {
224                    byte_offset += text.len();
225                }
226                ParagraphSegment::BlockquoteMarker {
227                    leading_spaces,
228                    has_trailing_space,
229                } => {
230                    positions.push((byte_offset, *leading_spaces, *has_trailing_space));
231                }
232            }
233        }
234        positions
235    }
236
237    /// Emit the buffered content with inline parsing, interspersing markers at correct positions.
238    pub(crate) fn emit_with_inlines(
239        &self,
240        builder: &mut GreenNodeBuilder<'static>,
241        config: &ParserOptions,
242    ) {
243        let text = self.get_text_for_parsing();
244        if text.is_empty() && self.segments.is_empty() {
245            return;
246        }
247
248        let marker_positions = self.get_marker_positions();
249
250        if marker_positions.is_empty() {
251            // No markers - simple case, just emit inlines
252            inline_emission::emit_inlines(builder, &text, config);
253        } else {
254            // Complex case: emit inlines with markers interspersed
255            self.emit_with_markers(builder, &text, &marker_positions, config);
256        }
257    }
258
259    /// Emit inline content with markers at specified byte positions.
260    ///
261    /// Important: we must parse the full text *once* so multiline inlines (like STRONG)
262    /// can span across blockquote marker boundaries.
263    fn emit_with_markers(
264        &self,
265        builder: &mut GreenNodeBuilder<'static>,
266        text: &str,
267        marker_positions: &[(usize, usize, bool)],
268        config: &ParserOptions,
269    ) {
270        // Parse inlines once into a temporary tree.
271        let mut temp_builder = GreenNodeBuilder::new();
272        temp_builder.start_node(SyntaxKind::HEADING_CONTENT.into());
273        inline_emission::emit_inlines(&mut temp_builder, text, config);
274        temp_builder.finish_node();
275        let inline_root = SyntaxNode::new_root(temp_builder.finish());
276
277        struct MarkerEmitter<'a> {
278            marker_positions: &'a [(usize, usize, bool)],
279            idx: usize,
280            offset: usize,
281        }
282
283        impl<'a> MarkerEmitter<'a> {
284            fn emit_markers_at_current(&mut self, builder: &mut GreenNodeBuilder<'static>) {
285                while let Some(&(byte_offset, leading_spaces, has_trailing_space)) =
286                    self.marker_positions.get(self.idx)
287                    && byte_offset == self.offset
288                {
289                    if leading_spaces > 0 {
290                        builder.token(SyntaxKind::WHITESPACE.into(), &" ".repeat(leading_spaces));
291                    }
292                    builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
293                    if has_trailing_space {
294                        builder.token(SyntaxKind::WHITESPACE.into(), " ");
295                    }
296                    self.idx += 1;
297                }
298            }
299
300            fn emit_token_with_markers(
301                &mut self,
302                builder: &mut GreenNodeBuilder<'static>,
303                token: &SyntaxToken,
304            ) {
305                let kind = token.kind();
306                let token_text = token.text();
307
308                let mut start = 0;
309                while start < token_text.len() {
310                    // Markers at the current offset must be emitted before emitting any bytes.
311                    self.emit_markers_at_current(builder);
312
313                    let remaining = token_text.len() - start;
314
315                    let next_marker_offset = self
316                        .marker_positions
317                        .get(self.idx)
318                        .map(|(byte_offset, _, _)| *byte_offset);
319
320                    if let Some(next) = next_marker_offset
321                        && next > self.offset
322                        && next < self.offset + remaining
323                    {
324                        let split_len = next - self.offset;
325                        let end = start + split_len;
326                        if end > start {
327                            builder.token(kind.into(), &token_text[start..end]);
328                            self.offset += split_len;
329                            start = end;
330                            continue;
331                        }
332                    }
333
334                    builder.token(kind.into(), &token_text[start..]);
335                    self.offset += remaining;
336                    break;
337                }
338            }
339
340            fn emit_element(
341                &mut self,
342                builder: &mut GreenNodeBuilder<'static>,
343                el: NodeOrToken<SyntaxNode, SyntaxToken>,
344            ) {
345                match el {
346                    NodeOrToken::Node(n) => {
347                        builder.start_node(n.kind().into());
348                        for child in n.children_with_tokens() {
349                            self.emit_element(builder, child);
350                        }
351                        builder.finish_node();
352                    }
353                    NodeOrToken::Token(t) => self.emit_token_with_markers(builder, &t),
354                }
355            }
356        }
357
358        let mut emitter = MarkerEmitter {
359            marker_positions,
360            idx: 0,
361            offset: 0,
362        };
363
364        // Emit the inline parse result, injecting markers at the recorded offsets.
365        for el in inline_root.children_with_tokens() {
366            emitter.emit_element(builder, el);
367        }
368
369        // Emit any markers at the end.
370        emitter.emit_markers_at_current(builder);
371    }
372
373    /// Check if buffer is empty.
374    pub(crate) fn is_empty(&self) -> bool {
375        self.segments.is_empty()
376    }
377}
378
379#[cfg(test)]
380mod paragraph_buffer_tests {
381    use super::*;
382
383    #[test]
384    fn test_new_buffer_is_empty() {
385        let buffer = ParagraphBuffer::new();
386        assert!(buffer.is_empty());
387        assert_eq!(buffer.get_text_for_parsing(), "");
388    }
389
390    #[test]
391    fn test_push_text_single() {
392        let mut buffer = ParagraphBuffer::new();
393        buffer.push_text("Hello, world!");
394        assert!(!buffer.is_empty());
395        assert_eq!(buffer.get_text_for_parsing(), "Hello, world!");
396    }
397
398    #[test]
399    fn test_push_text_concatenates() {
400        let mut buffer = ParagraphBuffer::new();
401        buffer.push_text("Hello");
402        buffer.push_text(", ");
403        buffer.push_text("world!");
404        assert_eq!(buffer.get_text_for_parsing(), "Hello, world!");
405        // Should be a single Text segment due to concatenation
406        assert_eq!(buffer.segments.len(), 1);
407    }
408
409    #[test]
410    fn test_push_marker_separates_text() {
411        let mut buffer = ParagraphBuffer::new();
412        buffer.push_text("Line 1\n");
413        buffer.push_marker(0, true);
414        buffer.push_text("Line 2\n");
415        // Should be: Text, Marker, Text
416        assert_eq!(buffer.segments.len(), 3);
417        assert_eq!(buffer.get_text_for_parsing(), "Line 1\nLine 2\n");
418    }
419
420    #[test]
421    fn test_marker_positions() {
422        let mut buffer = ParagraphBuffer::new();
423        buffer.push_text("Line 1\n"); // 7 bytes
424        buffer.push_marker(0, true);
425        buffer.push_text("Line 2\n"); // 7 bytes
426
427        let positions = buffer.get_marker_positions();
428        assert_eq!(positions.len(), 1);
429        assert_eq!(positions[0], (7, 0, true)); // marker at byte 7
430    }
431
432    #[test]
433    fn test_multiple_markers() {
434        let mut buffer = ParagraphBuffer::new();
435        buffer.push_text("A\n"); // 2 bytes
436        buffer.push_marker(0, true);
437        buffer.push_text("B\n"); // 2 bytes
438        buffer.push_marker(1, false);
439        buffer.push_text("C");
440
441        let positions = buffer.get_marker_positions();
442        assert_eq!(positions.len(), 2);
443        assert_eq!(positions[0], (2, 0, true)); // first marker at byte 2
444        assert_eq!(positions[1], (4, 1, false)); // second marker at byte 4
445    }
446
447    #[test]
448    fn test_empty_text_ignored() {
449        let mut buffer = ParagraphBuffer::new();
450        buffer.push_text("");
451        assert!(buffer.is_empty());
452    }
453}