Skip to main content

panache_parser/parser/utils/
text_buffer.rs

1//! Text buffer for accumulating multi-line block content.
2//!
3//! Used during paragraph and plain text parsing to collect lines before
4//! emitting them with inline parsing applied.
5
6use super::inline_emission;
7use crate::options::ParserOptions;
8use crate::syntax::{SyntaxKind, SyntaxNode, SyntaxToken};
9use rowan::{GreenNodeBuilder, NodeOrToken};
10
11/// Buffer for accumulating text lines before emission.
12///
13/// Designed for minimal allocation overhead - reuses the same buffer
14/// across multiple paragraph/plain blocks by clearing between uses.
15#[derive(Debug, Default, Clone)]
16pub(crate) struct TextBuffer {
17    /// Accumulated lines (stored WITH trailing newlines if they had them in source).
18    lines: Vec<String>,
19}
20
21impl TextBuffer {
22    /// Create a new empty text buffer.
23    pub(crate) fn new() -> Self {
24        Self { lines: Vec::new() }
25    }
26
27    /// Push a line of text to the buffer.
28    ///
29    /// The line should include its trailing newline if it had one in the source.
30    pub(crate) fn push_line(&mut self, text: impl Into<String>) {
31        self.lines.push(text.into());
32    }
33
34    /// Get the accumulated text by concatenating all lines.
35    ///
36    /// Returns empty string if buffer is empty.
37    /// Lines are concatenated as-is (they should include their own newlines if needed).
38    pub(crate) fn get_accumulated_text(&self) -> String {
39        self.lines.concat()
40    }
41
42    /// Clear the buffer for reuse.
43    pub(crate) fn clear(&mut self) {
44        self.lines.clear();
45    }
46
47    /// Check if buffer is empty.
48    pub(crate) fn is_empty(&self) -> bool {
49        self.lines.is_empty()
50    }
51}
52
53#[cfg(test)]
54mod tests {
55    use super::*;
56
57    #[test]
58    fn test_new_buffer_is_empty() {
59        let buffer = TextBuffer::new();
60        assert!(buffer.is_empty());
61        assert!(buffer.is_empty());
62        assert_eq!(buffer.get_accumulated_text(), "");
63    }
64
65    #[test]
66    fn test_push_single_line() {
67        let mut buffer = TextBuffer::new();
68        buffer.push_line("Hello, world!");
69        assert!(!buffer.is_empty());
70        assert_eq!(buffer.get_accumulated_text(), "Hello, world!");
71    }
72
73    #[test]
74    fn test_push_multiple_lines() {
75        let mut buffer = TextBuffer::new();
76        buffer.push_line("Line 1\n");
77        buffer.push_line("Line 2\n");
78        buffer.push_line("Line 3");
79        assert_eq!(buffer.get_accumulated_text(), "Line 1\nLine 2\nLine 3");
80    }
81
82    #[test]
83    fn test_clear_buffer() {
84        let mut buffer = TextBuffer::new();
85        buffer.push_line("Line 1");
86        buffer.push_line("Line 2");
87        buffer.clear();
88        assert!(buffer.is_empty());
89        assert_eq!(buffer.get_accumulated_text(), "");
90    }
91
92    #[test]
93    fn test_reuse_after_clear() {
94        let mut buffer = TextBuffer::new();
95
96        // First use
97        buffer.push_line("First paragraph\n");
98        buffer.push_line("continues here");
99        assert_eq!(
100            buffer.get_accumulated_text(),
101            "First paragraph\ncontinues here"
102        );
103
104        // Clear and reuse
105        buffer.clear();
106        buffer.push_line("Second paragraph\n");
107        buffer.push_line("also continues");
108        assert_eq!(
109            buffer.get_accumulated_text(),
110            "Second paragraph\nalso continues"
111        );
112    }
113
114    #[test]
115    fn test_empty_lines() {
116        let mut buffer = TextBuffer::new();
117        buffer.push_line("\n");
118        buffer.push_line("Non-empty\n");
119        buffer.push_line("");
120        assert!(!buffer.is_empty());
121        assert_eq!(buffer.get_accumulated_text(), "\nNon-empty\n");
122    }
123
124    #[test]
125    fn test_whitespace_preserved() {
126        let mut buffer = TextBuffer::new();
127        buffer.push_line("  Leading spaces\n");
128        buffer.push_line("Trailing spaces  \n");
129        buffer.push_line("\tTab at start");
130        assert_eq!(
131            buffer.get_accumulated_text(),
132            "  Leading spaces\nTrailing spaces  \n\tTab at start"
133        );
134    }
135
136    #[test]
137    fn test_default_is_empty() {
138        let buffer = TextBuffer::default();
139        assert!(buffer.is_empty());
140        assert_eq!(buffer.get_accumulated_text(), "");
141    }
142}
143
144// ============================================================================
145// ParagraphBuffer - Interleaved buffer for paragraphs with structural markers
146// ============================================================================
147
148/// A segment in the paragraph buffer - either text content or a structural marker.
149#[derive(Debug, Clone)]
150pub(crate) enum ParagraphSegment {
151    /// Text content (may include newlines)
152    Text(String),
153    /// A blockquote marker with its whitespace info
154    BlockquoteMarker {
155        leading_spaces: usize,
156        has_trailing_space: bool,
157    },
158}
159
160/// Buffer for accumulating paragraph content with interleaved structural markers.
161///
162/// This enables proper inline parsing across line boundaries while preserving
163/// the position of BLOCK_QUOTE_MARKER tokens for lossless reconstruction.
164#[derive(Debug, Default, Clone)]
165pub(crate) struct ParagraphBuffer {
166    /// Interleaved segments of text and markers
167    segments: Vec<ParagraphSegment>,
168}
169
170impl ParagraphBuffer {
171    /// Create a new empty paragraph buffer.
172    pub(crate) fn new() -> Self {
173        Self {
174            segments: Vec::new(),
175        }
176    }
177
178    /// Push text content to the buffer.
179    ///
180    /// If the last segment is Text, appends to it. Otherwise creates a new Text segment.
181    pub(crate) fn push_text(&mut self, text: &str) {
182        if text.is_empty() {
183            return;
184        }
185        match self.segments.last_mut() {
186            Some(ParagraphSegment::Text(existing)) => {
187                existing.push_str(text);
188            }
189            _ => {
190                self.segments.push(ParagraphSegment::Text(text.to_string()));
191            }
192        }
193    }
194
195    /// Push a blockquote marker to the buffer.
196    pub(crate) fn push_marker(&mut self, leading_spaces: usize, has_trailing_space: bool) {
197        self.segments.push(ParagraphSegment::BlockquoteMarker {
198            leading_spaces,
199            has_trailing_space,
200        });
201    }
202
203    /// Get concatenated text for inline parsing (excludes markers).
204    pub(crate) fn get_text_for_parsing(&self) -> String {
205        let mut result = String::new();
206        for segment in &self.segments {
207            if let ParagraphSegment::Text(text) = segment {
208                result.push_str(text);
209            }
210        }
211        result
212    }
213
214    /// Get the byte positions where markers should be inserted in the concatenated text.
215    ///
216    /// Returns a list of (byte_offset, marker_info) pairs.
217    fn get_marker_positions(&self) -> Vec<(usize, usize, bool)> {
218        let mut positions = Vec::new();
219        let mut byte_offset = 0;
220
221        for segment in &self.segments {
222            match segment {
223                ParagraphSegment::Text(text) => {
224                    byte_offset += text.len();
225                }
226                ParagraphSegment::BlockquoteMarker {
227                    leading_spaces,
228                    has_trailing_space,
229                } => {
230                    positions.push((byte_offset, *leading_spaces, *has_trailing_space));
231                }
232            }
233        }
234        positions
235    }
236
237    /// Emit the buffered content with inline parsing, interspersing markers at correct positions.
238    ///
239    /// `suppress_footnote_refs` cascades down into the inline parser. Block
240    /// callers compute it from the container stack so paragraphs flushed
241    /// from inside a `FOOTNOTE_DEFINITION` body silently drop `[^id]` refs
242    /// (pandoc-native behavior).
243    pub(crate) fn emit_with_inlines(
244        &self,
245        builder: &mut GreenNodeBuilder<'static>,
246        config: &ParserOptions,
247        suppress_footnote_refs: bool,
248    ) {
249        let text = self.get_text_for_parsing();
250        if text.is_empty() && self.segments.is_empty() {
251            return;
252        }
253
254        let marker_positions = self.get_marker_positions();
255
256        if marker_positions.is_empty() {
257            // No markers - simple case, just emit inlines
258            inline_emission::emit_inlines(builder, &text, config, suppress_footnote_refs);
259        } else {
260            // Complex case: emit inlines with markers interspersed
261            self.emit_with_markers(
262                builder,
263                &text,
264                &marker_positions,
265                config,
266                suppress_footnote_refs,
267            );
268        }
269    }
270
271    /// Emit inline content with markers at specified byte positions.
272    ///
273    /// Important: we must parse the full text *once* so multiline inlines (like STRONG)
274    /// can span across blockquote marker boundaries.
275    fn emit_with_markers(
276        &self,
277        builder: &mut GreenNodeBuilder<'static>,
278        text: &str,
279        marker_positions: &[(usize, usize, bool)],
280        config: &ParserOptions,
281        suppress_footnote_refs: bool,
282    ) {
283        // Parse inlines once into a temporary tree.
284        let mut temp_builder = GreenNodeBuilder::new();
285        temp_builder.start_node(SyntaxKind::HEADING_CONTENT.into());
286        inline_emission::emit_inlines(&mut temp_builder, text, config, suppress_footnote_refs);
287        temp_builder.finish_node();
288        let inline_root = SyntaxNode::new_root(temp_builder.finish());
289
290        struct MarkerEmitter<'a> {
291            marker_positions: &'a [(usize, usize, bool)],
292            idx: usize,
293            offset: usize,
294        }
295
296        impl<'a> MarkerEmitter<'a> {
297            fn emit_markers_at_current(&mut self, builder: &mut GreenNodeBuilder<'static>) {
298                while let Some(&(byte_offset, leading_spaces, has_trailing_space)) =
299                    self.marker_positions.get(self.idx)
300                    && byte_offset == self.offset
301                {
302                    if leading_spaces > 0 {
303                        builder.token(SyntaxKind::WHITESPACE.into(), &" ".repeat(leading_spaces));
304                    }
305                    builder.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
306                    if has_trailing_space {
307                        builder.token(SyntaxKind::WHITESPACE.into(), " ");
308                    }
309                    self.idx += 1;
310                }
311            }
312
313            fn emit_token_with_markers(
314                &mut self,
315                builder: &mut GreenNodeBuilder<'static>,
316                token: &SyntaxToken,
317            ) {
318                let kind = token.kind();
319                let token_text = token.text();
320
321                let mut start = 0;
322                while start < token_text.len() {
323                    // Markers at the current offset must be emitted before emitting any bytes.
324                    self.emit_markers_at_current(builder);
325
326                    let remaining = token_text.len() - start;
327
328                    let next_marker_offset = self
329                        .marker_positions
330                        .get(self.idx)
331                        .map(|(byte_offset, _, _)| *byte_offset);
332
333                    if let Some(next) = next_marker_offset
334                        && next > self.offset
335                        && next < self.offset + remaining
336                    {
337                        let split_len = next - self.offset;
338                        let end = start + split_len;
339                        if end > start {
340                            builder.token(kind.into(), &token_text[start..end]);
341                            self.offset += split_len;
342                            start = end;
343                            continue;
344                        }
345                    }
346
347                    builder.token(kind.into(), &token_text[start..]);
348                    self.offset += remaining;
349                    break;
350                }
351            }
352
353            fn emit_element(
354                &mut self,
355                builder: &mut GreenNodeBuilder<'static>,
356                el: NodeOrToken<SyntaxNode, SyntaxToken>,
357            ) {
358                match el {
359                    NodeOrToken::Node(n) => {
360                        // Emit any markers at the current offset *outside* this
361                        // node — otherwise they end up nested inside (e.g. a
362                        // BLOCK_QUOTE_MARKER inside a SUBSCRIPT_MARKER), which
363                        // confuses lossless reconstruction during reformatting.
364                        self.emit_markers_at_current(builder);
365                        builder.start_node(n.kind().into());
366                        for child in n.children_with_tokens() {
367                            self.emit_element(builder, child);
368                        }
369                        builder.finish_node();
370                    }
371                    NodeOrToken::Token(t) => self.emit_token_with_markers(builder, &t),
372                }
373            }
374        }
375
376        let mut emitter = MarkerEmitter {
377            marker_positions,
378            idx: 0,
379            offset: 0,
380        };
381
382        // Emit the inline parse result, injecting markers at the recorded offsets.
383        for el in inline_root.children_with_tokens() {
384            emitter.emit_element(builder, el);
385        }
386
387        // Emit any markers at the end.
388        emitter.emit_markers_at_current(builder);
389    }
390
391    /// Check if buffer is empty.
392    pub(crate) fn is_empty(&self) -> bool {
393        self.segments.is_empty()
394    }
395}
396
397#[cfg(test)]
398mod paragraph_buffer_tests {
399    use super::*;
400
401    #[test]
402    fn test_new_buffer_is_empty() {
403        let buffer = ParagraphBuffer::new();
404        assert!(buffer.is_empty());
405        assert_eq!(buffer.get_text_for_parsing(), "");
406    }
407
408    #[test]
409    fn test_push_text_single() {
410        let mut buffer = ParagraphBuffer::new();
411        buffer.push_text("Hello, world!");
412        assert!(!buffer.is_empty());
413        assert_eq!(buffer.get_text_for_parsing(), "Hello, world!");
414    }
415
416    #[test]
417    fn test_push_text_concatenates() {
418        let mut buffer = ParagraphBuffer::new();
419        buffer.push_text("Hello");
420        buffer.push_text(", ");
421        buffer.push_text("world!");
422        assert_eq!(buffer.get_text_for_parsing(), "Hello, world!");
423        // Should be a single Text segment due to concatenation
424        assert_eq!(buffer.segments.len(), 1);
425    }
426
427    #[test]
428    fn test_push_marker_separates_text() {
429        let mut buffer = ParagraphBuffer::new();
430        buffer.push_text("Line 1\n");
431        buffer.push_marker(0, true);
432        buffer.push_text("Line 2\n");
433        // Should be: Text, Marker, Text
434        assert_eq!(buffer.segments.len(), 3);
435        assert_eq!(buffer.get_text_for_parsing(), "Line 1\nLine 2\n");
436    }
437
438    #[test]
439    fn test_marker_positions() {
440        let mut buffer = ParagraphBuffer::new();
441        buffer.push_text("Line 1\n"); // 7 bytes
442        buffer.push_marker(0, true);
443        buffer.push_text("Line 2\n"); // 7 bytes
444
445        let positions = buffer.get_marker_positions();
446        assert_eq!(positions.len(), 1);
447        assert_eq!(positions[0], (7, 0, true)); // marker at byte 7
448    }
449
450    #[test]
451    fn test_multiple_markers() {
452        let mut buffer = ParagraphBuffer::new();
453        buffer.push_text("A\n"); // 2 bytes
454        buffer.push_marker(0, true);
455        buffer.push_text("B\n"); // 2 bytes
456        buffer.push_marker(1, false);
457        buffer.push_text("C");
458
459        let positions = buffer.get_marker_positions();
460        assert_eq!(positions.len(), 2);
461        assert_eq!(positions[0], (2, 0, true)); // first marker at byte 2
462        assert_eq!(positions[1], (4, 1, false)); // second marker at byte 4
463    }
464
465    #[test]
466    fn test_empty_text_ignored() {
467        let mut buffer = ParagraphBuffer::new();
468        buffer.push_text("");
469        assert!(buffer.is_empty());
470    }
471}