Skip to main content

panache_parser/parser/inlines/
sink.rs

1//! Output sink abstraction for inline emission.
2//!
3//! The inline parser ([`super::core`]) emits its CST through exactly three
4//! operations: emit a leaf token, open a node, close a node. Abstracting those
5//! behind [`InlineSink`] lets the common path write straight into a
6//! [`GreenNodeBuilder`] (zero-cost, monomorphized) while a blockquote paragraph
7//! can swap in [`MarkerInjectingSink`], which splices `BLOCK_QUOTE_MARKER`
8//! tokens into the stream at recorded byte offsets during the *same* pass —
9//! no temporary tree built and replayed.
10//!
11//! The marker-injection logic mirrors the lossless reconstruction rules:
12//! a leaf token is split when a marker falls in its interior, and a marker
13//! whose offset coincides with a node boundary is emitted *outside* the node
14//! (before `start_node`) so it never nests inside e.g. an `EMPHASIS_MARKER`.
15
16use crate::syntax::SyntaxKind;
17use rowan::GreenNodeBuilder;
18
19/// The three CST-building operations the inline emitter relies on.
20///
21/// Implemented for [`GreenNodeBuilder`] as a direct passthrough (the hot
22/// path) and for [`MarkerInjectingSink`] for blockquote paragraphs.
23pub trait InlineSink {
24    fn token(&mut self, kind: rowan::SyntaxKind, text: &str);
25    fn start_node(&mut self, kind: rowan::SyntaxKind);
26    fn finish_node(&mut self);
27}
28
29impl InlineSink for GreenNodeBuilder<'_> {
30    #[inline]
31    fn token(&mut self, kind: rowan::SyntaxKind, text: &str) {
32        GreenNodeBuilder::token(self, kind, text);
33    }
34
35    #[inline]
36    fn start_node(&mut self, kind: rowan::SyntaxKind) {
37        GreenNodeBuilder::start_node(self, kind);
38    }
39
40    #[inline]
41    fn finish_node(&mut self) {
42        GreenNodeBuilder::finish_node(self);
43    }
44}
45
46/// An [`InlineSink`] that forwards into a real [`GreenNodeBuilder`] while
47/// splicing blockquote markers at recorded byte offsets.
48///
49/// `marker_positions` is a sorted list of `(byte_offset, leading_spaces,
50/// has_trailing_space)` tuples, where `byte_offset` is relative to the start of
51/// the text fed to the inline parser. `offset` tracks how many bytes of that
52/// text have been emitted so far; it advances *only* on [`token`](Self::token),
53/// since node boundaries carry zero bytes.
54pub(crate) struct MarkerInjectingSink<'a, 'b> {
55    inner: &'a mut GreenNodeBuilder<'static>,
56    marker_positions: &'b [(usize, usize, bool)],
57    /// Index of the next marker to emit.
58    idx: usize,
59    /// Bytes of source text emitted so far.
60    offset: usize,
61}
62
63impl<'a, 'b> MarkerInjectingSink<'a, 'b> {
64    pub(crate) fn new(
65        inner: &'a mut GreenNodeBuilder<'static>,
66        marker_positions: &'b [(usize, usize, bool)],
67    ) -> Self {
68        Self {
69            inner,
70            marker_positions,
71            idx: 0,
72            offset: 0,
73        }
74    }
75
76    /// Emit any markers whose offset equals the current byte position.
77    fn emit_markers_at_current(&mut self) {
78        while let Some(&(byte_offset, leading_spaces, has_trailing_space)) =
79            self.marker_positions.get(self.idx)
80            && byte_offset == self.offset
81        {
82            if leading_spaces > 0 {
83                self.inner
84                    .token(SyntaxKind::WHITESPACE.into(), &" ".repeat(leading_spaces));
85            }
86            self.inner.token(SyntaxKind::BLOCK_QUOTE_MARKER.into(), ">");
87            if has_trailing_space {
88                self.inner.token(SyntaxKind::WHITESPACE.into(), " ");
89            }
90            self.idx += 1;
91        }
92    }
93
94    /// Flush any markers at or past the end of the emitted text. Must be called
95    /// once after the inline parser finishes, to place trailing markers.
96    pub(crate) fn finish(mut self) {
97        self.emit_markers_at_current();
98    }
99}
100
101impl InlineSink for MarkerInjectingSink<'_, '_> {
102    fn token(&mut self, kind: rowan::SyntaxKind, text: &str) {
103        let mut start = 0;
104        while start < text.len() {
105            // Markers at the current offset must be emitted before any bytes.
106            self.emit_markers_at_current();
107
108            let remaining = text.len() - start;
109            let next_marker_offset = self
110                .marker_positions
111                .get(self.idx)
112                .map(|(byte_offset, _, _)| *byte_offset);
113
114            // If a marker falls strictly inside this token, split it there.
115            if let Some(next) = next_marker_offset
116                && next > self.offset
117                && next < self.offset + remaining
118            {
119                let split_len = next - self.offset;
120                let end = start + split_len;
121                if end > start {
122                    self.inner.token(kind, &text[start..end]);
123                    self.offset += split_len;
124                    start = end;
125                    continue;
126                }
127            }
128
129            self.inner.token(kind, &text[start..]);
130            self.offset += remaining;
131            break;
132        }
133    }
134
135    fn start_node(&mut self, kind: rowan::SyntaxKind) {
136        // Emit any markers at the current offset *outside* this node — otherwise
137        // they nest inside (e.g. a BLOCK_QUOTE_MARKER inside an EMPHASIS_MARKER),
138        // which breaks lossless reconstruction during reformatting.
139        self.emit_markers_at_current();
140        self.inner.start_node(kind);
141    }
142
143    fn finish_node(&mut self) {
144        self.inner.finish_node();
145    }
146}