facet_json/
scan_buffer.rs

1//! Buffer management for streaming JSON scanning.
2//!
3//! # Design: No Compaction Needed
4//!
5//! This buffer uses a grow-only strategy that never requires compacting
6//! (shifting data to the left). This is possible because:
7//!
8//! 1. When the scanner returns a complete token, we materialize it immediately.
9//!    The raw bytes are no longer needed.
10//!
11//! 2. When the scanner returns `Eof` (end of buffer, not real EOF), all tokens
12//!    in the buffer have been processed. We can **reset** and read fresh data.
13//!
14//! 3. When the scanner returns `NeedMore` (mid-token), we **grow** the buffer
15//!    and read more data into the new space. Indices remain valid.
16//!
17//! This avoids all data copying, which is both simpler and more efficient.
18
19use alloc::vec::Vec;
20
21/// Default buffer capacity (8KB)
22pub const DEFAULT_CAPACITY: usize = 8 * 1024;
23
24/// A refillable buffer for streaming JSON parsing.
25///
26/// Uses a grow-only strategy: we either reset (when all data processed)
27/// or grow (when mid-token). Never compacts/shifts data.
28#[derive(Debug)]
29pub struct ScanBuffer {
30    /// The underlying buffer
31    data: Vec<u8>,
32    /// How many bytes are valid (filled with data)
33    filled: usize,
34    /// Whether EOF has been reached on the underlying reader
35    eof: bool,
36}
37
38impl ScanBuffer {
39    /// Create a new buffer with the default capacity.
40    pub fn new() -> Self {
41        Self::with_capacity(DEFAULT_CAPACITY)
42    }
43
44    /// Create a new buffer with a specific capacity.
45    pub fn with_capacity(capacity: usize) -> Self {
46        Self {
47            data: vec![0u8; capacity],
48            filled: 0,
49            eof: false,
50        }
51    }
52
53    /// Create a buffer from an existing slice (for slice-based parsing).
54    pub fn from_slice(slice: &[u8]) -> Self {
55        let mut data = Vec::with_capacity(slice.len());
56        data.extend_from_slice(slice);
57        Self {
58            filled: data.len(),
59            data,
60            eof: true, // No more data to read
61        }
62    }
63
64    /// Get the current buffer contents.
65    #[inline]
66    pub fn data(&self) -> &[u8] {
67        &self.data[..self.filled]
68    }
69
70    /// Whether the underlying reader has reached EOF.
71    #[inline]
72    pub fn is_eof(&self) -> bool {
73        self.eof
74    }
75
76    /// How many bytes are filled.
77    #[inline]
78    pub fn filled(&self) -> usize {
79        self.filled
80    }
81
82    /// Get the buffer's total capacity.
83    #[inline]
84    pub fn capacity(&self) -> usize {
85        self.data.len()
86    }
87
88    /// Reset the buffer for fresh data.
89    ///
90    /// Called when all data has been processed (scanner returned Eof but reader has more).
91    /// This is NOT compaction - we're simply starting fresh because everything was consumed.
92    pub fn reset(&mut self) {
93        self.filled = 0;
94        // Note: we don't reset eof here - that's determined by the reader
95    }
96
97    /// Grow the buffer to make room for more data.
98    ///
99    /// Called when mid-token (NeedMore) and buffer is full.
100    /// We grow rather than compact because:
101    /// - No data copying needed
102    /// - Scanner indices remain valid
103    /// - Simpler logic
104    pub fn grow(&mut self) {
105        let new_capacity = self.data.len() * 2;
106        self.data.resize(new_capacity, 0);
107    }
108
109    /// Refill the buffer from a synchronous reader.
110    ///
111    /// Reads more data into the unfilled portion of the buffer.
112    /// Returns the number of bytes read, or 0 if EOF.
113    #[cfg(feature = "std")]
114    pub fn refill<R: std::io::Read>(&mut self, reader: &mut R) -> std::io::Result<usize> {
115        if self.eof {
116            return Ok(0);
117        }
118
119        let read_buf = &mut self.data[self.filled..];
120        if read_buf.is_empty() {
121            // Buffer is full - caller should grow() first if needed
122            return Ok(0);
123        }
124
125        let n = reader.read(read_buf)?;
126        self.filled += n;
127
128        if n == 0 {
129            self.eof = true;
130        }
131
132        Ok(n)
133    }
134
135    /// Refill the buffer from an async reader (tokio).
136    #[cfg(feature = "tokio")]
137    pub async fn refill_tokio<R>(&mut self, reader: &mut R) -> std::io::Result<usize>
138    where
139        R: tokio::io::AsyncRead + Unpin,
140    {
141        use tokio::io::AsyncReadExt;
142
143        if self.eof {
144            return Ok(0);
145        }
146
147        let read_buf = &mut self.data[self.filled..];
148        if read_buf.is_empty() {
149            return Ok(0);
150        }
151
152        let n = reader.read(read_buf).await?;
153        self.filled += n;
154
155        if n == 0 {
156            self.eof = true;
157        }
158
159        Ok(n)
160    }
161
162    /// Refill the buffer from an async reader (futures-io).
163    #[cfg(feature = "futures-io")]
164    pub async fn refill_futures<R>(&mut self, reader: &mut R) -> std::io::Result<usize>
165    where
166        R: futures_io::AsyncRead + Unpin,
167    {
168        use core::pin::Pin;
169        use core::task::Context;
170
171        if self.eof {
172            return Ok(0);
173        }
174
175        let read_buf = &mut self.data[self.filled..];
176        if read_buf.is_empty() {
177            return Ok(0);
178        }
179
180        let n = core::future::poll_fn(|cx: &mut Context<'_>| {
181            Pin::new(&mut *reader).poll_read(cx, read_buf)
182        })
183        .await?;
184        self.filled += n;
185
186        if n == 0 {
187            self.eof = true;
188        }
189
190        Ok(n)
191    }
192}
193
194impl Default for ScanBuffer {
195    fn default() -> Self {
196        Self::new()
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn test_from_slice() {
206        let buf = ScanBuffer::from_slice(b"hello world");
207        assert_eq!(buf.data(), b"hello world");
208        assert_eq!(buf.filled(), 11);
209        assert!(buf.is_eof());
210    }
211
212    #[test]
213    fn test_reset() {
214        let mut buf = ScanBuffer::from_slice(b"hello");
215        assert_eq!(buf.filled(), 5);
216        buf.reset();
217        assert_eq!(buf.filled(), 0);
218        assert_eq!(buf.data(), b"");
219    }
220
221    #[test]
222    fn test_grow() {
223        let mut buf = ScanBuffer::with_capacity(4);
224        assert_eq!(buf.capacity(), 4);
225        buf.grow();
226        assert_eq!(buf.capacity(), 8);
227    }
228
229    #[cfg(feature = "std")]
230    #[test]
231    fn test_refill_from_reader() {
232        use std::io::Cursor;
233
234        let mut reader = Cursor::new(b"hello world");
235        let mut buf = ScanBuffer::with_capacity(8);
236
237        // First read
238        let n = buf.refill(&mut reader).unwrap();
239        assert_eq!(n, 8);
240        assert_eq!(buf.data(), b"hello wo");
241
242        // Buffer full, refill returns 0
243        let n = buf.refill(&mut reader).unwrap();
244        assert_eq!(n, 0);
245
246        // Grow and refill
247        buf.grow();
248        let n = buf.refill(&mut reader).unwrap();
249        assert_eq!(n, 3);
250        assert_eq!(buf.data(), b"hello world");
251        assert!(!buf.is_eof()); // Not EOF yet - we got 3 bytes
252
253        // One more refill to confirm EOF
254        let n = buf.refill(&mut reader).unwrap();
255        assert_eq!(n, 0);
256        assert!(buf.is_eof());
257    }
258}