picojson/
push_parser.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! A SAX-style JSON push parser.
4//!
5//! Clean implementation based on handler_design pattern with proper HRTB lifetime management.
6
7use crate::event_processor::{ContentExtractor, EscapeTiming, ParserCore};
8use crate::push_content_builder::{PushContentBuilder, PushParserHandler};
9use crate::shared::{DataSource, State};
10use crate::stream_buffer::StreamBufferError;
11use crate::{ujson, BitStackConfig, Event, ParseError};
12
13/// A SAX-style JSON push parser.
14///
15/// Generic over BitStack storage type for configurable nesting depth. Parsing
16/// events are returned to the handler.
17///
18/// # Generic Parameters
19///
20/// * `'scratch` - Lifetime for the scratch buffer used for temporary storage
21/// * `H` - The event handler type that implements [`PushParserHandler`]
22/// * `C` - BitStack configuration type that implements [`BitStackConfig`]
23pub struct PushParser<'input, 'scratch, H, C>
24where
25    C: BitStackConfig,
26{
27    /// Content extractor that handles content extraction and event emission
28    extractor: PushContentBuilder<'input, 'scratch>,
29    /// The handler that receives events
30    handler: H,
31    /// Core parser logic shared with other parsers
32    core: ParserCore<C::Bucket, C::Counter>,
33}
34
35impl<'input, 'scratch, H, C> PushParser<'input, 'scratch, H, C>
36where
37    C: BitStackConfig,
38{
39    /// Creates a new `PushParser`.
40    pub fn new(handler: H, buffer: &'scratch mut [u8]) -> Self {
41        Self {
42            extractor: PushContentBuilder::new(buffer),
43            handler,
44            core: ParserCore::new_chunked(),
45        }
46    }
47
48    /// Processes a chunk of input data.
49    pub fn write<E>(&mut self, data: &'input [u8]) -> Result<(), PushParseError<E>>
50    where
51        H: for<'a, 'b> PushParserHandler<'a, 'b, E>,
52        E: From<ParseError>,
53    {
54        // Apply any queued buffer resets
55        self.extractor.apply_unescaped_reset_if_queued();
56
57        // Set the input slice for the extractor to iterate over
58        self.extractor.set_chunk(data);
59
60        // Use ParserCore to process all bytes in the chunk
61        loop {
62            match self.core.next_event_impl_with_flags(
63                &mut self.extractor,
64                EscapeTiming::OnEnd, // PushParser uses OnEnd timing like StreamParser
65                |extractor, byte| {
66                    // Selective accumulation: let PushContentBuilder decide based on its state
67                    // whether this byte should be accumulated or processed directly
68                    extractor.handle_byte_accumulation(byte)
69                },
70                true, // always_accumulate_during_escapes: ensure all hex digits reach the accumulator
71            ) {
72                Ok(Event::EndDocument) => {
73                    // EndDocument during write() means we've consumed all bytes in current chunk
74                    break;
75                }
76                Ok(event) => {
77                    // Handle all other events normally
78                    self.handler
79                        .handle_event(event)
80                        .map_err(PushParseError::Handler)?;
81
82                    // Apply any queued buffer resets after the event has been processed
83                    // This ensures that buffer content from previous tokens doesn't leak into subsequent ones
84                    self.extractor.apply_unescaped_reset_if_queued();
85                }
86                Err(ParseError::EndOfData) => {
87                    // No more events available from current chunk
88                    break;
89                }
90                Err(e) => {
91                    return Err(PushParseError::Parse(e));
92                }
93            }
94        }
95
96        // Check for chunk boundary condition - if still processing a token when chunk ends
97        let extractor_state = self.extractor.parser_state();
98
99        if matches!(
100            extractor_state,
101            State::String(_) | State::Key(_) | State::Number(_)
102        ) {
103            // If we haven't already started using the scratch buffer (e.g., due to escapes)
104            if !self.extractor.has_unescaped_content() {
105                // Copy the partial content from this chunk to scratch buffer before it's lost
106                self.extractor.copy_partial_content_to_scratch()?;
107            } else {
108                // Special case: For Numbers, check if the scratch buffer is actually empty
109                // This handles the byte-by-byte case where the flag is stale from previous Key processing
110                if matches!(extractor_state, State::Number(_)) {
111                    let buffer_slice = self.extractor.get_unescaped_slice().unwrap_or(&[]);
112                    let buffer_empty = buffer_slice.is_empty();
113
114                    if buffer_empty {
115                        self.extractor.copy_partial_content_to_scratch()?;
116                    }
117                }
118            }
119        }
120
121        // Reset input slice
122        self.extractor.reset_input();
123
124        // Update position offset for next call
125        self.extractor.add_position_offset(data.len());
126
127        Ok(())
128    }
129
130    /// Finishes parsing, flushes any remaining events, and returns the handler.
131    /// This method consumes the parser.
132    pub fn finish<E>(mut self) -> Result<H, PushParseError<E>>
133    where
134        H: for<'a, 'b> PushParserHandler<'a, 'b, E>,
135    {
136        // Check that the JSON document is complete (all containers closed)
137        // Use a no-op callback since we don't expect any more events
138        let mut no_op_callback = |_event: ujson::Event, _pos: usize| {};
139        let _bytes_processed = self.core.tokenizer.finish(&mut no_op_callback)?;
140
141        // Handle any remaining content in the buffer
142        if *self.extractor.parser_state() != State::None {
143            return Err(crate::push_parser::PushParseError::Parse(
144                ParseError::EndOfData,
145            ));
146        }
147
148        // Emit EndDocument event
149        self.handler
150            .handle_event(Event::EndDocument)
151            .map_err(PushParseError::Handler)?;
152
153        Ok(self.handler)
154    }
155}
156
157/// An error that can occur during push-based parsing.
158#[derive(Debug, PartialEq)]
159pub enum PushParseError<E> {
160    /// An error occurred within the parser itself.
161    Parse(ParseError),
162    /// An error was returned by the user's handler.
163    Handler(E),
164}
165
166impl<E> From<ujson::Error> for PushParseError<E> {
167    fn from(e: ujson::Error) -> Self {
168        PushParseError::Parse(e.into())
169    }
170}
171
172impl<E> From<ParseError> for PushParseError<E> {
173    fn from(e: ParseError) -> Self {
174        PushParseError::Parse(e)
175    }
176}
177
178impl<E> From<StreamBufferError> for PushParseError<E> {
179    fn from(e: StreamBufferError) -> Self {
180        PushParseError::Parse(e.into())
181    }
182}
183
184impl<E> From<core::str::Utf8Error> for PushParseError<E> {
185    fn from(e: core::str::Utf8Error) -> Self {
186        PushParseError::Parse(ParseError::InvalidUtf8(e))
187    }
188}