picojson/push_parser.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! A SAX-style JSON push parser.
4//!
5//! Clean implementation based on handler_design pattern with proper HRTB lifetime management.
6
7use crate::event_processor::{ContentExtractor, EscapeTiming, ParserCore};
8use crate::push_content_builder::{PushContentBuilder, PushParserHandler};
9use crate::shared::{DataSource, State};
10use crate::stream_buffer::StreamBufferError;
11use crate::{ujson, BitStackConfig, Event, ParseError};
12
13/// A SAX-style JSON push parser.
14///
15/// Generic over BitStack storage type for configurable nesting depth. Parsing
16/// events are returned to the handler.
17///
18/// # Generic Parameters
19///
20/// * `'scratch` - Lifetime for the scratch buffer used for temporary storage
21/// * `H` - The event handler type that implements [`PushParserHandler`]
22/// * `C` - BitStack configuration type that implements [`BitStackConfig`]
23pub struct PushParser<'input, 'scratch, H, C>
24where
25 C: BitStackConfig,
26{
27 /// Content extractor that handles content extraction and event emission
28 extractor: PushContentBuilder<'input, 'scratch>,
29 /// The handler that receives events
30 handler: H,
31 /// Core parser logic shared with other parsers
32 core: ParserCore<C::Bucket, C::Counter>,
33}
34
35impl<'input, 'scratch, H, C> PushParser<'input, 'scratch, H, C>
36where
37 C: BitStackConfig,
38{
39 /// Creates a new `PushParser`.
40 pub fn new(handler: H, buffer: &'scratch mut [u8]) -> Self {
41 Self {
42 extractor: PushContentBuilder::new(buffer),
43 handler,
44 core: ParserCore::new_chunked(),
45 }
46 }
47
48 /// Processes a chunk of input data.
49 pub fn write<E>(&mut self, data: &'input [u8]) -> Result<(), PushParseError<E>>
50 where
51 H: for<'a, 'b> PushParserHandler<'a, 'b, E>,
52 E: From<ParseError>,
53 {
54 // Apply any queued buffer resets
55 self.extractor.apply_unescaped_reset_if_queued();
56
57 // Set the input slice for the extractor to iterate over
58 self.extractor.set_chunk(data);
59
60 // Use ParserCore to process all bytes in the chunk
61 loop {
62 match self.core.next_event_impl_with_flags(
63 &mut self.extractor,
64 EscapeTiming::OnEnd, // PushParser uses OnEnd timing like StreamParser
65 |extractor, byte| {
66 // Selective accumulation: let PushContentBuilder decide based on its state
67 // whether this byte should be accumulated or processed directly
68 extractor.handle_byte_accumulation(byte)
69 },
70 true, // always_accumulate_during_escapes: ensure all hex digits reach the accumulator
71 ) {
72 Ok(Event::EndDocument) => {
73 // EndDocument during write() means we've consumed all bytes in current chunk
74 break;
75 }
76 Ok(event) => {
77 // Handle all other events normally
78 self.handler
79 .handle_event(event)
80 .map_err(PushParseError::Handler)?;
81
82 // Apply any queued buffer resets after the event has been processed
83 // This ensures that buffer content from previous tokens doesn't leak into subsequent ones
84 self.extractor.apply_unescaped_reset_if_queued();
85 }
86 Err(ParseError::EndOfData) => {
87 // No more events available from current chunk
88 break;
89 }
90 Err(e) => {
91 return Err(PushParseError::Parse(e));
92 }
93 }
94 }
95
96 // Check for chunk boundary condition - if still processing a token when chunk ends
97 let extractor_state = self.extractor.parser_state();
98
99 if matches!(
100 extractor_state,
101 State::String(_) | State::Key(_) | State::Number(_)
102 ) {
103 // If we haven't already started using the scratch buffer (e.g., due to escapes)
104 if !self.extractor.has_unescaped_content() {
105 // Copy the partial content from this chunk to scratch buffer before it's lost
106 self.extractor.copy_partial_content_to_scratch()?;
107 } else {
108 // Special case: For Numbers, check if the scratch buffer is actually empty
109 // This handles the byte-by-byte case where the flag is stale from previous Key processing
110 if matches!(extractor_state, State::Number(_)) {
111 let buffer_slice = self.extractor.get_unescaped_slice().unwrap_or(&[]);
112 let buffer_empty = buffer_slice.is_empty();
113
114 if buffer_empty {
115 self.extractor.copy_partial_content_to_scratch()?;
116 }
117 }
118 }
119 }
120
121 // Reset input slice
122 self.extractor.reset_input();
123
124 // Update position offset for next call
125 self.extractor.add_position_offset(data.len());
126
127 Ok(())
128 }
129
130 /// Finishes parsing, flushes any remaining events, and returns the handler.
131 /// This method consumes the parser.
132 pub fn finish<E>(mut self) -> Result<H, PushParseError<E>>
133 where
134 H: for<'a, 'b> PushParserHandler<'a, 'b, E>,
135 {
136 // Check that the JSON document is complete (all containers closed)
137 // Use a no-op callback since we don't expect any more events
138 let mut no_op_callback = |_event: ujson::Event, _pos: usize| {};
139 let _bytes_processed = self.core.tokenizer.finish(&mut no_op_callback)?;
140
141 // Handle any remaining content in the buffer
142 if *self.extractor.parser_state() != State::None {
143 return Err(crate::push_parser::PushParseError::Parse(
144 ParseError::EndOfData,
145 ));
146 }
147
148 // Emit EndDocument event
149 self.handler
150 .handle_event(Event::EndDocument)
151 .map_err(PushParseError::Handler)?;
152
153 Ok(self.handler)
154 }
155}
156
157/// An error that can occur during push-based parsing.
158#[derive(Debug, PartialEq)]
159pub enum PushParseError<E> {
160 /// An error occurred within the parser itself.
161 Parse(ParseError),
162 /// An error was returned by the user's handler.
163 Handler(E),
164}
165
166impl<E> From<ujson::Error> for PushParseError<E> {
167 fn from(e: ujson::Error) -> Self {
168 PushParseError::Parse(e.into())
169 }
170}
171
172impl<E> From<ParseError> for PushParseError<E> {
173 fn from(e: ParseError) -> Self {
174 PushParseError::Parse(e)
175 }
176}
177
178impl<E> From<StreamBufferError> for PushParseError<E> {
179 fn from(e: StreamBufferError) -> Self {
180 PushParseError::Parse(e.into())
181 }
182}
183
184impl<E> From<core::str::Utf8Error> for PushParseError<E> {
185 fn from(e: core::str::Utf8Error) -> Self {
186 PushParseError::Parse(ParseError::InvalidUtf8(e))
187 }
188}