json_streaming/shared/
read.rs

1use core::error::Error;
2use core::fmt::{Display, Formatter};
3use core::str::{FromStr, Utf8Error};
4use core::marker::PhantomData;
5
6/// [JsonReadToken] represents a single token read from a `JsonReader`. It does not own string 
7///  data, but references the reader's internal buffer.
8#[derive(Debug, PartialEq, Eq)]
9pub enum JsonReadToken<'a> {
10    StartObject,
11    EndObject,
12    StartArray,
13    EndArray,
14
15    Key(&'a str),
16    StringLiteral(&'a str),
17    NumberLiteral(JsonNumber<'a>),
18    BooleanLiteral(bool),
19    NullLiteral,
20
21    EndOfStream,
22}
23impl <'a> JsonReadToken<'a> {
24    pub fn kind(&self) -> &'static str {
25        match self {
26            JsonReadToken::StartObject => "{",
27            JsonReadToken::EndObject => "}",
28            JsonReadToken::StartArray => "[",
29            JsonReadToken::EndArray => "]",
30            JsonReadToken::Key(_) => "key",
31            JsonReadToken::StringLiteral(_) => "string",
32            JsonReadToken::NumberLiteral(_) => "number",
33            JsonReadToken::BooleanLiteral(_) => "boolean",
34            JsonReadToken::NullLiteral => "null",
35            JsonReadToken::EndOfStream => "<EOF>",
36        }
37    }
38}
39
40
41/// A [JsonNumber] is the raw representation of a number. It is a parsed representation in the
42///  sense that a `JsonReader` (more or less) verified that it is a valid JSON number, but it has 
43///  not been parsed into an actual Rust numeric type yet.
44///
45/// Client code can either access the string representation, or call the [JsonNumber::parse]
46///  function to parse it into a numeric Rust type.
47/// 
48/// `JsonReader` has a higher-level API for parsing into a Rust number directly, which is usually
49///  the more convenient and preferred way to parse numbers.
50#[derive(Debug, PartialEq, Eq)]
51pub struct JsonNumber<'a>(pub &'a str);
52impl <'a> JsonNumber<'a> {
53    /// Parse a JSON number into some concrete numeric type.
54    pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
55        self.0.parse()
56    }
57}
58
59/// Represents a location in a parsed stream: offset as well as line and column. This location
60///  is maintained by a `JsonReader` and is mostly useful to help pinpoint problems.
61#[derive(Debug, Clone, Copy, Eq, PartialEq)]
62pub struct Location {
63    /// in bytes, not characters - aligned with how Rust counts offsets in strings
64    pub offset: usize,
65    pub line: usize,
66    /// in bytes, not characters - aligned with how Rust counts offsets in strings
67    pub column: usize,
68}
69impl Display for Location {
70    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
71        write!(f, "line {}, column {} (offset {})", self.line, self.column, self.offset)
72    }
73}
74impl Location {
75    pub fn start() -> Location {
76        Location {
77            offset: 0,
78            line: 1,
79            column: 1,
80        }
81    }
82
83    pub fn after_byte(&mut self, byte: u8) {
84        self.offset += 1;
85        if byte == b'\n' {
86            self.line += 1;
87            self.column = 1;
88        }
89        else {
90            self.column += 1;
91        }
92    }
93}
94
95
96/// A [JsonParseError] represents the range of things that can go wrong while reading a JSON
97///  stream: I/O error, byte sequences that are invalid UTF-8, violations of JSON tokenization
98///  or grammar, and tokens that exceed the `JsonReader`'s configured token buffer size.
99///
100/// Note that the representation of I/O errors depends on the reader implementation and is therefore
101///  a generic parameter of [JsonParseError]. 
102#[derive(Debug)]
103pub enum JsonParseError<E: Error> {
104    Io(E),
105    Utf8(Utf8Error),
106    Parse(&'static str, Location),
107    BufferOverflow(Location),
108}
109impl <E: Error> Display for JsonParseError<E> {
110    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
111        match self {
112            JsonParseError::Io(err) => write!(f, "I/O error: {}", err),
113            JsonParseError::Utf8(err) => write!(f, "Invalid UTF8: {}", err),
114            JsonParseError::Parse(msg, location) => write!(f, "parse error: {} @ {}", msg, location),
115            JsonParseError::BufferOverflow(location) => write!(f, "buffer overflow @ {}", location),
116        }
117    }
118}
119
120impl <E: Error> Error for JsonParseError<E> {
121}
122impl <E: Error> From<E> for JsonParseError<E> {
123    fn from(value: E) -> Self {
124        JsonParseError::Io(value)
125    }
126}
127
128
129/// A convenience type alias for a [Result] with a [JsonParseError] as its error type.
130pub type JsonParseResult<T, E> = Result<T, JsonParseError<E>>;
131
132
133/// Simple state tracking to handle those parts of the grammar that require only local context. That
134///  is essentially everything except the distinction between objects and arrays.
135#[derive(Debug, Clone, Copy, Eq, PartialEq)]
136pub(crate) enum ReaderState {
137    /// Immediately after a nested object or array starts. This needs separate handling from
138    ///  'BeforeEntry' to reject trailing commas in objects and arrays
139    Initial,
140    /// Ready to accept the current container's next entry, i.e. a value (for arrays) or a key/value
141    ///  pair (for objects)
142    BeforeEntry,
143    /// After a key, i.e. values are the only valid follow-up
144    AfterKey,
145    /// After a value, i.e. a comma or the closing bracket of the current container is expected
146    AfterValue,
147}
148
149pub(crate) struct ReaderInner<B: AsMut<[u8]>, E: Error> {
150    pub buf: B,
151    pub ind_end_buf: usize,
152    pub lenient_comma_handling: bool,
153    pub state: ReaderState,
154    pub parked_next: Option<u8>,
155    pub cur_location: Location,
156    pd: PhantomData<E>,
157}
158impl <B: AsMut<[u8]>, E: Error> ReaderInner<B, E> {
159    pub fn new(buf: B, lenient_comma_handling: bool) -> Self {
160        Self {
161            buf,
162            ind_end_buf: 0,
163            lenient_comma_handling,
164            state: ReaderState::Initial,
165            parked_next: None,
166            cur_location: Location::start(),
167            pd: PhantomData,
168        }
169    }
170
171    pub fn append_to_buf(&mut self, ch: u8) -> JsonParseResult<(), E> {
172        if self.ind_end_buf >= self.buf.as_mut().len() {
173            return self.buf_overflow();
174        }
175        self.buf.as_mut()[self.ind_end_buf] = ch;
176        self.ind_end_buf += 1;
177        Ok(())
178    }
179
180    /// see https://de.wikipedia.org/wiki/UTF-8
181    pub fn append_code_point(&mut self, cp: u16) -> JsonParseResult<(), E> {
182        match cp {
183            0x0000..=0x007F => {
184                self.append_to_buf(cp as u8)
185            }
186            0x0080..=0x07FF => {
187                self.append_to_buf(0xC0 | ((cp >> 6) as u8 & 0x1F))?;
188                self.append_to_buf(0x80 | ( cp       as u8 & 0x3F))
189            }
190            _ => { // 0x00800..0xffff
191                self.append_to_buf(0xE0 | ((cp >> 12) as u8 & 0x0F))?;
192                self.append_to_buf(0x80 | ((cp >>  6) as u8 & 0x3F))?;
193                self.append_to_buf(0x80 | ( cp        as u8 & 0x3F))
194            }
195        }
196    }
197
198    pub fn buf_as_str(&mut self) -> JsonParseResult<&str, E> {
199        // the reference is used only immutably, but all callers have a mutable refrence anyway
200        //  and calling as_mut() avoids the need for another type bound
201        core::str::from_utf8(
202            &self.buf.as_mut()[..self.ind_end_buf])
203            .map_err(|e| JsonParseError::Utf8(e))
204    }
205
206    pub fn ensure_accept_value(&mut self) -> JsonParseResult<(), E> {
207        match self.state {
208            ReaderState::Initial |
209            ReaderState::BeforeEntry |
210            ReaderState::AfterKey => {
211                Ok(())
212            }
213            ReaderState::AfterValue => {
214                if self.lenient_comma_handling {
215                    Ok(())
216                }
217                else {
218                    self.parse_err("missing comma")
219                }
220            }
221        }
222    }
223
224    pub fn ensure_accept_end_nested(&mut self) -> JsonParseResult<(), E> {
225        match self.state {
226            ReaderState::Initial |
227            ReaderState::AfterValue => {
228                Ok(())
229            }
230            ReaderState::BeforeEntry => {
231                self.parse_err("trailing comma")
232            }
233            ReaderState::AfterKey => {
234                self.parse_err("key without a value")
235            }
236        }
237    }
238
239    pub fn state_change_for_value(&mut self) -> JsonParseResult<(), E> {
240        match self.state {
241            ReaderState::Initial |
242            ReaderState::BeforeEntry |
243            ReaderState::AfterKey => {
244                self.state = ReaderState::AfterValue;
245                Ok(())
246            }
247            ReaderState::AfterValue => {
248                self.parse_err("missing comma")
249            }
250        }
251    }
252
253    pub fn on_comma(&mut self) -> JsonParseResult<(), E> {
254        match self.state {
255            ReaderState::AfterValue => {
256                self.state = ReaderState::BeforeEntry;
257                Ok(())
258            }
259            ReaderState::Initial |
260            ReaderState::BeforeEntry |
261            ReaderState::AfterKey => {
262                self.parse_err("unexpected comma")
263            }
264        }
265    }
266
267    pub fn parse_err<T>(&self, msg: &'static str) -> JsonParseResult<T, E> {
268        Err(JsonParseError::Parse(msg, self.cur_location))
269    }
270
271    pub fn buf_overflow<T>(&self) -> JsonParseResult<T, E> {
272        Err(JsonParseError::BufferOverflow(self.cur_location))
273    }
274}