json_streaming/shared/
read.rs

1use core::error::Error;
2use core::fmt::{Display, Formatter};
3use core::str::{FromStr, Utf8Error};
4use core::marker::PhantomData;
5
6/// [JsonReadToken] represents a single token read from a `JsonReader`. It does not own string 
7///  data, but references the reader's internal buffer.
8#[derive(Debug, PartialEq, Eq)]
9pub enum JsonReadToken<'a> {
10    StartObject,
11    EndObject,
12    StartArray,
13    EndArray,
14
15    Key(&'a str),
16    StringLiteral(&'a str),
17    NumberLiteral(JsonNumber<'a>),
18    BooleanLiteral(bool),
19    NullLiteral,
20
21    EndOfStream,
22}
23impl <'a> JsonReadToken<'a> {
24    pub fn kind(&self) -> &'static str {
25        match self {
26            JsonReadToken::StartObject => "{",
27            JsonReadToken::EndObject => "}",
28            JsonReadToken::StartArray => "[",
29            JsonReadToken::EndArray => "]",
30            JsonReadToken::Key(_) => "key",
31            JsonReadToken::StringLiteral(_) => "string",
32            JsonReadToken::NumberLiteral(_) => "number",
33            JsonReadToken::BooleanLiteral(_) => "boolean",
34            JsonReadToken::NullLiteral => "null",
35            JsonReadToken::EndOfStream => "<EOF>",
36        }
37    }
38}
39
40
41/// A [JsonNumber] is the raw representation of a number. It is a parsed representation in the
42///  sense that a `JsonReader` (more or less) verified that it is a valid JSON number, but it has 
43///  not been parsed into an actual Rust numeric type yet.
44///
45/// Client code can either access the string representation, or call the [JsonNumber::parse]
46///  function to parse it into a numeric Rust type.
47/// 
48/// `JsonReader` has a higher-level API for parsing into a Rust number directly, which is usually
49///  the more convenient and preferred way to parse numbers.
50#[derive(Debug, PartialEq, Eq)]
51pub struct JsonNumber<'a>(pub &'a str);
52impl <'a> JsonNumber<'a> {
53    /// Parse a JSON number into some concrete numeric type.
54    pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
55        self.0.parse()
56    }
57}
58
59/// Represents a location in a parsed stream: offset as well as line and column. This location
60///  is maintained by a `JsonReader` and is mostly useful to help pinpoint problems.
61#[derive(Debug, Clone, Copy, Eq, PartialEq)]
62pub struct Location {
63    /// in bytes, not characters - aligned with how Rust counts offsets in strings
64    pub offset: usize,
65    pub line: usize,
66    /// in bytes, not characters - aligned with how Rust counts offsets in strings
67    pub column: usize,
68}
69impl Display for Location {
70    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
71        write!(f, "line {}, column {} (offset {})", self.line, self.column, self.offset)
72    }
73}
74impl Location {
75    pub fn start() -> Location {
76        Location {
77            offset: 0,
78            line: 1,
79            column: 1,
80        }
81    }
82
83    pub fn after_byte(&mut self, byte: u8) {
84        self.offset += 1;
85        if byte == b'\n' {
86            self.line += 1;
87            self.column = 1;
88        }
89        else {
90            self.column += 1;
91        }
92    }
93}
94
95
96/// A [JsonParseError] represents the range of things that can go wrong while reading a JSON
97///  stream: I/O error, byte sequences that are invalid UTF-8, violations of JSON tokenization
98///  or grammar, and tokens that exceed the `JsonReader`'s configured token buffer size.
99///
100/// Note that the representation of I/O errors depends on the reader implementation and is therefore
101///  a generic parameter of [JsonParseError]. 
102#[derive(Debug)]
103pub enum JsonParseError<E: Error> {
104    Io(E),
105    Utf8(Utf8Error),
106    Parse(&'static str, Location),
107    UnexpectedToken(&'static str, Location),
108    BufferOverflow(Location),
109}
110impl <E: Error> Display for JsonParseError<E> {
111    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
112        match self {
113            JsonParseError::Io(err) => write!(f, "I/O error: {}", err),
114            JsonParseError::Utf8(err) => write!(f, "Invalid UTF8: {}", err),
115            JsonParseError::Parse(msg, location) => write!(f, "parse error: {} @ {}", msg, location),
116            JsonParseError::UnexpectedToken(kind, location) => write!(f, "unexpected token '{}' @ {}", kind, location),
117            JsonParseError::BufferOverflow(location) => write!(f, "buffer overflow @ {}", location),
118        }
119    }
120}
121
122impl <E: Error> Error for JsonParseError<E> {
123}
124impl <E: Error> From<E> for JsonParseError<E> {
125    fn from(value: E) -> Self {
126        JsonParseError::Io(value)
127    }
128}
129
130
131/// A convenience type alias for a [Result] with a [JsonParseError] as its error type.
132pub type JsonParseResult<T, E> = Result<T, JsonParseError<E>>;
133
134
135/// Simple state tracking to handle those parts of the grammar that require only local context. That
136///  is essentially everything except the distinction between objects and arrays.
137#[derive(Debug, Clone, Copy, Eq, PartialEq)]
138pub(crate) enum ReaderState {
139    /// Immediately after a nested object or array starts. This needs separate handling from
140    ///  'BeforeEntry' to reject trailing commas in objects and arrays
141    Initial,
142    /// Ready to accept the current container's next entry, i.e. a value (for arrays) or a key/value
143    ///  pair (for objects)
144    BeforeEntry,
145    /// After a key, i.e. values are the only valid follow-up
146    AfterKey,
147    /// After a value, i.e. a comma or the closing bracket of the current container is expected
148    AfterValue,
149}
150
151pub(crate) struct ReaderInner<B: AsMut<[u8]>, E: Error> {
152    pub buf: B,
153    pub ind_end_buf: usize,
154    pub lenient_comma_handling: bool,
155    pub state: ReaderState,
156    pub parked_next: Option<u8>,
157    pub cur_location: Location,
158    pd: PhantomData<E>,
159}
160impl <B: AsMut<[u8]>, E: Error> ReaderInner<B, E> {
161    pub fn new(buf: B, lenient_comma_handling: bool) -> Self {
162        Self {
163            buf,
164            ind_end_buf: 0,
165            lenient_comma_handling,
166            state: ReaderState::Initial,
167            parked_next: None,
168            cur_location: Location::start(),
169            pd: PhantomData,
170        }
171    }
172
173    pub fn append_to_buf(&mut self, ch: u8) -> JsonParseResult<(), E> {
174        if self.ind_end_buf >= self.buf.as_mut().len() {
175            return self.buf_overflow();
176        }
177        self.buf.as_mut()[self.ind_end_buf] = ch;
178        self.ind_end_buf += 1;
179        Ok(())
180    }
181
182    /// see https://de.wikipedia.org/wiki/UTF-8
183    pub fn append_code_point(&mut self, cp: u16) -> JsonParseResult<(), E> {
184        match cp {
185            0x0000..=0x007F => {
186                self.append_to_buf(cp as u8)
187            }
188            0x0080..=0x07FF => {
189                self.append_to_buf(0xC0 | ((cp >> 6) as u8 & 0x1F))?;
190                self.append_to_buf(0x80 | ( cp       as u8 & 0x3F))
191            }
192            _ => { // 0x00800..0xffff
193                self.append_to_buf(0xE0 | ((cp >> 12) as u8 & 0x0F))?;
194                self.append_to_buf(0x80 | ((cp >>  6) as u8 & 0x3F))?;
195                self.append_to_buf(0x80 | ( cp        as u8 & 0x3F))
196            }
197        }
198    }
199
200    pub fn buf_as_str(&mut self) -> JsonParseResult<&str, E> {
201        // the reference is used only immutably, but all callers have a mutable refrence anyway
202        //  and calling as_mut() avoids the need for another type bound
203        core::str::from_utf8(
204            &self.buf.as_mut()[..self.ind_end_buf])
205            .map_err(|e| JsonParseError::Utf8(e))
206    }
207
208    pub fn ensure_accept_value(&mut self) -> JsonParseResult<(), E> {
209        match self.state {
210            ReaderState::Initial |
211            ReaderState::BeforeEntry |
212            ReaderState::AfterKey => {
213                Ok(())
214            }
215            ReaderState::AfterValue => {
216                if self.lenient_comma_handling {
217                    Ok(())
218                }
219                else {
220                    self.parse_err("missing comma")
221                }
222            }
223        }
224    }
225
226    pub fn ensure_accept_end_nested(&mut self) -> JsonParseResult<(), E> {
227        match self.state {
228            ReaderState::Initial |
229            ReaderState::AfterValue => {
230                Ok(())
231            }
232            ReaderState::BeforeEntry => {
233                self.parse_err("trailing comma")
234            }
235            ReaderState::AfterKey => {
236                self.parse_err("key without a value")
237            }
238        }
239    }
240
241    pub fn state_change_for_value(&mut self) -> JsonParseResult<(), E> {
242        match self.state {
243            ReaderState::Initial |
244            ReaderState::BeforeEntry |
245            ReaderState::AfterKey => {
246                self.state = ReaderState::AfterValue;
247                Ok(())
248            }
249            ReaderState::AfterValue => {
250                self.parse_err("missing comma")
251            }
252        }
253    }
254
255    pub fn on_comma(&mut self) -> JsonParseResult<(), E> {
256        match self.state {
257            ReaderState::AfterValue => {
258                self.state = ReaderState::BeforeEntry;
259                Ok(())
260            }
261            ReaderState::Initial |
262            ReaderState::BeforeEntry |
263            ReaderState::AfterKey => {
264                self.parse_err("unexpected comma")
265            }
266        }
267    }
268
269    pub fn parse_err<T>(&self, msg: &'static str) -> JsonParseResult<T, E> {
270        Err(JsonParseError::Parse(msg, self.cur_location))
271    }
272
273    pub fn buf_overflow<T>(&self) -> JsonParseResult<T, E> {
274        Err(JsonParseError::BufferOverflow(self.cur_location))
275    }
276}