oxidized_json_checker/
lib.rs

1//! `oxidized-json-checker` is a library that provides JSON validation without
2//! keeping the stream of bytes in memory, it streams the bytes and validate it
3//! on the fly using a pushdown automaton.
4//!
5//! The original library has been retrieved from [json.org](http://www.json.org/JSON_checker/)
6//! and improved to accept every valid JSON element has a valid JSOn document.
7//!
8//! Therefore this library accepts a single string or single integer as a valid JSON document,
9//! this way we follow the [`serde_json`](https://docs.rs/serde_json) rules.
10//!
11//! # Example: validate some bytes
12//!
13//! This example shows how you can give the library a simple slice
14//! of bytes and validate that it is a valid JSON document.
15//!
16//! ```
17//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
18//! let text = r#"["I", "am", "a", "valid", "JSON", "array"]"#;
19//! let bytes = text.as_bytes();
20//!
21//! oxidized_json_checker::validate(bytes)?;
22//! # Ok(()) }
23//! # fmain().unwrap()
24//! ```
25//!
26//! # Example: validate a stream of bytes
27//!
28//! This example shows that you can use any type that implements `io::Read`
29//! to the `JsonChecker` and validate that it is valid JSON.
30//!
31//! ```
32//! # const json_bytes: &[u8] = b"null";
33//! # fn streaming_from_the_web() -> std::io::Result<&'static [u8]> {
34//! #     Ok(json_bytes)
35//! # }
36//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
37//! let stream = streaming_from_the_web()?;
38//!
39//! oxidized_json_checker::validate(stream)?;
40//! # Ok(()) }
41//! # fmain().unwrap()
42//! ```
43//!
44//! # Example: complex compositions
45//!
46//! This example show how you can use the `JsonChecker` type to check
47//! a compressed stream of bytes.
48//!
49//! You can decompress the stream, check it using the `JsonChecker`, and compress it
50//! again to pipe it elsewhere. All of that without much memory impact.
51//!
52//! ```no_run
53//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
54//! use std::io;
55//! use oxidized_json_checker::JsonChecker;
56//!
57//! let stdin = io::stdin();
58//! let stdout = io::stdout();
59//!
60//! // Wrap the stdin reader in a Snappy reader
61//! // then wrap it in a JsonChecker reader.
62//! let rdr = snap::read::FrameDecoder::new(stdin.lock());
63//! let mut rdr = JsonChecker::new(rdr);
64//!
65//! // Wrap the stdout writer in a Snappy writer.
66//! let mut wtr = snap::write::FrameEncoder::new(stdout.lock());
67//!
68//! // The copy function will return any io error thrown by any of the reader,
69//! // the JsonChecker throw errors when invalid JSON is encountered.
70//! io::copy(&mut rdr, &mut wtr)?;
71//!
72//! // We must check that the final bytes were valid.
73//! rdr.finish()?;
74//! # Ok(()) }
75//! # fmain().unwrap()
76//! ```
77//!
78
79use std::{fmt, io};
80use crate::internals::{State, Class, Mode};
81use crate::internals::{STATE_TRANSITION_TABLE, ASCII_CLASS};
82
83#[cfg(test)]
84mod tests;
85mod internals;
86
87/// The error type returned by the `JsonChecker` type.
88#[derive(Copy, Clone, Debug)]
89pub enum Error {
90    InvalidCharacter,
91    EmptyCurlyBraces,
92    OrphanCurlyBrace,
93    OrphanSquareBrace,
94    MaxDepthReached,
95    InvalidQuote,
96    InvalidComma,
97    InvalidColon,
98    InvalidState,
99    IncompleteElement,
100}
101
102impl From<Error> for io::Error {
103    fn from(err: Error) -> io::Error {
104        io::Error::new(io::ErrorKind::Other, err)
105    }
106}
107
108impl std::error::Error for Error {}
109
110impl fmt::Display for Error {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        match self {
113            Error::InvalidCharacter => f.write_str("invalid character"),
114            Error::EmptyCurlyBraces => f.write_str("empty curly braces"),
115            Error::OrphanCurlyBrace => f.write_str("orphan curly brace"),
116            Error::OrphanSquareBrace => f.write_str("orphan square brace"),
117            Error::MaxDepthReached => f.write_str("max depth reached"),
118            Error::InvalidQuote => f.write_str("invalid quote"),
119            Error::InvalidComma => f.write_str("invalid comma"),
120            Error::InvalidColon => f.write_str("invalid colon"),
121            Error::InvalidState => f.write_str("invalid state"),
122            Error::IncompleteElement => f.write_str("incomplete element"),
123        }
124    }
125}
126
127/// Represents any valid JSON type.
128#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
129pub enum JsonType {
130    Null,
131    Bool,
132    Number,
133    String,
134    Array,
135    Object,
136}
137
138/// A convenient method to check and consume JSON from a stream of bytes.
139///
140/// # Example
141///
142/// ```
143/// # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
144/// use oxidized_json_checker::{validate, JsonType};
145/// let text = r#""I am a simple string!""#;
146/// let bytes = text.as_bytes();
147///
148/// let json_type = validate(bytes)?;
149/// assert_eq!(json_type, JsonType::String);
150/// # Ok(()) }
151/// # fmain().unwrap()
152/// ```
153pub fn validate<R: io::Read>(reader: R) -> io::Result<JsonType> {
154    let mut checker = JsonChecker::new(reader);
155    io::copy(&mut checker, &mut io::sink())?;
156    let outer_type = checker.finish()?;
157    Ok(outer_type)
158}
159
160/// A convenient method to check and consume JSON from an `str`.
161pub fn validate_str(string: &str) -> Result<JsonType, Error> {
162    validate_bytes(string.as_bytes())
163}
164
165/// A convenient method to check and consume JSON from a bytes slice.
166pub fn validate_bytes(bytes: &[u8]) -> Result<JsonType, Error> {
167    let mut checker = JsonChecker::new(());
168    checker.next_bytes(bytes)?;
169    checker.finish()
170}
171
172/// The `JsonChecker` is a `io::Read` adapter, it can be used like a pipe,
173/// reading bytes, checkings those and output the same bytes.
174///
175/// If an error is encountered, a JSON syntax error or an `io::Error`
176/// it is returned by the `io::Read::read` method.
177///
178/// # Safety
179///
180/// An error encountered while reading bytes will invalidate the checker.
181///
182/// # Example: read from a slice
183///
184/// ```
185/// # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
186/// use std::io;
187/// use oxidized_json_checker::JsonChecker;
188///
189/// let text = r#"{"I am": "an object"}"#;
190/// let bytes = text.as_bytes();
191///
192/// let mut checker = JsonChecker::new(bytes);
193/// io::copy(&mut checker, &mut io::sink())?;
194/// checker.finish()?;
195/// # Ok(()) }
196/// # fmain().unwrap()
197/// ```
198pub struct JsonChecker<R> {
199    state: State,
200    error: Option<Error>,
201    outer_type: Option<JsonType>,
202    max_depth: usize,
203    stack: Vec<Mode>,
204    reader: R,
205}
206
207impl<R> fmt::Debug for JsonChecker<R> {
208    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
209        f.debug_struct("JsonChecker").finish()
210    }
211}
212
213impl<R> JsonChecker<R> {
214    /// Construct a `JsonChecker. To continue the process, write to the `JsonChecker`
215    /// like a sink, and then call `JsonChecker::finish` to obtain the final result.
216    pub fn new(reader: R) -> JsonChecker<R> {
217        JsonChecker::with_max_depth(reader, usize::max_value())
218    }
219
220    /// Construct a `JsonChecker` and restrict the level of maximum nesting.
221    ///
222    /// For more information read the `JsonChecker::new` documentation.
223    pub fn with_max_depth(reader: R, max_depth: usize) -> JsonChecker<R> {
224        JsonChecker {
225            state: State::Go,
226            error: None,
227            outer_type: None,
228            max_depth,
229            stack: vec![Mode::Done],
230            reader,
231        }
232    }
233
234    #[inline]
235    fn next_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> {
236        bytes.iter().try_for_each(|b| self.next_byte(*b))
237    }
238
239    #[inline]
240    fn next_byte(&mut self, next_byte: u8) -> Result<(), Error> {
241        if let Some(error) = self.error {
242            return Err(error);
243        }
244
245        // We can potentially use try_blocks in the future.
246        fn internal_next_byte<R>(jc: &mut JsonChecker<R>, next_byte: u8) -> Result<(), Error> {
247            // Determine the character's class.
248            let next_class = if next_byte >= 128 {
249                Class::CEtc
250            } else {
251                ASCII_CLASS[next_byte as usize]
252            };
253
254            if next_class == Class::Invalid {
255                return Err(Error::InvalidCharacter);
256            }
257
258            // Get the next state from the state transition table and
259            // perform one of the actions.
260            let next_state = STATE_TRANSITION_TABLE[jc.state as usize][next_class as usize];
261
262            // Save the type we met if not already saved.
263            if jc.outer_type.is_none() {
264                match next_state {
265                    State::N1 => jc.outer_type = Some(JsonType::Null),
266                    State::T1 | State::F1 => jc.outer_type = Some(JsonType::Bool),
267                    State::In => jc.outer_type = Some(JsonType::Number),
268                    State::Wq => jc.outer_type = Some(JsonType::String),
269                    State::Wos => jc.outer_type = Some(JsonType::Array),
270                    State::Woc => jc.outer_type = Some(JsonType::Object),
271                    _ => (),
272                }
273            }
274
275            match next_state {
276                State::Wec => { // Empty }
277                    if !jc.pop(Mode::Key) {
278                        return Err(Error::EmptyCurlyBraces);
279                    }
280                    jc.state = State::Ok;
281                },
282                State::Wcu => { // }
283                    if !jc.pop(Mode::Object) {
284                        return Err(Error::OrphanCurlyBrace);
285                    }
286                    jc.state = State::Ok;
287                },
288                State::Ws => { // ]
289                    if !jc.pop(Mode::Array) {
290                        return Err(Error::OrphanSquareBrace);
291                    }
292                    jc.state = State::Ok;
293                },
294                State::Woc => { // {
295                    if !jc.push(Mode::Key) {
296                        return Err(Error::MaxDepthReached);
297                    }
298                    jc.state = State::Ob;
299                },
300                State::Wos => { // [
301                    if !jc.push(Mode::Array) {
302                        return Err(Error::MaxDepthReached);
303                    }
304                    jc.state = State::Ar;
305                }
306                State::Wq => { // "
307                    match jc.stack.last() {
308                        Some(Mode::Done) => {
309                            if !jc.push(Mode::String) {
310                                return Err(Error::MaxDepthReached);
311                            }
312                            jc.state = State::St;
313                        },
314                        Some(Mode::String) => {
315                            jc.pop(Mode::String);
316                            jc.state = State::Ok;
317                        },
318                        Some(Mode::Key) => jc.state = State::Co,
319                        Some(Mode::Array) |
320                        Some(Mode::Object) => jc.state = State::Ok,
321                        _ => return Err(Error::InvalidQuote),
322                    }
323                },
324                State::Wcm => { // ,
325                    match jc.stack.last() {
326                        Some(Mode::Object) => {
327                            // A comma causes a flip from object mode to key mode.
328                            if !jc.pop(Mode::Object) || !jc.push(Mode::Key) {
329                                return Err(Error::InvalidComma);
330                            }
331                            jc.state = State::Ke;
332                        }
333                        Some(Mode::Array) => jc.state = State::Va,
334                        _ => return Err(Error::InvalidComma),
335                    }
336                },
337                State::Wcl => { // :
338                    // A colon causes a flip from key mode to object mode.
339                    if !jc.pop(Mode::Key) || !jc.push(Mode::Object) {
340                        return Err(Error::InvalidColon);
341                    }
342                    jc.state = State::Va;
343                },
344                State::Invalid => {
345                    return Err(Error::InvalidState)
346                },
347
348                // Or change the state.
349                state => jc.state = state,
350            }
351
352            Ok(())
353        }
354
355        // By catching returned errors when this `JsonChecker` is used we *fuse*
356        // the checker and ensure the user don't use a checker in an invalid state.
357        if let Err(error) = internal_next_byte(self, next_byte) {
358            self.error = Some(error);
359            return Err(error);
360        }
361
362        Ok(())
363    }
364
365    /// The `JsonChecker::finish` method must be called after all of the characters
366    /// have been processed.
367    ///
368    /// This function consumes the `JsonChecker` and returns `Ok(JsonType)` if the
369    /// JSON text was accepted and the JSON type guessed.
370    pub fn finish(self) -> Result<JsonType, Error> {
371        self.into_inner().map(|(_, t)| t)
372    }
373
374    /// The `JsonChecker::into_inner` does the same as the `JsonChecker::finish`
375    /// method but returns the internal reader along with the JSON type guessed.
376    pub fn into_inner(mut self) -> Result<(R, JsonType), Error> {
377        let is_state_valid = match self.state {
378            State::Ok | State::In | State::Fr | State::Fs | State::E3 => true,
379            _ => false,
380        };
381
382        if is_state_valid && self.pop(Mode::Done) {
383            let outer_type = self.outer_type.expect("BUG: the outer type must have been guessed");
384            return Ok((self.reader, outer_type))
385        }
386
387        // We do not need to catch this error to *fuse* the checker because this method
388        // consumes the checker, it cannot be reused after an error has been thrown.
389        Err(Error::IncompleteElement)
390    }
391
392    /// Push a mode onto the stack. Returns false if max depth is reached.
393    fn push(&mut self, mode: Mode) -> bool {
394        if self.stack.len() + 1 >= self.max_depth {
395            return false;
396        }
397        self.stack.push(mode);
398        return true;
399    }
400
401    /// Pop the stack, assuring that the current mode matches the expectation.
402    /// Return false if the stack is empty or if the modes mismatch.
403    fn pop(&mut self, mode: Mode) -> bool {
404        self.stack.pop() == Some(mode)
405    }
406}
407
408impl<R: io::Read> io::Read for JsonChecker<R> {
409    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
410        // If an error have already been encountered we return it,
411        // this *fuses* the JsonChecker.
412        if let Some(error) = self.error {
413            return Err(error.into());
414        }
415
416        let len = match self.reader.read(buf) {
417            Err(error) => {
418                // We do not store the io::Error in the JsonChecker Error
419                // type instead we use the IncompleteElement error.
420                self.error = Some(Error::IncompleteElement);
421                return Err(error);
422            },
423            Ok(len) => len,
424        };
425
426        self.next_bytes(&buf[..len])?;
427
428        Ok(len)
429    }
430}