1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
//! `oxidized-json-checker` is a library that provides JSON validation without
//! keeping the stream of bytes in memory, it streams the bytes and validate it
//! on the fly using a pushdown automaton.
//!
//! The original library has been retrieved from [json.org](http://www.json.org/JSON_checker/)
//! and improved to accept every valid JSON element has a valid JSOn document.
//!
//! Therefore this library accepts a single string or single integer as a valid JSON document,
//! this way we follow the [`serde_json`](https://docs.rs/serde_json) rules.
//!
//! # Example: validate some bytes
//!
//! This example shows how you can give the library a simple slice
//! of bytes and validate that it is a valid JSON document.
//!
//! ```
//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
//! let text = r#"["I", "am", "a", "valid", "JSON", "array"]"#;
//! let bytes = text.as_bytes();
//!
//! oxidized_json_checker::validate(bytes)?;
//! # Ok(()) }
//! # fmain().unwrap()
//! ```
//!
//! # Example: validate a stream of bytes
//!
//! This example shows that you can use any type that implements `io::Read`
//! to the `JsonChecker` and validate that it is valid JSON.
//!
//! ```
//! # const json_bytes: &[u8] = b"null";
//! # fn streaming_from_the_web() -> std::io::Result<&'static [u8]> {
//! #     Ok(json_bytes)
//! # }
//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
//! let stream = streaming_from_the_web()?;
//!
//! oxidized_json_checker::validate(stream)?;
//! # Ok(()) }
//! # fmain().unwrap()
//! ```
//!
//! # Example: complex compositions
//!
//! This example show how you can use the `JsonChecker` type to check
//! a compressed stream of bytes.
//!
//! You can decompress the stream, check it using the `JsonChecker`, and compress it
//! again to pipe it elsewhere. All of that without much memory impact.
//!
//! ```no_run
//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
//! use std::io;
//! use oxidized_json_checker::JsonChecker;
//!
//! let stdin = io::stdin();
//! let stdout = io::stdout();
//!
//! // Wrap the stdin reader in a Snappy reader
//! // then wrap it in a JsonChecker reader.
//! let rdr = snap::read::FrameDecoder::new(stdin.lock());
//! let mut rdr = JsonChecker::new(rdr);
//!
//! // Wrap the stdout writer in a Snappy writer.
//! let mut wtr = snap::write::FrameEncoder::new(stdout.lock());
//!
//! // The copy function will return any io error thrown by any of the reader,
//! // the JsonChecker throw errors when invalid JSON is encountered.
//! io::copy(&mut rdr, &mut wtr)?;
//!
//! // We must check that the final bytes were valid.
//! rdr.finish()?;
//! # Ok(()) }
//! # fmain().unwrap()
//! ```
//!

use std::{fmt, io};
use crate::internals::{State, Class, Mode};
use crate::internals::{STATE_TRANSITION_TABLE, ASCII_CLASS};

#[cfg(test)]
mod tests;
mod internals;

/// The error type returned by the `JsonChecker` type.
#[derive(Copy, Clone, Debug)]
pub enum Error {
    InvalidCharacter,
    EmptyCurlyBraces,
    OrphanCurlyBrace,
    OrphanSquareBrace,
    MaxDepthReached,
    InvalidQuote,
    InvalidComma,
    InvalidColon,
    InvalidState,
    IncompleteElement,
}

impl From<Error> for io::Error {
    fn from(err: Error) -> io::Error {
        io::Error::new(io::ErrorKind::Other, err)
    }
}

impl std::error::Error for Error {}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Error::InvalidCharacter => f.write_str("invalid character"),
            Error::EmptyCurlyBraces => f.write_str("empty curly braces"),
            Error::OrphanCurlyBrace => f.write_str("orphan curly brace"),
            Error::OrphanSquareBrace => f.write_str("orphan square brace"),
            Error::MaxDepthReached => f.write_str("max depth reached"),
            Error::InvalidQuote => f.write_str("invalid quote"),
            Error::InvalidComma => f.write_str("invalid comma"),
            Error::InvalidColon => f.write_str("invalid colon"),
            Error::InvalidState => f.write_str("invalid state"),
            Error::IncompleteElement => f.write_str("incomplete element"),
        }
    }
}

/// Represents any valid JSON type.
#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub enum JsonType {
    Null,
    Bool,
    Number,
    String,
    Array,
    Object,
}

/// A convenient method to check and consume JSON from a stream of bytes.
///
/// # Example
///
/// ```
/// # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
/// use oxidized_json_checker::{validate, JsonType};
/// let text = r#""I am a simple string!""#;
/// let bytes = text.as_bytes();
///
/// let json_type = validate(bytes)?;
/// assert_eq!(json_type, JsonType::String);
/// # Ok(()) }
/// # fmain().unwrap()
/// ```
pub fn validate<R: io::Read>(reader: R) -> io::Result<JsonType> {
    let mut checker = JsonChecker::new(reader);
    io::copy(&mut checker, &mut io::sink())?;
    let outer_type = checker.finish()?;
    Ok(outer_type)
}

/// A convenient method to check and consume JSON from an `str`.
pub fn validate_str(string: &str) -> Result<JsonType, Error> {
    validate_bytes(string.as_bytes())
}

/// A convenient method to check and consume JSON from a bytes slice.
pub fn validate_bytes(bytes: &[u8]) -> Result<JsonType, Error> {
    let mut checker = JsonChecker::new(());
    checker.next_bytes(bytes)?;
    checker.finish()
}

/// The `JsonChecker` is a `io::Read` adapter, it can be used like a pipe,
/// reading bytes, checkings those and output the same bytes.
///
/// If an error is encountered, a JSON syntax error or an `io::Error`
/// it is returned by the `io::Read::read` method.
///
/// # Safety
///
/// An error encountered while reading bytes will invalidate the checker.
///
/// # Example: read from a slice
///
/// ```
/// # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
/// use std::io;
/// use oxidized_json_checker::JsonChecker;
///
/// let text = r#"{"I am": "an object"}"#;
/// let bytes = text.as_bytes();
///
/// let mut checker = JsonChecker::new(bytes);
/// io::copy(&mut checker, &mut io::sink())?;
/// checker.finish()?;
/// # Ok(()) }
/// # fmain().unwrap()
/// ```
pub struct JsonChecker<R> {
    state: State,
    error: Option<Error>,
    outer_type: Option<JsonType>,
    max_depth: usize,
    stack: Vec<Mode>,
    reader: R,
}

impl<R> fmt::Debug for JsonChecker<R> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.debug_struct("JsonChecker").finish()
    }
}

impl<R> JsonChecker<R> {
    /// Construct a `JsonChecker. To continue the process, write to the `JsonChecker`
    /// like a sink, and then call `JsonChecker::finish` to obtain the final result.
    pub fn new(reader: R) -> JsonChecker<R> {
        JsonChecker::with_max_depth(reader, usize::max_value())
    }

    /// Construct a `JsonChecker` and restrict the level of maximum nesting.
    ///
    /// For more information read the `JsonChecker::new` documentation.
    pub fn with_max_depth(reader: R, max_depth: usize) -> JsonChecker<R> {
        JsonChecker {
            state: State::Go,
            error: None,
            outer_type: None,
            max_depth,
            stack: vec![Mode::Done],
            reader,
        }
    }

    #[inline]
    fn next_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> {
        bytes.iter().try_for_each(|b| self.next_byte(*b))
    }

    #[inline]
    fn next_byte(&mut self, next_byte: u8) -> Result<(), Error> {
        if let Some(error) = self.error {
            return Err(error);
        }

        // We can potentially use try_blocks in the future.
        fn internal_next_byte<R>(jc: &mut JsonChecker<R>, next_byte: u8) -> Result<(), Error> {
            // Determine the character's class.
            let next_class = if next_byte >= 128 {
                Class::CEtc
            } else {
                ASCII_CLASS[next_byte as usize]
            };

            if next_class == Class::Invalid {
                return Err(Error::InvalidCharacter);
            }

            // Get the next state from the state transition table and
            // perform one of the actions.
            let next_state = STATE_TRANSITION_TABLE[jc.state as usize][next_class as usize];

            // Save the type we met if not already saved.
            if jc.outer_type.is_none() {
                match next_state {
                    State::N1 => jc.outer_type = Some(JsonType::Null),
                    State::T1 | State::F1 => jc.outer_type = Some(JsonType::Bool),
                    State::In => jc.outer_type = Some(JsonType::Number),
                    State::Wq => jc.outer_type = Some(JsonType::String),
                    State::Wos => jc.outer_type = Some(JsonType::Array),
                    State::Woc => jc.outer_type = Some(JsonType::Object),
                    _ => (),
                }
            }

            match next_state {
                State::Wec => { // Empty }
                    if !jc.pop(Mode::Key) {
                        return Err(Error::EmptyCurlyBraces);
                    }
                    jc.state = State::Ok;
                },
                State::Wcu => { // }
                    if !jc.pop(Mode::Object) {
                        return Err(Error::OrphanCurlyBrace);
                    }
                    jc.state = State::Ok;
                },
                State::Ws => { // ]
                    if !jc.pop(Mode::Array) {
                        return Err(Error::OrphanSquareBrace);
                    }
                    jc.state = State::Ok;
                },
                State::Woc => { // {
                    if !jc.push(Mode::Key) {
                        return Err(Error::MaxDepthReached);
                    }
                    jc.state = State::Ob;
                },
                State::Wos => { // [
                    if !jc.push(Mode::Array) {
                        return Err(Error::MaxDepthReached);
                    }
                    jc.state = State::Ar;
                }
                State::Wq => { // "
                    match jc.stack.last() {
                        Some(Mode::Done) => {
                            if !jc.push(Mode::String) {
                                return Err(Error::MaxDepthReached);
                            }
                            jc.state = State::St;
                        },
                        Some(Mode::String) => {
                            jc.pop(Mode::String);
                            jc.state = State::Ok;
                        },
                        Some(Mode::Key) => jc.state = State::Co,
                        Some(Mode::Array) |
                        Some(Mode::Object) => jc.state = State::Ok,
                        _ => return Err(Error::InvalidQuote),
                    }
                },
                State::Wcm => { // ,
                    match jc.stack.last() {
                        Some(Mode::Object) => {
                            // A comma causes a flip from object mode to key mode.
                            if !jc.pop(Mode::Object) || !jc.push(Mode::Key) {
                                return Err(Error::InvalidComma);
                            }
                            jc.state = State::Ke;
                        }
                        Some(Mode::Array) => jc.state = State::Va,
                        _ => return Err(Error::InvalidComma),
                    }
                },
                State::Wcl => { // :
                    // A colon causes a flip from key mode to object mode.
                    if !jc.pop(Mode::Key) || !jc.push(Mode::Object) {
                        return Err(Error::InvalidColon);
                    }
                    jc.state = State::Va;
                },
                State::Invalid => {
                    return Err(Error::InvalidState)
                },

                // Or change the state.
                state => jc.state = state,
            }

            Ok(())
        }

        // By catching returned errors when this `JsonChecker` is used we *fuse*
        // the checker and ensure the user don't use a checker in an invalid state.
        if let Err(error) = internal_next_byte(self, next_byte) {
            self.error = Some(error);
            return Err(error);
        }

        Ok(())
    }

    /// The `JsonChecker::finish` method must be called after all of the characters
    /// have been processed.
    ///
    /// This function consumes the `JsonChecker` and returns `Ok(JsonType)` if the
    /// JSON text was accepted and the JSON type guessed.
    pub fn finish(self) -> Result<JsonType, Error> {
        self.into_inner().map(|(_, t)| t)
    }

    /// The `JsonChecker::into_inner` does the same as the `JsonChecker::finish`
    /// method but returns the internal reader along with the JSON type guessed.
    pub fn into_inner(mut self) -> Result<(R, JsonType), Error> {
        let is_state_valid = match self.state {
            State::Ok | State::In | State::Fr | State::Fs | State::E3 => true,
            _ => false,
        };

        if is_state_valid && self.pop(Mode::Done) {
            let outer_type = self.outer_type.expect("BUG: the outer type must have been guessed");
            return Ok((self.reader, outer_type))
        }

        // We do not need to catch this error to *fuse* the checker because this method
        // consumes the checker, it cannot be reused after an error has been thrown.
        Err(Error::IncompleteElement)
    }

    /// Push a mode onto the stack. Returns false if max depth is reached.
    fn push(&mut self, mode: Mode) -> bool {
        if self.stack.len() + 1 >= self.max_depth {
            return false;
        }
        self.stack.push(mode);
        return true;
    }

    /// Pop the stack, assuring that the current mode matches the expectation.
    /// Return false if the stack is empty or if the modes mismatch.
    fn pop(&mut self, mode: Mode) -> bool {
        self.stack.pop() == Some(mode)
    }
}

impl<R: io::Read> io::Read for JsonChecker<R> {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        // If an error have already been encountered we return it,
        // this *fuses* the JsonChecker.
        if let Some(error) = self.error {
            return Err(error.into());
        }

        let len = match self.reader.read(buf) {
            Err(error) => {
                // We do not store the io::Error in the JsonChecker Error
                // type instead we use the IncompleteElement error.
                self.error = Some(Error::IncompleteElement);
                return Err(error);
            },
            Ok(len) => len,
        };

        self.next_bytes(&buf[..len])?;

        Ok(len)
    }
}