oxidized_json_checker/lib.rs
1//! `oxidized-json-checker` is a library that provides JSON validation without
2//! keeping the stream of bytes in memory, it streams the bytes and validate it
3//! on the fly using a pushdown automaton.
4//!
5//! The original library has been retrieved from [json.org](http://www.json.org/JSON_checker/)
6//! and improved to accept every valid JSON element has a valid JSOn document.
7//!
8//! Therefore this library accepts a single string or single integer as a valid JSON document,
9//! this way we follow the [`serde_json`](https://docs.rs/serde_json) rules.
10//!
11//! # Example: validate some bytes
12//!
13//! This example shows how you can give the library a simple slice
14//! of bytes and validate that it is a valid JSON document.
15//!
16//! ```
17//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
18//! let text = r#"["I", "am", "a", "valid", "JSON", "array"]"#;
19//! let bytes = text.as_bytes();
20//!
21//! oxidized_json_checker::validate(bytes)?;
22//! # Ok(()) }
23//! # fmain().unwrap()
24//! ```
25//!
26//! # Example: validate a stream of bytes
27//!
28//! This example shows that you can use any type that implements `io::Read`
29//! to the `JsonChecker` and validate that it is valid JSON.
30//!
31//! ```
32//! # const json_bytes: &[u8] = b"null";
33//! # fn streaming_from_the_web() -> std::io::Result<&'static [u8]> {
34//! # Ok(json_bytes)
35//! # }
36//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
37//! let stream = streaming_from_the_web()?;
38//!
39//! oxidized_json_checker::validate(stream)?;
40//! # Ok(()) }
41//! # fmain().unwrap()
42//! ```
43//!
44//! # Example: complex compositions
45//!
46//! This example show how you can use the `JsonChecker` type to check
47//! a compressed stream of bytes.
48//!
49//! You can decompress the stream, check it using the `JsonChecker`, and compress it
50//! again to pipe it elsewhere. All of that without much memory impact.
51//!
52//! ```no_run
53//! # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
54//! use std::io;
55//! use oxidized_json_checker::JsonChecker;
56//!
57//! let stdin = io::stdin();
58//! let stdout = io::stdout();
59//!
60//! // Wrap the stdin reader in a Snappy reader
61//! // then wrap it in a JsonChecker reader.
62//! let rdr = snap::read::FrameDecoder::new(stdin.lock());
63//! let mut rdr = JsonChecker::new(rdr);
64//!
65//! // Wrap the stdout writer in a Snappy writer.
66//! let mut wtr = snap::write::FrameEncoder::new(stdout.lock());
67//!
68//! // The copy function will return any io error thrown by any of the reader,
69//! // the JsonChecker throw errors when invalid JSON is encountered.
70//! io::copy(&mut rdr, &mut wtr)?;
71//!
72//! // We must check that the final bytes were valid.
73//! rdr.finish()?;
74//! # Ok(()) }
75//! # fmain().unwrap()
76//! ```
77//!
78
79use std::{fmt, io};
80use crate::internals::{State, Class, Mode};
81use crate::internals::{STATE_TRANSITION_TABLE, ASCII_CLASS};
82
83#[cfg(test)]
84mod tests;
85mod internals;
86
87/// The error type returned by the `JsonChecker` type.
88#[derive(Copy, Clone, Debug)]
89pub enum Error {
90 InvalidCharacter,
91 EmptyCurlyBraces,
92 OrphanCurlyBrace,
93 OrphanSquareBrace,
94 MaxDepthReached,
95 InvalidQuote,
96 InvalidComma,
97 InvalidColon,
98 InvalidState,
99 IncompleteElement,
100}
101
102impl From<Error> for io::Error {
103 fn from(err: Error) -> io::Error {
104 io::Error::new(io::ErrorKind::Other, err)
105 }
106}
107
108impl std::error::Error for Error {}
109
110impl fmt::Display for Error {
111 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112 match self {
113 Error::InvalidCharacter => f.write_str("invalid character"),
114 Error::EmptyCurlyBraces => f.write_str("empty curly braces"),
115 Error::OrphanCurlyBrace => f.write_str("orphan curly brace"),
116 Error::OrphanSquareBrace => f.write_str("orphan square brace"),
117 Error::MaxDepthReached => f.write_str("max depth reached"),
118 Error::InvalidQuote => f.write_str("invalid quote"),
119 Error::InvalidComma => f.write_str("invalid comma"),
120 Error::InvalidColon => f.write_str("invalid colon"),
121 Error::InvalidState => f.write_str("invalid state"),
122 Error::IncompleteElement => f.write_str("incomplete element"),
123 }
124 }
125}
126
127/// Represents any valid JSON type.
128#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
129pub enum JsonType {
130 Null,
131 Bool,
132 Number,
133 String,
134 Array,
135 Object,
136}
137
138/// A convenient method to check and consume JSON from a stream of bytes.
139///
140/// # Example
141///
142/// ```
143/// # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
144/// use oxidized_json_checker::{validate, JsonType};
145/// let text = r#""I am a simple string!""#;
146/// let bytes = text.as_bytes();
147///
148/// let json_type = validate(bytes)?;
149/// assert_eq!(json_type, JsonType::String);
150/// # Ok(()) }
151/// # fmain().unwrap()
152/// ```
153pub fn validate<R: io::Read>(reader: R) -> io::Result<JsonType> {
154 let mut checker = JsonChecker::new(reader);
155 io::copy(&mut checker, &mut io::sink())?;
156 let outer_type = checker.finish()?;
157 Ok(outer_type)
158}
159
160/// A convenient method to check and consume JSON from an `str`.
161pub fn validate_str(string: &str) -> Result<JsonType, Error> {
162 validate_bytes(string.as_bytes())
163}
164
165/// A convenient method to check and consume JSON from a bytes slice.
166pub fn validate_bytes(bytes: &[u8]) -> Result<JsonType, Error> {
167 let mut checker = JsonChecker::new(());
168 checker.next_bytes(bytes)?;
169 checker.finish()
170}
171
172/// The `JsonChecker` is a `io::Read` adapter, it can be used like a pipe,
173/// reading bytes, checkings those and output the same bytes.
174///
175/// If an error is encountered, a JSON syntax error or an `io::Error`
176/// it is returned by the `io::Read::read` method.
177///
178/// # Safety
179///
180/// An error encountered while reading bytes will invalidate the checker.
181///
182/// # Example: read from a slice
183///
184/// ```
185/// # fn fmain() -> Result<(), Box<dyn std::error::Error>> {
186/// use std::io;
187/// use oxidized_json_checker::JsonChecker;
188///
189/// let text = r#"{"I am": "an object"}"#;
190/// let bytes = text.as_bytes();
191///
192/// let mut checker = JsonChecker::new(bytes);
193/// io::copy(&mut checker, &mut io::sink())?;
194/// checker.finish()?;
195/// # Ok(()) }
196/// # fmain().unwrap()
197/// ```
198pub struct JsonChecker<R> {
199 state: State,
200 error: Option<Error>,
201 outer_type: Option<JsonType>,
202 max_depth: usize,
203 stack: Vec<Mode>,
204 reader: R,
205}
206
207impl<R> fmt::Debug for JsonChecker<R> {
208 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
209 f.debug_struct("JsonChecker").finish()
210 }
211}
212
213impl<R> JsonChecker<R> {
214 /// Construct a `JsonChecker. To continue the process, write to the `JsonChecker`
215 /// like a sink, and then call `JsonChecker::finish` to obtain the final result.
216 pub fn new(reader: R) -> JsonChecker<R> {
217 JsonChecker::with_max_depth(reader, usize::max_value())
218 }
219
220 /// Construct a `JsonChecker` and restrict the level of maximum nesting.
221 ///
222 /// For more information read the `JsonChecker::new` documentation.
223 pub fn with_max_depth(reader: R, max_depth: usize) -> JsonChecker<R> {
224 JsonChecker {
225 state: State::Go,
226 error: None,
227 outer_type: None,
228 max_depth,
229 stack: vec![Mode::Done],
230 reader,
231 }
232 }
233
234 #[inline]
235 fn next_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> {
236 bytes.iter().try_for_each(|b| self.next_byte(*b))
237 }
238
239 #[inline]
240 fn next_byte(&mut self, next_byte: u8) -> Result<(), Error> {
241 if let Some(error) = self.error {
242 return Err(error);
243 }
244
245 // We can potentially use try_blocks in the future.
246 fn internal_next_byte<R>(jc: &mut JsonChecker<R>, next_byte: u8) -> Result<(), Error> {
247 // Determine the character's class.
248 let next_class = if next_byte >= 128 {
249 Class::CEtc
250 } else {
251 ASCII_CLASS[next_byte as usize]
252 };
253
254 if next_class == Class::Invalid {
255 return Err(Error::InvalidCharacter);
256 }
257
258 // Get the next state from the state transition table and
259 // perform one of the actions.
260 let next_state = STATE_TRANSITION_TABLE[jc.state as usize][next_class as usize];
261
262 // Save the type we met if not already saved.
263 if jc.outer_type.is_none() {
264 match next_state {
265 State::N1 => jc.outer_type = Some(JsonType::Null),
266 State::T1 | State::F1 => jc.outer_type = Some(JsonType::Bool),
267 State::In => jc.outer_type = Some(JsonType::Number),
268 State::Wq => jc.outer_type = Some(JsonType::String),
269 State::Wos => jc.outer_type = Some(JsonType::Array),
270 State::Woc => jc.outer_type = Some(JsonType::Object),
271 _ => (),
272 }
273 }
274
275 match next_state {
276 State::Wec => { // Empty }
277 if !jc.pop(Mode::Key) {
278 return Err(Error::EmptyCurlyBraces);
279 }
280 jc.state = State::Ok;
281 },
282 State::Wcu => { // }
283 if !jc.pop(Mode::Object) {
284 return Err(Error::OrphanCurlyBrace);
285 }
286 jc.state = State::Ok;
287 },
288 State::Ws => { // ]
289 if !jc.pop(Mode::Array) {
290 return Err(Error::OrphanSquareBrace);
291 }
292 jc.state = State::Ok;
293 },
294 State::Woc => { // {
295 if !jc.push(Mode::Key) {
296 return Err(Error::MaxDepthReached);
297 }
298 jc.state = State::Ob;
299 },
300 State::Wos => { // [
301 if !jc.push(Mode::Array) {
302 return Err(Error::MaxDepthReached);
303 }
304 jc.state = State::Ar;
305 }
306 State::Wq => { // "
307 match jc.stack.last() {
308 Some(Mode::Done) => {
309 if !jc.push(Mode::String) {
310 return Err(Error::MaxDepthReached);
311 }
312 jc.state = State::St;
313 },
314 Some(Mode::String) => {
315 jc.pop(Mode::String);
316 jc.state = State::Ok;
317 },
318 Some(Mode::Key) => jc.state = State::Co,
319 Some(Mode::Array) |
320 Some(Mode::Object) => jc.state = State::Ok,
321 _ => return Err(Error::InvalidQuote),
322 }
323 },
324 State::Wcm => { // ,
325 match jc.stack.last() {
326 Some(Mode::Object) => {
327 // A comma causes a flip from object mode to key mode.
328 if !jc.pop(Mode::Object) || !jc.push(Mode::Key) {
329 return Err(Error::InvalidComma);
330 }
331 jc.state = State::Ke;
332 }
333 Some(Mode::Array) => jc.state = State::Va,
334 _ => return Err(Error::InvalidComma),
335 }
336 },
337 State::Wcl => { // :
338 // A colon causes a flip from key mode to object mode.
339 if !jc.pop(Mode::Key) || !jc.push(Mode::Object) {
340 return Err(Error::InvalidColon);
341 }
342 jc.state = State::Va;
343 },
344 State::Invalid => {
345 return Err(Error::InvalidState)
346 },
347
348 // Or change the state.
349 state => jc.state = state,
350 }
351
352 Ok(())
353 }
354
355 // By catching returned errors when this `JsonChecker` is used we *fuse*
356 // the checker and ensure the user don't use a checker in an invalid state.
357 if let Err(error) = internal_next_byte(self, next_byte) {
358 self.error = Some(error);
359 return Err(error);
360 }
361
362 Ok(())
363 }
364
365 /// The `JsonChecker::finish` method must be called after all of the characters
366 /// have been processed.
367 ///
368 /// This function consumes the `JsonChecker` and returns `Ok(JsonType)` if the
369 /// JSON text was accepted and the JSON type guessed.
370 pub fn finish(self) -> Result<JsonType, Error> {
371 self.into_inner().map(|(_, t)| t)
372 }
373
374 /// The `JsonChecker::into_inner` does the same as the `JsonChecker::finish`
375 /// method but returns the internal reader along with the JSON type guessed.
376 pub fn into_inner(mut self) -> Result<(R, JsonType), Error> {
377 let is_state_valid = match self.state {
378 State::Ok | State::In | State::Fr | State::Fs | State::E3 => true,
379 _ => false,
380 };
381
382 if is_state_valid && self.pop(Mode::Done) {
383 let outer_type = self.outer_type.expect("BUG: the outer type must have been guessed");
384 return Ok((self.reader, outer_type))
385 }
386
387 // We do not need to catch this error to *fuse* the checker because this method
388 // consumes the checker, it cannot be reused after an error has been thrown.
389 Err(Error::IncompleteElement)
390 }
391
392 /// Push a mode onto the stack. Returns false if max depth is reached.
393 fn push(&mut self, mode: Mode) -> bool {
394 if self.stack.len() + 1 >= self.max_depth {
395 return false;
396 }
397 self.stack.push(mode);
398 return true;
399 }
400
401 /// Pop the stack, assuring that the current mode matches the expectation.
402 /// Return false if the stack is empty or if the modes mismatch.
403 fn pop(&mut self, mode: Mode) -> bool {
404 self.stack.pop() == Some(mode)
405 }
406}
407
408impl<R: io::Read> io::Read for JsonChecker<R> {
409 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
410 // If an error have already been encountered we return it,
411 // this *fuses* the JsonChecker.
412 if let Some(error) = self.error {
413 return Err(error.into());
414 }
415
416 let len = match self.reader.read(buf) {
417 Err(error) => {
418 // We do not store the io::Error in the JsonChecker Error
419 // type instead we use the IncompleteElement error.
420 self.error = Some(Error::IncompleteElement);
421 return Err(error);
422 },
423 Ok(len) => len,
424 };
425
426 self.next_bytes(&buf[..len])?;
427
428 Ok(len)
429 }
430}