json_forensics/
lib.rs

1//! This crate implements a `Read` adapter that converts the invalid JSON
2//! tokens `NaN` and `Infinity` into other tokens without otherwise distorting
3//! the stream.  It achieves this by converting `NaN` and `Infinity` into `0.0`.
4//!
5//! This is useful because the Python JSON library traditionally emits invalid
6//! JSON if `NaN` and `Infinity` values are encountered.  If you have to support
7//! clients like this, this wrapper can be used to still deserialize such a
8//! JSON document.
9//!
10//! This is just a way to get this to parse and `0` is a value that can
11//! be inserted in a standardized way that fits without changing any of the
12//! positions.
13//!
14//! # Example Conversion
15//!
16//! The following JSON document:
17//!
18//! ```ignore
19//! {"nan":NaN,"inf":Infinity,"-inf":-Infinity}
20//! ```
21//!
22//! is thus converted to:
23//!
24//! ```ignore
25//! {"nan":0  ,"inf":0       ,"-inf":-0       }
26//! ```
27//!
28//! # serde support
29//!
30//! If the `serde` feature is enabled then the crate provides some basic
31//! wrappers around `serde_json` to deserialize quickly and also by running
32//! the conversions.
33
34use std::str;
35
36#[cfg(feature = "serde")]
37mod serde_impl;
38#[cfg(feature = "serde")]
39pub use self::serde_impl::*;
40
41#[derive(Copy, Clone)]
42enum State {
43    Initial,
44    Quoted,
45    QuotedEscape,
46    NaN0,
47    NaN1,
48    Number { start: usize },
49    Infinity0,
50    Infinity1,
51    Infinity2,
52    Infinity3,
53    Infinity4,
54    Infinity5,
55    Infinity6,
56}
57
58#[inline]
59fn transition(bytes: &mut [u8], state: State, i: usize, c: u8) -> (State, u8) {
60    match (state, c) {
61        (State::Initial, b'N') => (State::NaN0, b'N'),
62        (State::NaN0, b'a') => (State::NaN1, b'a'),
63        (State::NaN1, b'N') => {
64            bytes[i - 2] = b'0';
65            bytes[i - 1] = b' ';
66            (State::Initial, b' ')
67        }
68        (State::Initial, b'I') => (State::Infinity0, b'I'),
69        (State::Infinity0, b'n') => (State::Infinity1, b'n'),
70        (State::Infinity1, b'f') => (State::Infinity2, b'f'),
71        (State::Infinity2, b'i') => (State::Infinity3, b'i'),
72        (State::Infinity3, b'n') => (State::Infinity4, b'n'),
73        (State::Infinity4, b'i') => (State::Infinity5, b'i'),
74        (State::Infinity5, b't') => (State::Infinity6, b't'),
75        (State::Infinity6, b'y') => {
76            bytes[i - 7] = b'0';
77            for b in &mut bytes[i - 6..i] {
78                *b = b' ';
79            }
80            (State::Initial, b' ')
81        }
82        (State::Initial, b'"') => (State::Quoted, b'"'),
83        (State::Quoted, b'\\') => (State::QuotedEscape, b'\\'),
84        (State::QuotedEscape, c) => (State::Quoted, c),
85        (State::Quoted, b'"') => (State::Initial, b'"'),
86        (State::Initial, c) if c.is_ascii_digit() => (State::Number { start: i }, c),
87        (State::Number { .. }, b'.') => (State::Initial, b'.'),
88        (State::Number { .. }, b'E') => (State::Initial, b'E'),
89        (State::Number { .. }, b'e') => (State::Initial, b'e'),
90        (State::Number { start }, c) if !c.is_ascii_digit() => {
91            if let Ok(num_str) = str::from_utf8(&bytes[start..i]) {
92                if num_str.parse::<u64>().is_err() && num_str.parse::<i64>().is_err() {
93                    bytes[start] = b'0';
94                    for b in &mut bytes[start + 1..i] {
95                        *b = b' ';
96                    }
97                }
98            }
99
100            (State::Initial, c)
101        }
102        (state, c) => (state, c),
103    }
104}
105
106fn translate_slice_impl(bytes: &mut [u8], mut state: State) -> State {
107    for i in 0..bytes.len() {
108        let (new_state, new_char) = transition(bytes, state, i, bytes[i]);
109        state = new_state;
110        bytes[i] = new_char;
111    }
112    transition(bytes, state, bytes.len(), b'\0');
113    state
114}
115
116/// Translates a slice in place.
117///
118/// This works the same as the `JsonCompatRead` struct but instead converts a
119/// slice in place.  This is useful when working with JSON in slices.
120pub fn translate_slice(bytes: &mut [u8]) {
121    translate_slice_impl(bytes, State::Initial);
122}
123
124#[test]
125fn test_reader_simple() {
126    let mut json = br#"{"nan":0.0,"inf":Infinity,"-inf":-Infinity}"#.to_vec();
127    translate_slice(&mut json[..]);
128    assert_eq!(
129        str::from_utf8(&json[..]),
130        str::from_utf8(&b"{\"nan\":0.0,\"inf\":0       ,\"-inf\":-0       }"[..])
131    );
132}
133
134#[test]
135fn test_reader_string() {
136    let mut json = br#"{"nan":"nan","Infinity":"-Infinity","other":NaN}"#.to_vec();
137    translate_slice(&mut json[..]);
138    assert_eq!(
139        &json[..],
140        &b"{\"nan\":\"nan\",\"Infinity\":\"-Infinity\",\"other\":0  }"[..]
141    );
142}
143
144#[test]
145fn test_reader_string_escaping() {
146    let mut json = br#""NaN\"NaN\"NaN""#.to_vec();
147    translate_slice(&mut json[..]);
148    assert_eq!(&json[..], &br#""NaN\"NaN\"NaN""#[..]);
149}
150
151#[test]
152fn test_no_greedy_write() {
153    let mut json = br#"Inferior"#.to_vec();
154    translate_slice(&mut json[..]);
155    assert_eq!(&json[..], &b"Inferior"[..]);
156}
157
158#[test]
159fn test_too_large_int() {
160    let mut json = br#"999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999"#.to_vec();
161    translate_slice(&mut json[..]);
162    assert_eq!(str::from_utf8(&json[..]), str::from_utf8(
163                    &b"0                                                                                                              "[..]));
164}
165
166#[test]
167fn test_leaves_floats() {
168    let mut json = br#"9999999999999999999999999999.99999"#.to_vec();
169    let old_json = json.clone();
170    translate_slice(&mut json[..]);
171    assert_eq!(str::from_utf8(&json[..]), str::from_utf8(&old_json[..]));
172}
173
174#[test]
175fn test_leaves_floats2() {
176    let mut json = br#"999999999E10"#.to_vec();
177    let old_json = json.clone();
178    translate_slice(&mut json[..]);
179    assert_eq!(str::from_utf8(&json[..]), str::from_utf8(&old_json[..]));
180}
181
182#[test]
183fn test_leaves_floats3() {
184    let mut json = br#"999999999E-10"#.to_vec();
185    let old_json = json.clone();
186    translate_slice(&mut json[..]);
187    assert_eq!(str::from_utf8(&json[..]), str::from_utf8(&old_json[..]));
188}
189
190#[test]
191fn test_leaves_floats4() {
192    let mut json = br#"999999999e-10"#.to_vec();
193    let old_json = json.clone();
194    translate_slice(&mut json[..]);
195    assert_eq!(str::from_utf8(&json[..]), str::from_utf8(&old_json[..]));
196}