facet_json/
lib.rs

1#![no_std]
2#![warn(missing_docs)]
3#![warn(clippy::std_instead_of_core)]
4#![warn(clippy::std_instead_of_alloc)]
5#![forbid(unsafe_code)]
6#![doc = include_str!("../README.md")]
7
8extern crate alloc;
9
10use alloc::vec::Vec;
11pub use facet_deserialize::{DeserError, DeserErrorKind, DeserErrorMessage};
12
13mod deserialize;
14pub use deserialize::*;
15
16mod serialize;
17pub use serialize::*;
18
19mod tokenizer;
20
21/// The JSON format
22struct Json;
23
24/// `no_std` compatible Write trait used by the json serializer.
25pub trait JsonWrite {
26    /// Write all these bytes to the writer.
27    fn write(&mut self, buf: &[u8]);
28
29    /// If the writer supports it, reserve space for `len` additional bytes.
30    fn reserve(&mut self, additional: usize);
31}
32
33impl JsonWrite for &mut Vec<u8> {
34    fn write(&mut self, buf: &[u8]) {
35        self.extend(buf);
36    }
37
38    fn reserve(&mut self, additional: usize) {
39        Vec::reserve(self, additional)
40    }
41}
42
43impl JsonWrite for Vec<u8> {
44    fn write(&mut self, buf: &[u8]) {
45        self.extend(buf);
46    }
47
48    fn reserve(&mut self, additional: usize) {
49        Vec::reserve(self, additional)
50    }
51}
52
53/// Properly escapes and writes a JSON string
54#[inline]
55fn write_json_string<W: JsonWrite>(writer: &mut W, s: &str) {
56    // Just a little bit of text on how it works. There are two main steps:
57    // 1. Check if the string is completely ASCII and doesn't contain any quotes or backslashes or
58    //    control characters. This is the fast path, because it means that the bytes can be written
59    //    as they are, without any escaping needed. In this case we go over the string in windows
60    //    of 16 bytes (which is completely arbitrary, maybe find some real world data to tune this
61    //    with? I don't know and you don't have to do this dear reader.) and we just feed them into
62    //    the writer.
63    // 2. If the string is not completely ASCII or contains quotes or backslashes or control
64    //    characters, we need to escape them. This is the slow path, because it means that we need
65    //    to write the bytes one by one, and we need to figure out where to put the escapes. So we
66    //    just call `write_json_escaped_char` for each character.
67
68    const STEP_SIZE: usize = Window::BITS as usize / 8;
69    type Window = u128;
70    type Chunk = [u8; STEP_SIZE];
71
72    writer.write(b"\"");
73
74    let mut s = s;
75    while let Some(Ok(chunk)) = s.as_bytes().get(..STEP_SIZE).map(Chunk::try_from) {
76        let window = Window::from_ne_bytes(chunk);
77        // Our window is a concatenation of u8 values. For each value, we need to make sure that:
78        // 1. It is ASCII (i.e. the first bit of the u8 is 0, so u8 & 0x80 == 0)
79        // 2. It does not contain quotes (i.e. 0x22)
80        // 3. It does not contain backslashes (i.e. 0x5c)
81        // 4. It does not contain control characters (i.e. characters below 32, including 0)
82        //    This means the bit above the 1st, 2nd or 3rd bit must be set, so u8 & 0xe0 != 0
83        let completely_ascii = window & 0x80808080808080808080808080808080 == 0;
84        let quote_free = !contains_0x22(window);
85        let backslash_free = !contains_0x5c(window);
86        let control_char_free = top_three_bits_set(window);
87        if completely_ascii && quote_free && backslash_free && control_char_free {
88            // Yay! Whack it into the writer!
89            writer.write(&chunk);
90            s = &s[STEP_SIZE..];
91        } else {
92            // Ahw one of the conditions not met. Let's take our time and artisanally handle each
93            // character.
94            let mut chars = s.chars();
95            let mut count = STEP_SIZE;
96            for c in &mut chars {
97                write_json_escaped_char(writer, c);
98                count = count.saturating_sub(c.len_utf8());
99                if count == 0 {
100                    // Done with our chunk
101                    break;
102                }
103            }
104            s = chars.as_str();
105        }
106    }
107
108    // In our loop we checked that we were able to consume at least `STEP_SIZE` bytes every
109    // iteration. That means there might be a small remnant at the end that we can handle in the
110    // slow method.
111    for c in s.chars() {
112        write_json_escaped_char(writer, c);
113    }
114
115    writer.write(b"\"")
116}
117
118/// Writes a single JSON escaped character
119#[inline]
120fn write_json_escaped_char<W: JsonWrite>(writer: &mut W, c: char) {
121    match c {
122        '"' => writer.write(b"\\\""),
123        '\\' => writer.write(b"\\\\"),
124        '\n' => writer.write(b"\\n"),
125        '\r' => writer.write(b"\\r"),
126        '\t' => writer.write(b"\\t"),
127        '\u{08}' => writer.write(b"\\b"),
128        '\u{0C}' => writer.write(b"\\f"),
129        c if c.is_ascii_control() => {
130            let bytes = (c as u32).to_be_bytes();
131            // A radix 16 number (famously) fits in a u8, so unwrap here is safe.
132            let to_hex = |d: u8| char::from_digit(d as u32, 16).unwrap() as u8;
133            let buf = [
134                b'\\',
135                b'u',
136                to_hex(bytes[0]),
137                to_hex(bytes[1]),
138                to_hex(bytes[2]),
139                to_hex(bytes[3]),
140            ];
141            writer.write(&buf);
142        }
143        c if c.is_ascii() => {
144            writer.write(&[c as u8]);
145        }
146        c => {
147            let mut buf = [0; 4];
148            let len = c.encode_utf8(&mut buf).len();
149            writer.write(&buf[..len])
150        }
151    }
152}
153
154#[inline]
155fn contains_0x22(val: u128) -> bool {
156    let xor_result = val ^ 0x22222222222222222222222222222222;
157    let has_zero = (xor_result.wrapping_sub(0x01010101010101010101010101010101))
158        & !xor_result
159        & 0x80808080808080808080808080808080;
160    has_zero != 0
161}
162
163#[inline]
164fn contains_0x5c(val: u128) -> bool {
165    let xor_result = val ^ 0x5c5c5c5c5c5c5c5c5c5c5c5c5c5c5c5c;
166    let has_zero = (xor_result.wrapping_sub(0x01010101010101010101010101010101))
167        & !xor_result
168        & 0x80808080808080808080808080808080;
169    has_zero != 0
170}
171
172/// For each of the 16 u8s that make up a u128, check if the top three bits are set.
173#[inline]
174fn top_three_bits_set(value: u128) -> bool {
175    let xor_result = value & 0xe0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0;
176    let has_zero = (xor_result.wrapping_sub(0x01010101010101010101010101010101))
177        & !xor_result
178        & 0x80808080808080808080808080808080;
179    has_zero == 0
180}