facet_json/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2#![warn(missing_docs)]
3#![warn(clippy::std_instead_of_core)]
4#![warn(clippy::std_instead_of_alloc)]
5#![forbid(unsafe_code)]
6#![doc = include_str!("../README.md")]
7
8#[cfg(feature = "std")]
9use std::io::{self, Write};
10
11pub use facet_deserialize::{DeserError, DeserErrorKind, DeserErrorMessage};
12
13extern crate alloc;
14
15mod deserialize;
16pub use deserialize::*;
17
18#[cfg(feature = "std")]
19mod serialize;
20#[cfg(feature = "std")]
21pub use serialize::*;
22
23mod tokenizer;
24
25/// The JSON format
26struct Json;
27
28/// Properly escapes and writes a JSON string
29#[cfg(feature = "std")]
30#[inline]
31fn write_json_string<W: Write>(writer: &mut W, s: &str) -> io::Result<()> {
32    // // Just a little bit of text on how it works. There are two main steps:
33    // // 1. Check if the string is completely ASCII and doesn't contain any quotes or backslashes or
34    // //    control characters. This is the fast path, because it means that the bytes can be written
35    // //    as they are, without any escaping needed. In this case we go over the string in windows
36    // //    of 16 bytes (which is completely arbitrary, maybe find some real world data to tune this
37    // //    with? I don't know and you don't have to do this dear reader.) and we just feed them into
38    // //    the writer.
39    // // 2. If the string is not completely ASCII or contains quotes or backslashes or control
40    // //    characters, we need to escape them. This is the slow path, because it means that we need
41    // //    to write the bytes one by one, and we need to figure out where to put the escapes. So we
42    // //    just call `write_json_escaped_char` for each character.
43
44    const STEP_SIZE: usize = 16;
45    type Window = u128;
46    type Chunk = [u8; STEP_SIZE];
47
48    writer.write_all(b"\"")?;
49
50    let mut idx = 0;
51    while idx + STEP_SIZE < s.len() {
52        let slice = &s.as_bytes()[idx..idx + STEP_SIZE];
53        // Unwrap here is fine because the chunk is guaranteed to be exactly `CHUNK_SIZE` bytes long
54        // by construction.
55        let chunk = Chunk::try_from(slice).unwrap();
56        let window = Window::from_ne_bytes(chunk);
57        // Our window is a concatenation of u8 values. For each value, we need to make sure that:
58        // 1. It is ASCII (i.e. the first bit of the u8 is 0, so u8 & 0x80 == 0)
59        // 2. It does not contain quotes (i.e. 0x22)
60        // 3. It does not contain backslashes (i.e. 0x5c)
61        // 4. It does not contain control characters (i.e. characters below 32, including 0)
62        //    This means the bit above the 1st, 2nd or 3rd bit must be set, so u8 & 0xe0 != 0
63        let completely_ascii = window & 0x80808080808080808080808080808080 == 0;
64        let quote_free = !contains_0x22(window);
65        let backslash_free = !contains_0x5c(window);
66        let control_char_free = top_three_bits_set(window);
67        if completely_ascii && quote_free && backslash_free && control_char_free {
68            // Yay! Whack it into the writer!
69            writer.write_all(slice)?;
70            idx += STEP_SIZE;
71        } else {
72            // Ahw one of the conditions not met. Let's take our time and artisanally handle each
73            // character.
74            let mut chars = s[idx..].chars();
75            for c in (&mut chars).take(STEP_SIZE) {
76                write_json_escaped_char(writer, c)?;
77            }
78            let bytes_consumed = chars.as_str().as_ptr() as usize - (s.as_ptr() as usize + idx);
79            idx += bytes_consumed;
80        }
81    }
82
83    // // In our loop we checked that we were able to consume at least `STEP_SIZE` bytes every
84    // // iteration. That means there might be a small remnant at the end that we can handle in the
85    // // slow method.
86    for c in s[idx..].chars() {
87        write_json_escaped_char(writer, c)?;
88    }
89
90    writer.write_all(b"\"")
91}
92
93/// Writes a single JSON escaped character
94#[cfg(feature = "std")]
95#[inline]
96fn write_json_escaped_char<W: Write>(writer: &mut W, c: char) -> io::Result<()> {
97    match c {
98        '"' => writer.write_all(b"\\\""),
99        '\\' => writer.write_all(b"\\\\"),
100        '\n' => writer.write_all(b"\\n"),
101        '\r' => writer.write_all(b"\\r"),
102        '\t' => writer.write_all(b"\\t"),
103        '\u{08}' => writer.write_all(b"\\b"),
104        '\u{0C}' => writer.write_all(b"\\f"),
105        c if c.is_ascii_control() => {
106            let mut buf = [0; 6];
107            let s = format!("{:04x}", c as u32);
108            buf[0] = b'\\';
109            buf[1] = b'u';
110            buf[2] = s.as_bytes()[0];
111            buf[3] = s.as_bytes()[1];
112            buf[4] = s.as_bytes()[2];
113            buf[5] = s.as_bytes()[3];
114            writer.write_all(&buf)
115        }
116        c if c.is_ascii() => {
117            writer.write_all(&[c as u8])?;
118            Ok(())
119        }
120        c => {
121            let mut buf = [0; 4];
122            let len = c.encode_utf8(&mut buf).len();
123            writer.write_all(&buf[..len])
124        }
125    }
126}
127
128fn contains_0x22(val: u128) -> bool {
129    let xor_result = val ^ 0x22222222222222222222222222222222;
130    let has_zero = (xor_result.wrapping_sub(0x01010101010101010101010101010101))
131        & !xor_result
132        & 0x80808080808080808080808080808080;
133    has_zero != 0
134}
135
136fn contains_0x5c(val: u128) -> bool {
137    let xor_result = val ^ 0x5c5c5c5c5c5c5c5c5c5c5c5c5c5c5c5c;
138    let has_zero = (xor_result.wrapping_sub(0x01010101010101010101010101010101))
139        & !xor_result
140        & 0x80808080808080808080808080808080;
141    has_zero != 0
142}
143
144/// For each of the 16 u8s that make up a u128, check if the top three bits are set.
145fn top_three_bits_set(value: u128) -> bool {
146    let mask = 0xe0e0e0e0e0e0e0e0e0e0e0e0e0e0e0e0;
147    let masked = value & mask;
148    let has_zero = (masked.wrapping_sub(0x01010101010101010101010101010101))
149        & !masked
150        & 0x80808080808080808080808080808080;
151    has_zero == 0
152}