json_threat_protection/read/
mod.rs1mod io;
4mod slice;
5mod str;
6mod utils;
7pub use io::IoRead;
8pub use slice::SliceRead;
9pub use str::StrRead;
10use thiserror::Error;
11
12use utils::{decode_hex_sequence, IS_HEX, NEED_ESCAPE};
13
14macro_rules! parse_number {
15 ($self:ident) => {{
16 match $self.peek()? {
17 Some(b'-') => $self.discard(),
18 Some(b'0'..=b'9') => (),
19 Some(_) => return Err(ReadError::Bug{
20 msg: "macro_rules! parse_number: assume the first character is a number or a minus sign".to_string(),
21 position: $self.position(),
22 }),
23 None => return Err(ReadError::UnexpectedEndOfInput($self.position())),
24 }
25
26 let first = match $self.next()? {
27 Some(n @ b'0'..=b'9') => n,
28 _ => return Err(ReadError::Bug {
29 msg: "macro_rules! parse_number: assume the first character is a number".to_string(),
30 position: $self.position(),
31 }),
32 };
33
34 let second = $self.peek()?;
35 if second.is_none() {
36 return Ok(());
37 }
38
39 if first == b'0' && matches!(second, Some(b'0'..=b'9')) {
40 return Err(ReadError::LeadingZerosInNumber($self.position()));
41 }
42
43 loop {
44 match $self.peek()? {
45 Some(b'0'..=b'9') => $self.discard(),
46 Some(b'.') => return parse_float!($self),
47 Some(b'e') | Some(b'E') => return parse_exponent!($self),
48 _ => break,
49 }
50 }
51
52 Ok(())
53 }};
54}
55
56macro_rules! parse_float {
57 ($self:ident) => {{
58 if $self.next()? != Some(b'.') {
59 return Err(ReadError::Bug {
60 msg: "macro_rules! parse_float: assume the first character is a period".to_string(),
61 position: $self.position(),
62 });
63 }
64
65 match $self.peek()? {
66 Some(b'0'..=b'9') => $self.discard(),
67 Some(_) => return Err(ReadError::NoNumberCharactersAfterFraction($self.position())),
68 None => return Err(ReadError::UnexpectedEndOfInput($self.position())),
69 }
70
71 loop {
72 match $self.peek()? {
73 Some(b'0'..=b'9') => $self.discard(),
74 Some(b'e') | Some(b'E') => return parse_exponent!($self),
75 _ => break,
76 }
77 }
78
79 Ok(())
80 }};
81}
82
83macro_rules! parse_exponent {
84 ($self:ident) => {{
85 if !matches!($self.next()?, Some(b'e') | Some(b'E')) {
86 return Err(ReadError::Bug {
87 msg: "macro_rules! parse_exponent: assume the first character is an exponent"
88 .to_string(),
89 position: $self.position(),
90 });
91 }
92
93 match $self.peek()? {
94 Some(b'-') | Some(b'+') => $self.discard(),
95 Some(b'0'..=b'9') => (),
96 Some(_) => return Err(ReadError::NoNumberCharactersAfterExponent($self.position())),
97 None => return Err(ReadError::UnexpectedEndOfInput($self.position())),
98 }
99
100 match $self.peek()? {
101 Some(b'0'..=b'9') => (),
102 Some(_) => return Err(ReadError::NoNumberCharactersAfterExponent($self.position())),
103 None => return Err(ReadError::UnexpectedEndOfInput($self.position())),
104 }
105
106 loop {
107 match $self.peek()? {
108 Some(b'0'..=b'9') => $self.discard(),
109 _ => break,
110 }
111 }
112
113 Ok(())
114 }};
115}
116
117macro_rules! next4_hex {
118 ($self:ident) => {{
119 let mut buf = [0; 4];
120 for i in 0..4 {
121 let next = $self.next()?;
122 if next.is_none() {
123 return Err(ReadError::UnexpectedEndOfInput($self.position()));
124 }
125
126 let next = next.unwrap();
128 if IS_HEX[next as usize] {
129 buf[i] = next;
130 } else {
131 return Err(ReadError::NonHexCharacterInUnicodeEscape($self.position()));
132 }
133 }
134 buf
135 }};
136}
137
138pub use utils::Position;
139
140#[derive(Debug, Error)]
141pub enum ReadError {
143 #[error("unexpected end of input ({0})")]
145 UnexpectedEndOfInput(Position),
146
147 #[error("I/O Error ({0})")]
149 IoError(std::io::Error, Position),
150
151 #[error("non numirical character ({0})")]
153 NonNumericalCharacter(Position),
154
155 #[error("unclosed string ({0})")]
157 UnclosedString(Position),
158
159 #[error("invalid escape sequence ({0})")]
161 InvalidEscapeSequence(Position),
162
163 #[error("control character in string ({0})")]
165 ControlCharacterInString(Position),
166
167 #[error("non hex character in unicode escape sequence ({0})")]
169 NonHexCharacterInUnicodeEscape(Position),
170
171 #[error("leading zeros in number ({0})")]
173 LeadingZerosInNumber(Position),
174
175 #[error("no number characters after fraction ({0})")]
177 NoNumberCharactersAfterFraction(Position),
178
179 #[error("no number characters after exponent ({0})")]
181 NoNumberCharactersAfterExponent(Position),
182
183 #[error("running into unexpected state, please report this issue to the maintainer, ({msg}) ({position})")]
185 Bug {
186 msg: String,
188
189 position: Position,
191 },
192}
193
194pub trait Read {
204 fn position(&self) -> Position;
206
207 fn peek(&mut self) -> Result<Option<u8>, ReadError>;
209
210 fn next(&mut self) -> Result<Option<u8>, ReadError>;
212
213 fn discard(&mut self) {
219 self.next().unwrap();
220 }
221
222 fn next4(&mut self) -> Result<[u8; 4], ReadError> {
224 let mut buf = [0; 4];
225 for i in 0..4 {
226 match self.next()? {
227 Some(ch) => buf[i] = ch,
228 None => return Err(ReadError::UnexpectedEndOfInput(self.position())),
229 }
230 }
231 Ok(buf)
232 }
233
234 fn next5(&mut self) -> Result<[u8; 5], ReadError> {
236 let mut buf = [0; 5];
237 for i in 0..5 {
238 match self.next()? {
239 Some(ch) => buf[i] = ch,
240 None => return Err(ReadError::UnexpectedEndOfInput(self.position())),
241 }
242 }
243 Ok(buf)
244 }
245
246 fn skip_whitespace(&mut self) -> Result<(), ReadError> {
248 loop {
249 match self.peek()? {
250 Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => {
251 self.next()?;
252 }
253 _ => break,
254 }
255 }
256 Ok(())
257 }
258
259 fn next_number(&mut self) -> Result<(), ReadError> {
261 parse_number!(self)
262 }
263
264 fn next_likely_string(&mut self, buf: &mut Vec<u8>) -> Result<(), ReadError> {
266 if self.next()? != Some(b'"') {
267 return Err(ReadError::Bug {
268 msg: "Read.next_likely_string: assume the first character is a double quote"
269 .to_string(),
270 position: self.position(),
271 });
272 }
273
274 while let Some(byte) = self.next()? {
275 if !NEED_ESCAPE[byte as usize] {
276 buf.push(byte);
277 continue;
278 }
279
280 match byte {
281 b'"' => return Ok(()),
282 b'\\' => {
283 let mut simple_escape = true;
284
285 match self.next()? {
286 Some(b'"') => buf.push(b'"'),
287 Some(b'\\') => buf.push(b'\\'),
288 Some(b'/') => buf.push(b'/'),
289 Some(b'b') => buf.push(b'\x08'),
290 Some(b'f') => buf.push(b'\x0C'),
291 Some(b'n') => buf.push(b'\n'),
292 Some(b'r') => buf.push(b'\r'),
293 Some(b't') => buf.push(b'\t'),
294 Some(b'u') => simple_escape = false,
295 Some(_) => return Err(ReadError::InvalidEscapeSequence(self.position())),
296 None => return Err(ReadError::UnexpectedEndOfInput(self.position())),
297 };
298
299 if simple_escape {
300 continue;
301 }
302
303 let hex = decode_hex_sequence(&next4_hex!(self));
304 let ch = match hex {
305 _n @ 0xDC00..=0xDFFF => {
306 return Err(ReadError::InvalidEscapeSequence(self.position()));
307 }
308 n @ 0xD800..=0xDBFF => {
309 let high = n;
310 if self.next()? != Some(b'\\') {
311 return Err(ReadError::InvalidEscapeSequence(self.position()));
312 }
313 if self.next()? != Some(b'u') {
314 return Err(ReadError::InvalidEscapeSequence(self.position()));
315 }
316 let low = decode_hex_sequence(&next4_hex!(self));
317 if !matches!(low, 0xDC00..=0xDFFF) {
318 return Err(ReadError::InvalidEscapeSequence(self.position()));
319 }
320
321 let high = ((high & 0x03FF) << 10) as u32;
322 let low = (low & 0x03FF) as u32;
323 let codepoint = 0x10000u32 + high + low;
324
325 match std::char::from_u32(codepoint) {
326 Some(ch) => ch,
327 None => {
328 return Err(ReadError::Bug {
329 msg:
330 "Read.next_likely_string: assume the codepoint is valid"
331 .to_string(),
332 position: self.position(),
333 })
334 }
335 }
336 }
337 n => match std::char::from_u32(n as u32) {
338 Some(ch) => ch,
339 None => {
340 return Err(ReadError::Bug {
341 msg: "Read.next_likely_string: assume the codepoint is valid"
342 .to_string(),
343 position: self.position(),
344 });
345 }
346 },
347 };
348
349 buf.extend_from_slice(ch.encode_utf8(&mut [0u8; 4]).as_bytes());
350 }
351 _ => return Err(ReadError::ControlCharacterInString(self.position())),
352 }
353 }
354
355 Err(ReadError::UnclosedString(self.position()))
356 }
357}