1use crate::httpx::error;
20use std::str;
21
22#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord)]
23pub(crate) enum ScanState {
24 Continue = 0,
25 BeginLiteral = 1,
26 BeginObject = 2,
27 ObjectKey = 3,
28 ObjectValue = 4,
29 EndObject = 5,
30 BeginArray = 6,
31 ArrayValue = 7,
32 EndArray = 8,
33 SkipSpace = 9,
34
35 End = 10,
37 Error = 11,
38}
39
40#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, Ord)]
41pub(crate) enum ParseState {
42 ObjectKey = 0,
43 ObjectValue = 1,
44 ArrayValue = 2,
45}
46
47pub(crate) struct Scanner {
48 step: fn(&mut Scanner, u8) -> ScanState,
49 end_top: bool,
50 parse_state: Vec<ParseState>,
51 err: Option<error::Error>,
52 bytes: usize,
53}
54
55impl Scanner {
56 pub fn new() -> Self {
57 Scanner {
58 step: Scanner::state_begin_value,
59 end_top: false,
60 parse_state: Vec::new(),
61 err: None,
62 bytes: 0,
63 }
64 }
65
66 pub fn step(&mut self, step: u8) -> ScanState {
67 (self.step)(self, step)
68 }
69
70 pub fn incr_bytes(&mut self, incr: isize) {
71 self.bytes = (self.bytes as isize + incr) as usize;
72 }
73
74 pub fn err(&self) -> Option<&error::Error> {
75 self.err.as_ref()
76 }
77
78 pub fn reset(&mut self) {
79 self.step = Scanner::state_begin_value;
80 self.parse_state.clear();
81 self.err = None;
82 self.end_top = false;
83 }
84
85 fn eof(&mut self) -> ScanState {
86 if self.err.is_some() {
87 return ScanState::Error;
88 }
89 if self.end_top {
90 return ScanState::End;
91 }
92 (self.step)(self, b' ')
93 }
94
95 fn state_begin_value(s: &mut Scanner, c: u8) -> ScanState {
96 if c.is_ascii_whitespace() {
97 return ScanState::SkipSpace;
98 }
99 match c {
100 b'{' => {
101 s.step = Scanner::state_begin_string_or_empty;
102 s.push_parse_state(ParseState::ObjectKey, ScanState::BeginObject)
103 }
104 b'[' => {
105 s.step = Scanner::state_begin_value_or_empty;
106 s.push_parse_state(ParseState::ArrayValue, ScanState::BeginArray)
107 }
108 b'"' => {
109 s.step = Scanner::state_in_string;
110 ScanState::BeginLiteral
111 }
112 b'-' => {
113 s.step = Scanner::state_neg;
114 ScanState::BeginLiteral
115 }
116 b'0' => {
117 s.step = Scanner::state0;
118 ScanState::BeginLiteral
119 }
120 b't' => {
121 s.step = Scanner::state_t;
122 ScanState::BeginLiteral
123 }
124 b'f' => {
125 s.step = Scanner::state_f;
126 ScanState::BeginLiteral
127 }
128 b'n' => {
129 s.step = Scanner::state_n;
130 ScanState::BeginLiteral
131 }
132 _ if c.is_ascii_digit() => {
133 s.step = Scanner::state1;
134 ScanState::BeginLiteral
135 }
136 _ => s.error(c, "looking for beginning of value"),
137 }
138 }
139
140 fn state_begin_value_or_empty(s: &mut Scanner, c: u8) -> ScanState {
141 if c.is_ascii_whitespace() {
142 return ScanState::SkipSpace;
143 }
144 if c == b']' {
145 return Scanner::state_end_value(s, c);
146 }
147 Scanner::state_begin_value(s, c)
148 }
149
150 fn state_begin_string_or_empty(s: &mut Scanner, c: u8) -> ScanState {
151 if c.is_ascii_whitespace() {
152 return ScanState::SkipSpace;
153 }
154 if c == b'}' {
155 let n = s.parse_state.len();
156 s.parse_state[n - 1] = ParseState::ObjectValue;
157 return Scanner::state_end_value(s, c);
158 }
159 Scanner::state_begin_string(s, c)
160 }
161
162 fn state_begin_string(s: &mut Scanner, c: u8) -> ScanState {
163 if c.is_ascii_whitespace() {
164 return ScanState::SkipSpace;
165 }
166 if c == b'"' {
167 s.step = Scanner::state_in_string;
168 return ScanState::BeginLiteral;
169 }
170 s.error(c, "looking for beginning of object key string")
171 }
172
173 fn state_end_value(s: &mut Scanner, c: u8) -> ScanState {
174 let n = s.parse_state.len();
175 if n == 0 {
176 s.step = Scanner::state_end_top;
177 s.end_top = true;
178 return Scanner::state_end_top(s, c);
179 }
180 if c.is_ascii_whitespace() {
181 s.step = Scanner::state_end_value;
182 return ScanState::SkipSpace;
183 }
184 let ps = s.parse_state[n - 1];
185 match ps {
186 ParseState::ObjectKey => {
187 if c == b':' {
188 s.parse_state[n - 1] = ParseState::ObjectValue;
189 s.step = Scanner::state_begin_value;
190 return ScanState::ObjectKey;
191 }
192 s.error(c, "after object key")
193 }
194 ParseState::ObjectValue => {
195 if c == b',' {
196 s.parse_state[n - 1] = ParseState::ObjectKey;
197 s.step = Scanner::state_begin_string;
198 return ScanState::ObjectValue;
199 }
200 if c == b'}' {
201 s.pop_parse_state();
202 return ScanState::EndObject;
203 }
204 s.error(c, "after object key:value pair")
205 }
206 ParseState::ArrayValue => {
207 if c == b',' {
208 s.step = Scanner::state_begin_value;
209 return ScanState::ArrayValue;
210 }
211 if c == b']' {
212 s.pop_parse_state();
213 return ScanState::EndArray;
214 }
215 s.error(c, "after array element")
216 }
217 }
218 }
219
220 fn state_end_top(s: &mut Scanner, c: u8) -> ScanState {
221 if !c.is_ascii_whitespace() {
222 s.error(c, "after top-level value");
223 }
224
225 ScanState::End
226 }
227
228 fn state_in_string(s: &mut Scanner, c: u8) -> ScanState {
229 if c == b'"' {
230 s.step = Scanner::state_end_value;
231 return ScanState::Continue;
232 }
233 if c == b'\\' {
234 s.step = Scanner::state_in_string_esc;
235 return ScanState::Continue;
236 }
237 if c < 0x20 {
238 return s.error(c, "in string literal");
239 }
240 ScanState::Continue
241 }
242
243 fn state_in_string_esc(s: &mut Scanner, c: u8) -> ScanState {
244 match c {
245 b'b' | b'f' | b'n' | b'r' | b't' | b'\\' | b'/' | b'"' => {
246 s.step = Scanner::state_in_string;
247 ScanState::Continue
248 }
249 b'u' => {
250 s.step = Scanner::state_in_string_esc_u;
251 ScanState::Continue
252 }
253 _ => s.error(c, "in string escape code"),
254 }
255 }
256
257 fn state_in_string_esc_u(s: &mut Scanner, c: u8) -> ScanState {
258 if c.is_ascii_hexdigit() {
259 s.step = Scanner::state_in_string_esc_u1;
260 ScanState::Continue
261 } else {
262 s.error(c, "in \\u hexadecimal character escape")
263 }
264 }
265
266 fn state_in_string_esc_u1(s: &mut Scanner, c: u8) -> ScanState {
267 if c.is_ascii_hexdigit() {
268 s.step = Scanner::state_in_string_esc_u12;
269 ScanState::Continue
270 } else {
271 s.error(c, "in \\u hexadecimal character escape")
272 }
273 }
274
275 fn state_in_string_esc_u12(s: &mut Scanner, c: u8) -> ScanState {
276 if c.is_ascii_hexdigit() {
277 s.step = Scanner::state_in_string_esc_u123;
278 ScanState::Continue
279 } else {
280 s.error(c, "in \\u hexadecimal character escape")
281 }
282 }
283
284 fn state_in_string_esc_u123(s: &mut Scanner, c: u8) -> ScanState {
285 if c.is_ascii_hexdigit() {
286 s.step = Scanner::state_in_string;
287 ScanState::Continue
288 } else {
289 s.error(c, "in \\u hexadecimal character escape")
290 }
291 }
292
293 fn state_neg(s: &mut Scanner, c: u8) -> ScanState {
294 if c == b'0' {
295 s.step = Scanner::state0;
296 ScanState::Continue
297 } else if c.is_ascii_digit() {
298 s.step = Scanner::state1;
299 ScanState::Continue
300 } else {
301 s.error(c, "in numeric literal")
302 }
303 }
304
305 fn state1(s: &mut Scanner, c: u8) -> ScanState {
306 if c.is_ascii_digit() {
307 s.step = Scanner::state1;
308 ScanState::Continue
309 } else {
310 Scanner::state0(s, c)
311 }
312 }
313
314 fn state0(s: &mut Scanner, c: u8) -> ScanState {
315 if c == b'.' {
316 s.step = Scanner::state_dot;
317 ScanState::Continue
318 } else if c == b'e' || c == b'E' {
319 s.step = Scanner::state_e;
320 ScanState::Continue
321 } else {
322 Scanner::state_end_value(s, c)
323 }
324 }
325
326 fn state_dot(s: &mut Scanner, c: u8) -> ScanState {
327 if c.is_ascii_digit() {
328 s.step = Scanner::state_dot0;
329 ScanState::Continue
330 } else {
331 s.error(c, "after decimal point in numeric literal")
332 }
333 }
334
335 fn state_dot0(s: &mut Scanner, c: u8) -> ScanState {
336 if c.is_ascii_digit() {
337 ScanState::Continue
338 } else if c == b'e' || c == b'E' {
339 s.step = Scanner::state_e;
340 ScanState::Continue
341 } else {
342 Scanner::state_end_value(s, c)
343 }
344 }
345
346 fn state_e(s: &mut Scanner, c: u8) -> ScanState {
347 if c == b'+' || c == b'-' {
348 s.step = Scanner::state_e_sign;
349 ScanState::Continue
350 } else {
351 Scanner::state_e_sign(s, c)
352 }
353 }
354
355 fn state_e_sign(s: &mut Scanner, c: u8) -> ScanState {
356 if c.is_ascii_digit() {
357 s.step = Scanner::state_e0;
358 ScanState::Continue
359 } else {
360 s.error(c, "in exponent of numeric literal")
361 }
362 }
363
364 fn state_e0(s: &mut Scanner, c: u8) -> ScanState {
365 if c.is_ascii_digit() {
366 ScanState::Continue
367 } else {
368 Scanner::state_end_value(s, c)
369 }
370 }
371
372 fn state_t(s: &mut Scanner, c: u8) -> ScanState {
373 if c == b'r' {
374 s.step = Scanner::state_tr;
375 ScanState::Continue
376 } else {
377 s.error(c, "in literal true (expecting 'r')")
378 }
379 }
380
381 fn state_tr(s: &mut Scanner, c: u8) -> ScanState {
382 if c == b'u' {
383 s.step = Scanner::state_tru;
384 ScanState::Continue
385 } else {
386 s.error(c, "in literal true (expecting 'u')")
387 }
388 }
389
390 fn state_tru(s: &mut Scanner, c: u8) -> ScanState {
391 if c == b'e' {
392 s.step = Scanner::state_end_value;
393 ScanState::Continue
394 } else {
395 s.error(c, "in literal true (expecting 'e')")
396 }
397 }
398
399 fn state_f(s: &mut Scanner, c: u8) -> ScanState {
400 if c == b'a' {
401 s.step = Scanner::state_fa;
402 ScanState::Continue
403 } else {
404 s.error(c, "in literal false (expecting 'a')")
405 }
406 }
407
408 fn state_fa(s: &mut Scanner, c: u8) -> ScanState {
409 if c == b'l' {
410 s.step = Scanner::state_fal;
411 ScanState::Continue
412 } else {
413 s.error(c, "in literal false (expecting 'l')")
414 }
415 }
416
417 fn state_fal(s: &mut Scanner, c: u8) -> ScanState {
418 if c == b's' {
419 s.step = Scanner::state_fals;
420 ScanState::Continue
421 } else {
422 s.error(c, "in literal false (expecting 's')")
423 }
424 }
425
426 fn state_fals(s: &mut Scanner, c: u8) -> ScanState {
427 if c == b'e' {
428 s.step = Scanner::state_end_value;
429 ScanState::Continue
430 } else {
431 s.error(c, "in literal false (expecting 'e')")
432 }
433 }
434
435 fn state_n(s: &mut Scanner, c: u8) -> ScanState {
436 if c == b'u' {
437 s.step = Scanner::state_nu;
438 ScanState::Continue
439 } else {
440 s.error(c, "in literal null (expecting 'u')")
441 }
442 }
443
444 fn state_nu(s: &mut Scanner, c: u8) -> ScanState {
445 if c == b'l' {
446 s.step = Scanner::state_nul;
447 ScanState::Continue
448 } else {
449 s.error(c, "in literal null (expecting 'l')")
450 }
451 }
452
453 fn state_nul(s: &mut Scanner, c: u8) -> ScanState {
454 if c == b'l' {
455 s.step = Scanner::state_end_value;
456 ScanState::Continue
457 } else {
458 s.error(c, "in literal null (expecting 'l')")
459 }
460 }
461
462 fn error(&mut self, c: u8, context: &str) -> ScanState {
463 self.step = Scanner::state_error;
464 self.err = Some(error::Error::new_message_error(format!(
465 "invalid character {} {}",
466 Scanner::quote_char(c),
467 context
468 )));
469 ScanState::Error
470 }
471
472 fn state_error(_s: &mut Scanner, _c: u8) -> ScanState {
473 ScanState::Error
474 }
475
476 pub fn quote_char(c: u8) -> String {
477 match c {
478 b'\'' => "'\\''".to_string(),
479 b'"' => "'\"'".to_string(),
480 _ => format!("'{}'", c as char),
481 }
482 }
483
484 fn push_parse_state(
485 &mut self,
486 new_parse_state: ParseState,
487 success_state: ScanState,
488 ) -> ScanState {
489 self.parse_state.push(new_parse_state);
490 success_state
491 }
492
493 fn pop_parse_state(&mut self) {
494 self.parse_state.pop();
495 if self.parse_state.is_empty() {
496 self.step = Scanner::state_end_top;
497 self.end_top = true;
498 } else {
499 self.step = Scanner::state_end_value;
500 }
501 }
502}
503
504fn valid(data: &[u8]) -> bool {
505 let mut scan = Scanner::new();
506 for &c in data {
507 scan.bytes += 1;
508 if (scan.step)(&mut scan, c) == ScanState::Error {
509 return false;
510 }
511 }
512 scan.eof() != ScanState::Error
513}
514
515#[cfg(test)]
516mod tests {
517 use super::*;
518
519 #[test]
520 fn test_valid() {
521 let tests = vec![
522 ("foo", false),
523 ("}{", false),
524 ("{]", false),
525 ("{}", true),
526 ("[]", true),
527 ("[1,2,3]", true),
528 ("[1,2,3,]", false),
529 (r#"{"foo":"bar"}"#, true),
530 (r#"{"foo": "bar",}"#, false),
531 (r#"{"foo": "bar", "baz":}"#, false),
532 (r#"{"foo": "bar", "baz": 123,}"#, false),
533 (r#"{"foo":"bar","bar":{"baz":["qux"]}}"#, true),
534 ("{\"foo\": \"bar\", \"baz\": 123, \"qux\":}", false),
535 ];
536
537 for (data, expected) in tests {
538 assert_eq!(valid(data.as_bytes()), expected, "data: {data}");
539 }
540 }
541}