1#![doc = include_str!("../README.md")]
2
3use std::collections::HashMap;
4use std::sync::LazyLock;
5
6use serde_json::{Number, Value};
7
8static ESCAPED_CHARS: LazyLock<HashMap<char, &'static str>> = LazyLock::new(|| {
9 let mut map = HashMap::new();
10 map.insert('b', r"\b");
11 map.insert('f', r"\f");
12 map.insert('n', "\n");
13 map.insert('r', "\r");
14 map.insert('t', "\t");
15 map.insert('"', "\"");
16 map.insert('/', "/");
17 map.insert('\\', "\\");
18 map
19});
20
21#[derive(Debug, thiserror::Error, Clone)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24pub enum Error {
25 #[error("Unexpected end of JSON input")]
26 UnexpectedEof,
27 #[error("Unexpected token: {0} in JSON at position {1}")]
28 UnexpectedToken(char, usize),
29 #[error("Convert to unicode codepoint failed")]
30 Int,
31 #[error("Invalid unicode codepoint: {0} at position {1}")]
32 InvalidUnicodeCodePoint(u32, usize),
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37pub struct Location {
38 pub line: usize,
39 pub column: usize,
40 pub pos: usize,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
45#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46pub struct Options {
47 pub bigint: bool,
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub enum Prop {
54 Key,
55 KeyEnd,
56 Value,
57 ValueEnd,
58}
59
60struct Parser {
61 chars: Vec<char>,
62 #[allow(dead_code)]
63 options: Options,
64
65 line: usize,
66 column: usize,
67 pos: usize,
68
69 pointers: HashMap<String, LocationMap>,
71}
72
73#[derive(Debug, Clone)]
74pub struct ParseResult {
75 pub value: Value,
76 pub pointers: HashMap<String, LocationMap>,
77}
78
79impl ParseResult {
80 pub fn get_location(&self, ptr: &str) -> Option<&LocationMap> {
82 self.pointers.get(ptr)
83 }
84}
85
86#[derive(Debug, Clone)]
88#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
89pub struct LocationMap(HashMap<Prop, Location>);
90
91impl LocationMap {
92 pub fn get(&self, prop: Prop) -> Option<Location> {
94 self.0.get(&prop).cloned()
95 }
96
97 fn insert(&mut self, prop: Prop, loc: Location) {
98 self.0.insert(prop, loc);
99 }
100
101 pub fn value(&self) -> Location {
103 self.get(Prop::Value).unwrap()
104 }
105
106 pub fn key(&self) -> Location {
108 self.get(Prop::Key).unwrap()
109 }
110
111 pub fn value_end(&self) -> Location {
113 self.get(Prop::ValueEnd).unwrap()
114 }
115
116 pub fn key_end(&self) -> Location {
118 self.get(Prop::KeyEnd).unwrap()
119 }
120}
121
122impl Parser {
123 fn new(source: &str, options: Options) -> Self {
124 let chars = source.chars().collect();
125 Parser {
126 chars,
127 options,
128 line: 0,
129 column: 0,
130 pos: 0,
131 pointers: HashMap::new(),
132 }
133 }
134
135 fn parse(&mut self, ptr: &str, top_level: bool) -> Result<Value, Error> {
136 self.whitespace();
137 self.map(ptr, Prop::Value);
138 let c = self.get_char()?;
139 let data = match c {
140 't' => {
141 self.expect("rue")?;
142 Value::Bool(true)
143 }
144 'f' => {
145 self.expect("alse")?;
146 Value::Bool(false)
147 }
148 'n' => {
149 self.expect("ull")?;
150 Value::Null
151 }
152 '"' => Value::String(self.parse_string()?),
153 '[' => Value::Array(self.parse_array(ptr)?),
154 '{' => self.parse_object(ptr)?,
155 '-' | '0'..='9' => Value::Number(self.parse_number()?),
156 _ => return Err(Error::UnexpectedToken(c, self.pos)),
157 };
158 self.map(ptr, Prop::ValueEnd);
159 self.whitespace();
161 if top_level && self.pos < self.len() {
163 return Err(self.unexpected_token());
164 }
165
166 Ok(data)
167 }
168
169 #[inline]
170 fn len(&self) -> usize {
171 self.chars.len()
172 }
173
174 fn whitespace(&mut self) {
175 'outer: {
176 while self.pos < self.len() {
177 match self.chars.get(self.pos) {
178 Some(' ') => self.column += 1,
179 Some('\t') => self.column += 4,
180 Some('\r') => self.column = 0,
181 Some('\n') => {
182 self.line += 1;
183 self.column = 0;
184 }
185 _ => break 'outer,
186 }
187 self.pos += 1;
188 }
189 }
191 }
192
193 fn parse_string(&mut self) -> Result<String, Error> {
194 let mut s = String::new();
195 loop {
196 match self.get_char()? {
197 '"' => break,
198 '\\' => {
199 let c = self.get_char()?;
200 if let Some(escaped) = ESCAPED_CHARS.get(&c) {
201 s.push_str(escaped);
202 } else if c == 'u' {
203 s.push(self.get_char_code()?);
204 } else {
205 return Err(self.was_unexpected_token());
206 }
207 }
208 c => {
209 s.push(c);
210 }
211 }
212 }
214 Ok(s)
215 }
216
217 fn parse_number(&mut self) -> Result<serde_json::value::Number, Error> {
218 self.back_char();
219
220 let mut num_str = String::new();
221 if self.next() == '-' {
223 num_str.push(self.get_char()?);
224 }
225
226 let next = if self.next() == '0' {
227 self.get_char()?.to_string()
228 } else {
229 self.get_digits()?
230 };
231 num_str = num_str + &next;
232
233 if self.next() == '.' {
234 num_str.push(self.get_char()?);
236 num_str = num_str + &self.get_digits()?;
237 }
238
239 if self.next() == 'e' || self.next() == 'E' {
240 num_str.push(self.get_char()?);
242 if self.next() == '-' || self.next() == '+' {
243 num_str.push(self.get_char()?);
244 }
245 num_str = num_str + &self.get_digits()?;
246 }
247
248 Ok(Number::from_string_unchecked(num_str))
257 }
258
259 fn parse_array(&mut self, ptr: &str) -> Result<Vec<Value>, Error> {
260 self.whitespace();
261 let mut array = Vec::new();
262 let c = self.get_char()?; if c == ']' {
264 return Ok(array);
265 }
266 self.back_char();
267
268 loop {
269 let item_ptr = format!("{}/{}", ptr, array.len());
270 array.push(self.parse(&item_ptr, false)?);
271 self.whitespace();
272 let c = self.get_char()?;
273 if c == ']' {
274 break;
275 } else if c != ',' {
276 return Err(self.unexpected_token());
277 }
278 self.whitespace();
279 }
281
282 Ok(array)
283 }
284
285 fn parse_object(&mut self, ptr: &str) -> Result<Value, Error> {
286 self.whitespace();
287 let mut object = serde_json::Map::new();
288 if self.get_char()? == '}' {
289 return Ok(object.into());
290 }
291
292 self.back_char();
293
294 loop {
295 let loc = self.get_location();
296 if self.get_char()? != '"' {
297 return Err(self.was_unexpected_token());
298 }
299 let key = self.parse_string()?;
300 let prop_ptr = format!("{}/{}", ptr, Self::escape_json_pointer(&key));
301 self.map_location(&prop_ptr, Prop::Key, loc);
302 self.map(&prop_ptr, Prop::KeyEnd);
303 self.whitespace();
304 if self.get_char()? != ':' {
305 return Err(self.was_unexpected_token());
306 }
307 self.whitespace();
308 let value = self.parse(&prop_ptr, false)?;
309 object.insert(key, value);
310 self.whitespace();
311
312 match self.get_char()? {
313 '}' => break,
314 ',' => {}
315 _ => return Err(self.was_unexpected_token()),
316 }
317
318 self.whitespace();
319 }
320 Ok(object.into())
321 }
322
323 fn expect(&mut self, s: &str) -> Result<(), Error> {
324 for c in s.chars() {
325 if self.get_char()? != c {
326 return Err(self.was_unexpected_token());
327 }
328 }
329 Ok(())
330 }
331
332 #[inline]
333 fn get_char(&mut self) -> Result<char, Error> {
334 self.check_unexpected_eof()?;
335 let c = self.next();
336 self.pos += 1;
337 self.column += 1;
338 Ok(c)
339 }
340
341 #[inline]
342 fn next(&self) -> char {
343 *self
344 .chars
345 .get(self.pos)
346 .unwrap_or_else(|| panic!("Unexpected EOF, pos: {}", self.pos))
347 }
348
349 fn back_char(&mut self) {
351 self.pos -= 1;
352 self.column -= 1;
353 }
354
355 fn get_char_code(&mut self) -> Result<char, Error> {
356 let count = 4;
357 let mut code = String::new();
358 for _ in 0..count {
359 let c = self.get_char()?;
360 if !c.is_ascii_hexdigit() {
361 return Err(Error::UnexpectedToken(c, self.pos));
362 }
363 code.push(c);
364 }
365
366 let unicode = u32::from_str_radix(&code, 16).map_err(|_| Error::Int)?;
367 char::from_u32(unicode).ok_or(Error::InvalidUnicodeCodePoint(unicode, self.pos))
368 }
369
370 fn get_digits(&mut self) -> Result<String, Error> {
371 let mut digits = String::new();
372 loop {
373 let c = self.next();
374 if c.is_ascii_digit() {
375 digits.push(self.get_char()?);
376 } else {
377 break;
378 }
379 }
381 Ok(digits)
382 }
383
384 fn map(&mut self, ptr: impl ToString, pro: Prop) {
385 self.map_location(ptr, pro, self.get_location());
386 }
387
388 fn map_location(&mut self, ptr: impl ToString, prop: Prop, loc: Location) {
389 self.pointers
390 .entry(ptr.to_string())
391 .or_insert_with(|| LocationMap(HashMap::new()))
392 .insert(prop, loc);
393 }
394
395 fn get_location(&self) -> Location {
396 Location {
397 line: self.line,
398 column: self.column,
399 pos: self.pos,
400 }
401 }
402
403 fn unexpected_token(&self) -> Error {
404 Error::UnexpectedToken(self.next(), self.pos)
405 }
406
407 fn was_unexpected_token(&mut self) -> Error {
408 self.back_char();
409 self.unexpected_token()
410 }
411
412 fn check_unexpected_eof(&self) -> Result<(), Error> {
413 if self.pos >= self.len() {
414 return Err(Error::UnexpectedEof);
415 }
416
417 Ok(())
418 }
419
420 fn escape_json_pointer(s: &str) -> String {
421 s.replace("~", "~0").replace("/", "~1")
422 }
423}
424
425pub fn parse(source: &str, options: Options) -> Result<ParseResult, Error> {
426 let mut parser = Parser::new(source, options);
427 let value = parser.parse("", true)?;
428 Ok(ParseResult {
429 value,
430 pointers: parser.pointers,
431 })
432}
433
434#[cfg(test)]
435mod tests {
436 use super::*;
437
438 #[test]
439 fn test_parse() {
440 let source = r#"{
441 "name": "John",
442 "age": 30,
443 "cars": [
444 "Ford",
445 "BMW",
446 "Fiat"
447 ]
448 }"#;
449
450 let res = parse(source, Options::default()).unwrap();
451 assert!(res.value.is_object());
452 assert_eq!(
453 res.pointers["/name"].key(),
454 Location {
455 line: 1,
456 column: 12,
457 pos: 14
458 }
459 );
460 assert_eq!(
461 res.pointers["/name"].key_end(),
462 Location {
463 line: 1,
464 column: 18,
465 pos: 20
466 }
467 );
468 assert_eq!(
469 res.value,
470 serde_json::from_str::<serde_json::Value>(source).unwrap()
471 );
472
473 let source = r#"{
474 "foo": "bar"
475}"#;
476 let res = parse(source, Options::default()).unwrap();
477 assert!(res.value.is_object());
478 assert_eq!(
479 res.pointers[""].value(),
480 Location {
481 line: 0,
482 column: 0,
483 pos: 0
484 }
485 );
486 assert_eq!(
487 res.pointers[""].value_end(),
488 Location {
489 line: 2,
490 column: 1,
491 pos: 18
492 }
493 );
494
495 assert_eq!(
496 res.pointers["/foo"].key(),
497 Location {
498 line: 1,
499 column: 2,
500 pos: 4
501 }
502 );
503 assert_eq!(
504 res.pointers["/foo"].key_end(),
505 Location {
506 line: 1,
507 column: 7,
508 pos: 9
509 }
510 );
511 assert_eq!(
512 res.pointers["/foo"].value(),
513 Location {
514 line: 1,
515 column: 9,
516 pos: 11
517 }
518 );
519 assert_eq!(
520 res.pointers["/foo"].value_end(),
521 Location {
522 line: 1,
523 column: 14,
524 pos: 16
525 }
526 );
527 assert_eq!(
528 res.value,
529 serde_json::from_str::<serde_json::Value>(source).unwrap()
530 );
531
532 let source = r#"{
533 "name": "John",
534 "age": 30.0
535 }"#;
536 let res = parse(source, Options::default()).unwrap();
537 assert!(res.value.is_object());
538 assert_eq!(
539 res.pointers["/age"].value(),
540 Location {
541 line: 2,
542 column: 19,
543 pos: 49
544 }
545 );
546 assert_eq!(
547 res.pointers["/age"].value_end(),
548 Location {
549 line: 2,
550 column: 23,
551 pos: 53
552 }
553 );
554 assert_eq!(
555 res.value,
556 serde_json::from_str::<serde_json::Value>(source).unwrap()
557 );
558
559 let source = r#"{"number":1.23e+10000}"#;
560 let res = parse(source, Options::default()).unwrap();
561 assert!(res.value.is_object());
562 assert_eq!(
563 res.pointers["/number"].value(),
564 Location {
565 line: 0,
566 column: 10,
567 pos: 10
568 }
569 );
570 assert_eq!(
571 res.pointers["/number"].value_end(),
572 Location {
573 line: 0,
574 column: 21,
575 pos: 21
576 }
577 );
578
579 let source = r#"{"number":-1.23e-10000}"#;
580 let res = parse(source, Options::default()).unwrap();
581 assert!(res.value.is_object());
582 assert_eq!(
583 res.pointers["/number"].value(),
584 Location {
585 line: 0,
586 column: 10,
587 pos: 10
588 }
589 );
590 assert_eq!(
591 res.pointers["/number"].value_end(),
592 Location {
593 line: 0,
594 column: 22,
595 pos: 22
596 }
597 );
598
599 let source = r#"{"number":-0.0}"#;
600 let res = parse(source, Options::default()).unwrap();
601 assert!(res.value.is_object());
602 assert_eq!(
603 res.pointers["/number"].value(),
604 Location {
605 line: 0,
606 column: 10,
607 pos: 10
608 }
609 );
610 assert_eq!(
611 res.pointers["/number"].value_end(),
612 Location {
613 line: 0,
614 column: 14,
615 pos: 14
616 }
617 );
618 assert_eq!(
619 res.value,
620 serde_json::from_str::<serde_json::Value>(source).unwrap()
621 );
622
623 let source = r#"{"code":"\u0020"}"#;
624 let res = parse(source, Options::default()).unwrap();
625 assert!(res.value.is_object());
626 assert_eq!(
627 res.pointers["/code"].value(),
628 Location {
629 line: 0,
630 column: 8,
631 pos: 8
632 }
633 );
634 assert_eq!(
635 res.pointers["/code"].value_end(),
636 Location {
637 line: 0,
638 column: 16,
639 pos: 16
640 }
641 );
642 assert_eq!(
643 res.value,
644 serde_json::from_str::<serde_json::Value>(source).unwrap()
645 );
646
647 let source = r#"{"chinese":"你好"}"#;
648 let res = parse(source, Options::default()).unwrap();
649 assert!(res.value.is_object());
650 assert_eq!(
651 res.pointers["/chinese"].value(),
652 Location {
653 line: 0,
654 column: 11,
655 pos: 11
656 }
657 );
658 assert_eq!(
659 res.pointers["/chinese"].value_end(),
660 Location {
661 line: 0,
662 column: 15,
663 pos: 15
664 }
665 );
666 }
667}