json/json.rs
1//! A JSON value parser.
2//!
3//! You can test it by running
4//!
5//! cargo run --example json -- foo.json
6//!
7//! to let it parse some file `foo.json`, or
8//!
9//! cargo run --example json --
10//!
11//! to let it parse from standard input (terminate with CTRL-D or similar).
12use parcours::str::{matches, take_while};
13use parcours::{any, lazy, Combinator, Parser};
14
15/// A JSON value generic over the type of strings `S`.
16#[derive(Clone, Debug)]
17enum JsonVal<S> {
18 Arr(Vec<Self>),
19 Map(Vec<(S, Self)>),
20 Num(S),
21 Str(S),
22 True,
23 False,
24 Null,
25}
26
27/// Parse whitespace and return it.
28fn space<'a, S>() -> impl Parser<&'a str, S, O = &'a str> + Clone {
29 take_while(|c, _| c.is_ascii_whitespace())
30}
31
32/// Parse a JSON number and return its string representation.
33///
34/// This is implemented as state machine to achieve better performance,
35/// exploiting that `take_while` accepts mutable closures.
36fn num<'a, S>() -> impl Parser<&'a str, S, O = &'a str> + Clone {
37 // are we reading the first character?
38 let mut first = true;
39 // have we encountered no dot so far?
40 let mut no_dot = true;
41 // have we encountered no exponent character ('e' or 'E') so far?
42 let mut no_exp = true;
43 take_while(move |c, _| match c {
44 b'0'..=b'9' => {
45 first = false;
46 true
47 }
48 b'-' if first => core::mem::replace(&mut first, false),
49 b'.' if !first && no_dot && no_exp => core::mem::replace(&mut no_dot, false),
50 b'e' | b'E' if !first && no_exp => core::mem::replace(&mut no_exp, false),
51 _ => false,
52 })
53 // the last character of a number must always be a digit
54 .filter(|s| match s.bytes().last() {
55 Some(c) => c.is_ascii_digit(),
56 _ => false,
57 })
58}
59
60/// Parse a string and return it.
61///
62/// Like `num()`, this is implemented as a state machine.
63///
64/// This parser does minimal work to handle escaping; in particular,
65/// it does not specially handle escaping sequences such as "\n" or "\t",
66/// because we can directly use newlines and tabulators in JSON strings,
67/// however, this parser handles escaped quotes,
68/// otherwise we could not parse any JSON string containing quotes.
69fn str<'a, S>() -> impl Parser<&'a str, S, O = &'a str> + Clone {
70 // was the previous character an escaping backslash ('\')?
71 let mut escaped = false;
72 take_while(move |c, _| match c {
73 b'\\' if !escaped => {
74 escaped = true;
75 true
76 }
77 b'"' if !escaped => false,
78 // anything preceded by an escaped backslash is ignored
79 _ if escaped => core::mem::replace(&mut escaped, false),
80 _ => true,
81 })
82}
83
84/// Parse a JSON value, possibly followed by some space.
85///
86/// Here, we use `lazy!` to construct a recursive parser.
87fn json<'a>() -> impl Parser<&'a str, O = JsonVal<&'a str>> {
88 let str = str().delimited_by(matches("\""), matches("\""));
89
90 let token = |s: &'a str| matches(s).then_ignore(space());
91 let arr = lazy!(json).separated_by(token(","));
92 let arr = arr.delimited_by(token("["), matches("]"));
93 let map = str.clone().then_ignore(token(":")).then(lazy!(json));
94 let map = map.separated_by(token(","));
95 let map = map.delimited_by(token("{"), matches("}"));
96
97 any((
98 arr.map(JsonVal::Arr),
99 map.map(JsonVal::Map),
100 str.map(JsonVal::Str),
101 num().map(JsonVal::Num),
102 matches("true").map(|_| JsonVal::True),
103 matches("false").map(|_| JsonVal::False),
104 matches("null").map(|_| JsonVal::Null),
105 ))
106 .then_ignore(space())
107}
108
109/*
110/// Create a JSON array that contains `n` repetitions of `s`.
111fn many(s: &str, n: usize) -> String {
112 let mut json = "[".to_string();
113 json.push_str(s);
114 for _ in 1..n {
115 json.push(',');
116 json.push_str(s);
117 }
118 json.push(']');
119 json
120}
121*/
122
123fn main() -> std::io::Result<()> {
124 /*
125 let bla = "y̆es";
126 let input = r#"[[1,true , "bla" , 1 , [] ]] []2"#;
127 */
128
129 // read from file if one is provided as argument, else from standard input
130 let mut args = std::env::args();
131 args.next();
132 let input = match args.next() {
133 Some(arg) => std::fs::read_to_string(arg)?,
134 None => std::io::read_to_string(std::io::stdin())?,
135 };
136
137 //let input = many(r#"{"key": 12345}"#, 10_000_000);
138 //println!("{}", input.len());
139
140 // we first have to strip away any space, because that's what the parser expects
141 let out = space().ignore_then(json()).parse(&input, &mut ());
142 println!("Parsed JSON: {:?}", out);
143 Ok(())
144}