json/
json.rs

1//! A JSON value parser.
2//!
3//! You can test it by running
4//!
5//!     cargo run --example json -- foo.json
6//!
7//! to let it parse some file `foo.json`, or
8//!
9//!     cargo run --example json --
10//!
11//! to let it parse from standard input (terminate with CTRL-D or similar).
12use parcours::str::{matches, take_while};
13use parcours::{any, lazy, Combinator, Parser};
14
15/// A JSON value generic over the type of strings `S`.
16#[derive(Clone, Debug)]
17enum JsonVal<S> {
18    Arr(Vec<Self>),
19    Map(Vec<(S, Self)>),
20    Num(S),
21    Str(S),
22    True,
23    False,
24    Null,
25}
26
27/// Parse whitespace and return it.
28fn space<'a, S>() -> impl Parser<&'a str, S, O = &'a str> + Clone {
29    take_while(|c, _| c.is_ascii_whitespace())
30}
31
32/// Parse a JSON number and return its string representation.
33///
34/// This is implemented as state machine to achieve better performance,
35/// exploiting that `take_while` accepts mutable closures.
36fn num<'a, S>() -> impl Parser<&'a str, S, O = &'a str> + Clone {
37    // are we reading the first character?
38    let mut first = true;
39    // have we encountered no dot so far?
40    let mut no_dot = true;
41    // have we encountered no exponent character ('e' or 'E') so far?
42    let mut no_exp = true;
43    take_while(move |c, _| match c {
44        b'0'..=b'9' => {
45            first = false;
46            true
47        }
48        b'-' if first => core::mem::replace(&mut first, false),
49        b'.' if !first && no_dot && no_exp => core::mem::replace(&mut no_dot, false),
50        b'e' | b'E' if !first && no_exp => core::mem::replace(&mut no_exp, false),
51        _ => false,
52    })
53    // the last character of a number must always be a digit
54    .filter(|s| match s.bytes().last() {
55        Some(c) => c.is_ascii_digit(),
56        _ => false,
57    })
58}
59
60/// Parse a string and return it.
61///
62/// Like `num()`, this is implemented as a state machine.
63///
64/// This parser does minimal work to handle escaping; in particular,
65/// it does not specially handle escaping sequences such as "\n" or "\t",
66/// because we can directly use newlines and tabulators in JSON strings,
67/// however, this parser handles escaped quotes,
68/// otherwise we could not parse any JSON string containing quotes.
69fn str<'a, S>() -> impl Parser<&'a str, S, O = &'a str> + Clone {
70    // was the previous character an escaping backslash ('\')?
71    let mut escaped = false;
72    take_while(move |c, _| match c {
73        b'\\' if !escaped => {
74            escaped = true;
75            true
76        }
77        b'"' if !escaped => false,
78        // anything preceded by an escaped backslash is ignored
79        _ if escaped => core::mem::replace(&mut escaped, false),
80        _ => true,
81    })
82}
83
84/// Parse a JSON value, possibly followed by some space.
85///
86/// Here, we use `lazy!` to construct a recursive parser.
87fn json<'a>() -> impl Parser<&'a str, O = JsonVal<&'a str>> {
88    let str = str().delimited_by(matches("\""), matches("\""));
89
90    let token = |s: &'a str| matches(s).then_ignore(space());
91    let arr = lazy!(json).separated_by(token(","));
92    let arr = arr.delimited_by(token("["), matches("]"));
93    let map = str.clone().then_ignore(token(":")).then(lazy!(json));
94    let map = map.separated_by(token(","));
95    let map = map.delimited_by(token("{"), matches("}"));
96
97    any((
98        arr.map(JsonVal::Arr),
99        map.map(JsonVal::Map),
100        str.map(JsonVal::Str),
101        num().map(JsonVal::Num),
102        matches("true").map(|_| JsonVal::True),
103        matches("false").map(|_| JsonVal::False),
104        matches("null").map(|_| JsonVal::Null),
105    ))
106    .then_ignore(space())
107}
108
109/*
110/// Create a JSON array that contains `n` repetitions of `s`.
111fn many(s: &str, n: usize) -> String {
112    let mut json = "[".to_string();
113    json.push_str(s);
114    for _ in 1..n {
115        json.push(',');
116        json.push_str(s);
117    }
118    json.push(']');
119    json
120}
121*/
122
123fn main() -> std::io::Result<()> {
124    /*
125    let bla = "y̆es";
126    let input = r#"[[1,true  ,   "bla" , 1  , []  ]] []2"#;
127    */
128
129    // read from file if one is provided as argument, else from standard input
130    let mut args = std::env::args();
131    args.next();
132    let input = match args.next() {
133        Some(arg) => std::fs::read_to_string(arg)?,
134        None => std::io::read_to_string(std::io::stdin())?,
135    };
136
137    //let input = many(r#"{"key": 12345}"#, 10_000_000);
138    //println!("{}", input.len());
139
140    // we first have to strip away any space, because that's what the parser expects
141    let out = space().ignore_then(json()).parse(&input, &mut ());
142    println!("Parsed JSON: {:?}", out);
143    Ok(())
144}