Skip to main content

json/
json.rs

1//! JSON parser written in Rust, using Logos.
2//!
3//! If the file is a valid JSON value, it will be printed
4//! to the terminal using the debug format.
5//!
6//! Otherwise, an error will be printed with its location.
7//!
8//! Usage:
9//!     cargo run --example json <path/to/file>
10//!
11//! Example:
12//!     cargo run --example json examples/example.json
13
14/* ANCHOR: all */
15use logos::{Lexer, Logos, Span};
16
17use std::collections::HashMap;
18use std::env;
19use std::fs;
20
21type Error = (String, Span);
22
23type Result<T> = std::result::Result<T, Error>;
24
25/* ANCHOR: tokens */
26/// All meaningful JSON tokens.
27///
28/// > NOTE: regexes for [`Token::Number`] and [`Token::String`] may not
29/// > catch all possible values, especially for strings. If you find
30/// > errors, please report them so that we can improve the regex.
31#[derive(Debug, Logos)]
32#[logos(skip r"[ \t\r\n\f]+")]
33enum Token {
34    #[token("false", |_| false)]
35    #[token("true", |_| true)]
36    Bool(bool),
37
38    #[token("{")]
39    BraceOpen,
40
41    #[token("}")]
42    BraceClose,
43
44    #[token("[")]
45    BracketOpen,
46
47    #[token("]")]
48    BracketClose,
49
50    #[token(":")]
51    Colon,
52
53    #[token(",")]
54    Comma,
55
56    #[token("null")]
57    Null,
58
59    #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| lex.slice().parse::<f64>().unwrap())]
60    Number(f64),
61
62    #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex| lex.slice().to_owned())]
63    String(String),
64}
65/* ANCHOR_END: tokens */
66
67/* ANCHOR: values */
68/// Represent any valid JSON value.
69#[allow(unused)]
70#[derive(Debug)]
71enum Value {
72    /// null.
73    Null,
74    /// true or false.
75    Bool(bool),
76    /// Any floating point number.
77    Number(f64),
78    /// Any quoted string.
79    String(String),
80    /// An array of values
81    Array(Vec<Value>),
82    /// An dictionary mapping keys and values.
83    Object(HashMap<String, Value>),
84}
85/* ANCHOR_END: values */
86
87/* ANCHOR: value */
88/// Parse a token stream into a JSON value.
89fn parse_value(lexer: &mut Lexer<'_, Token>) -> Result<Value> {
90    if let Some(token) = lexer.next() {
91        match token {
92            Ok(Token::Bool(b)) => Ok(Value::Bool(b)),
93            Ok(Token::BraceOpen) => parse_object(lexer),
94            Ok(Token::BracketOpen) => parse_array(lexer),
95            Ok(Token::Null) => Ok(Value::Null),
96            Ok(Token::Number(n)) => Ok(Value::Number(n)),
97            Ok(Token::String(s)) => Ok(Value::String(s)),
98            _ => Err((
99                "unexpected token here (context: value)".to_owned(),
100                lexer.span(),
101            )),
102        }
103    } else {
104        Err(("empty values are not allowed".to_owned(), lexer.span()))
105    }
106}
107/* ANCHOR_END: value */
108
109/* ANCHOR: array */
110/// Parse a token stream into an array and return when
111/// a valid terminator is found.
112///
113/// > NOTE: we assume '[' was consumed.
114fn parse_array(lexer: &mut Lexer<'_, Token>) -> Result<Value> {
115    let mut array = Vec::new();
116    let span = lexer.span();
117    let mut awaits_comma = false;
118    let mut awaits_value = false;
119
120    while let Some(token) = lexer.next() {
121        match token {
122            Ok(Token::Bool(b)) if !awaits_comma => {
123                array.push(Value::Bool(b));
124                awaits_value = false;
125            }
126            Ok(Token::BraceOpen) if !awaits_comma => {
127                let object = parse_object(lexer)?;
128                array.push(object);
129                awaits_value = false;
130            }
131            Ok(Token::BracketOpen) if !awaits_comma => {
132                let sub_array = parse_array(lexer)?;
133                array.push(sub_array);
134                awaits_value = false;
135            }
136            Ok(Token::BracketClose) if !awaits_value => return Ok(Value::Array(array)),
137            Ok(Token::Comma) if awaits_comma => awaits_value = true,
138            Ok(Token::Null) if !awaits_comma => {
139                array.push(Value::Null);
140                awaits_value = false
141            }
142            Ok(Token::Number(n)) if !awaits_comma => {
143                array.push(Value::Number(n));
144                awaits_value = false;
145            }
146            Ok(Token::String(s)) if !awaits_comma => {
147                array.push(Value::String(s));
148                awaits_value = false;
149            }
150            _ => {
151                return Err((
152                    "unexpected token here (context: array)".to_owned(),
153                    lexer.span(),
154                ))
155            }
156        }
157        awaits_comma = !awaits_value;
158    }
159    Err(("unmatched opening bracket defined here".to_owned(), span))
160}
161/* ANCHOR_END: array */
162
163/* ANCHOR: object */
164/// Parse a token stream into an object and return when
165/// a valid terminator is found.
166///
167/// > NOTE: we assume '{' was consumed.
168fn parse_object(lexer: &mut Lexer<'_, Token>) -> Result<Value> {
169    let mut map = HashMap::new();
170    let span = lexer.span();
171    let mut awaits_comma = false;
172    let mut awaits_key = false;
173
174    while let Some(token) = lexer.next() {
175        match token {
176            Ok(Token::BraceClose) if !awaits_key => return Ok(Value::Object(map)),
177            Ok(Token::Comma) if awaits_comma => awaits_key = true,
178            Ok(Token::String(key)) if !awaits_comma => {
179                match lexer.next() {
180                    Some(Ok(Token::Colon)) => (),
181                    _ => {
182                        return Err((
183                            "unexpected token here, expecting ':'".to_owned(),
184                            lexer.span(),
185                        ))
186                    }
187                }
188                let value = parse_value(lexer)?;
189                map.insert(key, value);
190                awaits_key = false;
191            }
192            _ => {
193                return Err((
194                    "unexpected token here (context: object)".to_owned(),
195                    lexer.span(),
196                ))
197            }
198        }
199        awaits_comma = !awaits_key;
200    }
201    Err(("unmatched opening brace defined here".to_owned(), span))
202}
203/* ANCHOR_END: object */
204
205fn main() {
206    let filename = env::args().nth(1).expect("Expected file argument");
207    let src = fs::read_to_string(&filename).expect("Failed to read file");
208
209    let mut lexer = Token::lexer(src.as_str());
210
211    match parse_value(&mut lexer) {
212        Ok(value) => println!("{:#?}", value),
213        Err((msg, span)) => {
214            use ariadne::{ColorGenerator, Label, Report, ReportKind, Source};
215
216            let mut colors = ColorGenerator::new();
217
218            let a = colors.next();
219
220            Report::build(ReportKind::Error, &filename, 12)
221                .with_message("Invalid JSON".to_string())
222                .with_label(
223                    Label::new((&filename, span))
224                        .with_message(msg)
225                        .with_color(a),
226                )
227                .finish()
228                .eprint((&filename, Source::from(src)))
229                .unwrap();
230        }
231    }
232}
233/* ANCHOR_END: all */