json_borrowed/
json_borrowed.rs

1//! Variant of JSON parser example, but
2//! using borrowed string slices to avoid copies.
3//!
4//! Usage:
5//!     cargo run --example json-borrowed <path/to/file>
6//!
7//! Example:
8//!     cargo run --example json-borrowed examples/example.json
9
10/* ANCHOR: all */
11use logos::{Lexer, Logos, Span};
12
13use std::collections::HashMap;
14use std::env;
15use std::fs;
16
17type Error = (String, Span);
18
19type Result<T> = std::result::Result<T, Error>;
20
21/* ANCHOR: tokens */
22/// All meaningful JSON tokens.
23///
24/// > NOTE: regexes for [`Token::Number`] and [`Token::String`] may not
25/// > catch all possible values, especially for strings. If you find
26/// > errors, please report them so that we can improve the regex.
27#[derive(Debug, Logos)]
28#[logos(skip r"[ \t\r\n\f]+")]
29enum Token<'source> {
30    #[token("false", |_| false)]
31    #[token("true", |_| true)]
32    Bool(bool),
33
34    #[token("{")]
35    BraceOpen,
36
37    #[token("}")]
38    BraceClose,
39
40    #[token("[")]
41    BracketOpen,
42
43    #[token("]")]
44    BracketClose,
45
46    #[token(":")]
47    Colon,
48
49    #[token(",")]
50    Comma,
51
52    #[token("null")]
53    Null,
54
55    #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?", |lex| lex.slice().parse::<f64>().unwrap())]
56    Number(f64),
57
58    #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex| lex.slice())]
59    String(&'source str),
60}
61/* ANCHOR_END: tokens */
62
63/* ANCHOR: values */
64/// Represent any valid JSON value.
65#[derive(Debug)]
66enum Value<'source> {
67    /// null.
68    Null,
69    /// true or false.
70    Bool(bool),
71    /// Any floating point number.
72    Number(f64),
73    /// Any quoted string.
74    String(&'source str),
75    /// An array of values
76    Array(Vec<Value<'source>>),
77    /// An dictionary mapping keys and values.
78    Object(HashMap<&'source str, Value<'source>>),
79}
80/* ANCHOR_END: values */
81
82/* ANCHOR: value */
83/// Parse a token stream into a JSON value.
84fn parse_value<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> {
85    if let Some(token) = lexer.next() {
86        match token {
87            Ok(Token::Bool(b)) => Ok(Value::Bool(b)),
88            Ok(Token::BraceOpen) => parse_object(lexer),
89            Ok(Token::BracketOpen) => parse_array(lexer),
90            Ok(Token::Null) => Ok(Value::Null),
91            Ok(Token::Number(n)) => Ok(Value::Number(n)),
92            Ok(Token::String(s)) => Ok(Value::String(s)),
93            _ => Err((
94                "unexpected token here (context: value)".to_owned(),
95                lexer.span(),
96            )),
97        }
98    } else {
99        Err(("empty values are not allowed".to_owned(), lexer.span()))
100    }
101}
102/* ANCHOR_END: value */
103
104/* ANCHOR: array */
105/// Parse a token stream into an array and return when
106/// a valid terminator is found.
107///
108/// > NOTE: we assume '[' was consumed.
109fn parse_array<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> {
110    let mut array = Vec::new();
111    let span = lexer.span();
112    let mut awaits_comma = false;
113    let mut awaits_value = false;
114
115    while let Some(token) = lexer.next() {
116        match token {
117            Ok(Token::Bool(b)) if !awaits_comma => {
118                array.push(Value::Bool(b));
119                awaits_value = false;
120            }
121            Ok(Token::BraceOpen) if !awaits_comma => {
122                let object = parse_object(lexer)?;
123                array.push(object);
124                awaits_value = false;
125            }
126            Ok(Token::BracketOpen) if !awaits_comma => {
127                let sub_array = parse_array(lexer)?;
128                array.push(sub_array);
129                awaits_value = false;
130            }
131            Ok(Token::BracketClose) if !awaits_value => return Ok(Value::Array(array)),
132            Ok(Token::Comma) if awaits_comma => awaits_value = true,
133            Ok(Token::Null) if !awaits_comma => {
134                array.push(Value::Null);
135                awaits_value = false
136            }
137            Ok(Token::Number(n)) if !awaits_comma => {
138                array.push(Value::Number(n));
139                awaits_value = false;
140            }
141            Ok(Token::String(s)) if !awaits_comma => {
142                array.push(Value::String(s));
143                awaits_value = false;
144            }
145            _ => {
146                return Err((
147                    "unexpected token here (context: array)".to_owned(),
148                    lexer.span(),
149                ))
150            }
151        }
152        awaits_comma = !awaits_value;
153    }
154    Err(("unmatched opening bracket defined here".to_owned(), span))
155}
156/* ANCHOR_END: array */
157
158/* ANCHOR: object */
159/// Parse a token stream into an object and return when
160/// a valid terminator is found.
161///
162/// > NOTE: we assume '{' was consumed.
163fn parse_object<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> {
164    let mut map = HashMap::new();
165    let span = lexer.span();
166    let mut awaits_comma = false;
167    let mut awaits_key = false;
168
169    while let Some(token) = lexer.next() {
170        match token {
171            Ok(Token::BraceClose) if !awaits_key => return Ok(Value::Object(map)),
172            Ok(Token::Comma) if awaits_comma => awaits_key = true,
173            Ok(Token::String(key)) if !awaits_comma => {
174                match lexer.next() {
175                    Some(Ok(Token::Colon)) => (),
176                    _ => {
177                        return Err((
178                            "unexpected token here, expecting ':'".to_owned(),
179                            lexer.span(),
180                        ))
181                    }
182                }
183                let value = parse_value(lexer)?;
184                map.insert(key, value);
185                awaits_key = false;
186            }
187            _ => {
188                return Err((
189                    "unexpected token here (context: object)".to_owned(),
190                    lexer.span(),
191                ))
192            }
193        }
194        awaits_comma = !awaits_key;
195    }
196    Err(("unmatched opening brace defined here".to_owned(), span))
197}
198/* ANCHOR_END: object */
199
200fn main() {
201    let filename = env::args().nth(1).expect("Expected file argument");
202    let src = fs::read_to_string(&filename).expect("Failed to read file");
203
204    let mut lexer = Token::lexer(src.as_str());
205
206    match parse_value(&mut lexer) {
207        Ok(value) => println!("{:#?}", value),
208        Err((msg, span)) => {
209            use ariadne::{ColorGenerator, Label, Report, ReportKind, Source};
210
211            let mut colors = ColorGenerator::new();
212
213            let a = colors.next();
214
215            Report::build(ReportKind::Error, &filename, 12)
216                .with_message("Invalid JSON".to_string())
217                .with_label(
218                    Label::new((&filename, span))
219                        .with_message(msg)
220                        .with_color(a),
221                )
222                .finish()
223                .eprint((&filename, Source::from(src)))
224                .unwrap();
225        }
226    }
227}
228/* ANCHOR_END: all */