bms_rs/bmson/
parse.rs

1//! This is a parser for JSON.
2
3use ariadne::{Color, Report, ReportKind};
4use chumsky::{error::RichReason, prelude::*};
5use serde_json::Value;
6
7use crate::diagnostics::{SimpleSource, ToAriadne, build_report};
8
9/// This is a parser for JSON.
10///
11/// Parsing from str, returning [`Value`]. Chumsky emits `Rich<char>` internally,
12/// which we later classify into `Warning` (custom diagnostics) and `Recovered`
13/// (grammar errors recovered by the parser).
14#[must_use]
15pub fn parser<'a>() -> impl Parser<'a, &'a str, Value, extra::Err<Rich<'a, char>>> {
16    recursive(|value| {
17        let digits = text::digits(10).to_slice();
18
19        let frac = just('.').then(digits);
20
21        let exp = just('e')
22            .or(just('E'))
23            .then(one_of("+-").or_not())
24            .then(digits);
25
26        let number = just('-')
27            .or_not()
28            .then(text::int(10))
29            .then(frac.or_not())
30            .then(exp.or_not())
31            .to_slice()
32            .map(|s: &str| {
33                // Try to parse as integer first, then as float
34                s.parse::<i64>()
35                    .map(|i| Value::Number(serde_json::Number::from(i)))
36                    .or_else(|_| {
37                        s.parse::<f64>().map(|f| {
38                            Value::Number(
39                                serde_json::Number::from_f64(f)
40                                    .unwrap_or_else(|| serde_json::Number::from(0)),
41                            )
42                        })
43                    })
44                    .unwrap_or_else(|_| Value::Number(serde_json::Number::from(0)))
45            })
46            .boxed();
47
48        let escape = just('\\')
49            .then(choice((
50                just('\\'),
51                just('/'),
52                just('"'),
53                just('b').to('\x08'),
54                just('f').to('\x0C'),
55                just('n').to('\n'),
56                just('r').to('\r'),
57                just('t').to('\t'),
58                just('u').ignore_then(text::digits(16).exactly(4).to_slice().validate(
59                    |digits, e, emitter| {
60                        char::from_u32(u32::from_str_radix(digits, 16).unwrap()).unwrap_or_else(
61                            || {
62                                emitter.emit(Rich::custom(e.span(), "invalid unicode character"));
63                                '\u{FFFD}' // unicode replacement character
64                            },
65                        )
66                    },
67                )),
68            )))
69            .ignored()
70            .boxed();
71
72        let string = none_of("\\\"")
73            .ignored()
74            .or(escape)
75            .repeated()
76            .to_slice()
77            .map(ToString::to_string)
78            .delimited_by(just('"'), just('"'))
79            .boxed();
80
81        let array = value
82            .clone()
83            .separated_by(just(',').padded().recover_with(skip_then_retry_until(
84                any().ignored(),
85                one_of(",]").ignored(),
86            )))
87            .allow_trailing()
88            .collect()
89            .padded()
90            .delimited_by(
91                just('['),
92                just(']')
93                    .ignored()
94                    .recover_with(via_parser(end()))
95                    .recover_with(skip_then_retry_until(any().ignored(), end())),
96            )
97            .boxed();
98
99        let member = string
100            .clone()
101            .then_ignore(just(':').padded())
102            .then(value.clone());
103
104        // Support objects with:
105        // - normal commas
106        // - missing commas between members (emit an error but continue)
107        // - a trailing comma before the closing '}'
108        let subsequent_member = choice((
109            // Normal: comma then member
110            just(',').padded().ignore_then(member.clone()).map(Some),
111            // Missing comma: directly another member. Emit an error and continue.
112            member
113                .clone()
114                .validate(|m, e, emitter| {
115                    emitter.emit(Rich::custom(
116                        e.span(),
117                        "expected ',' between object members",
118                    ));
119                    m
120                })
121                .map(Some),
122            // Trailing comma: consume it and yield no item
123            just(',').padded().to::<Option<(String, Value)>>(None),
124        ));
125
126        let members = member
127            .clone()
128            .or_not()
129            .then(subsequent_member.repeated().collect::<Vec<_>>())
130            .map(|(first_opt, rest)| {
131                let mut pairs: Vec<(String, Value)> = Vec::new();
132                if let Some(first) = first_opt {
133                    pairs.push(first);
134                }
135                for item in rest.into_iter().flatten() {
136                    pairs.push(item);
137                }
138                pairs
139            });
140
141        let object = members
142            .map(|pairs| {
143                let mut map = serde_json::Map::new();
144                for (key, value) in pairs {
145                    map.insert(key, value);
146                }
147                Value::Object(map)
148            })
149            .padded()
150            .delimited_by(
151                just('{'),
152                just('}')
153                    .ignored()
154                    .recover_with(via_parser(end()))
155                    .recover_with(skip_then_retry_until(any().ignored(), end())),
156            )
157            .boxed();
158
159        choice((
160            just("null").to(Value::Null),
161            just("true").to(Value::Bool(true)),
162            just("false").to(Value::Bool(false)),
163            number,
164            string.map(Value::String),
165            array.map(Value::Array),
166            object,
167        ))
168        .recover_with(via_parser(nested_delimiters(
169            '{',
170            '}',
171            [('[', ']')],
172            |_| Value::Null,
173        )))
174        .recover_with(via_parser(nested_delimiters(
175            '[',
176            ']',
177            [('{', '}')],
178            |_| Value::Null,
179        )))
180        .recover_with(skip_then_retry_until(
181            any().ignored(),
182            one_of(",]}").ignored(),
183        ))
184        .padded()
185    })
186}
187
188/// Error recovered by the JSON parser. These originated from grammar mismatches
189/// that were recovered via `recover_with` or similar mechanisms.
190#[derive(Debug, Clone)]
191pub struct Recovered<'a>(pub Rich<'a, char>);
192
193/// Diagnostic warning intentionally emitted by the JSON parser using `Rich::custom`.
194#[derive(Debug, Clone)]
195pub struct Warning<'a>(pub Rich<'a, char>);
196
197/// Unrecoverable JSON parsing error (no output value was produced).
198#[derive(Debug, Clone)]
199pub struct Error<'a>(pub Rich<'a, char>);
200
201impl<'a> ToAriadne for Recovered<'a> {
202    fn to_report<'b>(
203        &self,
204        src: &SimpleSource<'b>,
205    ) -> Report<'b, (String, std::ops::Range<usize>)> {
206        let span = self.0.span();
207        let message = self.0.to_string();
208        build_report(
209            src,
210            ReportKind::Advice,
211            span.start..span.end,
212            "JSON recovered parsing issue",
213            message,
214            Color::Blue,
215        )
216    }
217}
218
219impl<'a> ToAriadne for Warning<'a> {
220    fn to_report<'b>(
221        &self,
222        src: &SimpleSource<'b>,
223    ) -> Report<'b, (String, std::ops::Range<usize>)> {
224        let span = self.0.span();
225        let message = self.0.to_string();
226        build_report(
227            src,
228            ReportKind::Warning,
229            span.start..span.end,
230            "JSON parsing warning",
231            message,
232            Color::Yellow,
233        )
234    }
235}
236
237impl<'a> ToAriadne for Error<'a> {
238    fn to_report<'b>(
239        &self,
240        src: &SimpleSource<'b>,
241    ) -> Report<'b, (String, std::ops::Range<usize>)> {
242        let span = self.0.span();
243        let message = self.0.to_string();
244        build_report(
245            src,
246            ReportKind::Error,
247            span.start..span.end,
248            "JSON parsing error",
249            message,
250            Color::Red,
251        )
252    }
253}
254
255/// Split chumsky `Rich<char>` errors into `Warning`, `Recovered`, and `Error` buckets.
256#[must_use]
257pub fn split_chumsky_errors<'a>(
258    errors: impl IntoIterator<Item = Rich<'a, char>>,
259    had_output: bool,
260) -> (Vec<Warning<'a>>, Vec<Recovered<'a>>, Vec<Error<'a>>) {
261    let mut warnings = Vec::new();
262    let mut recovered = Vec::new();
263    let mut fatal = Vec::new();
264    for err in errors {
265        match err.reason() {
266            // Custom reasons are produced via `Rich::custom(...)` in this module,
267            // which we treat as non-fatal parser diagnostics.
268            RichReason::Custom(_) => warnings.push(Warning(err)),
269            // All other errors: recovered if we produced an output value, otherwise fatal.
270            _ if had_output => recovered.push(Recovered(err)),
271            _ => fatal.push(Error(err)),
272        }
273    }
274    (warnings, recovered, fatal)
275}