Skip to main content

reliakit_json/
parse.rs

1//! Strict, bounded JSON parser.
2
3use alloc::collections::BTreeSet;
4use alloc::string::{String, ToString};
5use alloc::vec::Vec;
6
7use crate::error::{JsonError, JsonErrorKind, JsonLimitKind, JsonPath, JsonPathSegment};
8use crate::limits::JsonLimits;
9use crate::number::{is_valid_json_number, JsonNumber};
10use crate::value::{JsonObject, JsonValue};
11
12/// Parses a JSON value from UTF-8 bytes using the default
13/// [`JsonLimits`].
14pub fn parse(input: &[u8]) -> Result<JsonValue, JsonError> {
15    parse_with_limits(input, JsonLimits::new())
16}
17
18/// Parses a JSON value from a `&str` using the default [`JsonLimits`].
19pub fn parse_str(input: &str) -> Result<JsonValue, JsonError> {
20    parse_with_limits(input.as_bytes(), JsonLimits::new())
21}
22
23/// Parses a JSON value from UTF-8 bytes with explicit [`JsonLimits`].
24pub fn parse_with_limits(input: &[u8], limits: JsonLimits) -> Result<JsonValue, JsonError> {
25    if input.len() > limits.max_input_bytes {
26        return Err(JsonError::new(
27            JsonErrorKind::LimitExceeded(JsonLimitKind::InputBytes),
28            0,
29            1,
30            1,
31        )
32        .with_path(JsonPath::default()));
33    }
34
35    if let Err(e) = core::str::from_utf8(input) {
36        let offset = e.valid_up_to();
37        let (line, column) = line_column(input, offset);
38        return Err(
39            JsonError::new(JsonErrorKind::InvalidUtf8, offset, line, column)
40                .with_path(JsonPath::default()),
41        );
42    }
43
44    // Reject a leading UTF-8 byte-order mark (valid UTF-8, but not valid JSON).
45    if input.starts_with(&[0xEF, 0xBB, 0xBF]) {
46        return Err(
47            JsonError::new(JsonErrorKind::InvalidUtf8, 0, 1, 1).with_path(JsonPath::default())
48        );
49    }
50
51    let mut parser = Parser::new(input, limits);
52    parser.skip_ws();
53    let value = parser.parse_value(0)?;
54    parser.skip_ws();
55    if parser.pos != parser.input.len() {
56        return Err(parser.error(JsonErrorKind::TrailingData));
57    }
58    Ok(value)
59}
60
61fn line_column(input: &[u8], offset: usize) -> (usize, usize) {
62    let mut line = 1;
63    let mut column = 1;
64    for &b in &input[..offset.min(input.len())] {
65        if b == b'\n' {
66            line += 1;
67            column = 1;
68        } else {
69            column += 1;
70        }
71    }
72    (line, column)
73}
74
75struct Parser<'a> {
76    input: &'a [u8],
77    pos: usize,
78    line: usize,
79    column: usize,
80    limits: JsonLimits,
81    nodes: usize,
82    decoded_string_bytes: usize,
83    path: Vec<JsonPathSegment>,
84}
85
86impl<'a> Parser<'a> {
87    fn new(input: &'a [u8], limits: JsonLimits) -> Self {
88        Self {
89            input,
90            pos: 0,
91            line: 1,
92            column: 1,
93            limits,
94            nodes: 0,
95            decoded_string_bytes: 0,
96            path: Vec::new(),
97        }
98    }
99
100    fn peek(&self) -> Option<u8> {
101        self.input.get(self.pos).copied()
102    }
103
104    fn bump(&mut self) -> u8 {
105        let b = self.input[self.pos];
106        self.pos += 1;
107        if b == b'\n' {
108            self.line += 1;
109            self.column = 1;
110        } else {
111            self.column += 1;
112        }
113        b
114    }
115
116    fn skip_ws(&mut self) {
117        while let Some(b) = self.peek() {
118            if matches!(b, b' ' | b'\t' | b'\n' | b'\r') {
119                self.bump();
120            } else {
121                break;
122            }
123        }
124    }
125
126    fn error(&self, kind: JsonErrorKind) -> JsonError {
127        JsonError::new(kind, self.pos, self.line, self.column)
128            .with_path(JsonPath::from_segments(self.path.clone()))
129    }
130
131    fn error_at(&self, kind: JsonErrorKind, pos: usize, line: usize, column: usize) -> JsonError {
132        JsonError::new(kind, pos, line, column)
133            .with_path(JsonPath::from_segments(self.path.clone()))
134    }
135
136    fn limit(&self, kind: JsonLimitKind) -> JsonError {
137        self.error(JsonErrorKind::LimitExceeded(kind))
138    }
139
140    fn parse_value(&mut self, depth: usize) -> Result<JsonValue, JsonError> {
141        self.nodes += 1;
142        if self.nodes > self.limits.max_total_nodes {
143            return Err(self.limit(JsonLimitKind::TotalNodes));
144        }
145        match self.peek() {
146            Some(b'{') => {
147                let d = depth + 1;
148                if d > self.limits.max_depth {
149                    return Err(self.limit(JsonLimitKind::Depth));
150                }
151                self.parse_object(d)
152            }
153            Some(b'[') => {
154                let d = depth + 1;
155                if d > self.limits.max_depth {
156                    return Err(self.limit(JsonLimitKind::Depth));
157                }
158                self.parse_array(d)
159            }
160            Some(b'"') => {
161                let s =
162                    self.parse_string(self.limits.max_string_bytes, JsonLimitKind::StringBytes)?;
163                Ok(JsonValue::String(s))
164            }
165            Some(b't') => self.parse_literal(b"true", JsonValue::Bool(true)),
166            Some(b'f') => self.parse_literal(b"false", JsonValue::Bool(false)),
167            Some(b'n') => self.parse_literal(b"null", JsonValue::Null),
168            Some(b'-') | Some(b'0'..=b'9') => self.parse_number(),
169            Some(_) => Err(self.error(JsonErrorKind::UnexpectedByte)),
170            None => Err(self.error(JsonErrorKind::UnexpectedEof)),
171        }
172    }
173
174    fn parse_literal(&mut self, word: &[u8], value: JsonValue) -> Result<JsonValue, JsonError> {
175        for &expected in word {
176            match self.peek() {
177                Some(b) if b == expected => {
178                    self.bump();
179                }
180                Some(_) => return Err(self.error(JsonErrorKind::UnexpectedByte)),
181                None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
182            }
183        }
184        Ok(value)
185    }
186
187    fn parse_object(&mut self, depth: usize) -> Result<JsonValue, JsonError> {
188        self.bump(); // consume '{'
189        let mut object = JsonObject::new();
190        let mut seen: BTreeSet<String> = BTreeSet::new();
191
192        self.skip_ws();
193        if self.peek() == Some(b'}') {
194            self.bump();
195            return Ok(JsonValue::Object(object));
196        }
197
198        loop {
199            self.skip_ws();
200            // A key must come next.
201            if self.peek() != Some(b'"') {
202                return match self.peek() {
203                    None => Err(self.error(JsonErrorKind::UnexpectedEof)),
204                    _ => Err(self.error(JsonErrorKind::UnexpectedByte)),
205                };
206            }
207
208            let key_pos = self.pos;
209            let key_line = self.line;
210            let key_column = self.column;
211            let key = self.parse_string(self.limits.max_key_bytes, JsonLimitKind::KeyBytes)?;
212
213            if !seen.insert(key.clone()) {
214                return Err(self.error_at(
215                    JsonErrorKind::DuplicateKey,
216                    key_pos,
217                    key_line,
218                    key_column,
219                ));
220            }
221            if seen.len() > self.limits.max_object_members {
222                return Err(self.limit(JsonLimitKind::ObjectMembers));
223            }
224
225            self.skip_ws();
226            if self.peek() != Some(b':') {
227                return match self.peek() {
228                    None => Err(self.error(JsonErrorKind::UnexpectedEof)),
229                    _ => Err(self.error(JsonErrorKind::UnexpectedByte)),
230                };
231            }
232            self.bump(); // ':'
233            self.skip_ws();
234
235            self.path.push(JsonPathSegment::Key(key.clone()));
236            let value = self.parse_value(depth)?;
237            self.path.pop();
238            object.push_unique(key, value);
239
240            self.skip_ws();
241            match self.peek() {
242                Some(b',') => {
243                    self.bump();
244                }
245                Some(b'}') => {
246                    self.bump();
247                    return Ok(JsonValue::Object(object));
248                }
249                None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
250                _ => return Err(self.error(JsonErrorKind::UnexpectedByte)),
251            }
252        }
253    }
254
255    fn parse_array(&mut self, depth: usize) -> Result<JsonValue, JsonError> {
256        self.bump(); // consume '['
257        let mut items: Vec<JsonValue> = Vec::new();
258
259        self.skip_ws();
260        if self.peek() == Some(b']') {
261            self.bump();
262            return Ok(JsonValue::Array(items));
263        }
264
265        loop {
266            self.skip_ws();
267            if items.len() >= self.limits.max_array_items {
268                return Err(self.limit(JsonLimitKind::ArrayItems));
269            }
270
271            self.path.push(JsonPathSegment::Index(items.len()));
272            let value = self.parse_value(depth)?;
273            self.path.pop();
274            items.push(value);
275
276            self.skip_ws();
277            match self.peek() {
278                Some(b',') => {
279                    self.bump();
280                }
281                Some(b']') => {
282                    self.bump();
283                    return Ok(JsonValue::Array(items));
284                }
285                None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
286                _ => return Err(self.error(JsonErrorKind::UnexpectedByte)),
287            }
288        }
289    }
290
291    fn parse_string(
292        &mut self,
293        max_bytes: usize,
294        limit_kind: JsonLimitKind,
295    ) -> Result<String, JsonError> {
296        self.bump(); // consume opening '"'
297        let mut out = String::new();
298
299        loop {
300            match self.peek() {
301                None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
302                Some(b'"') => {
303                    self.bump();
304                    self.decoded_string_bytes = self.decoded_string_bytes.saturating_add(out.len());
305                    if self.decoded_string_bytes > self.limits.max_total_decoded_string_bytes {
306                        return Err(self.limit(JsonLimitKind::TotalDecodedStringBytes));
307                    }
308                    return Ok(out);
309                }
310                Some(b'\\') => {
311                    self.bump();
312                    self.parse_escape(&mut out)?;
313                }
314                Some(b) if b < 0x20 => {
315                    return Err(self.error(JsonErrorKind::UnescapedControlCharacter));
316                }
317                Some(b) => {
318                    let len = utf8_len(b);
319                    // Input is validated UTF-8, so this slice is a whole scalar.
320                    let scalar = &self.input[self.pos..self.pos + len];
321                    out.push_str(core::str::from_utf8(scalar).expect("validated UTF-8"));
322                    for _ in 0..len {
323                        self.bump();
324                    }
325                }
326            }
327
328            if out.len() > max_bytes {
329                return Err(self.limit(limit_kind));
330            }
331        }
332    }
333
334    fn parse_escape(&mut self, out: &mut String) -> Result<(), JsonError> {
335        match self.peek() {
336            None => Err(self.error(JsonErrorKind::UnexpectedEof)),
337            Some(b'"') => {
338                out.push('"');
339                self.bump();
340                Ok(())
341            }
342            Some(b'\\') => {
343                out.push('\\');
344                self.bump();
345                Ok(())
346            }
347            Some(b'/') => {
348                out.push('/');
349                self.bump();
350                Ok(())
351            }
352            Some(b'b') => {
353                out.push('\u{08}');
354                self.bump();
355                Ok(())
356            }
357            Some(b'f') => {
358                out.push('\u{0C}');
359                self.bump();
360                Ok(())
361            }
362            Some(b'n') => {
363                out.push('\n');
364                self.bump();
365                Ok(())
366            }
367            Some(b'r') => {
368                out.push('\r');
369                self.bump();
370                Ok(())
371            }
372            Some(b't') => {
373                out.push('\t');
374                self.bump();
375                Ok(())
376            }
377            Some(b'u') => {
378                self.bump();
379                let hi = self.parse_hex4()?;
380                if (0xD800..=0xDBFF).contains(&hi) {
381                    // Expect a following low-surrogate escape.
382                    if self.peek() != Some(b'\\') {
383                        return Err(self.error(JsonErrorKind::LoneSurrogate));
384                    }
385                    self.bump();
386                    if self.peek() != Some(b'u') {
387                        return Err(self.error(JsonErrorKind::LoneSurrogate));
388                    }
389                    self.bump();
390                    let lo = self.parse_hex4()?;
391                    if !(0xDC00..=0xDFFF).contains(&lo) {
392                        return Err(self.error(JsonErrorKind::LoneSurrogate));
393                    }
394                    let scalar = 0x10000 + (((hi as u32) - 0xD800) << 10) + ((lo as u32) - 0xDC00);
395                    out.push(char::from_u32(scalar).expect("valid scalar from surrogate pair"));
396                    Ok(())
397                } else if (0xDC00..=0xDFFF).contains(&hi) {
398                    Err(self.error(JsonErrorKind::LoneSurrogate))
399                } else {
400                    out.push(char::from_u32(hi as u32).expect("non-surrogate is a valid scalar"));
401                    Ok(())
402                }
403            }
404            Some(_) => Err(self.error(JsonErrorKind::InvalidEscape)),
405        }
406    }
407
408    fn parse_hex4(&mut self) -> Result<u16, JsonError> {
409        let mut value: u16 = 0;
410        for _ in 0..4 {
411            match self.peek() {
412                None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
413                Some(b) => match hex_value(b) {
414                    Some(digit) => {
415                        value = (value << 4) | digit;
416                        self.bump();
417                    }
418                    None => return Err(self.error(JsonErrorKind::InvalidUnicodeEscape)),
419                },
420            }
421        }
422        Ok(value)
423    }
424
425    fn parse_number(&mut self) -> Result<JsonValue, JsonError> {
426        let start = self.pos;
427        let start_line = self.line;
428        let start_column = self.column;
429        while let Some(b) = self.peek() {
430            if matches!(b, b'-' | b'+' | b'.' | b'e' | b'E' | b'0'..=b'9') {
431                self.bump();
432            } else {
433                break;
434            }
435        }
436        let token = &self.input[start..self.pos];
437        if token.len() > self.limits.max_number_bytes {
438            return Err(self.error_at(
439                JsonErrorKind::LimitExceeded(JsonLimitKind::NumberBytes),
440                start,
441                start_line,
442                start_column,
443            ));
444        }
445        let text = core::str::from_utf8(token).expect("validated UTF-8");
446        if !is_valid_json_number(text) {
447            return Err(self.error_at(
448                JsonErrorKind::InvalidNumber,
449                start,
450                start_line,
451                start_column,
452            ));
453        }
454        Ok(JsonValue::Number(JsonNumber::from_validated(
455            text.to_string(),
456        )))
457    }
458}
459
460fn utf8_len(lead: u8) -> usize {
461    if lead < 0x80 {
462        1
463    } else if lead < 0xE0 {
464        2
465    } else if lead < 0xF0 {
466        3
467    } else {
468        4
469    }
470}
471
472fn hex_value(b: u8) -> Option<u16> {
473    match b {
474        b'0'..=b'9' => Some((b - b'0') as u16),
475        b'a'..=b'f' => Some((b - b'a' + 10) as u16),
476        b'A'..=b'F' => Some((b - b'A' + 10) as u16),
477        _ => None,
478    }
479}