Skip to main content

jaq_json/
read.rs

1//! JSON support.
2use crate::{Map, Num, Val};
3use alloc::{string::ToString, vec::Vec};
4use core::fmt::{self, Formatter};
5use hifijson::token::{Expect, Lex};
6use hifijson::{LexAlloc, SliceLexer};
7#[cfg(feature = "std")]
8use std::io;
9
10/// Eat whitespace/comments, then peek at next character.
11fn ws_tk<L: Lex>(lexer: &mut L) -> Option<u8> {
12    loop {
13        lexer.eat_whitespace();
14        match lexer.peek_next() {
15            Some(b'#') => lexer.skip_until(|c| c == b'\n'),
16            next => return next,
17        }
18    }
19}
20
21/// Parse error.
22#[derive(Debug)]
23pub struct Error(usize, hifijson::Error);
24
25impl core::fmt::Display for Error {
26    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
27        write!(f, "byte offset {}: {}", self.0, self.1)
28    }
29}
30
31#[cfg(feature = "std")]
32impl std::error::Error for Error {}
33
34/// Parse exactly one JSON value.
35pub fn parse_single(slice: &[u8]) -> Result<Val, Error> {
36    let offset = |rest: &[u8]| rest.as_ptr() as usize - slice.as_ptr() as usize;
37    let mut lexer = SliceLexer::new(slice);
38    lexer
39        .exactly_one(ws_tk, parse)
40        .map_err(|e| Error(offset(lexer.as_slice()), e))
41}
42
43/// Parse a sequence of JSON values.
44pub fn parse_many(slice: &[u8]) -> impl Iterator<Item = Result<Val, Error>> + '_ {
45    let offset = |rest: &[u8]| rest.as_ptr() as usize - slice.as_ptr() as usize;
46    let mut lexer = SliceLexer::new(slice);
47    core::iter::from_fn(move || {
48        Some(parse(ws_tk(&mut lexer)?, &mut lexer).map_err(|e| Error(offset(lexer.as_slice()), e)))
49    })
50}
51
52#[cfg(feature = "std")]
53/// Read a sequence of JSON values.
54pub fn read_many<'a>(read: impl io::BufRead + 'a) -> impl Iterator<Item = io::Result<Val>> + 'a {
55    let invalid_data = |e| io::Error::new(io::ErrorKind::InvalidData, e);
56    let mut lexer = hifijson::IterLexer::new(read.bytes());
57    core::iter::from_fn(move || {
58        let v = ws_tk(&mut lexer).map(|next| parse(next, &mut lexer).map_err(invalid_data));
59        // always return I/O error if present, regardless of the output value!
60        lexer.error.take().map(Err).or(v)
61    })
62}
63
64/// Parse a single number.
65pub fn parse_single_num(slice: &[u8]) -> Option<Num> {
66    Some(match slice {
67        b"Infinity" => Num::Float(f64::INFINITY),
68        b"NaN" => Num::Float(f64::NAN),
69        _ => {
70            let mut lexer = SliceLexer::new(slice);
71            parse_num(&mut lexer)
72                .ok()
73                .filter(|_| lexer.as_slice().is_empty())?
74        }
75    })
76}
77
78/// Parse a JSON string as byte or text string, preserving invalid UTF-8 as-is.
79fn parse_string<L: LexAlloc>(lexer: &mut L, bytes: bool) -> Result<Vec<u8>, hifijson::Error> {
80    let on_string = |bytes: &mut L::Bytes, out: &mut Vec<u8>| {
81        out.extend(bytes.as_ref());
82        Ok(())
83    };
84    let s = lexer.str_fold(Vec::new(), on_string, |lexer, out| {
85        use hifijson::escape::Error;
86        match lexer.take_next().ok_or(Error::Eof)? {
87            b'u' if bytes => Err(Error::InvalidKind(b'u'))?,
88            b'x' if bytes => out.push(lexer.hex()?),
89            c => out.extend(lexer.escape(c)?.encode_utf8(&mut [0; 4]).as_bytes()),
90        }
91        Ok(())
92    });
93    s.map_err(hifijson::Error::Str)
94}
95
96fn parse_num<L: LexAlloc>(lexer: &mut L) -> Result<Num, hifijson::Error> {
97    let num = hifijson::num::Num::signed_digits();
98    let (num, parts) = lexer.num_string_with(num).unvalidated();
99    let num = num.as_ref();
100    Ok(match num {
101        "+" if lexer.strip_prefix(b"Infinity") => Num::Float(f64::INFINITY),
102        "-" if lexer.strip_prefix(b"Infinity") => Num::Float(f64::NEG_INFINITY),
103        _ if num.ends_with(|c: char| c.is_ascii_digit()) => {
104            if parts.is_int() {
105                Num::from_str_radix(num, 10).unwrap()
106            } else {
107                Num::Dec(num.to_string().into())
108            }
109        }
110        _ => Err(hifijson::num::Error::ExpectedDigit)?,
111    })
112}
113
114/// Parse a JSON value, given an initial non-whitespace character and a lexer.
115///
116/// If the underlying lexer reads input fallibly (for example [`hifijson::IterLexer`]),
117/// the error returned by this function might be misleading.
118/// In that case, always check whether the lexer contains an error.
119fn parse<L: LexAlloc>(next: u8, lexer: &mut L) -> Result<Val, hifijson::Error> {
120    Ok(match next {
121        b'n' if lexer.strip_prefix(b"null") => Val::Null,
122        b't' if lexer.strip_prefix(b"true") => Val::Bool(true),
123        b'f' if lexer.strip_prefix(b"false") => Val::Bool(false),
124        b'b' if lexer.strip_prefix(b"b\"") => Val::byte_str(parse_string(lexer, true)?),
125        b'N' if lexer.strip_prefix(b"NaN") => Val::Num(Num::Float(f64::NAN)),
126        b'I' if lexer.strip_prefix(b"Infinity") => Val::Num(Num::Float(f64::INFINITY)),
127        b'0'..=b'9' | b'+' | b'-' => Val::Num(parse_num(lexer)?),
128        b'"' => Val::utf8_str(parse_string(lexer.discarded(), false)?),
129        b'[' => Val::Arr({
130            let mut arr = Vec::new();
131            lexer.discarded().seq(b']', ws_tk, |next, lexer| {
132                arr.push(parse(next, lexer)?);
133                Ok::<_, hifijson::Error>(())
134            })?;
135            arr.into()
136        }),
137        b'{' => Val::obj({
138            let mut obj = Map::default();
139            lexer.discarded().seq(b'}', ws_tk, |next, lexer| {
140                let key = parse(next, lexer)?;
141                lexer.expect(ws_tk, b':').ok_or(Expect::Colon)?;
142                let value = parse(ws_tk(lexer).ok_or(Expect::Value)?, lexer)?;
143                obj.insert(key, value);
144                Ok::<_, hifijson::Error>(())
145            })?;
146            obj
147        }),
148        _ => Err(Expect::Value)?,
149    })
150}