sj 0.18.5

Some JSON implementation
Documentation
// License: see LICENSE file at root directory of `master` branch

//! # Parser

#![cfg(feature="std")]

mod number_parser;
mod unicode_char;

use {
    alloc::{
        string::String,
        vec::Vec,
    },
    core::{
        convert::TryFrom,
        fmt::{self, Display, Formatter},
    },
    std::io::{Bytes, Read},

    crate::{
        Error, Number, Object, Result, Value,
        bytes,
    },

    self::{
        number_parser::NumberParser,
        unicode_char::UnicodeChar,
    },
};

const STRING_CAPACITY: usize = 512;

/// # Ignored byte
#[derive(Debug)]
enum IgnoredByte {
    Colon,
    Comma,
    EndOfArray,
    EndOfObject,
}

impl Display for IgnoredByte {

    fn fmt(&self, f: &mut Formatter) -> core::result::Result<(), fmt::Error> {
        f.write_str(match self {
            IgnoredByte::Colon => concat!(':'),
            IgnoredByte::Comma => concat!(','),
            IgnoredByte::EndOfArray => concat!(']'),
            IgnoredByte::EndOfObject => concat!('}'),
        })
    }

}

impl TryFrom<&u8> for IgnoredByte {

    type Error = Error;

    fn try_from(b: &u8) -> core::result::Result<Self, Self::Error> {
        match b {
            b':' => Ok(IgnoredByte::Colon),
            b',' => Ok(IgnoredByte::Comma),
            b']' => Ok(IgnoredByte::EndOfArray),
            b'}' => Ok(IgnoredByte::EndOfObject),
            _ => Err(Error::from(__!("Expected one of ':,]}}', got: {:?}", char::from(*b)))),
        }
    }

}

/// # Parses a stream
///
/// ## Notes
///
/// - The stream is used as-is. For better performance, you _should_ wrap your stream inside a [`BufRead`][std::io/BufRead].
/// - You can also use implementation of `TryFrom<Vec<u8>>` for [`Value`][::Value].
/// - On machines whose pointer width is one of `8`/`16`/`32`/`64`, numbers will be parsed by either `i64`/`u64`/`f64`. On other machines,
///   numbers will be parsed by `isize`/`usize`/`f64`. So, even though you can convert `i128`/`u128` into [`Number`][::Number] and format them
///   into JSON string, _but_: parsing `i128`/`u128` is _not_ supported on some machines.
///
/// [::Number]: struct.Number.html
/// [::Value]: enum.Value.html
///
/// [std::io/BufRead]: https://doc.rust-lang.org/std/io/trait.BufRead.html
pub fn parse<R>(src: &mut R) -> Result<Value> where R: Read {
    let mut bytes = src.bytes();
    let mut result = None;
    let mut number_parser = NumberParser::new();
    loop {
        let b = match bytes.next() {
            Some(b) => b.map_err(|e| Error::from(__!("{}", e)))?,
            None => break,
        };
        match b {
            b' ' | b'\r' | b'\n' | b'\t' => continue,
            b'[' => {
                let mut array = vec![];
                parse_array_content(&mut bytes, &mut number_parser, |v| array.push(v))?;
                result = Some(Value::Array(array));
                break;
            },
            b'{' => {
                let mut object = Object::new();
                parse_object_content(&mut bytes, &mut number_parser, |k, v| drop(object.insert(k, v)))?;
                result = Some(Value::Object(object));
                break;
            },
            other => return Err(Error::from(__!("Expected either '{{' or '[', got: {:?}", char::from(other)))),
        };
    }

    ensure_white_spaces(&mut bytes)?;
    result.ok_or_else(|| Error::from(__!("Found no JSON document")))
}

/// # Parses bytes
///
/// ## Examples
///
/// ```
/// use core::convert::TryFrom;
///
/// let value = sj::parse_bytes("[true]")?;
/// assert!(bool::try_from(value.at(&[0])?)?);
///
/// # Ok::<_, sj::Error>(())
/// ```
pub fn parse_bytes<B>(bytes: B) -> Result<Value> where B: AsRef<[u8]> {
    parse(&mut bytes.as_ref())
}

/// # Parses object content
fn parse_object_content<R, F>(bytes: &mut Bytes<R>, number_parser: &mut NumberParser, mut handler: F) -> Result<()>
where R: Read, F: FnMut(String, Value) {
    loop {
        match parse_string(bytes, None)? {
            (Some(key), ignored) => match ignored {
                Some(IgnoredByte::Colon) => match parse_one_value(bytes, number_parser)? {
                    (Some(value), ignored) => match ignored {
                        Some(IgnoredByte::Comma) => handler(key, value),
                        Some(IgnoredByte::EndOfObject) => {
                            handler(key, value);
                            return Ok(());
                        },
                        Some(ignored) => return Err(Error::from(__!("Expected either ',' or '}}', got: {:?}", ignored))),
                        None => return Err(Error::from(__!("Invalid object"))),
                    },
                    (None, _) => return Err(Error::from(__!("Missing value"))),
                },
                _ => return Err(Error::from(__!("Expected ':', got: {:?}", ignored))),
            },
            (None, ignored) => return match ignored {
                Some(IgnoredByte::EndOfObject) => Ok(()),
                Some(ignored) => Err(Error::from(__!("Expected '}}', got: {:?}", ignored))),
                None => ensure_char(bytes, b'}'),
            },
        };
    }
}

/// # Parses array content
fn parse_array_content<R, F>(bytes: &mut Bytes<R>, number_parser: &mut NumberParser, mut handler: F) -> Result<()>
where R: Read, F: FnMut(Value) {
    loop {
        match parse_one_value(bytes, number_parser)? {
            (Some(value), ignored) => match ignored {
                Some(IgnoredByte::Comma) => handler(value),
                Some(IgnoredByte::EndOfArray) => {
                    handler(value);
                    return Ok(());
                },
                _ => return Err(Error::from(__!("Expected either ',' or ']', got: {:?}", ignored))),
            },
            (None, ignored) => match ignored {
                Some(IgnoredByte::EndOfArray) => return Ok(()),
                _ => return Err(Error::from(__!("Expected ']', got: {:?}", ignored))),
            },
        };
    }
}

/// # Parses one single value, returns optionally ignored byte
fn parse_one_value<R>(bytes: &mut Bytes<R>, number_parser: &mut NumberParser) -> Result<(Option<Value>, Option<IgnoredByte>)> where R: Read {
    let mut value = None;
    loop {
        let b = match bytes.next() {
            Some(b) => b.map_err(|e| Error::from(__!("{}", e)))?,
            None => break,
        };
        match b {
            b' ' | b'\r' | b'\n' | b'\t' => continue,
            b't' => match (bytes.next(), bytes.next(), bytes.next()) {
                (Some(Ok(b'r')), Some(Ok(b'u')), Some(Ok(b'e'))) => {
                    value = Some(Value::Boolean(true));
                    break;
                },
                _ => return Err(Error::from(__!("Expected 'true', got other"))),
            },
            b'f' => match (bytes.next(), bytes.next(), bytes.next(), bytes.next()) {
                (Some(Ok(b'a')), Some(Ok(b'l')), Some(Ok(b's')), Some(Ok(b'e'))) => {
                    value = Some(Value::Boolean(false));
                    break;
                },
                _ => return Err(Error::from(__!("Expected 'false', got other"))),
            },
            b'n' => match (bytes.next(), bytes.next(), bytes.next()) {
                (Some(Ok(b'u')), Some(Ok(b'l')), Some(Ok(b'l'))) => {
                    value = Some(Value::Null);
                    break;
                },
                _ => return Err(Error::from(__!("Expected 'null', got other"))),
            },
            b'[' => {
                let mut array = vec![];
                parse_array_content(bytes, number_parser, |v| array.push(v))?;
                value = Some(Value::Array(array));
                break;
            },
            b'{' => {
                let mut object = Object::new();
                parse_object_content(bytes, number_parser, |k, v| drop(object.insert(k, v)))?;
                value = Some(Value::Object(object));
                break;
            },
            b']' => return Ok((None, Some(IgnoredByte::EndOfArray))),
            b'}' => return Ok((None, Some(IgnoredByte::EndOfObject))),
            b'"' => return match parse_string(bytes, Some(Vec::with_capacity(STRING_CAPACITY)))? {
                (Some(s), ignored) => Ok((Some(Value::String(s)), ignored)),
                (None, _) => Err(Error::from(__!("Invalid string value"))),
            },
            b'+' | b'-' | b'0'..=b'9' => {
                number_parser.add(&b)?;
                let (number, ignored) = parse_number(bytes, number_parser)?;
                match ignored {
                    Some(_) => return Ok((Some(Value::Number(number)), ignored)),
                    None => {
                        value = Some(Value::Number(number));
                        break;
                    },
                };
            },
            other => return Err(Error::from(__!("Invalid character: {:?}", char::from(other)))),
        };
    }

    for b in bytes {
        match b.map_err(|e| Error::from(__!("{}", e)))? {
            b' ' | b'\r' | b'\n' | b'\t' => continue,
            b',' => return Ok((value, Some(IgnoredByte::Comma))),
            b']' => return Ok((value, Some(IgnoredByte::EndOfArray))),
            b'}' => return Ok((value, Some(IgnoredByte::EndOfObject))),
            other => return Err(Error::from(__!("Expected one of ',]}}', got: {:?}", char::from(other)))),
        };
    }

    Ok((value, None))
}

/// # Parse string
///
/// If `start` is provided, the function treats it as start of string content.
///
/// If `start` is **not** provided, the function first finds for `"`, then the string content.
fn parse_string<R>(bytes: &mut Bytes<R>, start: Option<Vec<u8>>) -> Result<(Option<String>, Option<IgnoredByte>)> where R: Read {
    fn escape(b: &u8) -> Result<u8> {
        match b {
            b'"' => Ok(*b),
            b'\\' => Ok(*b),
            b'/' => Ok(*b),
            b'b' => Ok(bytes::BACKSPACE),
            b'f' => Ok(bytes::FORM_FEED),
            b'n' => Ok(bytes::LINE_FEED),
            b'r' => Ok(bytes::CARRIAGE_RETURN),
            b't' => Ok(bytes::HORIZONTAL_TAB),
            _ => Err(Error::from(__!("Escape character not supported: {:?}", char::from(*b)))),
        }
    }

    let mut result: Option<Vec<u8>> = start;
    let mut escaping = false;
    let mut unicode_char: Option<UnicodeChar> = None;
    loop {
        let b = match bytes.next() {
            Some(b) => b.map_err(|e| Error::from(__!("{}", e)))?,
            None => match result {
                Some(_) => return Err(Error::from(__!("Missing end of string: \""))),
                None => break,
            },
        };
        match result.as_mut() {
            Some(result) => {
                if escaping {
                    if let Some(mut uc) = unicode_char.take() {
                        match uc.is_full() {
                            false => {
                                uc.add_hex(&b)?;
                                unicode_char = Some(uc);
                                continue;
                            },
                            true => {
                                uc.encode_as_utf8_bytes(result)?;
                                escaping = false;
                            },
                        };
                    }
                }
                match escaping {
                    false => match b {
                        b'\\' => escaping = true,
                        b'"' => break,
                        _ => result.push(b),
                    },
                    true => match b {
                        b'u' => unicode_char = Some(UnicodeChar::new()),
                        _ => {
                            result.push(escape(&b)?);
                            escaping = false;
                        },
                    },
                };
            },
            None => match b {
                b' ' | b'\r' | b'\n' | b'\t' => continue,
                b'"' => result = Some(Vec::with_capacity(STRING_CAPACITY)),
                _ => return Ok((None, Some(IgnoredByte::try_from(&b)?))),
            },
        };
    }

    let result = match result {
        Some(result) => Some(String::from_utf8(result).map_err(|e| Error::from(__!("{}", e)))?),
        None => None,
    };

    for b in bytes {
        match b.map_err(|e| Error::from(__!("{}", e)))? {
            b' ' | b'\r' | b'\n' | b'\t' => continue,
            other => return Ok((result, Some(IgnoredByte::try_from(&other)?))),
        };
    }

    Ok((result, None))
}

#[test]
fn test_parse_string() -> Result<()> {
    use alloc::string::ToString;

    fn escape_unicode(c: &char) -> Vec<u8> {
        fn f(c: char) -> bool {
            match c {
                '{' | '}' => true,
                _ => false,
            }
        }
        let mut result = c.escape_unicode().to_string().replace(f, concat!());
        (0..(6 - result.len())).for_each(|_| result.insert(2, '0'));
        result.into_bytes()
    }

    let (s, ignored) = parse_string(&mut b"".bytes(), None)?;
    assert!(s.is_none() && ignored.is_none());

    parse_string(&mut b"".bytes(), Some(vec![])).unwrap_err();

    for (mut raw, expected) in vec![
        (vec![], vec![]),
        (br#"some-\t\r\n--\\/\/\""#.to_vec(), "some-\t\r\n--\\//\"".as_bytes().to_vec()),
    ] {
        raw.insert(0, b'"');
        raw.push(b'"');

        let (parsed, ignored) = parse_string(&mut raw.bytes(), None)?;
        assert!(ignored.is_none());
        assert_eq!(parsed.unwrap().into_bytes(), expected);
    }

    for chr in &['\u{1d2d}', '\u{D55C}', '\u{a2}', '\u{0024}'] {
        const SRC: &[u8] = b"some";;

        let mut raw = SRC.to_vec();
        raw.insert(0, b'"');
        raw.extend(escape_unicode(chr));
        raw.push(b'"');

        let mut expected = SRC.to_vec();
        expected.extend(chr.to_string().into_bytes());

        let (parsed, ignored) = parse_string(&mut raw.bytes(), None)?;
        assert!(ignored.is_none());
        assert_eq!(parsed.unwrap().into_bytes(), expected);
    }

    Ok(())
}

/// # Parses one expected syntax character
fn ensure_char<R>(bytes: &mut Bytes<R>, expected: u8) -> Result<()> where R: Read {
    for b in bytes {
        match b.map_err(|e| Error::from(__!("{}", e)))? {
            b' ' | b'\r' | b'\n' | b'\t' => continue,
            other => match other == expected {
                true => return Ok(()),
                false => return Err(Error::from(__!(
                    "Expected {expected:?}, got: {other:?}", expected=char::from(expected), other=char::from(other),
                ))),
            },
        };
    }

    Err(Error::from(__!("Missing {:?}", char::from(expected))))
}

/// # Ensures syntax white spaces
fn ensure_white_spaces<R>(bytes: &mut Bytes<R>) -> Result<()> where R: Read {
    for b in bytes {
        match b.map_err(|e| Error::from(__!("{}", e)))? {
            b' ' | b'\r' | b'\n' | b'\t' => continue,
            other => return Err(Error::from(__!("Expected white spaces, got: {other:?}", other=char::from(other)))),
        };
    }

    Ok(())
}

/// # Parse number
fn parse_number<R>(bytes: &mut Bytes<R>, number_parser: &mut NumberParser) -> Result<(Number, Option<IgnoredByte>)> where R: Read {
    let mut ignored = None;
    for b in bytes {
        let b = b.map_err(|e| Error::from(__!("{}", e)))?;
        match b {
            b' ' | b'\r' | b'\n' | b'\t' => break,
            b'0'..=b'9' | b'-' | b'+' | b'e' | b'E' | b'.' => number_parser.add(&b)?,
            _ => {
                ignored = Some(IgnoredByte::try_from(&b)?);
                break;
            },
        };
    }

    Ok((number_parser.parse()?, ignored))
}