nanojson 0.4.1 - Docs.rs

use core::str::FromStr;

use crate::error::{ParseError, ParseErrorKind};

#[derive(Copy, Clone, PartialEq, Eq, Debug)]
enum Token {
    Invalid,
    Eof,
    OpenCurly,
    CloseCurly,
    OpenBracket,
    CloseBracket,
    Comma,
    Colon,
    True,
    False,
    Null,
    String,
    Number,
}

impl Token {
    fn name(self) -> &'static str {
        match self {
            Token::Invalid      => "invalid",
            Token::Eof          => "end of input",
            Token::OpenCurly    => "{",
            Token::CloseCurly   => "}",
            Token::OpenBracket  => "[",
            Token::CloseBracket => "]",
            Token::Comma        => ",",
            Token::Colon        => ":",
            Token::True         => "true",
            Token::False        => "false",
            Token::Null         => "null",
            Token::String       => "string",
            Token::Number       => "number",
        }
    }
}

/// Immediate-mode JSON parser. Borrows the source (`'src`).
///
/// A scratch buffer (`'buf`) is supplied at construction and reused by every
/// `string()` call. `member()` borrows directly from the source and
/// does not touch the buffer.
///
/// # Example
/// ```
/// use nanojson::Parser;
/// let src = b"[1, 2, 3]";
/// let mut p = Parser::new(src, &mut []);
/// p.array_begin().unwrap();
/// let mut sum = 0i64;
/// while p.array_item().unwrap() {
///     sum += p.integer::<i64>().unwrap();
/// }
/// p.array_end().unwrap();
/// assert_eq!(sum, 6);
/// ```
pub struct Parser<'src, 'buf> {
    src: &'src [u8],
    /// Scratch buffer for decoding escape sequences in string values.
    /// Supplied once at construction; reused for every `string()` call.
    str_buf: &'buf mut [u8],
    pos: usize,
    token_start: usize,
    /// Start of the most recently parsed object member key (the opening `"`).
    /// Used by [`Parser::unknown_field`] to point at the key, not the colon.
    key_start: usize,

    str_len: usize,
    /// Byte offset in `src` of the first character after the opening `"`.
    str_start_in_src: usize,
    /// Byte offset in `src` of the closing `"` (exclusive end of content).
    str_end_in_src: usize,
    /// True when the last parsed string token contained any `\` escapes.
    str_has_escapes: bool,

    token: Token,
    /// Source span of the last NUMBER token (points into `src`).
    number_start: usize,
    number_end: usize,
}

impl<'src, 'buf> Parser<'src, 'buf> {
    /// Create a parser for `src`. `str_buf` is the scratch buffer used by
    /// [`string()`] to decode escape sequences; its size limits the longest
    /// decodable string value. Pass `&mut []` if you will not call `string()`.
    pub fn new(src: &'src [u8], str_buf: &'buf mut [u8]) -> Self {
        Self {
            src,
            str_buf,
            pos: 0,
            token_start: 0,
            key_start: 0,
            str_len: 0,
            str_start_in_src: 0,
            str_end_in_src: 0,
            str_has_escapes: false,
            token: Token::Invalid,
            number_start: 0,
            number_end: 0,
        }
    }

    /// Byte offset of the start of the most recently attempted token.
    /// Use this in your own diagnostics to compute line/column.
    pub fn error_offset(&self) -> usize {
        self.token_start
    }

    // ---- tokenizer ----

    fn skip_whitespace(&mut self) {
        while self.pos < self.src.len() {
            match self.src[self.pos] {
                b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
                _ => break,
            }
        }
    }

    /// Tokenize the next token. When `WRITE_OUT_TOKEN` is true and the token is
    /// a string, decoded bytes are written into `self.str_buf`; for all other
    /// tokens and when `WRITE_OUT_TOKEN` is false the buffer is not used.
    fn get_token<const WRITE_OUT_TOKEN: bool>(&mut self) -> Result<(), ParseError> {
        self.skip_whitespace();
        self.token_start = self.pos;

        if self.pos >= self.src.len() {
            self.token = Token::Eof;
            return Ok(());
        }

        let ch = self.src[self.pos];

        // Single-char punctuation
        let punct = match ch {
            b'{' => Some(Token::OpenCurly),
            b'}' => Some(Token::CloseCurly),
            b'[' => Some(Token::OpenBracket),
            b']' => Some(Token::CloseBracket),
            b',' => Some(Token::Comma),
            b':' => Some(Token::Colon),
            _ => None,
        };
        if let Some(t) = punct {
            self.token = t;
            self.pos += 1;
            return Ok(());
        }

        // Keywords: true / false / null
        let keywords: [(&[u8], Token); 3] = [
            (b"true",  Token::True),
            (b"false", Token::False),
            (b"null",  Token::Null),
        ];
        for (keyword, tok) in keywords {
            if self.src[self.pos..].starts_with(keyword) {
                self.token = tok;
                self.pos += keyword.len();
                return Ok(());
            }
        }

        // Number: optional '-', digits, optional '.digits', optional 'e/E±digits'
        if ch == b'-' || ch.is_ascii_digit() {
            let start = self.pos;
            if ch == b'-' { self.pos += 1; }
            while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
                self.pos += 1;
            }
            if self.pos < self.src.len() && self.src[self.pos] == b'.' {
                self.pos += 1;
                while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
                    self.pos += 1;
                }
            }
            if self.pos < self.src.len() && matches!(self.src[self.pos], b'e' | b'E') {
                self.pos += 1;
                if self.pos < self.src.len() && matches!(self.src[self.pos], b'+' | b'-') {
                    self.pos += 1;
                }
                while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
                    self.pos += 1;
                }
            }
            self.number_start = start;
            self.number_end = self.pos;
            self.token = Token::Number;
            return Ok(());
        }

        // String
        if ch == b'"' {
            self.pos += 1;
            self.str_len = 0;
            self.str_start_in_src = self.pos;
            self.str_has_escapes = false;

            loop {
                if self.pos >= self.src.len() {
                    self.token = Token::Invalid;
                    return Err(ParseError::at(
                        self.token_start,
                        ParseErrorKind::UnexpectedEof,
                    ));
                }
                match self.src[self.pos] {
                    b'"' => {
                        self.str_end_in_src = self.pos;
                        self.pos += 1;
                        self.token = Token::String;
                        return Ok(());
                    }
                    b'\\' => {
                        self.str_has_escapes = true;
                        self.pos += 1;
                        if self.pos >= self.src.len() {
                            self.token = Token::Invalid;
                            return Err(ParseError::at(
                                self.pos,
                                ParseErrorKind::UnexpectedEof,
                            ));
                        }
                        let esc = self.src[self.pos];
                        self.pos += 1;
                        if esc == b'u' {
                            // \uXXXX — parse 4 hex digits then encode as UTF-8.
                            if self.pos + 4 > self.src.len() {
                                return Err(ParseError::at(self.pos, ParseErrorKind::UnexpectedEof));
                            }
                            let h = parse_hex4(&self.src[self.pos..])
                                .ok_or_else(|| ParseError::at(self.pos, ParseErrorKind::InvalidEscape(b'u')))?;
                            self.pos += 4;

                            let cp: u32 = if (0xD800..=0xDBFF).contains(&h) {
                                // High surrogate — must be followed by \uDC00..=\uDFFF.
                                if self.pos + 6 > self.src.len()
                                    || self.src[self.pos]     != b'\\'
                                    || self.src[self.pos + 1] != b'u'
                                {
                                    return Err(ParseError::at(self.pos, ParseErrorKind::InvalidEscape(b'u')));
                                }
                                self.pos += 2;
                                let low = parse_hex4(&self.src[self.pos..])
                                    .ok_or_else(|| ParseError::at(self.pos, ParseErrorKind::InvalidEscape(b'u')))?;
                                if !(0xDC00..=0xDFFF).contains(&low) {
                                    return Err(ParseError::at(self.pos, ParseErrorKind::InvalidEscape(b'u')));
                                }
                                self.pos += 4;
                                0x10000 + ((h as u32 - 0xD800) << 10) + (low as u32 - 0xDC00)
                            } else if (0xDC00..=0xDFFF).contains(&h) {
                                // Lone low surrogate.
                                return Err(ParseError::at(self.pos - 4, ParseErrorKind::InvalidEscape(b'u')));
                            } else {
                                h as u32
                            };

                            let (bytes, len) = encode_utf8_cp(cp);
                            for &byte in &bytes[..len] {
                                if WRITE_OUT_TOKEN {
                                    if self.str_len >= self.str_buf.len() {
                                        return Err(ParseError::at(
                                            self.token_start,
                                            ParseErrorKind::StringBufferOverflow,
                                        ));
                                    }
                                    self.str_buf[self.str_len] = byte;
                                }
                                self.str_len += 1;
                            }
                        } else {
                            let decoded = match esc {
                                b'"'  => b'"',
                                b'\\' => b'\\',
                                b'/'  => b'/',
                                b'b'  => b'\x08',
                                b't'  => b'\t',
                                b'n'  => b'\n',
                                b'v'  => b'\x0B',
                                b'f'  => b'\x0C',
                                b'r'  => b'\r',
                                other => {
                                    self.token = Token::Invalid;
                                    return Err(ParseError::at(
                                        self.pos - 1,
                                        ParseErrorKind::InvalidEscape(other),
                                    ));
                                }
                            };
                            if WRITE_OUT_TOKEN {
                                if self.str_len >= self.str_buf.len() {
                                    return Err(ParseError::at(
                                        self.token_start,
                                        ParseErrorKind::StringBufferOverflow,
                                    ));
                                }
                                self.str_buf[self.str_len] = decoded;
                            }
                            self.str_len += 1;
                        }
                    }
                    _ => {
                        let b = self.src[self.pos];
                        self.pos += 1;
                        if WRITE_OUT_TOKEN {
                            if self.str_len < self.str_buf.len() {
                                self.str_buf[self.str_len] = b;
                            } else {
                                return Err(ParseError::at(
                                    self.token_start,
                                    ParseErrorKind::StringBufferOverflow,
                                ));
                            }
                        }
                        self.str_len += 1;
                    }
                }
            }
        }

        self.token = Token::Invalid;
        Err(ParseError::at(
            self.token_start,
            ParseErrorKind::UnexpectedToken { expected: "value", got: "invalid character" },
        ))
    }

    fn expect_token(&mut self, expected: Token) -> Result<(), ParseError> {
        if self.token != expected {
            return Err(ParseError::at(
                self.token_start,
                ParseErrorKind::UnexpectedToken {
                    expected: expected.name(),
                    got: self.token.name(),
                },
            ));
        }
        Ok(())
    }

    /// Tokenize and expect a specific non-string token.
    fn get_and_expect(&mut self, expected: Token) -> Result<(), ParseError> {
        self.get_token::<false>()?;
        self.expect_token(expected)
    }

    /// After a successful String token, return the decoded bytes as a `&str`.
    /// The lifetime is tied to `&self` (and therefore the `'buf` scratch buffer).
    fn current_string(&self) -> Result<&str, ParseError> {
        let bytes = &self.str_buf[..self.str_len];
        core::str::from_utf8(bytes).map_err(|_| {
            ParseError::at(self.token_start, ParseErrorKind::InvalidUtf8)
        })
    }

    /// After a successful String token parsed with `get_token::<false>`, return
    /// the raw source bytes as a `&'src str`. Returns `Err(KeyHasEscapes)` when
    /// the string contained any backslash escape sequences.
    fn current_string_src(&self) -> Result<&'src str, ParseError> {
        if self.str_has_escapes {
            return Err(ParseError::at(self.token_start, ParseErrorKind::KeyHasEscapes));
        }
        let bytes = &self.src[self.str_start_in_src..self.str_end_in_src];
        core::str::from_utf8(bytes).map_err(|_| {
            ParseError::at(self.token_start, ParseErrorKind::InvalidUtf8)
        })
    }

    // ---- public API ----

    /// Parse `{`.
    pub fn object_begin(&mut self) -> Result<(), ParseError> {
        self.get_and_expect(Token::OpenCurly)
    }

    /// Shared structural prologue for object-member parsing.
    ///
    /// Skips whitespace, returns `false` when `}` is next (without consuming
    /// it so `object_end` can), consumes a `,` separator for subsequent
    /// members, and returns `true` when the caller should now call `get_token`
    /// to read the key string.
    fn object_next_member(&mut self) -> Result<bool, ParseError> {
        self.skip_whitespace();
        if self.pos >= self.src.len() {
            return Err(ParseError::at(self.pos, ParseErrorKind::UnexpectedEof));
        }
        match self.src[self.pos] {
            b'}' => Ok(false),
            b',' => {
                self.token_start = self.pos;
                self.token = Token::Comma;
                self.pos += 1;
                Ok(true)
            }
            _ => Ok(true), // first member; bad chars caught by get_token + expect_token
        }
    }

    /// Parse the next object member key, or return `None` when `}` is reached.
    ///
    /// The returned `&'src str` borrows directly from the original JSON source.
    /// Plain (unescaped) keys are supported; keys containing backslash escapes
    /// return `Err(ParseErrorKind::KeyHasEscapes)`.
    pub fn member(&mut self) -> Result<Option<&'src str>, ParseError> {
        if !self.object_next_member()? { return Ok(None) };
        self.get_and_expect(Token::String)?;
        self.key_start = self.token_start;
        self.get_and_expect(Token::Colon)?;
        Ok(Some(self.current_string_src()?))
    }

    /// Like `member` but decodes the key into `self.str_buf`, supporting
    /// escape sequences. Used internally by map deserializers.
    fn member_decoded(&mut self) -> Result<Option<&str>, ParseError> {
        if !self.object_next_member()? { return Ok(None) };
        self.get_token::<true>()?;
        self.expect_token(Token::String)?;
        self.key_start = self.token_start;
        self.get_and_expect(Token::Colon)?;
        Ok(Some(self.current_string()?))
    }

    /// Parse `}`.
    pub fn object_end(&mut self) -> Result<(), ParseError> {
        self.get_and_expect(Token::CloseCurly)
    }

    /// Returns an `UnknownField` error at the current position.
    /// Call this inside the `_` arm of your `member` match.
    pub fn unknown_field(&self) -> ParseError {
        ParseError::at(self.key_start, ParseErrorKind::UnknownField { type_name: "", expected_fields: &[] })
    }

    /// Returns an `UnknownField` error enriched with the type name and its valid field names.
    /// Used by derive-generated code to produce more helpful diagnostics.
    pub fn unknown_field_in(&self, type_name: &'static str, expected_fields: &'static [&'static str]) -> ParseError {
        ParseError::at(self.key_start, ParseErrorKind::UnknownField { type_name, expected_fields })
    }

    /// Parse `[`.
    pub fn array_begin(&mut self) -> Result<(), ParseError> {
        self.get_and_expect(Token::OpenBracket)
    }

    /// Check whether there is another item in the array.
    /// Returns `true` if so (consuming a `,` separator if present),
    /// `false` when `]` is reached.
    ///
    /// Uses fast first-character inspection so no scratch buffer is needed.
    pub fn array_item(&mut self) -> Result<bool, ParseError> {
        match self.peek_token() {
            Token::Comma => {
                // Consume the comma
                self.skip_whitespace();
                self.token_start = self.pos;
                self.pos += 1;
                self.token = Token::Comma;
                Ok(true)
            }
            Token::CloseBracket => Ok(false),
            // First item, or EOF/invalid — let the item deserializer produce the error.
            _ => Ok(true),
        }
    }

    /// Parse `]`.
    pub fn array_end(&mut self) -> Result<(), ParseError> {
        self.get_and_expect(Token::CloseBracket)
    }

    /// Parse `null`.
    pub fn null(&mut self) -> Result<(), ParseError> {
        self.get_and_expect(Token::Null)
    }

    /// Parse `true` or `false`, returning the value.
    pub fn boolean(&mut self) -> Result<bool, ParseError> {
        self.get_token::<false>()?;
        match self.token {
            Token::True  => Ok(true),
            Token::False => Ok(false),
            _ => Err(ParseError::at(
                self.token_start,
                ParseErrorKind::UnexpectedToken { expected: "boolean", got: self.token.name() },
            )),
        }
    }

    /// Parse a JSON string, decoding escape sequences into the scratch buffer
    /// supplied at construction.
    ///
    /// The returned `&str` is valid until the next call that overwrites the
    /// buffer. Callers should copy the value (e.g. `.to_owned()`) before the
    /// next `string()` call.
    pub fn string(&mut self) -> Result<&str, ParseError> {
        self.get_token::<true>()?;
        self.expect_token(Token::String)?;
        self.current_string()
    }

    /// Parse a JSON number and return the raw source bytes as a `&'src str`.
    /// No numeric conversion is performed. Parse the value yourself with
    /// e.g. `s.parse::<f64>()` (requires std) or a dedicated crate.
    pub fn number_str(&mut self) -> Result<&'src str, ParseError> {
        self.get_and_expect(Token::Number)?;
        let bytes = &self.src[self.number_start..self.number_end];
        core::str::from_utf8(bytes).map_err(|_| {
            ParseError::at(self.token_start, ParseErrorKind::InvalidUtf8)
        })
    }

    /// Parse a JSON number and return the raw source bytes as a float type
    /// (either `f32` or `f64`).
    pub fn float<Num: FromStr>(&mut self) -> Result<Num, ParseError> {
        let s = self.number_str()?;
        let offset = self.error_offset();
        s.parse::<Num>().map_err(|_| ParseError::at(
            offset,
            ParseErrorKind::UnexpectedToken { expected: "number", got: "invalid float" },
        ))
    }

    /// Parse a JSON number and return the raw source bytes as an integer type
    /// (either `i8`, `i16`, `i32`, `i64`, `u8`, `u16`, `u32`, or `u64`).
    pub fn integer<Num: FromStr>(&mut self) -> Result<Num, ParseError> {
        let s = self.number_str()?;
        let offset = self.error_offset();
        s.parse::<Num>().map_err(|_| ParseError::at(
            offset,
            ParseErrorKind::UnexpectedToken { expected: "number", got: "int out of range" },
        ))
    }

    // ---- lookahead ----

    /// Peek at the type of the next token without advancing the parser.
    /// Inspects only the first non-whitespace byte, so no scratch buffer is needed.
    fn peek_token(&self) -> Token {
        let mut i = self.pos;
        while i < self.src.len() && matches!(self.src[i], b' ' | b'\t' | b'\n' | b'\r') {
            i += 1;
        }
        if i >= self.src.len() { return Token::Eof; }
        match self.src[i] {
            b'{' => Token::OpenCurly,
            b'}' => Token::CloseCurly,
            b'[' => Token::OpenBracket,
            b']' => Token::CloseBracket,
            b',' => Token::Comma,
            b':' => Token::Colon,
            b'"' => Token::String,
            b't' => Token::True,
            b'f' => Token::False,
            b'n' => Token::Null,
            b'-' | b'0'..=b'9' => Token::Number,
            _ => Token::Invalid,
        }
    }

    pub fn is_null_ahead(&self) -> bool   { self.peek_token() == Token::Null }
    pub fn is_bool_ahead(&self) -> bool   { matches!(self.peek_token(), Token::True | Token::False) }
    pub fn is_number_ahead(&self) -> bool { self.peek_token() == Token::Number }
    pub fn is_string_ahead(&self) -> bool { self.peek_token() == Token::String }
    pub fn is_array_ahead(&self) -> bool  { self.peek_token() == Token::OpenBracket }
    pub fn is_object_ahead(&self) -> bool { self.peek_token() == Token::OpenCurly }
}

/// Trait for types that can deserialize themselves from JSON using a [`Parser`].
///
/// `'src` is the lifetime of the JSON source bytes. The scratch buffer
/// lifetime is on the method (`'buf`), not the trait, so owned types implement
/// `for<'s> Deserialize<'s>` without needing a second lifetime parameter.
pub trait Deserialize<'src>: Sized {
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError>;
}

impl<'src> Deserialize<'src> for bool {
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        parser.boolean()
    }
}

impl<'src> Deserialize<'src> for &'src str {
    /// Deserializes a JSON string that contains no backslash escapes, borrowing
    /// directly from the source. Returns `Err(KeyHasEscapes)` for escaped strings.
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        parser.get_token::<false>()?;
        parser.expect_token(Token::String)?;
        parser.current_string_src()
    }
}

#[cfg(feature = "alloc")]
impl<'src> Deserialize<'src> for alloc::string::String {
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        Ok(alloc::string::String::from(parser.string()?))
    }
}

macro_rules! impl_float {
    ($($t:ty),*) => {$(
        impl<'src> Deserialize<'src> for $t {
            fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
                parser.float()
            }
        }
    )*};
}
impl_float!(f32, f64);

macro_rules! impl_integer {
    ($($t:ty),*) => {$(
        impl<'src> Deserialize<'src> for $t {
            fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
                parser.integer()
            }
        }
    )*};
}
impl_integer!(i8, i16, i32, i64, i128, isize, u8, u16, u32, u64, u128, usize);

impl<'src, T: Deserialize<'src>> Deserialize<'src> for Option<T> {
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        if parser.is_null_ahead() {
            parser.null()?;
            Ok(None)
        } else {
            T::deserialize(parser).map(Some)
        }
    }
}

impl<'src, T, const N: usize> Deserialize<'src> for [T; N]
where
    T: Deserialize<'src>,
{
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        parser.array_begin()?;

        let mut arr: [Option<T>; N] = [(); N].map(|_| None);

        for i in 0..N {
            if !parser.array_item()? {
                return Err(ParseError::at(
                    parser.error_offset(),
                    ParseErrorKind::UnexpectedToken { expected: "array item", got: "]" },
                ));
            }
            arr[i] = Some(T::deserialize(parser)?);
        }

        // Reject arrays with more items than N.
        if parser.array_item()? {
            return Err(ParseError::at(
                parser.error_offset(),
                ParseErrorKind::UnexpectedToken { expected: "]", got: "array item" },
            ));
        }
        parser.array_end()?;

        // Every element was set to `Some(...)` by the loop above — unwrap is
        // dead code (the loop invariant guarantees all slots are filled).
        // `array::try_from_fn` would be the ideal solution but is nightly-only.
        Ok(arr.map(|x| x.unwrap()))
    }
}

#[cfg(feature = "arrayvec")]
impl<'src, T, const N: usize> Deserialize<'src> for arrayvec::ArrayVec<T, N>
where
    T: Deserialize<'src>,
{
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        let mut vec = arrayvec::ArrayVec::new();
        parser.array_begin()?;
        while parser.array_item()? {
            let v = T::deserialize(parser)?;
            vec.try_push(v).map_err(|_| ParseError::at(
                parser.error_offset(),
                ParseErrorKind::StringBufferOverflow,
            ))?;
        }
        parser.array_end()?;
        Ok(vec)
    }
}

#[cfg(feature = "arrayvec")]
impl<'src, const N: usize> Deserialize<'src> for arrayvec::ArrayString<N> {
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        let off = parser.error_offset();
        let s = parser.string()?;
        let off = off + s.as_bytes().len();
        arrayvec::ArrayString::try_from(s).map_err(|_| ParseError::at(
            off, ParseErrorKind::StringBufferOverflow,
        ))
    }
}

#[cfg(feature = "alloc")]
impl<'src, T> Deserialize<'src> for alloc::vec::Vec<T>
where
    T: Deserialize<'src>,
{
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        let mut vec = alloc::vec::Vec::new();
        parser.array_begin()?;
        while parser.array_item()? {
            vec.push(T::deserialize(parser)?);
        }
        parser.array_end()?;
        Ok(vec)
    }
}

#[cfg(feature = "alloc")]
impl<'src, T: Deserialize<'src>> Deserialize<'src> for alloc::boxed::Box<T> {
    fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
        T::deserialize(parser).map(alloc::boxed::Box::new)
    }
}

macro_rules! impl_deserialize_map {
    ($map_ty:ty, $new:expr) => {
        impl<'src, V> Deserialize<'src> for $map_ty
        where
            V: Deserialize<'src>,
        {
            fn deserialize<'buf>(parser: &mut Parser<'src, 'buf>) -> Result<Self, ParseError> {
                let mut map = $new;
                parser.object_begin()?;
                // Explicit loop so NLL can see the borrow from member_decoded ends
                // after String::from(k) before the next call.
                loop {
                    let maybe_key = parser.member_decoded()?;
                    let key = match maybe_key {
                        None => break,
                        Some(k) => alloc::string::String::from(k),
                    };
                    let value = V::deserialize(parser)?;
                    map.insert(key, value);
                }
                parser.object_end()?;
                Ok(map)
            }
        }
    };
}

#[cfg(feature = "alloc")]
impl_deserialize_map!(
    alloc::collections::BTreeMap<alloc::string::String, V>,
    alloc::collections::BTreeMap::new()
);

#[cfg(feature = "std")]
impl_deserialize_map!(
    std::collections::HashMap<std::string::String, V>,
    std::collections::HashMap::new()
);

// ---- Convenience free functions ----

/// Parse using a hand-written closure. `buf` is the scratch buffer used by
/// `string()` calls inside the closure; its size limits the longest decodable
/// string value. Pass `&mut []` if you will not call `string()`.
///
/// # Example
/// ```
/// let (x, y) = nanojson::parse_sized_as(&mut [0u8; 16], b"{\"x\":3,\"y\":4}", |p| {
///     p.object_begin()?;
///     let mut x = 0i64; let mut y = 0i64;
///     while let Some(k) = p.member()? {
///         match k {
///             "x" => x = p.integer()?,
///             "y" => y = p.integer()?,
///             _ => return Err(p.unknown_field()),
///         }
///     }
///     p.object_end()?;
///     Ok((x, y))
/// }).unwrap();
/// assert_eq!((x, y), (3, 4));
/// ```
pub fn parse_sized_as<T>(
    buf: &mut [u8],
    src: impl AsRef<[u8]>,
    f: impl for<'a, 'b> FnOnce(&mut Parser<'a, 'b>) -> Result<T, ParseError>,
) -> Result<T, ParseError> {
    let mut parser = Parser::new(src.as_ref(), buf);
    f(&mut parser)
}

/// Deserialize a `T: Deserialize` value. `buf` is the scratch buffer used for
/// string decoding; its size limits the longest decodable string value.
/// Pass `&mut []` for types that contain no strings.
///
/// # Example
/// ```
/// let n: i64 = nanojson::parse_sized(&mut [0; 0], b"42").unwrap();
/// assert_eq!(n, 42);
/// ```
#[inline]
pub fn parse_sized<T: for<'s> Deserialize<'s>>(
    buf: &mut [u8],
    src: impl AsRef<[u8]>
) -> Result<T, ParseError> {
    T::deserialize(&mut Parser::new(src.as_ref(), buf))
}

/// Deserialize a fully-owned type from raw bytes or `&str`.
/// The scratch buffer is auto-allocated at `src.len()` bytes (safe upper bound
/// for string decoding: a decoded string is never longer than its escaped form).
///
/// # Example
/// ```
/// let n: i64 = nanojson::parse(b"42").unwrap();
/// assert_eq!(n, 42);
///
/// let n: i64 = nanojson::parse("42").unwrap();
/// assert_eq!(n, 42);
/// ```
#[cfg(feature = "std")]
#[inline]
pub fn parse<T: for<'s> Deserialize<'s>>(
    src: impl AsRef<[u8]>,
) -> Result<T, ParseError> {
    let src = src.as_ref();
    let mut scratch = std::vec![0u8; src.len().max(1)];
    T::deserialize(&mut Parser::new(src, &mut scratch))
}

/// Drive the parser manually with an auto-sized heap-allocated scratch buffer.
/// The scratch buffer is sized to `src.len()` (safe upper bound for string decoding).
/// `T` must be a fully owned type (no borrows from the parser).
///
/// # Example
/// ```
/// let (x, y) = nanojson::parse_as(b"{\"x\":3,\"y\":4}", |p| {
///     p.object_begin()?;
///     let mut x = 0i64; let mut y = 0i64;
///     while let Some(k) = p.member()? {
///         match k {
///             "x" => x = p.integer().unwrap(),
///             "y" => y = p.integer().unwrap(),
///             _ => return Err(p.unknown_field()),
///         }
///     }
///     p.object_end()?;
///     Ok((x, y))
/// }).unwrap();
/// assert_eq!((x, y), (3, 4));
/// ```
#[cfg(feature = "std")]
#[inline]
pub fn parse_as<T>(
    src: impl AsRef<[u8]>,
    f: impl for<'a, 'b> FnOnce(&mut Parser<'a, 'b>) -> Result<T, ParseError>,
) -> Result<T, ParseError> {
    let src = src.as_ref();
    let mut scratch = std::vec![0u8; src.len().max(1)];
    let mut parser = Parser::new(src, &mut scratch);
    f(&mut parser)
}

// ---- Unicode helpers (used by \uXXXX parsing in get_token) ----

/// Parse exactly 4 hex digits from the start of `bytes`, returning the u16 value.
/// Returns `None` if fewer than 4 bytes are present or any byte is not a hex digit.
fn parse_hex4(bytes: &[u8]) -> Option<u16> {
    if bytes.len() < 4 { return None; }
    let mut n: u16 = 0;
    for &b in &bytes[..4] {
        let d: u16 = match b {
            b'0'..=b'9' => (b - b'0') as u16,
            b'a'..=b'f' => (b - b'a' + 10) as u16,
            b'A'..=b'F' => (b - b'A' + 10) as u16,
            _ => return None,
        };
        n = n * 16 + d;
    }
    Some(n)
}

/// Encode a Unicode codepoint (must be a valid scalar value) as UTF-8.
/// Returns the bytes and the number of bytes written (1–4).
fn encode_utf8_cp(cp: u32) -> ([u8; 4], usize) {
    match cp {
        0x00..=0x7F => ([cp as u8, 0, 0, 0], 1),
        0x80..=0x7FF => ([
            0xC0 | (cp >> 6) as u8,
            0x80 | (cp & 0x3F) as u8,
            0, 0,
        ], 2),
        0x800..=0xFFFF => ([
            0xE0 | (cp >> 12) as u8,
            0x80 | ((cp >> 6) & 0x3F) as u8,
            0x80 | (cp & 0x3F) as u8,
            0,
        ], 3),
        _ => ([  // 0x10000..=0x10FFFF
            0xF0 | (cp >> 18) as u8,
            0x80 | ((cp >> 12) & 0x3F) as u8,
            0x80 | ((cp >> 6) & 0x3F) as u8,
            0x80 | (cp & 0x3F) as u8,
        ], 4),
    }
}