neodyn_xc 0.4.0

//! Deserializing from the human-readable text representation.

use std::io::{ Read, BufReader };
use std::str;
use std::borrow::Cow;
use std::convert::TryInto;
use serde::de::{
    Deserialize, DeserializeSeed, Deserializer, IntoDeserializer,
    SeqAccess, MapAccess, EnumAccess, VariantAccess,
    Visitor, IgnoredAny, Error as DeError,
    Expected, Unexpected,
};
use unicode_segmentation::UnicodeSegmentation;
use unicode_xid::UnicodeXID;
use ordered_float::NotNan;
use crate::error::{ Error, ResultExt };
use crate::span::Location;

/// Deserialize a strongly-typed value from the human-readable representation.
pub fn from_str<'a, T: Deserialize<'a>>(s: &'a str) -> Result<T, Error> {
    let mut de = TextDeserializer::new(s);
    let value = T::deserialize(&mut de)?;
    de.finalize()?;
    Ok(value)
}

/// Deserialize a strongly-typed value from the human-readable representation
/// when it is given as UTF-8 encoded bytes.
pub fn from_bytes<'a, T: Deserialize<'a>>(b: &'a [u8]) -> Result<T, Error> {
    str::from_utf8(b).then_conv_err(from_str)
}

/// Deserialize a strongly-typed value from the human-readable representation
/// when it is given as UTF-8 encoded bytes coming from a stream reader.
///
/// It does not perform buffering.
pub fn from_reader<R, T>(mut reader: R) -> Result<T, Error>
    where
        R: Read,
        T: for<'a> Deserialize<'a>,
{
    // This is a lazy (albeit correct) implementation. We can improve it later,
    // but the text representation exists for debugging, not for efficiency.
    // Rationale for a capacity of 4 kB: it's the page size on most platforms
    // one generally cares about, so many allocations will reserve at least
    // this amount of memory by default. It's also not excessively large.
    // We also expect most serialized data to be smaller than 4k, so this
    // should help us avoid the majority of reallocations and `memcpy()`ing.
    let mut s = String::with_capacity(4096);
    reader.read_to_string(&mut s)?;
    from_str(&s)
}

/// The same as `from_reader` except that this function performs buffering.
pub fn from_reader_buffered<R, T>(reader: R) -> Result<T, Error>
    where
        R: Read,
        T: for<'a> Deserialize<'a>,
{
    from_reader(BufReader::new(reader))
}

/// When given a `Cow<'de, str>`, decide whether it is borrowed or owned,
/// then call the appropriate method on the visitor passed in accordingly.
fn visit_cow_str<'de, V: Visitor<'de>>(cow: Cow<'de, str>, visitor: V) -> Result<V::Value, Error> {
    match cow {
        Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
        Cow::Owned(s) => visitor.visit_string(s),
    }
}

/// Checks if the given byte index lies at a Unicode word boundary.
#[must_use]
fn is_word_boundary(string: &str, index: usize) -> bool {
    if index == string.len() {
        return true;
    }
    if index > string.len() {
        return false;
    }

    string
        .split_word_bound_indices()
        .find(|&(i, _)| index <= i)
        .map_or(false, |(i, _)| i == index)
}

/// Checks if the given byte index lies at a Unicode word boundary or starts
/// a punctuation character or is preceded by a punctuation character.
/// Perhaps surprisingly, not all ASCII punctuation characters are always
/// classified as a Unicode word boundary, depending on their context.
#[must_use]
fn is_word_boundary_or_punct(string: &str, index: usize) -> bool {
    if is_word_boundary(string, index) {
        true
    } else if string.is_char_boundary(index) {
        let (head, tail) = string.split_at(index);
        // `head` is only empty iff `index == 0`, which is a boundary, although
        // this case should have been picked up by `is_word_boundary()` already.
        // The same applies to `tail` when `index == string.len()`.
        (
            head.chars()
                .rev()
                .next()
                .map_or(true, |ch| ch.is_ascii_punctuation())
        ) || (
            tail.chars()
                .next()
                .map_or(true, |ch| ch.is_ascii_punctuation())
        )
    } else {
        false // not even a valid code point boundary
    }
}

/// Represents the unparsed lexeme of a token along with the parsed `TokenValue`.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct Token<'de> {
    /// The length of the raw, unparsed lexeme substring.
    len: usize,
    /// The parsed token value.
    parsed: TokenValue<'de>,
}

/// Represents the parsed payload of a token (e.g. parenthesis, literal, etc.)
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum TokenValue<'de> {
    /// Literal `null`.
    Null,
    /// Prefix `?` denoting an optional.
    QuestionMark,
    /// Boolean literal.
    Bool(bool),
    /// Signed integer literal.
    Int(i64),
    /// Unsigned integer literal.
    Uint(u64),
    /// Floating-point literal.
    Float(NotNan<f64>),
    /// String literal. This is a `Cow` because a string containing no escaped
    /// characters doesn't need to be copied.
    String(Cow<'de, str>),
    /// Blob literal.
    Blob(Vec<u8>),
    /// Array start.
    LeftBracket,
    /// Array end.
    RightBracket,
    /// Map start.
    LeftBrace,
    /// Map end.
    RightBrace,
    /// Item separator.
    Comma,
    /// Key-value separator.
    Colon,
}

impl<'de> From<&'de TokenValue<'de>> for Unexpected<'de> {
    fn from(token: &'de TokenValue<'de>) -> Self {
        use TokenValue::*;

        match *token {
            Null            => Unexpected::Unit,
            QuestionMark    => Unexpected::Option,
            Bool(b)         => Unexpected::Bool(b),
            Int(i)          => Unexpected::Signed(i),
            Uint(u)         => Unexpected::Unsigned(u),
            Float(f)        => Unexpected::Float(f.into()),
            String(ref s)   => Unexpected::Str(s),
            Blob(ref bytes) => Unexpected::Bytes(bytes),
            LeftBracket     => Unexpected::Seq,
            LeftBrace       => Unexpected::Map,
            RightBracket    => Unexpected::Char(']'),
            RightBrace      => Unexpected::Char('}'),
            Comma           => Unexpected::Char(','),
            Colon           => Unexpected::Char(':'),
        }
    }
}

/// Deserializer for parsing the human-readable format.
///
/// **The deserializer must always be `finalize()`d explicitly
/// after a value has been deserialized from it!**
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct TextDeserializer<'de> {
    /// The whole string, stored only for error reporting.
    string: &'de str,
    /// The remaining tail of the string, yet to be parsed.
    cursor: &'de str,
    /// Byte position **before** the latest token that was lexed,
    /// i.e. the starting point of the aforementioned token.
    /// Counted relative to the the beginning of the entire (original) string.
    prev_byte_pos: usize,
    /// Byte position **after** the latest token that was lexed, i.e.,
    /// the starting point of the next token **or** the whitespace before it.
    /// Counted relative to the the beginning of the entire (original) string.
    next_byte_pos: usize,
}

impl<'de> TextDeserializer<'de> {
    /// Creates a new text deserializer from a string,
    /// with the position starting at line 1, column 1.
    ///
    /// **The deserializer must always be `finalize()`d explicitly
    /// after a value has been deserialized from it!**
    #[must_use]
    pub const fn new(s: &'de str) -> Self {
        TextDeserializer {
            string: s,
            cursor: s,
            prev_byte_pos: 0,
            next_byte_pos: 0,
        }
    }

    /// Check invariants that need to hold at the end of the deserialization.
    ///
    /// In particular, there must not be any tokens left
    /// after having parsed a complete value.
    pub fn finalize(mut self) -> Result<(), Error> {
        if self.next().is_some() {
            Err(self.error_at_prev(
                Unexpected::Other("garbage"), &"end of input"
            ))
        } else {
            Ok(())
        }
    }

    /// Return the source location (line, character) corresponding to
    /// the given byte position.
    fn location_at(&self, byte_pos: usize) -> Location {
        Location::default().advanced_by(&self.string[..byte_pos])
    }

    /// Create an error with a `Location` as its context.
    /// The source location is derived from the specified byte position.
    fn error_at<'a, U>(
        &self,
        byte_pos: usize,
        unexpected: U,
        expected: &dyn Expected,
    ) -> Error
        where
            U: Into<Unexpected<'a>>,
    {
        Error::custom(
            format_args!(
                "unexpected {}, expected {}", unexpected.into(), expected,
            )
        ).with_context::<Location>(
            self.location_at(byte_pos)
        )
    }

    /// Create an error pointing to the previous location.
    fn error_at_prev<'a, U>(&self, unexpected: U, expected: &dyn Expected) -> Error
        where
            U: Into<Unexpected<'a>>,
    {
        self.error_at(self.prev_byte_pos, unexpected, expected)
    }

    /// Create an error with the next location (line, column) as context.
    fn error_at_next<'a, U>(&self, unexpected: U, expected: &dyn Expected) -> Error
        where
            U: Into<Unexpected<'a>>,
    {
        self.error_at(self.next_byte_pos, unexpected, expected)
    }

    /// Decorates an error with the next `Location`.
    fn with_next(&self, error: Error) -> Error {
        error.with_context::<Location>(self.location_at(self.next_byte_pos))
    }

    /// Generic helper for advancing the cursor and **only the next location**
    /// accordingly, until (up to but not including) the specified byte index.
    ///
    /// The byte `index` is counted **relative to the cursor,** i.e. this
    /// is **different** from how `prev_byte_pos` and `next_byte_pos` are
    /// interpreted!
    fn advance_to_byte_index(&mut self, index: usize) {
        self.next_byte_pos += index;
        self.cursor = &self.cursor[index..];
    }

    /// Generic helper for advancing the cursor and **only the next location**
    /// accordingly, until end-of-input is encountered.
    fn advance_to_end_of_input(&mut self) {
        self.advance_to_byte_index(self.cursor.len());
    }

    /// Advances the cursor until the first character is not a whitespace.
    ///
    /// Once this returns, `self.prev_byte_pos` and `self.next_byte_pos`
    /// will both point to the beginning of the next (non-whitespace) token.
    fn skip_whitespace(&mut self) {
        // TODO(H2CO3): handle Right-to-Left text correctly (?)
        let new_cursor = self.cursor.trim_start();
        let ws_len = self.cursor.len() - new_cursor.len();
        self.advance_to_byte_index(ws_len);
        self.prev_byte_pos = self.next_byte_pos;
    }

    /// Attempts to lex a string literal.
    fn lex_string(&mut self) -> Result<Token<'de>, Error> {
        let mut iter = self.cursor
            .char_indices()
            .skip_while(|&it| it == (0, '"'));
        let start = match iter.clone().next() {
            None => {
                self.advance_to_end_of_input();
                return Err(self.error_at_next(
                    Unexpected::Other("end of input in string"),
                    &"characters or '\"'",
                ));
            }
            Some((i, _)) => i
        };
        let mut string = Cow::Borrowed(&self.cursor[start..start]);

        let len = loop {
            match iter.next() {
                Some((i, ch)) => {
                    match ch {
                        '"' => break i + ch.len_utf8(),
                        '\\' => {
                            let unescaped = self.unescape(&mut iter)?;
                            string.to_mut().push(unescaped);
                        }
                        _ => {
                            // As long as we have seen non-escaped characters
                            // only, we can keep borrowing a subslice.
                            // Otherwise, we need to push characters one-by-one.
                            match string {
                                Cow::Borrowed(_) => string = Cow::Borrowed(
                                    &self.cursor[start..i + ch.len_utf8()]
                                ),
                                Cow::Owned(ref mut buf) => buf.push(ch),
                            }
                        }
                    }
                }
                None => {
                    self.advance_to_end_of_input();
                    return Err(self.error_at_next(
                        Unexpected::Other("end of input in string"),
                        &"characters or '\"'"
                    ));
                }
            }
        };
        let parsed = TokenValue::String(string);

        Ok(Token { len, parsed })
    }

    /// Helper for `lex_string()`.
    fn unescape<T>(&mut self, iter: &mut T) -> Result<char, Error>
        where
            T: Iterator<Item = (usize, char)>,
    {
        let (i, ch) = match iter.next() {
            Some(it) => it,
            None => {
                self.advance_to_end_of_input();
                return Err(self.error_at_next(
                    Unexpected::Other("end of input in escape sequence"),
                    &"escape sequence"
                ));
            }
        };

        match ch {
            'n' => Ok('\n'),
            'r' => Ok('\r'),
            't' => Ok('\t'),
            '\\' | '\'' | '"' => Ok(ch),
            'u' => self.unescape_unicode(iter),
            _ => {
                self.advance_to_byte_index(i);
                Err(self.error_at_next(
                    Unexpected::Char(ch), &"one of `nrtu\'\"\\`"
                ))
            }
        }
    }

    /// Helper for `unescape()`.
    fn unescape_unicode<T>(
        &mut self,
        iter: &mut T,
    ) -> Result<char, Error>
        where
            T: Iterator<Item = (usize, char)>,
    {
        let start = match iter.next() {
            Some((i, ch)) => if ch == '{' {
                i + ch.len_utf8()
            } else {
                self.advance_to_byte_index(i);
                return Err(self.error_at_next(
                    Unexpected::Char(ch), &"'{' in Unicode escape"
                ));
            }
            None => {
                self.advance_to_end_of_input();
                return Err(self.error_at_next(
                    Unexpected::Other("end of input in Unicode escape"), &"'{'"
                ));
            }
        };

        let end = loop {
            let (i, ch) = match iter.next() {
                Some(it) => it,
                None => {
                    self.advance_to_end_of_input();
                    return Err(self.error_at_next(
                        Unexpected::Other("end of input in Unicode escape"),
                        &"'}'"
                    ));
                }
            };
            match ch {
                '}' => break i,
                '0'..='9' | 'a'..='f' | 'A'..='F' => {},
                _ => {
                    self.advance_to_byte_index(i);
                    return Err(self.error_at_next(
                        Unexpected::Char(ch), &"hex digits"
                    ));
                }
            }
        };

        let hex_str = &self.cursor[start..end];
        let code_point = u32::from_str_radix(hex_str, 16).map_err(|cause| {
            self.advance_to_byte_index(start);
            self.with_next(Error::with_cause("invalid Unicode escape", cause))
        })?;

        std::char::from_u32(code_point).ok_or_else(|| {
            self.advance_to_byte_index(start);
            self.error_at_next(
                Unexpected::Other(&format!(
                    "invalid Unicode code point: U+{:04X}", code_point
                )),
                &"Unicode code point in the valid range"
            )
        })
    }

    /// Attempts to lex a blob literal.
    fn lex_blob(&mut self) -> Result<Token<'de>, Error> {
        // for the rationale of reserving 4k bytes worth of capacity,
        // see the comment in `from_reader()`.
        let mut buf = Vec::with_capacity(4096);
        let mut iter = self.cursor.char_indices();

        // Skip leading '#'. The error cases should never happen,
        // because `lex_blob()` is only called when the next char is `#`.
        match iter.next() {
            Some((_, '#')) => {}
            Some((_, ch)) => return Err(self.error_at_next(
                Unexpected::Char(ch), &"'#' at beginning of blob"
            )),
            None => return Err(self.error_at_next(
                Unexpected::Other("end of input"), &"'#' at beginning of blob"
            )),
        }

        let len = loop {
            // there might be whitespace between bytes (i.e. between
            // each two consecutive hex digits)
            let res = self.expect_hex_pair_or_pound(
                iter.by_ref().skip_while(|&(_, ch)| ch.is_whitespace())
            )?;

            match res {
                Ok(byte) => buf.push(byte),
                Err(len) => break len,
            }
        };

        buf.shrink_to_fit();
        let parsed = TokenValue::Blob(buf);

        Ok(Token { len, parsed })
    }

    /// Helper for `lex_blob()`.
    ///
    /// The outer `Result` represents whether the lexing continued or ended
    /// successfully.
    ///
    /// The inner `Result` is `Ok` if there was a byte to consume, and
    /// `Err` if the closing `'#'` was encountered. In the latter case,
    /// the associated value is the total length of the blob literal.
    fn expect_hex_pair_or_pound<T>(
        &mut self,
        mut iter: T,
    ) -> Result<Result<u8, usize>, Error>
        where
            T: Iterator<Item = (usize, char)>,
    {
        // This assumes that both chars are hexadecimal digits.
        let byte_from_hex = |hi, lo| {
            #[allow(clippy::cast_possible_truncation)]
            let nibble_from_hex = |ch| match ch {
                '0'..='9' => (ch as u32 - '0' as u32) as u8,
                'a'..='f' => (ch as u32 - 'a' as u32) as u8 + 10,
                'A'..='F' => (ch as u32 - 'A' as u32) as u8 + 10,
                _ => unreachable!("invalid hex character: '{}'", ch)
            };

            nibble_from_hex(hi) << 4 | nibble_from_hex(lo)
        };

        let (i, hi) = match iter.next() {
            Some(it) => it,
            None => {
                self.advance_to_end_of_input();
                return Err(self.error_at_next(
                    Unexpected::Other("end of input"), &"'#' at end of blob"
                ));
            }
        };

        match hi {
            '#' => Ok(Err(i + hi.len_utf8())),
            '0'..='9' | 'a'..='f' | 'A'..='F' => {
                let (j, lo) = match iter.next() {
                    Some(it) => it,
                    None => {
                        self.advance_to_end_of_input();
                        return Err(self.error_at_next(
                            Unexpected::Other("end of input"),
                            &"hex digit"
                        ));
                    }
                };

                match lo {
                    '0'..='9' | 'a'..='f' | 'A'..='F' => {
                        Ok(Ok(byte_from_hex(hi, lo)))
                    }
                    '#' => {
                        // point error to preceding hex character
                        self.advance_to_byte_index(i);
                        Err(self.error_at_next(
                            Unexpected::Other("odd number of hex digits in blob"),
                            &"even number of hex digits",
                        ))
                    }
                    _ => {
                        // point error to current non-hex character
                        self.advance_to_byte_index(j);
                        Err(self.error_at_next(
                            Unexpected::Char(lo), &"hex digits"
                        ))
                    }
                }
            }
            _ => {
                // point error to current non-hex character
                self.advance_to_byte_index(i);
                Err(self.error_at_next(
                    Unexpected::Char(hi), &"hex digits or '#'"
                ))
            }
        }
    }

    /// Attempts to parse a number: signed or unsigned, int or floating-point.
    ///
    /// Side note: I love me a good spaghetti!
    fn lex_number(&mut self) -> Result<Token<'de>, Error> {
        // Skip sign for processing the actual digits
        let iter = self.cursor
            .char_indices()
            .skip_while(|&(_, ch)| ch == '+' || ch == '-');

        match iter.clone().next() {
            None => {
                self.advance_to_end_of_input();
                Err(self.error_at_next(
                    Unexpected::Other("end of input"),
                    &"decimal digits or 'inf'",
                ))
            }
            Some((i, ch)) => {
                let inf = "inf";
                if
                    self.cursor[i..].starts_with(inf)
                    && is_word_boundary_or_punct(&self.cursor[i..], inf.len())
                {
                    let len = i + inf.len();
                    let number = self.cursor[..len]
                        .parse()
                        .conv_err()
                        .map_err(|e| self.with_next(e))?;
                    let parsed = TokenValue::Float(number);

                    Ok(Token { len, parsed })
                } else if ch.is_numeric() || ch == '.' {
                    // 1. If there are digits before the decimal point,
                    // skip them. If we are at the decimal point, this correctly
                    // does nothing since `'.'` is not numeric.
                    let mut iter = iter.skip_while(|&(_, ch)| ch.is_numeric());

                    let len = match iter.next() {
                        // 2. If we are now at the decimal point, skip it
                        // and look for more digits. Otherwise, we're done.
                        Some((j, ch)) => if ch == '.' {
                            let mut iter = iter.skip_while(
                                |&(_, ch)| ch.is_numeric()
                            );

                            match iter.next() {
                                Some((k, _)) => k,
                                None => self.cursor.len(),
                            }
                        } else {
                            j
                        }
                        None => self.cursor.len(),
                    };

                    // 3. Check if the number is correctly separated
                    if is_word_boundary_or_punct(self.cursor, len) {
                        // 4. Decide whether to parse an int or a float, and
                        // if it's an int, whether it's signed or unsigned.
                        let num_str = &self.cursor[..len];
                        let parsed = if num_str.contains('.') {
                            num_str
                                .parse()
                                .conv_err()
                                .map(TokenValue::Float)
                                .map_err(|e| self.with_next(e))?
                        } else if num_str.starts_with(['+', '-'].as_ref()) {
                            num_str
                                .parse()
                                .conv_err()
                                .map(TokenValue::Int)
                                .map_err(|e| self.with_next(e))?
                        } else {
                            num_str
                                .parse()
                                .conv_err()
                                .map(TokenValue::Uint)
                                .map_err(|e| self.with_next(e))?
                        };

                        Ok(Token { len, parsed })
                    } else {
                        self.advance_to_byte_index(len);
                        Err(self.error_at_next(
                            Unexpected::Other("characters"),
                            &"Unicode word boundary or punctuation"
                        ))
                    }
                } else {
                    self.advance_to_byte_index(i);
                    Err(self.error_at_next(
                        Unexpected::Char(ch), &"digits or decimal point"
                    ))
                }
            }
        }
    }

    /// Looks for a simple literal: one of `null`, `true`, `false`, `inf`.
    fn lex_word(&mut self) -> Result<Token<'de>, Error> {
        let len = self.cursor
            .char_indices()
            .find(|&(_, ch)| !UnicodeXID::is_xid_continue(ch))
            .map_or(self.cursor.len(), |(i, _)| i);
        let word = &self.cursor[..len];

        let parsed = match word {
            "null"  => TokenValue::Null,
            "false" => TokenValue::Bool(false),
            "true"  => TokenValue::Bool(true),
            "inf"   => TokenValue::Float(f64::INFINITY.try_into()?),
            _ => return Err(self.error_at_next(
                Unexpected::Other(&format!("word `{}`", word)),
                &"`true`, `false`, `null`, or `inf`"
            ))
        };

        Ok(Token { len, parsed })
    }

    /// Looks for a punctuation character: `?`, `[`, `]`, `{`, `}`
    fn lex_punctuation(&mut self, ch: char) -> Result<Token<'de>, Error> {
        let payload = match ch {
            '?' => TokenValue::QuestionMark,
            '[' => TokenValue::LeftBracket,
            ']' => TokenValue::RightBracket,
            '{' => TokenValue::LeftBrace,
            '}' => TokenValue::RightBrace,
            ',' => TokenValue::Comma,
            ':' => TokenValue::Colon,
            _ => return Err(self.error_at_next(
                Unexpected::Char(ch), &"one of `?[]{},:`"
            )),
        };

        Ok(Token {
            len: ch.len_utf8(),
            parsed: payload,
        })
    }

    /// Return the next token or an "unexpected end of input" error if there
    /// are no more tokens.
    fn expect_token(&mut self) -> Result<TokenValue<'de>, Error> {
        self.next().unwrap_or_else(|| Err(
            self.error_at_next(Unexpected::Other("end of input"), &"any token")
        ))
    }

    /// Deserialize a generic number.
    fn deserialize_number<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value, Error> {
        match self.expect_token()? {
            TokenValue::Int(i)   => visitor.visit_i64(i),
            TokenValue::Uint(u)  => visitor.visit_u64(u),
            TokenValue::Float(f) => visitor.visit_f64(f.into()),
            token @ _ => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    /// Deserialize a sequence, exhausting any remaining elements so that
    /// the cursor will be in the correct position even if the visitor
    /// does not use every value in the sequence.
    fn visit_and_exhaust_seq<V: Visitor<'de>>(
        &mut self,
        visitor: V,
    ) -> Result<V::Value, Error> {
        let mut seq = SeqDeserializer::new(self);
        let value = visitor.visit_seq(&mut seq)?;
        seq.exhaust()?;
        Ok(value)
    }

    /// Deserialize a map, exhausting any remaining entries so that
    /// the cursor will be in the correct position even if the visitor
    /// does not use every entry in the map.
    fn visit_and_exhaust_map<V: Visitor<'de>>(
        &mut self,
        visitor: V,
    ) -> Result<V::Value, Error> {
        let mut map = MapDeserializer::new(self);
        let value = visitor.visit_map(&mut map)?;
        map.exhaust()?;
        Ok(value)
    }

    /// Retrieves the next token.
    fn next(&mut self) -> Option<Result<TokenValue<'de>, Error>> {
        self.skip_whitespace();

        let ch = self.cursor.chars().next()?;
        let result = match ch {
            '"' => self.lex_string(),
            '#' => self.lex_blob(),
            '+' | '-' | '.' => self.lex_number(),
            _ if ch.is_numeric() => self.lex_number(),
            _ if ch.is_ascii_punctuation() => self.lex_punctuation(ch),
            _ if UnicodeXID::is_xid_start(ch) => self.lex_word(),
            _ => Err(self.error_at_next(
                Unexpected::Char(ch), &"a Neodyn Exchange value"
            ))
        };

        Some(
            result.map(|Token { len, parsed }| {
                self.advance_to_byte_index(len);
                parsed
            })
        )
    }
}

impl<'de> Deserializer<'de> for &mut TextDeserializer<'de> {
    type Error = Error;

    fn is_human_readable(&self) -> bool {
        true
    }

    fn deserialize_any<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        use TokenValue::*;

        match self.expect_token()? {
            Null         => visitor.visit_unit(),
            QuestionMark => visitor.visit_some(self),
            Bool(b)      => visitor.visit_bool(b),
            Int(i)       => visitor.visit_i64(i),
            Uint(u)      => visitor.visit_u64(u),
            Float(f)     => visitor.visit_f64(f.into()),
            String(s)    => visit_cow_str(s, visitor),
            Blob(bytes)  => visitor.visit_byte_buf(bytes),
            LeftBracket  => self.visit_and_exhaust_seq(visitor),
            LeftBrace    => self.visit_and_exhaust_map(visitor),
            token @ _    => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    fn deserialize_bool<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        match self.expect_token()? {
            TokenValue::Bool(b) => visitor.visit_bool(b),
            token @ _ => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    fn deserialize_i8<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_i16<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_i32<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_i64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_i128<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_u8<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_u16<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_u32<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_u64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_u128<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_f32<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_f64<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_number(visitor)
    }

    fn deserialize_char<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_str(visitor)
    }

    fn deserialize_str<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_string(visitor)
    }

    fn deserialize_string<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        match self.expect_token()? {
            TokenValue::String(s) => visit_cow_str(s, visitor),
            TokenValue::Blob(bytes) => {
                let s = String::from_utf8(bytes)?;
                visitor.visit_string(s)
            },
            token @ _ => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    fn deserialize_bytes<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_byte_buf(visitor)
    }

    fn deserialize_byte_buf<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        match self.expect_token()? {
            TokenValue::Blob(bytes) => visitor.visit_byte_buf(bytes),
            TokenValue::String(s)   => visit_cow_str(s, visitor),
            TokenValue::LeftBracket => self.visit_and_exhaust_seq(visitor), // try `[u8]`
            token @ _ => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    fn deserialize_option<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        match self.expect_token()? {
            TokenValue::Null => visitor.visit_none(),
            TokenValue::QuestionMark => visitor.visit_some(self),
            token @ _ => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    fn deserialize_unit<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        match self.expect_token()? {
            TokenValue::Null => visitor.visit_unit(),
            token @ _ => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    fn deserialize_unit_struct<V: Visitor<'de>>(
        self,
        _name: &'static str,
        visitor: V,
    ) -> Result<V::Value, Self::Error> {
        self.deserialize_unit(visitor)
    }

    fn deserialize_newtype_struct<V: Visitor<'de>>(
        self,
        _name: &'static str,
        visitor: V,
    ) -> Result<V::Value, Self::Error> {
        visitor.visit_newtype_struct(self)
    }

    fn deserialize_seq<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        match self.expect_token()? {
            TokenValue::LeftBracket => self.visit_and_exhaust_seq(visitor),
            token @ _ => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    fn deserialize_tuple<V: Visitor<'de>>(
        self,
        _len: usize,
        visitor: V,
    ) -> Result<V::Value, Self::Error> {
        self.deserialize_seq(visitor)
    }

    fn deserialize_tuple_struct<V: Visitor<'de>>(
        self,
        _name: &'static str,
        len: usize,
        visitor: V,
    ) -> Result<V::Value, Self::Error> {
        self.deserialize_tuple(len, visitor)
    }

    fn deserialize_map<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        match self.expect_token()? {
            TokenValue::LeftBrace => self.visit_and_exhaust_map(visitor),
            token @ _ => Err(self.error_at_prev(&token, &visitor)),
        }
    }

    fn deserialize_struct<V: Visitor<'de>>(
        self,
        _name: &'static str,
        _fields: &'static [&'static str],
        visitor: V,
    ) -> Result<V::Value, Self::Error> {
        self.deserialize_map(visitor)
    }

    fn deserialize_enum<V: Visitor<'de>>(
        self,
        _type_name: &'static str,
        _variants: &'static [&'static str],
        visitor: V,
    ) -> Result<V::Value, Self::Error> {
        match self.expect_token()? {
            TokenValue::String(s) => {
                // Unit variant
                visitor.visit_enum(s.into_deserializer())
            }
            TokenValue::LeftBrace => {
                // Newtype, tuple, or struct variant
                let value = visitor.visit_enum(&mut *self)?;

                // An optional comma is allowed, then the closing brace must
                // follow, since the map representation of enums is supposed
                // to contain only one key-value pair.
                match self.expect_token()? {
                    TokenValue::RightBrace => Ok(value),
                    TokenValue::Comma => {
                        match self.expect_token()? {
                            TokenValue::RightBrace => Ok(value),
                            token @ _ => Err(self.error_at_prev(
                                &token, &"enum as single-key map"
                            )),
                        }
                    },
                    token @ _ => Err(self.error_at_prev(
                        &token, &"enum as single-key map"
                    )),
                }
            }
            token @ _ => Err(self.error_at_prev(
                &token, &"enum as string or single-key map"
            ))
        }
    }

    fn deserialize_identifier<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_str(visitor)
    }

    fn deserialize_ignored_any<V: Visitor<'de>>(self, visitor: V) -> Result<V::Value, Self::Error> {
        self.deserialize_any(IgnoredAny).and_then(|_| visitor.visit_unit())
    }
}

/// Helper for deserializing sequences, both variable-length (e.g. vectors,
/// sets, queues, etc.) and fixed-length (arrays, tuples, tuple structs and
/// tuple variants).
#[derive(Debug)]
struct SeqDeserializer<'a, 'de: 'a> {
    /// Then underlying deserializer from which to read the sequence.
    deserializer: &'a mut TextDeserializer<'de>,
    /// Used for remembering whether the closing `]` was already seen.
    seen_closing_bracket: bool,
}

impl<'a, 'de> SeqDeserializer<'a, 'de> {
    /// Initializes a new sequence deserializer.
    fn new(de: &'a mut TextDeserializer<'de>) -> Self {
        SeqDeserializer {
            deserializer: de,
            seen_closing_bracket: false,
        }
    }

    /// Consume the remaining elements, if any, so that the cursor will
    /// correctly point to after the end of the sequence in case the visitor
    /// did not use up all elements.
    fn exhaust(&mut self) -> Result<(), Error> {
        while let Some(IgnoredAny) = self.next_element()? {}
        Ok(())
    }
}

impl<'a, 'de> SeqAccess<'de> for SeqDeserializer<'a, 'de> {
    type Error = Error;

    fn next_element_seed<T: DeserializeSeed<'de>>(
        &mut self,
        seed: T,
    ) -> Result<Option<T::Value>, Self::Error> {
        if self.seen_closing_bracket {
            return Ok(None);
        }

        let saved_state = *self.deserializer;

        #[allow(clippy::wildcard_enum_match_arm)]
        match self.deserializer.expect_token()? {
            // If the next token is a closing `]`, the array is over.
            TokenValue::RightBracket => {
                self.seen_closing_bracket = true;
                Ok(None)
            }
            _ => {
                // Otherwise, we deserialize a value, "putting back" the
                // already-extracted token, by using a saved copy of the state.
                *self.deserializer = saved_state;
                let value = seed.deserialize(&mut *self.deserializer)?;

                // We then check the next token.
                // If it's a comma, we eat it and restore the state as it is
                // *after* the comma;
                //
                // If it's a closing `]`, we note that we have encountered
                // the end of the sequence.
                //
                // Otherwise we emit an error.
                match self.deserializer.expect_token()? {
                    TokenValue::Comma => {}
                    TokenValue::RightBracket => {
                        self.seen_closing_bracket = true;
                    }
                    token @ _ => return Err(self.deserializer.error_at_prev(
                        &token, &"',' or ']' after value in array,"
                    ))
                }

                Ok(Some(value))
            }
        }
    }
}

/// Helper for deserializing maps.
#[derive(Debug)]
struct MapDeserializer<'a, 'de: 'a> {
    /// Then underlying deserializer from which to read the map.
    deserializer: &'a mut TextDeserializer<'de>,
    /// Used for remembering whether the closing `}` was already seen.
    seen_closing_brace: bool,
}

impl<'a, 'de> MapDeserializer<'a, 'de> {
    /// Initializes a new map deserializer.
    fn new(de: &'a mut TextDeserializer<'de>) -> Self {
        MapDeserializer {
            deserializer: de,
            seen_closing_brace: false,
        }
    }

    /// Consume the remaining entries, if any, so that the cursor will
    /// correctly point to after the end of the map in case the visitor
    /// did not use up all entries.
    fn exhaust(&mut self) -> Result<(), Error> {
        while let Some((IgnoredAny, IgnoredAny)) = self.next_entry()? {}
        Ok(())
    }
}

impl<'a, 'de> MapAccess<'de> for MapDeserializer<'a, 'de> {
    type Error = Error;

    fn next_key_seed<K: DeserializeSeed<'de>>(
        &mut self,
        seed: K,
    ) -> Result<Option<K::Value>, Self::Error> {
        if self.seen_closing_brace {
            return Ok(None);
        }

        let saved_state = *self.deserializer;

        #[allow(clippy::wildcard_enum_match_arm)]
        match self.deserializer.expect_token()? {
            // If the next token is a closing `}`, the map is over.
            TokenValue::RightBrace => {
                self.seen_closing_brace = true;
                Ok(None)
            }
            _ => {
                // Otherwise, we deserialize the key, "putting back" the
                // already-extracted token, by using a saved copy of the state.
                *self.deserializer = saved_state;
                let key = seed.deserialize(&mut *self.deserializer)?;

                // Then a ':' must follow
                match self.deserializer.expect_token()? {
                    TokenValue::Colon => Ok(Some(key)),
                    token @ _ => Err(self.deserializer.error_at_prev(
                        &token, &"':' after key in map"
                    ))
                }
            }
        }
    }

    fn next_value_seed<V: DeserializeSeed<'de>>(
        &mut self,
        seed: V,
    ) -> Result<V::Value, Self::Error> {
        // The value must always follow.
        let value = seed.deserialize(&mut *self.deserializer)?;

        match self.deserializer.expect_token()? {
            TokenValue::Comma => {},
            TokenValue::RightBrace => {
                self.seen_closing_brace = true;
            }
            token @ _ => return Err(self.deserializer.error_at_prev(
                &token, &"',' or '}' after value in map"
            )),
        }

        Ok(value)
    }
}

impl<'de> EnumAccess<'de> for &mut TextDeserializer<'de> {
    type Error = Error;
    type Variant = Self;

    fn variant_seed<V: DeserializeSeed<'de>>(
        self,
        seed: V
    ) -> Result<(V::Value, Self::Variant), Self::Error> {
        // We're currently inside a map.
        // Deserialize the identifier from the key.
        let value = seed.deserialize(&mut *self)?;

        // Then step over the colon separator.
        match self.expect_token()? {
            TokenValue::Colon => Ok((value, self)),
            token @ _ => Err(self.error_at_prev(&token, &"':' after key in map")),
        }
    }
}

impl<'de> VariantAccess<'de> for &mut TextDeserializer<'de> {
    type Error = <Self as EnumAccess<'de>>::Error;

    fn unit_variant(self) -> Result<(), Self::Error> {
        Deserialize::deserialize(self)
    }

    fn newtype_variant_seed<T: DeserializeSeed<'de>>(
        self,
        seed: T,
    ) -> Result<T::Value, Self::Error> {
        seed.deserialize(self)
    }

    fn tuple_variant<V: Visitor<'de>>(
        self,
        len: usize,
        visitor: V,
    ) -> Result<V::Value, Self::Error> {
        self.deserialize_tuple(len, visitor)
    }

    fn struct_variant<V: Visitor<'de>>(
        self,
        _fields: &'static [&'static str],
        visitor: V,
    ) -> Result<V::Value, Self::Error> {
        self.deserialize_map(visitor)
    }
}

#[cfg(test)]
mod tests {
    use super::{ is_word_boundary, is_word_boundary_or_punct };

    #[test]
    fn is_word_boundary_works() {
        assert!(is_word_boundary("", 0));
        assert!(!is_word_boundary("", 1));
        assert!(is_word_boundary("foo", 0));
        assert!(is_word_boundary("foo", 3));
        assert!(!is_word_boundary("foo", 1));
        assert!(!is_word_boundary("foo", 4));
        assert!(is_word_boundary("bar qux", 0));
        assert!(is_word_boundary("bar qux", 3));
        assert!(is_word_boundary("bar qux", 4));
        assert!(is_word_boundary("bar qux", 7));
        assert!(!is_word_boundary("bar qux", 2));
        assert!(!is_word_boundary("bar qux", 5));
        assert!(!is_word_boundary("bar qux", 6));
        assert!(!is_word_boundary("bar qux", 8));
        assert!(!is_word_boundary("1,2,3", 1));
        assert!(!is_word_boundary("1,2,3", 2));
        assert!(!is_word_boundary("9.7", 1));
        assert!(!is_word_boundary("9.7", 2));
    }

    #[test]
    fn is_word_boundary_or_punct_works() {
        assert!(is_word_boundary_or_punct("", 0));
        assert!(!is_word_boundary_or_punct("", 1));
        assert!(is_word_boundary_or_punct("foo", 0));
        assert!(is_word_boundary_or_punct("foo", 3));
        assert!(!is_word_boundary_or_punct("foo", 1));
        assert!(!is_word_boundary_or_punct("foo", 4));
        assert!(is_word_boundary_or_punct("bar qux", 0));
        assert!(is_word_boundary_or_punct("bar qux", 3));
        assert!(is_word_boundary_or_punct("bar qux", 4));
        assert!(is_word_boundary_or_punct("bar qux", 7));
        assert!(!is_word_boundary_or_punct("bar qux", 2));
        assert!(!is_word_boundary_or_punct("bar qux", 5));
        assert!(!is_word_boundary_or_punct("bar qux", 6));
        assert!(!is_word_boundary_or_punct("bar qux", 8));
        assert!(is_word_boundary_or_punct("1,2,345", 1));
        assert!(is_word_boundary_or_punct("1,2,345", 2));
        assert!(!is_word_boundary_or_punct("1,2,345", 5));
        assert!(is_word_boundary_or_punct("9.753", 1));
        assert!(is_word_boundary_or_punct("9.753", 2));
        assert!(!is_word_boundary_or_punct("9.753", 3));
    }
}