trivet 3.1.0 - Docs.rs

// Trivet
// Copyright (c) 2025 by Stacy Prowell.  All rights reserved.
// https://gitlab.com/binary-tools/trivet

//! Provide a very simple parsing framework.
//!
//! This module allows for building very simple and hopefully very fast
//! parsers using simple recursive descent primitives.
//!
//! The primary item of interest here is the [`Parser`] struct,
//! which provides the parsing primitives with which you can build
//! recursive descent parsers.
//!
//! There are special methods to construct parsers around different sources.
//!
//!   * [`parse_from_string()`] creates a parser for a `&str` source
//!   * [`parse_from_bytes()`] creates a parser for a `&[u8]` source
//!   * [`parse_from_path()`] creates a parser for a `PathBuf` source
//!   * [`parse_from_stdin()`] creates a parser for the standard input

use crate::decoder::Decode;
use crate::decoder::Decoder;
use crate::errors::syntax_error;
use crate::errors::ParseResult;
use crate::loc::Loc;
use crate::numbers::NumberParser;
use crate::parsers::comments::CommentParser;
use crate::parsers::keyword::KeywordParser;
use crate::strings::StringParser;
use std::cmp::Ordering;
use std::io::Read;
use std::io::Result;
use std::path::PathBuf;

/// Maximum lookahead allowed.
pub const MAX_LOOKAHEAD: usize = 64 * 1024;

/// The limit on the number of times you can peek without consuming.  Excessive peeks without consuming
/// any characters indicate bad parsing logic and a stalled parse.
pub const PEEK_LIMIT: usize = 1000;

/// The limit on the number of times you can try to consume after reaching the end of the file.
/// Excessive attempts to consume characters after reaching the end of file indicate a stalled parse
/// and failure to check for the end of file condition.
pub const EOF_LIMIT: usize = 1000;

/// Simple macro rule to update the row and column based on a character value.   Provide `self` and
/// an expression for the character being considered.
#[cfg(not(feature = "no_tracking"))]
macro_rules! rc_update {
    ($self: ident, $ch: expr) => {
        if $ch == '\n' {
            $self.column = 1;
            $self.line += 1;
        } else {
            $self.column += 1;
        }
    };
}
#[cfg(feature = "no_tracking")]
macro_rules! rc_update {
    ($self: ident, $ch: expr) => {};
}

/// Simple macro to update the row and column for a chunk of text in the unwind buffer.
#[cfg(not(feature = "no_tracking"))]
macro_rules! rc_chunk {
    ($self: ident, $start: expr, $end: expr) => {
        for index in ($start..$end) {
            rc_update!($self, $self.unwind_buffer[index]);
        }
    };
}
#[cfg(feature = "no_tracking")]
macro_rules! rc_chunk {
    ($self: ident, $start: expr, $end: expr) => {};
}

/// Macro to do peek stall detection.
#[cfg(not(feature = "no_stall_detection"))]
macro_rules! check_peek_stall {
    ($self: ident) => {
        $self.peek_count += 1;
        if $self.peek_count > PEEK_LIMIT {
            panic!("Exceeded peek limit; parsing has stalled.  This is almost certainly an internal error.");
        }
    };
}
#[cfg(feature = "no_stall_detection")]
macro_rules! check_peek_stall {
    ($self: ident) => {};
}

/// Macro to do eof stall detection.
#[cfg(not(feature = "no_stall_detection"))]
macro_rules! check_eof_stall {
    ($self: ident) => {
        $self._eof_peek_count += 1;
        if $self._eof_peek_count > EOF_LIMIT {
            panic!("Exceeded EOF limit; parsing has stalled.  This is almost certainly an internal error.");
        }
    };
}
#[cfg(feature = "no_stall_detection")]
macro_rules! check_eof_stall {
    ($self: ident) => {};
}

/// Create a parser from a byte slice.  The source name is set to `<bytes>`.
pub fn parse_from_bytes(source: &[u8]) -> Parser {
    let decoder = Decode::new(source.to_vec());
    Parser::new("<bytes>", decoder)
}

/// Create a parser from a string.  The source name is set to `<string>`.
pub fn parse_from_string(source: &str) -> Parser {
    let decoder = Decode::new(source.bytes().collect());
    Parser::new("<string>", decoder)
}

/// Create a parser for the standard input.  The source is set to the console and the name is
/// the empty string to indicate this.  Errors reading from standard input are ignored for
/// robustness in pipelines.
#[cfg(not(tarpaulin_include))]
pub fn parse_from_stdin() -> Parser {
    let mut bytes = vec![];
    // We *explicitly* ignore errors for standard input.
    let _ = std::io::stdin().read_to_end(&mut bytes);
    let decoder = Decode::new(bytes);
    Parser::new("", decoder)
}

/// Create a parser for the given file.  The source name is set to the given path.
/// This method can fail if it is unable to open the given file, or is unable to read
/// from it.
///
/// For the specific errors that can result, see `std::fs::OpenOptions::open()` in the
/// Rust standard library, and also `std::io::Read::read_to_end()`.
pub fn parse_from_path(path: &PathBuf) -> Result<Parser> {
    let bytes = std::fs::read(path)?;

    // Build the parser.
    let decoder = Decode::new(bytes);
    let parser = Parser::new(path.to_str().unwrap(), decoder);
    Ok(parser)
}

/******************************************************************************

Implementation Details

The unwind buffer is a FIFO of fixed size.  It does not wrap; the parser counts on the
the FIFO being periodically drained, at which point it is reset.  If the FIFO is not
completely drained, then it can overflow and fail.

The following invariants must be maintained at the end of every method.

* len is zero iff at_eof is true
* next_char is the next character if not at end of file; otherwise undefined
* next points to the correct slot in the unwind buffer holding next_char
* line and column are correct; if any characters are moved then these must be updated

Rule 1: Every method that consumes characters must update the line and column numbers.
Rule 2: Every method that consumes characters must update at_eof.
Rule 3: Every method that consumes characters must update next_char.

The only methods that are permitted to consume characters directly are: consume() and
consume_n().  These methods must set at_eof if appropriate, update the row and column
numbers, and make sure that next_char is set appropriately.

The only method that sets at_eof is reset().  This method is called by consume() and
consume_n() whenever it is possible to reset the FIFO (whenever it is empty).

The only methods that are permitted to access the decoder are reset(), peek_n(), and
check().

******************************************************************************/

/// The parser core.
///
/// This struct provides all the basic parsing primitives used elsewhere.  To use it, make
/// a decoder and pass it, along with a name for the source, to [`ParserCore::new()`].
///
/// In general you will probably prefer to use [`Parser`] instead, which will provide all
/// the functionality of the core, plus additional helper methods.
///
/// This struct exists to break a dependency cycle in the architecture.
pub struct ParserCore {
    /// A name for the source for this parser.  The name is primarily used in [`Loc`] instances.
    name: String,

    /// The current one-based line number where the next character to be read will originate.
    line: usize,

    /// The current one-based column number of the next character to read.
    column: usize,

    /// The decoder that supplies the characters.
    decoder: Box<dyn Decoder>,

    /// Whether the end of file has been consumed.
    at_eof: bool,

    /// The number of reads performed without a consume.
    peek_count: usize,

    /// The number of attempts to consume while at the end of file.
    _eof_peek_count: usize,

    /// The unwind buffer that is used to store lookahead prior to consumption.
    unwind_buffer: Box<[char; MAX_LOOKAHEAD]>,

    /// The index of the first character in the buffer.  Only relevant if the length is not zero.
    next: usize,

    /// The number of characters currently in the unwind buffer.  This value should
    /// only be zero if the end of stream has been reached.  Otherwise it should be
    /// at least one.
    len: usize,

    /// The next character in the stream, if there is one.  If there is not (the length is zero) then
    /// this should be the null.
    next_char: char,

    /// A closure to define whitespace.  This permits changing the definition of whitespace if needed.
    whitespace: Box<dyn Fn(char) -> bool>,
}

impl ParserCore {
    /// Create a new parser using the given decoder as the source of characters.  A name is given
    /// that will be used when creating [`Loc`] instances.
    pub fn new<D: Decoder + 'static>(name: &str, decoder: D) -> Self {
        let mut dec = Box::new(decoder);
        let value = dec.next();
        match value {
            None => {
                // Stream is empty.
                ParserCore {
                    name: name.to_string(),
                    line: 1,
                    column: 1,
                    decoder: dec,
                    at_eof: true,
                    peek_count: 0,
                    _eof_peek_count: 0,
                    unwind_buffer: Box::new(['\0'; MAX_LOOKAHEAD]),
                    next: 0,
                    len: 0,
                    next_char: '\0',
                    whitespace: Box::new(char::is_whitespace),
                }
            }
            Some(ch) => {
                // Stream is not empty.
                let mut buffer = ['\0'; MAX_LOOKAHEAD];
                buffer[0] = ch;
                ParserCore {
                    name: name.to_string(),
                    line: 1,
                    column: 1,
                    decoder: dec,
                    at_eof: false,
                    peek_count: 0,
                    _eof_peek_count: 0,
                    unwind_buffer: Box::new(buffer),
                    next: 0,
                    len: 1,
                    next_char: ch,
                    whitespace: Box::new(char::is_whitespace),
                }
            }
        }
    }

    /// Get the current location in the parse.  This will return either a console (if the name is
    /// the empty string) or a file location (if the name was not the empty string).
    pub fn loc(&self) -> Loc {
        if self.name.is_empty() {
            Loc::Console {
                line: self.line,
                column: self.column,
            }
        } else {
            Loc::File {
                name: self.name.clone(),
                column: self.column,
                line: self.line,
            }
        }
    }

    /// Get the current one-based column number.  This may be useful when parsing languages
    /// in which indentation is significant, but otherwise you will probably prefer to use
    /// [`Self::loc()`].
    pub fn get_column_number(&self) -> usize {
        self.column
    }

    /// Get the current one-based line number.  For most uses you will probably find
    /// [`Self::loc()`] to be more useful.
    pub fn get_line_number(&self) -> usize {
        self.line
    }

    /// Define whitespace.  This takes a closure that returns `true` for whitespace and `false`
    /// otherwise.  The prior whitespace test is returned.
    pub fn replace_whitespace_test(
        &mut self,
        test: Box<dyn Fn(char) -> bool>,
    ) -> Box<dyn Fn(char) -> bool> {
        std::mem::replace(&mut self.whitespace, test)
    }

    /// To make sure everything is handled correctly, never set len to zero.  Instead, invoke this
    /// method to reset the buffer.  In short, the buffer is *never empty* unless the stream is
    /// exhausted.
    ///
    /// This is where the end of the stream is detected, and the flag set.  It is the only place
    /// that this happens.
    ///
    /// No characters are processed here for line and column numbers; that must be done elsewhere.
    fn reset(&mut self) {
        // Resetting the buffer completely.
        //
        // We need to get one character from the decoder now to place in the buffer and hold as
        // the next character for peek.
        match self.decoder.next() {
            None => {
                // There is no next character!  The source is exhausted and we cannot fill the buffer,
                // so we are at the end of the stream.
                self.at_eof = true;
                self.next_char = '\0';
                self.len = 0;
                self.next = 0;
                self.at_eof = true;
            }
            Some(ch) => {
                // Save the next character.
                self.next_char = ch;
                self.len = 1;
                self.next = 0;
                self.unwind_buffer[0] = ch;
            }
        }
    }

    /// Determine if the parser has reached the end of the stream.  If this is true, then no further
    /// characters are available from this parser.
    #[inline(always)]
    pub fn is_at_eof(&self) -> bool {
        self.at_eof
    }

    /// Peek at the next character in the stream.  In order to be as fast as is reasonable,
    /// no stream checking is done.  If the stream is at the end, then you should get null
    /// characters, but you should not rely on that, since the null is also a valid character
    /// in a file.  Instead, be sure to check [`Self::is_at_eof`].
    ///
    /// If this method is invoked too many times without any characters being consumed, then it
    /// will panic to indicate that parsing has stalled.  See [`PEEK_LIMIT`].
    #[inline]
    pub fn peek(&mut self) -> char {
        check_peek_stall!(self);
        self.next_char
    }

    /// Consume the next character from the stream, if there is one.  If not, then do nothing.
    ///
    /// If this method is invoked too many times after reaching the end of file, then it will panic
    /// to indicate that parsing has stalled.  See [`EOF_LIMIT`].
    pub fn consume(&mut self) {
        // We are consuming, so reset the peek count.
        self.peek_count = 0;
        if self.len > 0 {
            // There are characters in the buffer.  Consume one.
            rc_update!(self, self.unwind_buffer[self.next]);
            if self.len == 1 {
                // The buffer is empty.  Reset it.
                self.reset();
            } else {
                // The buffer is not quite empty.  Just consume a character.
                self.len -= 1;
                self.next += 1;
                self.next_char = self.unwind_buffer[self.next];
            }
        } else {
            // We are at the end of file, so check for stalling.
            check_eof_stall!(self);
        }
    }

    /// Peek at an offset in the stream.  That is, peek at a character at a given position.
    /// The position index is zero-based, with the *next* character to read (the result of
    /// a simple [`Self::peek`]) being at index zero.
    ///
    /// If there are not enough characters in the stream, then null (`\0`) is returned.
    /// The distance is limited by the maximum lookahead; attempts to look past it will
    /// also return a null.
    ///
    /// Note the distinction between this method and [`Self::peek_n`]; `peek_n(1)` method
    /// will return the character at position zero, so it is equivalent to `peek()` and
    /// to `peek_offset(0)`.
    pub fn peek_offset(&mut self, n: usize) -> char {
        check_peek_stall!(self);

        // Note on indexing
        //
        // self.next is offset zero, and always (except when the stream is exhausted) contains
        // the next character.  If offset zero is requested, immediately return it.
        if n == 0 {
            return self.next_char;
        }

        // Test the limit.
        if n >= MAX_LOOKAHEAD {
            '\0'
        } else {
            // self.len holds the number of valid characters starting at position self.next.  Thus
            // if the user wants offset n, then we need to have a length of at least n+1.  How many
            // characters do we need?  Suppose we have length L and want offset n, with L<n+1.  Then
            // we need (n+1 - L) characters.  These characters go into the buffer starting at offset
            // self.next + self.len (because that is the first open position) and run up to but not
            // including offset self.next + self.len + n + 1.  Then we need to update self.len to
            // the new limit (based on what we can actually read).
            if self.len <= n {
                let count = self.decoder.fill_n(
                    n + 1 - self.len,
                    &mut self.unwind_buffer[self.next + self.len..self.next + n + 1],
                );
                self.len += count;
            }
            if n < self.len {
                self.unwind_buffer[self.next + n]
            } else {
                '\0'
            }
        }
    }

    /// Peek at characters in the stream.  If there are fewer than `n` characters in the
    /// stream, then fewer are returned.  If the stream is exhausted, an empty vector is
    /// returned.
    ///
    /// If this method is invoked too many times without any characters being consumed, then it
    /// will panic to indicate that parsing has stalled.  See [`PEEK_LIMIT`].
    ///
    /// This method is similar to [`Self::peek_n`], but does not construct a string for the
    /// result, which can be better in some cases.
    pub fn peek_n_vec(&mut self, n: usize) -> Vec<char> {
        check_peek_stall!(self);

        // If there are already enough characters in the buffer, then just return
        // those.
        if self.len >= n {
            return self.unwind_buffer[self.next..(self.next + n)].to_vec();
        }

        // More characters have to be added to the buffer.  Figure out how many and then add them.
        let count = self.decoder.fill_n(
            n - self.len,
            &mut self.unwind_buffer[self.next + self.len..self.next + n],
        );
        self.len += count;
        self.unwind_buffer[self.next..(self.next + self.len)].to_vec()
    }

    /// Peek at characters in the stream.  If there are fewer than `n` characters in the
    /// stream, then fewer are returned.  If the stream is exhausted, an empty string is
    /// returned.
    ///
    /// If this method is invoked too many times without any characters being consumed, then it
    /// will panic to indicate that parsing has stalled.  See [`PEEK_LIMIT`].
    pub fn peek_n(&mut self, n: usize) -> String {
        check_peek_stall!(self);

        // If there are already enough characters in the buffer, then just return
        // those.
        if self.len >= n {
            return self.unwind_buffer[self.next..(self.next + n)]
                .iter()
                .collect();
        }

        // More characters have to be added to the buffer.  Figure out how many and then add them.
        let count = self.decoder.fill_n(
            n - self.len,
            &mut self.unwind_buffer[self.next + self.len..self.next + n],
        );
        self.len += count;
        self.unwind_buffer[self.next..(self.next + self.len)]
            .iter()
            .collect()
    }

    /// Consume a given number of characters from the stream.  The end of file is not checked during
    /// this.  If there are no characters to consume, nothing is done.
    ///
    /// If this method is invoked too many times after reaching the end of file, it will panic to
    /// indicate that parsing has stalled.  See [`EOF_LIMIT`].
    pub fn consume_n(&mut self, n: usize) {
        if self.len > 0 {
            self.peek_count = 0;
            match self.len.cmp(&n) {
                Ordering::Equal => {
                    // Consume everything.
                    rc_chunk!(self, self.next, self.next + n);
                    self.reset();
                }
                Ordering::Less => {
                    // Consume some characters from the decoder, and reset the buffer.
                    rc_chunk!(self, self.next, self.next + n);
                    for _ in self.len..n {
                        if let Some(_ch) = self.decoder.next() {
                            rc_update!(self, _ch);
                        };
                    }
                    self.reset();
                }
                Ordering::Greater => {
                    // Consume part of the buffer.
                    rc_chunk!(self, self.next, self.next + n);
                    self.next += n;
                    self.len -= n;
                    self.next_char = self.unwind_buffer[self.next];
                }
            }
        } else {
            check_eof_stall!(self);
        }
    }

    /// Check the next characters in the stream.  If the next characters exactly match those
    /// given in the vector, in order, then true is returned.  Otherwise false is returned.
    /// Nothing is consumed.
    pub fn peek_chars(&mut self, chars: &[char]) -> bool {
        check_peek_stall!(self);

        let n = chars.len();
        if n == 0 {
            return true;
        }
        if self.len == 0 {
            return false;
        }

        // If there are not enough characters in the buffer, try to fill it now.  There is at least one
        // character at position self.next, so we need to fill after the current characters, which means
        // we start filling at position self.next + self.len, and we need to add n - self.len characters.
        if self.len < n {
            let count = self.decoder.fill_n(
                n - self.len,
                &mut self.unwind_buffer[self.next + self.len..self.next + n],
            );
            self.len += count;
            if self.len < n {
                return false;
            }
        }

        // Compare the vector to the buffer.  Note that clippy is wrong; we are not
        // iterating over chars.
        #[allow(clippy::needless_range_loop)]
        for index in 0..n {
            if self.unwind_buffer[self.next + index] != chars[index] {
                return false;
            }
        }
        true
    }

    /// Peek at the next character in the stream.  If it is the given character, consume it and
    /// return true.  Otherwise return false.
    #[inline]
    pub fn peek_and_consume(&mut self, ch: char) -> bool {
        if self.peek() == ch {
            self.consume();
            true
        } else {
            false
        }
    }

    /// Check the next characters in the stream and, if they match in order, consume them and
    /// return true.  Otherwise return false.
    pub fn peek_and_consume_chars(&mut self, chars: &[char]) -> bool {
        if self.peek_chars(chars) {
            self.consume_n(chars.len());
            true
        } else {
            false
        }
    }

    /// Consume all whitespace starting at the current position.  The definition of whitespace
    /// used here is the same as the
    /// [Unicode standard](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt).
    ///
    /// At the time of writing, the following is the definition of whitespace used.
    ///
    /// ```text
    /// 0009..000D    ; White_Space # Cc   [5] <control-0009>..<control-000D>
    /// 0020          ; White_Space # Zs       SPACE
    /// 0085          ; White_Space # Cc       <control-0085>
    /// 00A0          ; White_Space # Zs       NO-BREAK SPACE
    /// 1680          ; White_Space # Zs       OGHAM SPACE MARK
    /// 2000..200A    ; White_Space # Zs  [11] EN QUAD..HAIR SPACE
    /// 2028          ; White_Space # Zl       LINE SEPARATOR
    /// 2029          ; White_Space # Zp       PARAGRAPH SEPARATOR
    /// 202F          ; White_Space # Zs       NARROW NO-BREAK SPACE
    /// 205F          ; White_Space # Zs       MEDIUM MATHEMATICAL SPACE
    /// 3000          ; White_Space # Zs       IDEOGRAPHIC SPACE
    /// ```
    ///
    pub fn consume_ws_only(&mut self) -> bool {
        let mut result = false;
        while !self.at_eof {
            if (self.whitespace)(self.next_char) {
                self.consume();
                result = true;
            } else {
                break;
            }
        }
        result
    }

    /// Consume characters until an end token is found.  The characters consumed are returned
    /// without the end token, though the end token is also consumed.
    pub fn take_until(&mut self, token: &str) -> String {
        let chars = token.chars().collect::<Vec<char>>();
        let mut value = String::new();
        while !self.at_eof && !self.peek_and_consume_chars(&chars) {
            value.push(self.unwind_buffer[self.next]);
            self.consume();
        }
        value
    }

    /// Consume characters so long as the test is true.  Return the characters consumed, if any.
    pub fn take_while<T: Fn(char) -> bool>(&mut self, include: T) -> String {
        let mut value = String::new();
        while !self.at_eof && include(self.next_char) {
            value.push(self.next_char);
            self.consume();
        }
        value
    }

    /// Consume characters so long as either test is true.  Return only those characters that
    /// satisfy the first test.  The exclude predicate is checked *first*.
    pub fn take_while_unless<T: Fn(char) -> bool, U: Fn(char) -> bool>(
        &mut self,
        include: T,
        exclude: U,
    ) -> String {
        let mut value = String::new();
        while !self.at_eof {
            if exclude(self.next_char) {
                self.consume();
            } else if include(self.next_char) {
                value.push(self.next_char);
                self.consume();
            } else {
                break;
            }
        }
        value
    }

    /// Consume and return characters.  This works as follows.
    ///
    /// If the current character satisfies `skip`, then the character is skipped.
    ///
    /// If the current character satisfies `stop`, then the parse is stopped and the result is returned,
    /// regardless of whether any other predicates match.
    ///
    /// Other characters (those that do not match `skip` or `stop`) are collected and returned.
    ///
    /// Note that `skip` is checked *first*, then `stop`.  This means the following code works as expected.
    ///
    /// ```
    /// use trivet::parse_from_string;
    /// let mut parser = parse_from_string("12_232.14");
    /// assert_eq!(parser.take(
    ///   |ch| ch == '_',
    ///   |ch| ch != '.' && !ch.is_alphanumeric()
    /// ), ("12232.14".chars().collect(), None));
    /// ```
    ///
    /// Also note that the following code will ignore the stop setting since it is never
    /// reached during checking.
    ///
    /// ```
    /// use trivet::parse_from_string;
    /// let mut parser = parse_from_string("12_232.14");
    /// assert_eq!(parser.take(
    ///     |ch| ch == '_',
    ///     |ch| ch == '_'
    /// ), ("12232.14".chars().collect(), None));
    /// ```
    ///
    /// The returned pair contains all matched characters and the character that caused the stop, or
    /// `None` if parsing stopped because the end of stream was reached.  Note that the character that
    /// caused the stop is not consumed.
    ///
    pub fn take<S, K>(&mut self, skip: S, stop: K) -> (Vec<char>, Option<char>)
    where
        S: Fn(char) -> bool,
        K: Fn(char) -> bool,
    {
        let mut kept = vec![];
        while !self.at_eof {
            let ch = self.next_char;
            if skip(ch) {
                self.consume();
            } else if stop(ch) {
                return (kept, Some(ch));
            } else {
                self.consume();
                kept.push(ch);
            }
        }
        (kept, None)
    }

    /// Consume characters so long as the test is true.  Returns true if any characters are consumed.
    pub fn consume_while<T: Fn(char) -> bool>(&mut self, include: T) -> bool {
        let mut retval = false;
        while !self.at_eof && include(self.next_char) {
            self.consume();
            retval = true;
        }
        retval
    }

    /// Consume characters until the given end token is found.  Returns true if any characters are
    /// consumed.  The end token is also consumed.  This stops at the first occurrence of the end
    /// token; that is, it is not greedy.
    pub fn consume_until(&mut self, token: &str) -> bool {
        let chars = token.chars().collect::<Vec<char>>();
        let mut retval = false;
        while !self.at_eof && !self.peek_and_consume_chars(&chars) {
            self.consume();
            retval = true;
        }
        retval
    }
}

/// Provide methods to implement a recursive descent parser.
///
/// This struct wraps a stream (an instance of `Read`) and
/// provides two primitives.
///
///   * You can "peek" at upcoming characters in the stream.
///   * You can "consume" characters from the stream.
///
/// These primitives are used to construct a series of more
/// complex methods, but essentially all you can do is look
/// at what is coming up in the stream, and then consume and
/// discard characters from the stream.
///
/// # Whitespace
///
/// Aside from methods specifically to consume whitespace, there
/// are special versions of some "consume" methods that consume whitespace
/// following a match.  These methods end with `_ws` and consume
/// any *trailing* whitespace following a successful match.
///
/// Whitespace includes only those characters that satisfy the
/// `char::is_whitespace()` predicate.  This includes Unicode whitespace
/// characters.  See [`Self::consume_ws_only`] for details.
///
/// # Comments
///
/// This is able to parse comments, using an embedded [`CommentParser`] instance.
/// The default comment parser handles C and C++-style comments.
///
/// If you want to configure the comment parser, you can borrow a mutable reference to it
/// using [`Self::borrow_comment_parser()`].
///
/// ```rust
/// use trivet::parsers::comments::CommentParser;
/// use trivet::Parser;
/// use trivet::parse_from_string;
///
/// let mut parser = parse_from_string("# This is a comment.");
/// let mut compar = parser.borrow_comment_parser();
/// compar.enable_c = false;
/// compar.enable_cpp = false;
/// compar.enable_python = true;
/// parser.consume_ws();
/// assert!(parser.is_at_eof());
/// ```
///
/// If your language does not support comments, you can disable them completely and
/// possibly gain a bit of performance by setting the [`Self::parse_comments`] flag to
/// `false`.
///
/// # Parsing Strings
///
/// String parsing is provided by a [`StringParser`] instance.  This instance can be
/// obtained and configured using [`Self::borrow_string_parser`].
///
/// Strings are parsed on-demand by the [`Self::parse_string`] method.  This takes an
/// optional terminator character.
///
/// ```rust
/// use trivet::Parser;
/// use trivet::errors::ParseResult;
/// use trivet::parse_from_string;
///
/// # fn main() -> ParseResult<()> {
/// // We will use «..» quotes here.
/// let mut parser = parse_from_string(r#"«This is\b\bwas\u{a}\x09some text»"#);
/// // Consume the opening quote mark.
/// parser.consume();
/// // Now consume until the closing mark.
/// let text = parser.parse_string_until_delimiter('\u{BB}')?;
/// assert_eq!(text, "This is\x08\x08was\n\tsome text");
/// # Ok(())
/// # }
/// ```
///
/// # Parsing Numbers
///
/// Number parsing is provided by a [`NumberParser`] instance.  This instance can be
/// obtained and configured using [`Self::borrow_number_parser`].
///
/// Numbers are parsed on-demand by the [`Self::parse_i128`],
/// [`Self::parse_u128`], and [`Self::parse_f64`] methods and their `_ws` counterparts.
///
/// # Parsing Keywords
///
/// Keyword parsing is provided by a [`KeywordParser`] instance.  This instance can be
/// obtained and configured using [`Self::borrow_keyword_parser`].
///
/// Keywords are parsed on-demand by the [`Self::parse_keyword`] method and its `_ws`
/// counterpart.
///
/// # Method Names
///
/// There are many methods.  Most method names have the following form:
///
/// ```text
/// [peek/consume][type][whitespace]
/// ```
///
/// `[peek/consume]`
/// - The `peek` methods look ahead at the character source.  Their return is either the
///   characters themselves or a Boolean indicating a match or failure to match.
/// - The `consume` methods discard characters from the character source.  Their return
///   is either nothing (for unconditional consumes) or a Boolean if something was
///   conditionally consumed.
/// - The `peek_and_consume` method both match a string and consume that string when it
///   is matched, and only when it is matched.
///
/// `[type]`
/// - Methods that work with a single character have no additional specification.
/// - Methods that work with a specific number of characters are suffixed with `_n`.
///   These do not test characters, but return or consume them unconditionally.
/// - Methods that work with a vector of chars have `_chars` in the name.
/// - Methods that work with a string have `_str` in the name.  These methods are absent
///   from [`ParserCore`].
///
/// `[whitespace]`
/// - Methods that consume trailing whitespace (possibly including comments) are suffixed
///   with `_ws`.  These methods are absent from [`ParserCore`].
///
/// Some method name combinations do not make sense, and are not present.  For instance,
/// `peek_ws`.  The following is a quick reference to the implemented methods.  Methods
/// indicated with an `*` are in [`Parser`], but not in [`ParserCore`].
///
/// |Method                                     |Input      |Return     |Only in [`Parser`] |
/// |-------------------------------------------|-----------|-----------|-------------------|
/// |[`Self::peek`]                             |           |`char`     |  |
/// |[`Self::peek_n`]                           |`usize`    |`String`   |  |
/// |[`Self::peek_n_vec`]                       |`usize`    |`Vec<char>`|  |
/// |[`Self::peek_offset`]                      |`usize`    |`char`     |  |
/// |[`Self::peek_chars`]                       |`Vec<char>`|`bool`     |  |
/// |[`Self::peek_chars_greedy`] (see below)    |`Vec<char>`|`bool`     |* |
/// |[`Self::peek_str`]                         |`&str`     |`bool`     |* |
/// |[`Self::peek_str_greedy`] (see below)      |`&str`     |`bool`     |* |
/// |[`Self::consume`]                          |           |           |  |
/// |[`Self::consume_n`]                        |`usize`    |           |  |
/// |[`Self::consume_ws`]                       |           |`bool`     |* |
/// |[`Self::peek_and_consume`]                 |`char`     |`bool`     |  |
/// |[`Self::peek_and_consume_ws`]              |`char`     |`bool`     |* |
/// |[`Self::peek_and_consume_chars`]           |`Vec<char>`|`bool`     |  |
/// |[`Self::peek_and_consume_chars_ws`]        |`Vec<char>`|`bool`     |* |
/// |[`Self::peek_and_consume_str`]             |`&str`     |`bool`     |* |
/// |[`Self::peek_and_consume_str_ws`]          |`&str`     |`bool`     |* |
/// |[`Self::take_while`]                       |closure    |`String`   |  |
/// |[`Self::take_while_unless`]                |closure    |`String`   |  |
/// |[`Self::take_until`]                       |`String`   |`String`   |  |
/// |[`Self::take_until_greedy`] (see below)    |`String`   |`String`   |* |
/// |[`Self::consume_while`]                    |closure    |`bool`     |  |
/// |[`Self::consume_until`]                    |`String`   |`bool`     |  |
///
/// The additional method [`Self::consume_ws_only`] consumes whitespace and is
/// present in [`ParserCore`], but does not consume comments even if there is
/// a comment parser installed.
///
/// The special `_greedy` methods exist to handle a special case.
/// Suppose the input stream contains `"""""""` and you want to match
/// on three consecutive quotation marks.  The [`Self::peek_str`] method will match
/// on the *first* three quotation marks.  The [`Self::peek_str_greedy`] method will
/// match on the *last* three quotation marks.  These methods are not in
/// [`ParserCore`].
///
/// # Example
///
/// To use this, make an instance with [`Self::new()`], giving a stream and
/// a name for the stream.  The name given will be used when
/// constructing [`Loc`] instances.  If the name is the empty string,
/// then `Console` locations are created; otherwise `File` locations
/// are created.
///
/// The following shows how to create a parser around the standard input.
///
/// ```rust
/// use std::io;
/// use trivet::Parser;
/// use trivet::parse_from_stdin;
///
/// // Make a new parser around the standard input.
/// let mut parser = parse_from_stdin();
/// // Invoke methods to parse...
/// ```
///
/// Use [`Self::peek()`] to peek at the next character in the stream, and
/// use [`Self::consume()`] to discard that character.  Note that [`Self::peek()`]
/// returns a result, and may be `Ok(None)` if at the end of stream.
/// The following uses these two methods to ignore whitespace
/// (though there actually is a method just for that).
///
/// ```rust
/// use trivet::Parser;
/// use trivet::parse_from_string;
///
/// # fn main() {
/// // Make a new parser around the standard input.
/// let mut parser = parse_from_string("\t\tx");
///
/// // While the next character is whitespace, consume it.
/// while parser.peek().is_whitespace() {
///     parser.consume();
/// }
///
/// // The stream should now be pointing to the first
/// // non-whitespace character.
/// assert_eq!(parser.peek(), 'x');
/// # }
/// ```
///
/// This would be a very tedious way to parse anything, so there
/// are other methods to help.  In particular, [`Self::consume_ws()`]
/// will do the job just outlined above.
///
/// More interestingly you can look to see if the next thing in the
/// stream is a given string with [`Self::peek_str()`] and [`Self::peek_str_greedy()`].
/// Usually once you match an upcoming string, you probably want to
/// consume it.  To both check for the string and consume it if
/// you find it, use [`Self::peek_and_consume()`] (for single characters) or
/// [`Self::peek_and_consume_str()`] (for strings).
///
/// You may also want to consume a sequence of characters, such as
/// all the digits or letters in a stream.  The method [`Self::take_while()`]
/// will do that.  It takes a predicate on characters (usually a
/// lambda) and consumes characters while the predicate is true.  All
/// the characters are then returned.  The following uses this to
/// obtain the next decimal integer from the stream.
///
/// ```rust
/// use std::io;
/// use trivet::Parser;
/// use trivet::parse_from_string;
///
/// # fn main() {
/// // Make a new parser around the standard input.
/// let mut parser = parse_from_string("-14x");
///
/// // Look for a minus sign.
/// let minus = parser.peek_and_consume('-');
///
/// // Read the next integer from the stream.
/// let digits = parser.take_while(|ch| ch.is_ascii_digit());
/// let number: i64 = (if minus { -1 } else { 1 })
///     * digits.parse().unwrap_or(0);
/// assert_eq!(number, -14i64);
///
/// // The stream now points to the first non-digit character.
/// assert_eq!(parser.peek(), 'x');
/// # }
/// ```
pub struct Parser {
    core: Box<ParserCore>,
    comment_parser: CommentParser,
    string_parser: StringParser,
    number_parser: NumberParser,
    keyword_parser: KeywordParser,
    /// If true (default) then the [`Self::consume_ws`] method and other `_ws` methods
    /// look for and consume comments.  If false, then they consume whitespace only.
    pub parse_comments: bool,
}

impl Parser {
    /// Create a new parser using the given decoder as the source of characters.  A name is given
    /// that will be used when creating [`Loc`] instances.
    pub fn new(name: &str, decoder: Decode) -> Self {
        Parser {
            // This became necessary in some cases or there would be a stack overflow.
            core: Box::new(ParserCore::new(name, decoder)),
            comment_parser: CommentParser::new(),
            string_parser: StringParser::new(),
            number_parser: NumberParser::new(),
            keyword_parser: KeywordParser::new(),
            parse_comments: true,
        }
    }

    /// Borrow the parser core.  This returns a mutable borrow of the internal parser
    /// core.  This is probably only needed if you want to run or test an external
    /// comment parser.
    pub fn borrow_core(&mut self) -> &mut ParserCore {
        &mut self.core
    }

    /// Borrow the comment parser.  This returns a mutable borrow of the internal
    /// comment parser that will allow you to configure the parser.
    pub fn borrow_comment_parser(&mut self) -> &mut CommentParser {
        &mut self.comment_parser
    }

    /// Replace the internal comment parser with the given instance of a comment parser.
    /// The prior comment parser is returned.
    pub fn replace_comment_parser(&mut self, compar: CommentParser) -> CommentParser {
        std::mem::replace(&mut self.comment_parser, compar)
    }

    /// Borrow the number parser.  This returns a mutable borrow of the internal number
    /// parser that will allow you to configure the parser.
    pub fn borrow_number_parser(&mut self) -> &mut NumberParser {
        &mut self.number_parser
    }

    /// Replace the internal string parser with the given instance of a string parser.
    /// The prior string parser is returned.
    pub fn replace_number_parser(&mut self, numpar: NumberParser) -> NumberParser {
        std::mem::replace(&mut self.number_parser, numpar)
    }

    /// Borrow the string parser.  This returns a mutable borrow of the internal string
    /// parser that will allow you to configure the parser.
    pub fn borrow_string_parser(&mut self) -> &mut StringParser {
        &mut self.string_parser
    }

    /// Replace the internal string parser with the given instance of a string parser.
    /// The prior string parser is returned.
    pub fn replace_string_parser(&mut self, strpar: StringParser) -> StringParser {
        std::mem::replace(&mut self.string_parser, strpar)
    }

    /// Borrow the keyword parser.  This returns a mutable borrow of the internal keyword
    /// parser that will allow you to configure the parser.
    pub fn borrow_keyword_parser(&mut self) -> &mut KeywordParser {
        &mut self.keyword_parser
    }

    /// Replace the internal keyword parser with the given instance of a keyword parser.
    /// The prior keyword parser is returned.
    pub fn replace_keyword_parser(&mut self, strpar: KeywordParser) -> KeywordParser {
        std::mem::replace(&mut self.keyword_parser, strpar)
    }

    /// Parse the next string.  The parse is assumed to be pointing to the opening delimiter,
    /// and the string is terminated by the next occurrence of the delimiter.
    pub fn parse_string_match_delimiter(&mut self) -> ParseResult<String> {
        if self.is_at_eof() {
            return Ok("".to_string());
        }
        let delimiter = self.peek();
        self.consume();
        self.string_parser.process(&mut self.core, Some(delimiter))
    }

    /// Parse the next string.  The opening delimiter, if any, should have already been
    /// consumed.  Provide the closing delimiter to look for.
    pub fn parse_string_until_delimiter(&mut self, delimiter: char) -> ParseResult<String> {
        self.string_parser.process(&mut self.core, Some(delimiter))
    }

    /// Parse the next string.  The parse is assumed to be pointing to the opening delimiter,
    /// and the string is terminated by the next occurrence of the delimiter.  Consume any
    /// whitespace after the closing delimiter.
    pub fn parse_string_match_delimiter_ws(&mut self) -> ParseResult<String> {
        let result = self.parse_string_match_delimiter();
        self.consume_ws();
        result
    }

    /// Use the embedded string parse to parse the provided string.  This reads from the provided
    /// string; nothing is consumed during this process.  The primary use case for this is matching
    /// a more complex string environment and then using the embedded string parser to decode the
    /// result of the match.
    pub fn parse_string(&self, string: &str) -> ParseResult<String> {
        self.string_parser.parse_string(string)
    }

    /// Parse the next string.  The opening delimiter, if any, should have already been
    /// consumed.  Provide the closing delimiter to look for.  Consume any trailing whitespace
    /// after the closing delimiter.
    pub fn parse_string_until_delimiter_ws(&mut self, terminator: char) -> ParseResult<String> {
        let string = self.string_parser.process(&mut self.core, Some(terminator));
        self.consume_ws();
        string
    }

    /// Parse an integer.  The parser should be on the first character of the
    /// integer.
    pub fn parse_u128(&mut self) -> ParseResult<u128> {
        self.number_parser.parse_u128(&mut self.core)
    }

    /// Parse an integer.  The parser should be on the first character of the
    /// integer.
    pub fn parse_i128(&mut self) -> ParseResult<i128> {
        self.number_parser.parse_i128(&mut self.core)
    }

    /// Parse an integer.  The parser should be on the first character of the
    /// integer.
    pub fn parse_u64(&mut self) -> ParseResult<u64> {
        self.number_parser.parse_u64(&mut self.core)
    }

    /// Parse an integer.  The parser should be on the first character of the
    /// integer.
    pub fn parse_i64(&mut self) -> ParseResult<i64> {
        self.number_parser.parse_i64(&mut self.core)
    }

    /// Parse a floating point number.  The parser should be on the first character of the
    /// number.  Radix specifiers are honored, if configured.
    pub fn parse_f64(&mut self) -> ParseResult<f64> {
        self.number_parser.parse_f64(&mut self.core)
    }

    /// Parse a floating point number.  The parser should be on the first character of the
    /// number.  Radix specifiers are not allowed; the number is assumed to be decimal.
    pub fn parse_f64_decimal(&mut self) -> ParseResult<f64> {
        self.number_parser.parse_f64_decimal(&mut self.core)
    }

    /// Parse an integer.  The parser should be on the first character of the
    /// integer.  Any trailing whitespace (and possibly comments) is consumed.
    pub fn parse_u128_ws(&mut self) -> ParseResult<u128> {
        let result = self.number_parser.parse_u128(&mut self.core);
        self.consume_ws();
        result
    }

    /// Parse an integer.  The parser should be on the first character of the
    /// integer.  Any trailing whitespace (and possibly comments) is consumed.
    pub fn parse_i128_ws(&mut self) -> ParseResult<i128> {
        let result = self.number_parser.parse_i128(&mut self.core);
        self.consume_ws();
        result
    }

    /// Parse an integer.  The parser should be on the first character of the
    /// integer.  Any trailing whitespace (and possibly comments) is consumed.
    pub fn parse_u64_ws(&mut self) -> ParseResult<u64> {
        let result = self.number_parser.parse_u64(&mut self.core);
        self.consume_ws();
        result
    }

    /// Parse an integer.  The parser should be on the first character of the
    /// integer.  Any trailing whitespace (and possibly comments) is consumed.
    pub fn parse_i64_ws(&mut self) -> ParseResult<i64> {
        let result = self.number_parser.parse_i64(&mut self.core);
        self.consume_ws();
        result
    }

    /// Parse a floating point number.  The parser should be on the first character of the
    /// number.  Radix specifiers are honored, if configured.  Trailing whitespace is consumed.
    pub fn parse_f64_ws(&mut self) -> ParseResult<f64> {
        let result = self.number_parser.parse_f64(&mut self.core);
        self.consume_ws();
        result
    }

    /// Parse a floating point number.  The parser should be on the first character of the
    /// number.  Radix specifiers are not allowed; the number is assumed to be decimal.
    /// Trailing whitespace is consumed.
    pub fn parse_f64_decimal_ws(&mut self) -> ParseResult<f64> {
        let result = self.number_parser.parse_f64_decimal(&mut self.core);
        self.consume_ws();
        result
    }

    /// Parse a keyword from the stream using the keyword parser.  The parser should be on the first
    /// character of the keyword.
    pub fn parse_keyword(&mut self) -> ParseResult<String> {
        self.keyword_parser.parse(&mut self.core)
    }

    /// Parse a keyword from the stream using the keyword parser.  The parser should be on the first
    /// character of the keyword.  Trailing whitespace is consumed.
    pub fn parse_keyword_ws(&mut self) -> ParseResult<String> {
        let result = self.keyword_parser.parse(&mut self.core);
        self.consume_ws();
        result
    }

    //-------------------------------------------------------------------------
    // Start of methods reflected to ParserCore
    //-------------------------------------------------------------------------

    /// Get the current location in the parse.  This will return either a console (if the name is
    /// the empty string) or a file location (if the name was not the empty string).
    #[inline(always)]
    pub fn loc(&self) -> Loc {
        self.core.loc()
    }

    /// Determine if the parser has reached the end of the stream.  If this is true, then no further
    /// characters are available from this parser.
    #[inline(always)]
    pub fn is_at_eof(&self) -> bool {
        self.core.is_at_eof()
    }

    /// Peek at the next character in the stream.  In order to be as fast as is reasonable,
    /// no stream checking is done.  If the stream is at the end, then you should get null
    /// characters, but you should not rely on that, since the null is also a valid character
    /// in a file.  Instead, be sure to check [`Parser::is_at_eof`].
    ///
    /// If this method is invoked too many times without any characters being consumed, then it
    /// will panic to indicate that parsing has stalled.  See [`PEEK_LIMIT`].
    #[inline(always)]
    pub fn peek(&mut self) -> char {
        self.core.peek()
    }

    /// Consume the next character from the stream, if there is one.  If not, then do nothing.
    ///
    /// If this method is invoked too many times after reaching the end of file, then it will panic
    /// to indicate that parsing has stalled.  See [`EOF_LIMIT`].
    #[inline(always)]
    pub fn consume(&mut self) {
        self.core.consume()
    }

    /// Peek at an offset in the stream.  That is, peek at a character at a given position.
    /// The position index is zero-based, with the *next* character to read (the result of
    /// a simple [`Self::peek`]) being at index zero.
    ///
    /// If there are not enough characters in the stream, then null (`\0`) is returned.
    /// The distance is limited by the maximum lookahead; attempts to look past it will
    /// also return a null.
    ///
    /// Note the distinction between this method and [`Self::peek_n`]; `peek_n(1)` method
    /// will return the character at position zero, so it is equivalent to `peek()` and
    /// to `peek_offset(0)`.
    #[inline(always)]
    pub fn peek_offset(&mut self, n: usize) -> char {
        self.core.peek_offset(n)
    }

    /// Peek at characters in the stream.  If there are fewer than `n` characters in the
    /// stream, then fewer are returned.  If the stream is exhausted, an empty string is
    /// returned.
    ///
    /// If this method is invoked too many times without any characters being consumed, then it
    /// will panic to indicate that parsing has stalled.  See [`PEEK_LIMIT`].
    #[inline(always)]
    pub fn peek_n(&mut self, n: usize) -> String {
        self.core.peek_n(n)
    }

    /// Peek at characters in the stream.  If there are fewer than `n` characters in the
    /// stream, then fewer are returned.  If the stream is exhausted, an empty vector is
    /// returned.
    ///
    /// If this method is invoked too many times without any characters being consumed, then it
    /// will panic to indicate that parsing has stalled.  See [`PEEK_LIMIT`].
    ///
    /// This method is similar to [`Self::peek_n`], but does not construct a string for the
    /// result, which can be better in some cases.
    #[inline(always)]
    pub fn peek_n_vec(&mut self, n: usize) -> Vec<char> {
        self.core.peek_n_vec(n)
    }

    /// Consume a given number of characters from the stream.  The end of file is not checked during
    /// this.  If there are no characters to consume, nothing is done.
    ///
    /// If this method is invoked too many times after reaching the end of file, it will panic to
    /// indicate that parsing has stalled.  See [`EOF_LIMIT`].
    #[inline(always)]
    pub fn consume_n(&mut self, n: usize) {
        self.core.consume_n(n)
    }

    /// Check the next characters in the stream.  If the next characters exactly match those
    /// given in the vector, then true is returned.  Otherwise false is returned.  Nothing is
    /// consumed.
    #[inline(always)]
    pub fn peek_chars(&mut self, chars: &[char]) -> bool {
        self.core.peek_chars(chars)
    }

    /// Check the next characters in the stream and, if the match, consume them and return true.
    /// Otherwise return false.
    #[inline(always)]
    pub fn peek_and_consume_chars(&mut self, chars: &[char]) -> bool {
        self.core.peek_and_consume_chars(chars)
    }

    /// Consume characters until an end token is found.  The characters consumed are returned
    /// without the end token.
    #[inline(always)]
    pub fn take_until(&mut self, token: &str) -> String {
        self.core.take_until(token)
    }

    /// Consume characters so long as the test is true.  Return the characters that
    /// satisfy the first test.
    #[inline(always)]
    pub fn take_while<T: Fn(char) -> bool>(&mut self, include: T) -> String {
        self.core.take_while(include)
    }

    /// Consume characters so long as either test is true.  Return only those characters that
    /// satisfy the first test.  The exclude predicate is checked *first*.
    #[inline(always)]
    pub fn take_while_unless<T: Fn(char) -> bool, U: Fn(char) -> bool>(
        &mut self,
        include: T,
        exclude: U,
    ) -> String {
        self.core.take_while_unless(include, exclude)
    }

    /// Consume and return characters.  This works as follows.
    ///
    /// If the current character satisfies `stop`, then the parse is stopped and the result is returned,
    /// regardless of whether any other predicates match.
    ///
    /// If the current character satisfies `skip`, then the character is skipped.
    ///
    /// Other characters (those that do not match `skip` or `stop`) are collected and returned.
    ///
    /// The returned pair contains all matched characters and the character that caused the stop, or
    /// `None` if parsing stopped because the end of stream was reached.  Because the stopping character
    /// is returned, it is also consumed.
    ///
    #[inline]
    pub fn take<S, K>(&mut self, skip: S, stop: K) -> (Vec<char>, Option<char>)
    where
        S: Fn(char) -> bool,
        K: Fn(char) -> bool,
    {
        self.core.take(skip, stop)
    }

    /// Consume characters so long as the test is true.  Returns true if any characters are consumed.
    #[inline(always)]
    pub fn consume_while<T: Fn(char) -> bool>(&mut self, include: T) -> bool {
        self.core.consume_while(include)
    }

    /// Consume characters until the given end token is found.  Returns true if any characters are
    /// consumed.  The end token is also consumed.
    #[inline(always)]
    pub fn consume_until(&mut self, token: &str) -> bool {
        self.core.consume_until(token)
    }

    /// Consume all whitespace starting at the current position.  The definition of whitespace
    /// used here is the same as the
    /// [Unicode standard](https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt).
    ///
    /// At the time of writing, the following is the definition of whitespace used.
    ///
    /// ```text
    /// 0009..000D    ; White_Space # Cc   [5] <control-0009>..<control-000D>
    /// 0020          ; White_Space # Zs       SPACE
    /// 0085          ; White_Space # Cc       <control-0085>
    /// 00A0          ; White_Space # Zs       NO-BREAK SPACE
    /// 1680          ; White_Space # Zs       OGHAM SPACE MARK
    /// 2000..200A    ; White_Space # Zs  [11] EN QUAD..HAIR SPACE
    /// 2028          ; White_Space # Zl       LINE SEPARATOR
    /// 2029          ; White_Space # Zp       PARAGRAPH SEPARATOR
    /// 202F          ; White_Space # Zs       NARROW NO-BREAK SPACE
    /// 205F          ; White_Space # Zs       MEDIUM MATHEMATICAL SPACE
    /// 3000          ; White_Space # Zs       IDEOGRAPHIC SPACE
    /// ```
    ///
    /// This method is slightly faster than the [`Self::consume_ws`] method as it does not
    /// look for comments.
    pub fn consume_ws_only(&mut self) -> bool {
        self.core.consume_ws_only()
    }

    /// Peek at the next character in the stream.  If it is the given character, consume it and
    /// return true.  Otherwise return false.
    #[inline(always)]
    pub fn peek_and_consume(&mut self, ch: char) -> bool {
        self.core.peek_and_consume(ch)
    }

    //-------------------------------------------------------------------------
    // End of reflected methods
    //-------------------------------------------------------------------------

    /// Expect a character at this point in the stream and if it is found, consume it.  If it nos not
    /// found, generate an error.
    pub fn expect(&mut self, ch: char) -> ParseResult<()> {
        if self.core.peek_and_consume(ch) {
            Ok(())
        } else {
            Err(syntax_error(
                self.loc(),
                &format!(
                    "Expected to find {:?}, but instead found {:?}",
                    ch,
                    self.peek()
                ),
            ))
        }
    }

    /// Expect a character sequence at this point in the stream and if it is found, consume it.
    /// If it is not found, generate an error.
    pub fn expect_chars(&mut self, ch: &[char]) -> ParseResult<()> {
        if self.core.peek_and_consume_chars(ch) {
            Ok(())
        } else {
            let expect = String::from_iter(ch);
            let found = self.core.peek_n(ch.len());
            Err(syntax_error(
                self.loc(),
                &format!(
                    "Expected to find {:?}, but instead found {:?}",
                    expect, found
                ),
            ))
        }
    }

    /// Peek at the stream and determine if the next characters are the given string.  True is returned
    /// if so, and false otherwise.  Nothing is consumed.
    pub fn peek_str(&mut self, value: &str) -> bool {
        self.core.peek_chars(&value.chars().collect::<Vec<char>>())
    }

    /// Check the next characters in the stream.  If the next characters exactly match those
    /// given in the vector, and do not also match one position more in the stream, then true
    /// is returned.  Otherwise false is returned.  Nothing is consumed.
    pub fn peek_chars_greedy(&mut self, chars: &[char]) -> bool {
        if chars.is_empty() {
            return true;
        }
        let len = chars.len();
        let peek = self.core.peek_n_vec(len + 1);
        match len.cmp(&peek.len()) {
            Ordering::Greater => false,
            Ordering::Equal => chars == peek,
            Ordering::Less => chars == &peek[0..len] && chars != &peek[1..len + 1],
        }
    }

    /// Peek ahead and see if the provided string is present in the stream.
    /// This method returns false if the token is also at an offset of one,
    /// which may seem an odd choice, but allows matching end tokens where
    /// there are repeated characters, such as `"""`.
    ///
    /// This is a relatively costly operation.
    pub fn peek_str_greedy(&mut self, value: &str) -> bool {
        // We need the length of the value in code points.  That's not a precise thing,
        // but we can get close with the following.
        let here = self.peek_n(value.len());
        let next = self.peek_n(value.len() + 1);
        if here == value {
            here.len() == next.len() || !next.ends_with(value)
        } else {
            false
        }
    }

    /// Peek at the next character in the stream.  If it is the given character, consume it and
    /// return true.  Otherwise return false.  Consume any trailing whitespace iff there is a match.
    pub fn peek_and_consume_ws(&mut self, ch: char) -> bool {
        if self.core.peek() == ch {
            self.core.consume();
            self.consume_ws();
            true
        } else {
            false
        }
    }

    /// Peek at the stream and determine if the next characters are the given characters in order.
    /// If so, consume them and any trailing whitespace, and then return true.  Otherwise consume
    /// nothing and return false.
    pub fn peek_and_consume_chars_ws(&mut self, chars: &[char]) -> bool {
        let retval = self.core.peek_and_consume_chars(chars);
        if retval {
            self.consume_ws();
        }
        retval
    }

    /// Peek at the stream and determine if the next characters are the given string.  If so, consume
    /// them and return true.  Otherwise return false.
    pub fn peek_and_consume_str(&mut self, value: &str) -> bool {
        self.core
            .peek_and_consume_chars(&value.chars().collect::<Vec<char>>())
    }

    /// Peek at the stream and determine if the next characters are the given string.  If so, consume
    /// them and return true.  Otherwise return false.  Consume trailing whitespace.
    pub fn peek_and_consume_str_ws(&mut self, value: &str) -> bool {
        let result = self
            .core
            .peek_and_consume_chars(&value.chars().collect::<Vec<char>>());
        if result {
            self.consume_ws();
        }
        result
    }

    /// Consume whitespace.  This is the method used by all the `_ws` method forms.  It will
    /// also use the embedded comment parser to parse and discard comments.
    pub fn consume_ws(&mut self) -> bool {
        if self.parse_comments {
            self.comment_parser.process(&mut self.core)
        } else {
            self.core.consume_ws_only()
        }
    }

    /// Consume characters until the given character sequence is found.  This uses greedy
    /// matching (see [`Self::peek_chars_greedy`]) and returns the characters consumed.
    /// The end token is also consumed and not returned.
    ///
    /// If the `must_match` flag is true, then the end token must be matched and consumed,
    /// otherwise an error is generated.
    ///
    pub fn take_until_greedy(&mut self, chars: &[char], must_match: bool) -> ParseResult<String> {
        let mut value = String::new();
        let loc = self.loc();
        while !self.is_at_eof() {
            if self.peek_chars_greedy(chars) {
                self.consume_n(chars.len());
                return Ok(value);
            }
            value.push(self.peek());
            self.consume();
        }
        if must_match {
            Err(syntax_error(
                loc,
                &format!(
                    "Expected to find terminating {:?}, but did not.",
                    chars.iter().collect::<String>()
                ),
            ))
        } else {
            Ok(value)
        }
    }
}