math-core 0.6.1

use std::{collections::VecDeque, ops::Range};

use crate::{
    character_class::Class,
    error::{LatexErrKind, LatexError},
    lexer::Lexer,
    token::{EndToken, Span, TokSpan, Token},
};

/// A token queue that allows peeking at the next non-whitespace token.
pub(super) struct TokenQueue<'config, 'source> {
    pub lexer: Lexer<'config, 'source>,
    queue: VecDeque<TokSpan<'source>>,
    lexer_is_eoi: bool,
    next_non_whitespace: usize,
}

static EOI_TOK: TokSpan = TokSpan::new(Token::Eoi, Span::zero_width(0));

impl<'config, 'source> TokenQueue<'config, 'source> {
    pub(super) fn new(lexer: Lexer<'config, 'source>) -> Result<Self, Box<LatexError>> {
        let mut tm = TokenQueue {
            lexer,
            queue: VecDeque::with_capacity(2),
            lexer_is_eoi: false,
            next_non_whitespace: 0,
        };
        // Ensure that we have at least one non-whitespace token in the buffer for peeking.
        let idx = tm.load_token_skip_whitespace()?;
        tm.next_non_whitespace = idx;
        Ok(tm)
    }

    /// Load the next non-whitespace token from the lexer into the buffer, and return its index.
    fn load_token_skip_whitespace(&mut self) -> Result<usize, Box<LatexError>> {
        Ok(self
            .load_token(is_not_whitespace)?
            .unwrap_or(self.queue.len()))
    }

    /// Load the next not-skipped token from the lexer into the buffer.
    /// If the end of the input is reached, this will return early.
    fn load_token<T>(
        &mut self,
        predicate: fn(usize, &Token) -> Option<T>,
    ) -> Result<Option<T>, Box<LatexError>> {
        if self.lexer_is_eoi {
            return Ok(None);
        }
        let starting_len = self.queue.len();
        let mut non_skipped_offset = 0usize;
        loop {
            let tok = self.lexer.next_token()?;
            let result = predicate(starting_len + non_skipped_offset, tok.token());
            let is_eoi = matches!(tok.token(), Token::Eoi);
            self.queue.push_back(tok);
            if let Some(result) = result {
                return Ok(Some(result));
            }
            non_skipped_offset += 1;
            if is_eoi {
                self.lexer_is_eoi = true;
                return Ok(None);
            }
        }
    }

    /// Perform a linear search to find the next non-whitespace token in the buffer.
    fn find_next_non_whitespace(&self) -> Option<usize> {
        self.queue
            .iter()
            .position(|tokspan| !matches!(tokspan.token(), Token::Whitespace))
    }

    /// Ensure that `next_non_whitespace` points to the next non-whitespace token in the buffer,
    /// or to one past the end if there is none.
    fn ensure_next_non_whitespace(&mut self) -> Result<(), Box<LatexError>> {
        let pos = 'pos_calc: {
            // First, try to find the next non-whitespace token in the existing buffer.
            if !self.queue.is_empty()
                && let Some(pos) = self.find_next_non_whitespace()
            {
                break 'pos_calc pos;
            }
            // Then, try to load more tokens until we find one or reach EOI.
            self.load_token_skip_whitespace()?
        };
        self.next_non_whitespace = pos;
        Ok(())
    }

    /// Peek at the next non-whitespace token without consuming it.
    ///
    /// If the lexer has reached the end of the input, this will return an EOI token.
    /// The public interface of `TokenManager` enforces the invariant that there is
    /// always at least one non-whitespace token in the buffer when this is called,
    /// unless EOI has been reached.
    #[inline]
    pub(super) fn peek(&self) -> &TokSpan<'source> {
        // `next_non_whitespace` points to the next non-whitespace token,
        // or to one past the end of the buffer if there is none.
        if let Some(tok) = self.queue.get(self.next_non_whitespace) {
            tok
        } else {
            debug_assert!(self.lexer_is_eoi, "peek called without ensure");
            &EOI_TOK
        }
    }

    /// Find or load a token which is not skipped according to `predicate`.
    ///
    /// This function starts its search after `next_non_whitespace` (i.e., it skips
    /// the first non-whitespace token). The idea is that the caller has already
    /// checked `next_non_whitespace` or is not interested in it.
    fn find_or_load_after_next<T>(
        &mut self,
        predicate: fn(usize, &Token) -> Option<T>,
    ) -> Result<Option<T>, Box<LatexError>> {
        // We use a block here which returns an index to avoid borrow checker issues.
        let result = {
            // Ensure that the compiler can tell that `self.queue.range(start..)`
            // cannot panic due to being out of bounds.
            let start = self.next_non_whitespace;
            if start < self.queue.len() {
                let mut range = self.queue.range(start..);
                range.next(); // Skip `next_non_whitespace`.
                range
                    .enumerate()
                    .find_map(|(idx, ts)| predicate(start + 1 + idx, ts.token()))
            } else {
                debug_assert!(
                    self.lexer_is_eoi,
                    "find_or_load_after_next called without ensure"
                );
                return Ok(None);
            }
        };

        if let Some(result) = result {
            // If we found a token in the existing buffer, return it.
            Ok(Some(result))
        } else {
            // Otherwise, load more tokens until we find one or reach EOI.
            self.load_token(predicate)
        }
    }

    /// Peek at the second non-whitespace token without consuming it.
    pub(super) fn peek_second(&mut self) -> Result<&TokSpan<'source>, Box<LatexError>> {
        if let Some(tok) = self
            .find_or_load_after_next(is_not_whitespace)?
            .and_then(|idx| self.queue.get(idx))
        {
            Ok(tok)
        } else {
            debug_assert!(self.lexer_is_eoi, "peek_second called without ensure");
            Ok(&EOI_TOK)
        }
    }

    /// Peek at the first token which has a character class.
    ///
    /// This excludes, for example, `Space` tokens.
    pub(super) fn peek_class_token(&mut self, in_sequence: bool) -> Result<Class, Box<LatexError>> {
        if !in_sequence {
            return Ok(Class::Default);
        }
        // First check the common case where the next token is already a token with class.
        if let Some(class) = self.peek().token().class() {
            return Ok(class);
        }
        if let Some(class) = self.find_or_load_after_next(has_class)? {
            Ok(class)
        } else {
            debug_assert!(self.lexer_is_eoi, "peek_class_token called without ensure");
            // EOI is treated as having class `Close`.
            Ok(Class::Close)
        }
    }

    /// Get the next math-mode token.
    ///
    /// This method skips any whitespace tokens and unwraps [`Token::MathOrTextMode`].
    ///
    /// This method also ensures that there is always a peekable token after this one.
    pub(super) fn next(&mut self) -> Result<TokSpan<'source>, Box<LatexError>> {
        // Pop elements until we reach `next_non_whitespace`.
        for _ in 0..self.next_non_whitespace {
            let _ = self.queue.pop_front();
        }

        // Now pop the next token.
        if let Some(ret) = self.queue.pop_front() {
            self.ensure_next_non_whitespace()?;
            let (tok, span) = ret.into_parts();
            let ret = TokSpan::new(tok.unwrap_math(), span);
            debug_assert!(!matches!(
                ret.token(),
                Token::Whitespace | Token::MathOrTextMode(_, _)
            ));
            Ok(ret)
        } else {
            // We must have reached EOI previously.
            debug_assert!(self.lexer_is_eoi, "next called without ensure");
            Ok(EOI_TOK)
        }
    }

    /// Get the next token without skipping or unwrapping anything.
    ///
    /// This method may return whitespace tokens and [`Token::MathOrTextMode`].
    pub(super) fn next_any_token(&mut self) -> Result<TokSpan<'source>, Box<LatexError>> {
        if let Some(ret) = self.queue.pop_front() {
            // `next_non_whitespace` may need to be updated.
            if let Some(new_pos) = self.next_non_whitespace.checked_sub(1) {
                self.next_non_whitespace = new_pos;
            } else {
                // We popped `next_non_whitespace` itself, so we need to find the next one.
                self.ensure_next_non_whitespace()?;
            }
            Ok(ret)
        } else {
            // We must have reached EOI previously.
            debug_assert!(
                self.lexer_is_eoi,
                "next_with_whitespace called without ensure"
            );
            Ok(EOI_TOK)
        }
    }

    /// Queue a stream of tokens in the front of the buffer.
    ///
    /// We use a ring buffer, so this is efficient as long as the number of tokens is not too large.
    pub(super) fn queue_in_front(&mut self, tokens: &[impl Into<TokSpan<'source>> + Copy]) {
        self.queue.reserve(tokens.len());
        // Queue the token stream in the front in reverse order.
        for tok in tokens.iter().rev() {
            self.queue.push_front((*tok).into());
        }

        // Update the next_non_whitespace position.
        if let Some(pos) = self.find_next_non_whitespace() {
            self.next_non_whitespace = pos;
        } else {
            // There is only one scenario in which we wouldn't find a non-whitespace token:
            // We reached EOI previously and all queued tokens are whitespace.
            debug_assert!(self.lexer_is_eoi, "queue_in_front called without ensure");
            self.next_non_whitespace = self.queue.len();
        }
    }

    /// Read a group of tokens, ending with (an unopened) `}`.
    ///
    /// The initial `{` must have already been consumed. The closing `}` is not included
    /// in the output token vector.
    pub(super) fn record_group(
        &mut self,
        tokens: &mut Vec<TokSpan<'source>>,
        preserve_all: bool,
    ) -> Result<usize, Box<LatexError>> {
        let mut nesting_level = 0usize;
        let end = loop {
            let tokloc = if preserve_all {
                self.next_any_token()
            } else {
                self.next()
            };
            let tokloc = tokloc?;
            match tokloc.token() {
                Token::GroupBegin => {
                    nesting_level += 1;
                }
                Token::GroupEnd => {
                    // If the nesting level reaches one below where we started, we
                    // stop reading.
                    let Some(new_level) = nesting_level.checked_sub(1) else {
                        // We break directly without pushing the `}` token.
                        break tokloc.span().end();
                    };
                    nesting_level = new_level;
                }
                Token::Eoi => {
                    return Err(Box::new(LatexError(
                        tokloc.span().into(),
                        LatexErrKind::UnclosedGroup(EndToken::GroupClose),
                    )));
                }
                _ => {}
            }
            tokens.push(tokloc);
        };
        Ok(end)
    }

    /// Read one macro argument, which is either a single token or a group of tokens.
    ///
    /// Any immediately following whitespace is always skipped. If the argument is a group, then
    /// the parameter `preserve_all` determines whether the whitespace tokens within the group
    /// are included in the output vector or not.
    pub fn read_argument(
        &mut self,
        preserve_all: bool,
    ) -> Result<MacroArgument<'source>, Box<LatexError>> {
        let first = if preserve_all {
            // For `preserve_all`, we still want to skip leading whitespace, but we don't want to
            // perform the unwrapping that `next()` does. So we use this hack here of copying the
            // peek token and then discarding it with `next()`.
            let tok = *self.peek();
            self.next()?;
            tok
        } else {
            self.next()?
        };
        if matches!(first.token(), Token::GroupBegin) {
            let mut tokens = Vec::new();
            // Read until the matching `}`.
            let end_loc = self.record_group(&mut tokens, preserve_all)?;
            Ok(MacroArgument::Group(tokens, first.span().start()..end_loc))
        } else {
            Ok(MacroArgument::Token(first))
        }
    }
}

fn is_not_whitespace(idx: usize, tok: &Token) -> Option<usize> {
    (!matches!(tok, Token::Whitespace)).then_some(idx)
}

fn has_class(_idx: usize, tok: &Token) -> Option<Class> {
    tok.class()
}

/// A macro argument, which is either a single token or a group of tokens.
pub enum MacroArgument<'source> {
    Token(TokSpan<'source>),
    /// The `Range` is the range of the entire group, including the opening and closing braces.
    Group(Vec<TokSpan<'source>>, Range<usize>),
}

impl<'source> MacroArgument<'source> {
    /// Try to interpret this macro argument as a single token.
    pub fn into_one_or_none(self) -> Result<OneOrNone<'source>, Box<LatexError>> {
        match self {
            MacroArgument::Token(tok) => Ok(OneOrNone::One(tok)),
            MacroArgument::Group(tokens, span) => {
                if tokens.is_empty() {
                    Ok(OneOrNone::None(span))
                } else if let Ok([tokspan]) = <[TokSpan; 1]>::try_from(tokens) {
                    Ok(OneOrNone::One(tokspan))
                } else {
                    Err(Box::new(LatexError(
                        span,
                        LatexErrKind::ExpectedAtMostOneToken,
                    )))
                }
            }
        }
    }
}

pub enum OneOrNone<'source> {
    One(TokSpan<'source>),
    None(Range<usize>),
}

impl<'source> From<OneOrNone<'source>> for Option<TokSpan<'source>> {
    fn from(value: OneOrNone<'source>) -> Self {
        match value {
            OneOrNone::One(tok) => Some(tok),
            OneOrNone::None(_) => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use std::fmt::Write;

    use insta::assert_snapshot;

    use super::*;

    #[test]
    fn test_record_group() {
        let problems = [
            ("simple_group", r"{x+y}"),
            ("group_followed", r"{x+y} b"),
            ("nested_group", r"{x + {y - z}} c"),
            ("unclosed_group", r"{x + y"),
            ("unclosed_nested_group", r"{x + {y + z}"),
            ("too_many_closes", r"{x + y} + z}"),
            ("empty_group", r"{} d"),
            ("group_with_begin", r"{\begin{matrix}}"),
            ("early_error", r"{x + \unknowncmd + y}"),
        ];

        for (name, problem) in problems.into_iter() {
            let lexer = Lexer::new(problem, false, None);
            let mut manager = TokenQueue::new(lexer).expect("Failed to create TokenManager");
            // Load up some tokens to ensure the code can deal with that.
            manager.load_token_skip_whitespace().unwrap();
            manager.load_token_skip_whitespace().unwrap();
            // Check that the first token is `GroupBegin`.
            assert!(matches!(manager.next().unwrap().token(), Token::GroupBegin));
            let mut tokens = Vec::new();
            let tokens = match manager.record_group(&mut tokens, true) {
                Ok(_) => {
                    let mut token_str = String::new();
                    for tokloc in tokens {
                        let (tok, span) = tokloc.into_parts();
                        write!(token_str, "{}..{}: {:?}\n", span.start(), span.end(), tok).unwrap();
                    }
                    token_str
                }
                Err(error) => {
                    let report = error.to_report("<input>", false);
                    let mut buf = Vec::new();
                    report
                        .write(("<input>", ariadne::Source::from(problem)), &mut buf)
                        .expect("failed to write report");
                    String::from_utf8(buf).expect("report should be valid UTF-8")
                }
            };
            assert_snapshot!(name, &tokens, problem);
        }
    }

    #[test]
    fn test_get_whitespace_tokens() {
        let input = r"\text{  x +   y }";
        // let input = r"\text  xy";
        let lexer = Lexer::new(input, false, None);
        let mut manager = TokenQueue::new(lexer).expect("Failed to create TokenManager");

        let mut token_str = String::new();

        loop {
            let (tok, span) = manager.next_any_token().unwrap().into_parts();
            if matches!(tok, Token::Eoi) {
                break;
            }
            write!(token_str, "{}..{}: {:?}\n", span.start(), span.end(), tok).unwrap();
        }

        assert_snapshot!("next_with_whitespace", &token_str, input);
    }

    #[test]
    fn test_find_or_load_after_next() {
        let input = r"x y z";
        // let input = r"\text  xy";
        let lexer = Lexer::new(input, false, None);
        let mut queue = TokenQueue::new(lexer).expect("Failed to create TokenManager");
        queue.next().unwrap(); // Consume 'x'
        assert_eq!(queue.next_non_whitespace, 1);
        assert_eq!(queue.queue.len(), 2);
        assert!(matches!(queue.queue[0].token(), Token::Whitespace));
        assert!(matches!(queue.peek().token(), Token::Letter('y', _)));

        // Test the branch that needs to load more tokens.
        let tok_idx = queue.find_or_load_after_next(is_not_whitespace).unwrap();
        assert!(matches!(tok_idx, Some(3)));
        assert_eq!(queue.queue.len(), 4);
        assert!(matches!(queue.queue[0].token(), Token::Whitespace));
        assert!(matches!(queue.queue[2].token(), Token::Whitespace));
        assert!(matches!(queue.queue[3].token(), Token::Letter('z', _)));

        // Test the branch that finds the token in the existing buffer.
        let tok_idx = queue.find_or_load_after_next(is_not_whitespace).unwrap();
        assert!(matches!(tok_idx, Some(3)));
        assert_eq!(queue.queue.len(), 4);
        assert!(matches!(queue.queue[0].token(), Token::Whitespace));
        assert!(matches!(queue.queue[2].token(), Token::Whitespace));
        assert!(matches!(queue.queue[3].token(), Token::Letter('z', _)));
    }

    #[test]
    fn text_read_argument() {
        let problems = [
            ("hyphen", r"-xy", true),
            ("hyphen_math_mode", r"-xy", false),
            ("consecutive_whitespace", r"{x   y} z", true),
            ("consecutive_whitespace_skip", r"{x   y} z", false),
        ];

        for (name, problem, preserve_all) in problems.into_iter() {
            let lexer = Lexer::new(problem, false, None);
            let mut manager = TokenQueue::new(lexer).expect("Failed to create TokenManager");
            let tokens = match manager.read_argument(preserve_all) {
                Ok(MacroArgument::Group(tokens, _)) => {
                    let mut token_str = String::new();
                    for tokloc in tokens {
                        let (tok, span) = tokloc.into_parts();
                        write!(token_str, "{}..{}: {:?}\n", span.start(), span.end(), tok).unwrap();
                    }
                    token_str
                }
                Ok(MacroArgument::Token(tok)) => {
                    let (tok, span) = tok.into_parts();
                    format!("{}..{}: {:?}\n", span.start(), span.end(), tok)
                }
                Err(error) => {
                    let report = error.to_report("<input>", false);
                    let mut buf = Vec::new();
                    report
                        .write(("<input>", ariadne::Source::from(problem)), &mut buf)
                        .expect("failed to write report");
                    String::from_utf8(buf).expect("report should be valid UTF-8")
                }
            };
            assert_snapshot!(name, &tokens, problem);
        }
    }
}