saphyr-parser-bw 0.0.612

Saphyr-parser with changes from Bourumir Wyngs to be used in serde-saphyr
Documentation
use crate::char_traits::is_breakz;
use crate::input::{BorrowedInput, Input};

use arraydeque::ArrayDeque;

/// The size of the [`BufferedInput`] buffer.
///
/// The buffer is statically allocated to avoid conditions for reallocations each time we
/// consume/push a character. As of now, almost all lookaheads are 4 characters maximum, except:
///   - Escape sequences parsing: some escape codes are 8 characters
///   - Scanning indent in scalars: this looks ahead `indent + 2` characters
///
/// This constant must be set to at least 8. When scanning indent in scalars, the lookahead is done
/// in a single call if and only if the indent is `BUFFER_LEN - 2` or less. If the indent is higher
/// than that, the code will fall back to a loop of lookaheads.
const BUFFER_LEN: usize = 16;

/// A wrapper around an [`Iterator`] of [`char`]s with a buffer.
///
/// The YAML scanner often needs some lookahead. With fully allocated buffers such as `String` or
/// `&str`, this is not an issue. However, with streams, we need to have a way of peeking multiple
/// characters at a time and sometimes pushing some back into the stream.
/// There is no "easy" way of doing this without itertools. In order to avoid pulling the entierty
/// of itertools for one method, we use this structure.
#[allow(clippy::module_name_repetitions)]
pub struct BufferedInput<T: Iterator<Item = char>> {
    /// The iterator source,
    input: T,
    /// Buffer for the next characters to consume.
    buffer: ArrayDeque<char, BUFFER_LEN>,
}

impl<T: Iterator<Item = char>> BufferedInput<T> {
    /// Create a new [`BufferedInput`] with the given input.
    pub fn new(input: T) -> Self {
        Self {
            input,
            buffer: ArrayDeque::default(),
        }
    }
}

impl<T: Iterator<Item = char>> Input for BufferedInput<T> {
    #[inline]
    fn lookahead(&mut self, count: usize) {
        let target = count.min(BUFFER_LEN);

        if self.buffer.len() >= target {
            return;
        }
        for _ in 0..(target - self.buffer.len()) {
            self.buffer
                .push_back(self.input.next().unwrap_or('\0'))
                .unwrap();
        }
    }

    #[inline]
    fn buflen(&self) -> usize {
        self.buffer.len()
    }

    #[inline]
    fn bufmaxlen(&self) -> usize {
        BUFFER_LEN
    }

    #[inline]
    fn raw_read_ch(&mut self) -> char {
        self.input.next().unwrap_or('\0')
    }

    #[inline]
    fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
        if let Some(c) = self.input.next() {
            if is_breakz(c) {
                self.buffer.push_back(c).unwrap();
                None
            } else {
                Some(c)
            }
        } else {
            None
        }
    }

    #[inline]
    fn skip(&mut self) {
        self.buffer.pop_front();
    }

    #[inline]
    fn skip_n(&mut self, count: usize) {
        self.buffer.drain(0..count);
    }

    #[inline]
    fn peek(&self) -> char {
        self.buffer[0]
    }

    #[inline]
    fn peek_nth(&self, n: usize) -> char {
        self.buffer[n]
    }
}

/// `BufferedInput` does not support zero-copy slicing since it's a streaming input
/// without stable backing storage.
impl<T: Iterator<Item = char>> BorrowedInput<'static> for BufferedInput<T> {
    #[inline]
    fn slice_borrowed(&self, _start: usize, _end: usize) -> Option<&'static str> {
        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn lookahead_larger_than_buffer_is_clamped() {
        let mut input = BufferedInput::new("abc".chars());

        input.lookahead(BUFFER_LEN + 8);

        assert_eq!(input.buflen(), BUFFER_LEN);
        assert_eq!(input.peek(), 'a');
        assert_eq!(input.peek_nth(1), 'b');
        assert_eq!(input.peek_nth(2), 'c');
        assert_eq!(input.peek_nth(3), '\0');
    }
}