endbasic-core 0.13.0

The EndBASIC programming language - core
Documentation
// EndBASIC
// Copyright 2020 Julio Merino
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.

//! Character-based reader for an input stream with position tracking.

use std::char;
use std::fmt;
use std::io::{self, BufRead};

/// Tab length used to compute the current position within a line when encountering a tab character.
const TAB_LENGTH: usize = 8;

/// A position within a source stream, represented as line and column numbers.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct LineCol {
    /// Line number, starting from 1.
    pub line: usize,

    /// Column number, starting from 1.
    pub col: usize,
}

impl fmt::Display for LineCol {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}:{}", self.line, self.col)
    }
}

/// A character along with its position in the source stream.
#[derive(Debug)]
#[cfg_attr(test, derive(Eq, PartialEq))]
pub struct CharSpan {
    /// The character value.
    pub(crate) ch: char,

    /// The position where this character appears in the source.
    pub(crate) pos: LineCol,
}

/// State of buffered data in the character reader.
enum Pending {
    /// Initial state where no data has been buffered yet.
    Unknown,

    /// Holds a buffered line of characters and the index of the next character to return.
    Chars(Vec<char>, usize),

    /// Terminal state indicating end-of-file has been reached.
    Eof,

    /// Terminal state indicating an I/O error occurred.  Contains the original error if not yet
    /// consumed, or `None` if the error was already returned to the caller.
    Error(Option<io::Error>),
}

/// Wraps an `io::Read` to provide an iterator over characters with position tracking.
pub struct CharReader<'a> {
    /// The buffered reader wrapping the input stream.
    reader: io::BufReader<&'a mut dyn io::Read>,

    /// Current state of buffered character data.
    pending: Pending,

    /// If set, contains the result of a peek operation to be returned by the next `next()` call.
    peeked: Option<Option<io::Result<CharSpan>>>,

    /// Position of the next character to be read.
    next_pos: LineCol,
}

impl<'a> CharReader<'a> {
    /// Constructs a new character reader from an `io::Read`.
    pub fn from(reader: &'a mut dyn io::Read) -> Self {
        Self {
            reader: io::BufReader::new(reader),
            pending: Pending::Unknown,
            peeked: None,
            next_pos: LineCol { line: 1, col: 1 },
        }
    }

    /// Replenishes `pending` with the next line to process.
    fn refill_and_next(&mut self) -> Option<io::Result<CharSpan>> {
        self.pending = {
            let mut line = String::new();
            match self.reader.read_line(&mut line) {
                Ok(0) => Pending::Eof,
                Ok(_) => Pending::Chars(line.chars().collect(), 0),
                Err(e) => Pending::Error(Some(e)),
            }
        };
        self.next()
    }

    /// Peeks into the next character without consuming it.
    pub(crate) fn peek(&mut self) -> Option<&io::Result<CharSpan>> {
        if self.peeked.is_none() {
            let next = self.next();
            self.peeked.replace(next);
        }
        self.peeked.as_ref().unwrap().as_ref()
    }

    /// Gets the current position of the read, which is the position that the next character will
    /// carry.
    pub(crate) fn next_pos(&self) -> LineCol {
        self.next_pos
    }
}

impl Iterator for CharReader<'_> {
    type Item = io::Result<CharSpan>;

    /// Returns the next character in the input stream.
    fn next(&mut self) -> Option<Self::Item> {
        if let Some(peeked) = self.peeked.take() {
            return peeked;
        }

        match &mut self.pending {
            Pending::Unknown => self.refill_and_next(),
            Pending::Eof => None,
            Pending::Chars(chars, last) => {
                if *last == chars.len() {
                    self.refill_and_next()
                } else {
                    let ch = chars[*last];
                    *last += 1;

                    let pos = self.next_pos;
                    match ch {
                        '\n' => {
                            self.next_pos.line += 1;
                            self.next_pos.col = 1;
                        }
                        '\t' => {
                            self.next_pos.col =
                                (self.next_pos.col - 1 + TAB_LENGTH) / TAB_LENGTH * TAB_LENGTH + 1;
                        }
                        _ => {
                            self.next_pos.col += 1;
                        }
                    }

                    Some(Ok(CharSpan { ch, pos }))
                }
            }
            Pending::Error(e) => match e.take() {
                Some(e) => Some(Err(e)),
                None => Some(Err(io::Error::other("Invalid state; error already consumed"))),
            },
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Syntactic sugar to instantiate a `CharSpan` for testing.
    fn cs(ch: char, line: usize, col: usize) -> CharSpan {
        CharSpan { ch, pos: LineCol { line, col } }
    }

    #[test]
    fn test_empty() {
        let mut input = b"".as_ref();
        let mut reader = CharReader::from(&mut input);
        assert!(reader.next().is_none());
    }

    #[test]
    fn test_multibyte_chars() {
        let mut input = "Hi 훌리오".as_bytes();
        let mut reader = CharReader::from(&mut input);
        assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
        assert_eq!(cs(' ', 1, 3), reader.next().unwrap().unwrap());
        assert_eq!(cs('훌', 1, 4), reader.next().unwrap().unwrap());
        assert_eq!(cs('리', 1, 5), reader.next().unwrap().unwrap());
        assert_eq!(cs('오', 1, 6), reader.next().unwrap().unwrap());
        assert!(reader.next().is_none());
    }

    #[test]
    fn test_consecutive_newlines() {
        let mut input = b"a\n\nbc\n".as_ref();
        let mut reader = CharReader::from(&mut input);
        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
        assert_eq!(cs('\n', 2, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('b', 3, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('c', 3, 2), reader.next().unwrap().unwrap());
        assert_eq!(cs('\n', 3, 3), reader.next().unwrap().unwrap());
        assert!(reader.next().is_none());
    }

    #[test]
    fn test_tabs() {
        let mut input = "1\t9\n1234567\t8\n12345678\t9".as_bytes();
        let mut reader = CharReader::from(&mut input);
        assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('\t', 1, 2), reader.next().unwrap().unwrap());
        assert_eq!(cs('9', 1, 9), reader.next().unwrap().unwrap());
        assert_eq!(cs('\n', 1, 10), reader.next().unwrap().unwrap());
        assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('2', 2, 2), reader.next().unwrap().unwrap());
        assert_eq!(cs('3', 2, 3), reader.next().unwrap().unwrap());
        assert_eq!(cs('4', 2, 4), reader.next().unwrap().unwrap());
        assert_eq!(cs('5', 2, 5), reader.next().unwrap().unwrap());
        assert_eq!(cs('6', 2, 6), reader.next().unwrap().unwrap());
        assert_eq!(cs('7', 2, 7), reader.next().unwrap().unwrap());
        assert_eq!(cs('\t', 2, 8), reader.next().unwrap().unwrap());
        assert_eq!(cs('8', 2, 9), reader.next().unwrap().unwrap());
        assert_eq!(cs('\n', 2, 10), reader.next().unwrap().unwrap());
        assert_eq!(cs('1', 3, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('2', 3, 2), reader.next().unwrap().unwrap());
        assert_eq!(cs('3', 3, 3), reader.next().unwrap().unwrap());
        assert_eq!(cs('4', 3, 4), reader.next().unwrap().unwrap());
        assert_eq!(cs('5', 3, 5), reader.next().unwrap().unwrap());
        assert_eq!(cs('6', 3, 6), reader.next().unwrap().unwrap());
        assert_eq!(cs('7', 3, 7), reader.next().unwrap().unwrap());
        assert_eq!(cs('8', 3, 8), reader.next().unwrap().unwrap());
        assert_eq!(cs('\t', 3, 9), reader.next().unwrap().unwrap());
        assert_eq!(cs('9', 3, 17), reader.next().unwrap().unwrap());
        assert!(reader.next().is_none());
    }

    #[test]
    fn test_crlf() {
        let mut input = b"a\r\nb".as_ref();
        let mut reader = CharReader::from(&mut input);
        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('\r', 1, 2), reader.next().unwrap().unwrap());
        assert_eq!(cs('\n', 1, 3), reader.next().unwrap().unwrap());
        assert_eq!(cs('b', 2, 1), reader.next().unwrap().unwrap());
        assert!(reader.next().is_none());
    }

    #[test]
    fn test_past_eof_returns_eof() {
        let mut input = b"a".as_ref();
        let mut reader = CharReader::from(&mut input);
        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
        assert!(reader.next().is_none());
        assert!(reader.next().is_none());
    }

    #[test]
    fn test_next_pos() {
        let mut input = "Hi".as_bytes();
        let mut reader = CharReader::from(&mut input);
        assert_eq!(LineCol { line: 1, col: 1 }, reader.next_pos());
        assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
        assert_eq!(LineCol { line: 1, col: 2 }, reader.next_pos());
        assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
        assert_eq!(LineCol { line: 1, col: 3 }, reader.next_pos());
        assert!(reader.next().is_none());
        assert_eq!(LineCol { line: 1, col: 3 }, reader.next_pos());
    }

    /// A reader that generates an error only on the Nth read operation.
    ///
    /// All other reads return a line with a single character in them with the assumption that the
    /// `CharReader` issues a single read per line.  If that assumption changes, the tests here may
    /// start failing.
    struct FaultyReader {
        current_read: usize,
        fail_at_read: usize,
    }

    impl FaultyReader {
        /// Creates a new reader that will fail at the `fail_at_read`th operation.
        fn new(fail_at_read: usize) -> Self {
            let current_read = 0;
            FaultyReader { current_read, fail_at_read }
        }
    }

    impl io::Read for FaultyReader {
        #[allow(clippy::branches_sharing_code)]
        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
            if self.current_read == self.fail_at_read {
                self.current_read += 1;
                Err(io::Error::from(io::ErrorKind::InvalidInput))
            } else {
                self.current_read += 1;
                buf[0] = b'1';
                buf[1] = b'\n';
                Ok(2)
            }
        }
    }

    #[test]
    fn test_errors_prevent_further_reads() {
        let mut reader = FaultyReader::new(2);
        let mut reader = CharReader::from(&mut reader);
        assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
        assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
        assert_eq!(cs('\n', 2, 2), reader.next().unwrap().unwrap());
        assert_eq!(io::ErrorKind::InvalidInput, reader.next().unwrap().unwrap_err().kind());
        assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
        assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
    }
}