rascal_scanner 0.1.2

Rascal programming language scanner.
Documentation
use crate::token::{keywords, Token};
use alloc::{str::Chars, string::String};
use core::{
    iter::{Enumerate, Peekable},
    num::ParseFloatError,
};

pub struct Scanner<'a> {
    iterator: Peekable<Enumerate<Chars<'a>>>,
    line: usize,
    line_start_byte: usize,
    token_num: usize,
}
impl<'a> Iterator for Scanner<'a> {
    type Item = Result<Token, ScanError>;
    fn next(&mut self) -> Option<Self::Item> {
        while let Some((_, chr)) = self.iterator.peek() {
            if *chr == ' ' || *chr == '\t' {
                self.iterator.next(); // Ignore tabs and whitespace
            } else if let Some((n, '\n')) = self.iterator.peek() {
                self.line += 1;
                self.line_start_byte = *n;
                self.iterator.next();
            } else {
                break;
            }
        }
        let token = if let Some((byte, string)) = self.iterator.next() {
            match string {
                '(' => Token::LeftParen,
                ')' => Token::RightParen,
                '{' => Token::LeftBrace,
                '}' => Token::RightBrace,
                ',' => Token::Comma,
                '.' => Token::Dot,
                '-' => Token::Minus,
                '+' => Token::Plus,
                ';' => Token::Semicolon,
                '*' => Token::Star,
                '!' => {
                    if let Some((_, '=')) = self.iterator.peek() {
                        self.iterator.next();
                        Token::BangEqual
                    } else {
                        Token::Bang
                    }
                }
                '=' => {
                    if let Some((_, '=')) = self.iterator.peek() {
                        self.iterator.next();
                        Token::EqualEqual
                    } else {
                        Token::Equal
                    }
                }
                '<' => {
                    if let Some((_, '=')) = self.iterator.peek() {
                        self.iterator.next();
                        Token::LessEqual
                    } else {
                        Token::Less
                    }
                }
                '>' => {
                    if let Some((_, '=')) = self.iterator.peek() {
                        self.iterator.next();
                        Token::GreaterEqual
                    } else {
                        Token::Greater
                    }
                }
                '/' => {
                    if let Some((_, '/')) = self.iterator.peek() {
                        self.iterator.next();
                        // Ignore everything after double slashes (comment)
                        while self.iterator.next_if(|c| c.1 != '\n').is_some() {}
                        // Recursively call into the function to see what the next token is after
                        // the comment.
                        return self.next();
                        //TODO: Check to see if a long list of comments can overflow the stack due
                        //to this recursive call.
                    } else {
                        Token::Slash
                    }
                }
                '"' => {
                    let mut string_literal = String::from("");
                    loop {
                        if let Some((_, char)) = self.iterator.next() {
                            if char == '"' {
                                break;
                            } else {
                                string_literal = [string_literal, char.into()].concat();
                            }
                        } else {
                            return Some(Err(ScanError::UnterminatedString {
                                line_str: string_literal,
                                line_index: self.line,
                                col_index: byte - self.line_start_byte,
                            }));
                        }
                    }
                    Token::String(string_literal)
                }
                other_char => {
                    if other_char.is_numeric() {
                        match self.num_literal(other_char, byte) {
                            Ok(value) => value,
                            Err(value) => return value,
                        }
                    } else if other_char.is_alphabetic() {
                        let mut identifier = String::from(other_char);
                        while let Some((_, next_char)) = self.iterator.peek() {
                            if next_char.is_alphanumeric() {
                                identifier.push(*next_char);
                                self.iterator.next();
                            } else {
                                break;
                            }
                        }
                        if let Some(keyword) = keywords(&identifier) {
                            keyword.clone()
                        } else {
                            Token::Identifier(identifier)
                        }
                    } else {
                        return Some(Err(ScanError::UnexpectedChar {
                            char: other_char.into(),
                            line_index: self.line,
                            col_index: byte - self.line_start_byte,
                        }));
                    }
                }
            }
        } else {
            return None;
        };
        self.token_num += 1;
        Some(Ok(token))
    }
}

impl<'a> Scanner<'a> {
    pub fn scan_all(&mut self) -> Result<Vec<Token>, (Vec<Token>, Vec<ScanError>)> {
        let mut tokens = Vec::new();
        let mut errors = Vec::new();
        for result in self {
            match result {
                Ok(token) => tokens.push(token),
                Err(error) => errors.push(error),
            }
        }
        if errors.is_empty() {
            Ok(tokens)
        } else {
            Err((tokens, errors))
        }
    }

    pub fn peek(&mut self) -> Option<&(usize, char)> {
        self.iterator.peek()
    }

    /// Get a reference to the scanner's line.
    pub fn line(&self) -> &usize {
        &self.line
    }

    /// Get a reference to the scanner's line start byte.
    pub fn line_start_byte(&self) -> &usize {
        &self.line_start_byte
    }

    fn num_literal(
        &mut self,
        other_char: char,
        byte: usize,
    ) -> Result<Token, Option<Result<Token, ScanError>>> {
        let mut number_literal = String::from(other_char);
        'outer: while let Some((_, next_char)) = self.iterator.peek() {
            if next_char.is_numeric() {
                number_literal.push(other_char);
                self.iterator.next(); // Collect integer part
            } else if *next_char == '.' {
                number_literal.push(other_char);
                self.iterator.next(); // Collect the decimal
                while let Some((_, next_next_char)) = self.iterator.peek() {
                    if next_next_char.is_numeric() {
                        number_literal.push(other_char);
                        self.iterator.next(); // Collect the fractional part
                    } else {
                        break 'outer;
                    }
                }
            } else {
                break;
            }
        }
        Ok(match number_literal.parse() {
            Ok(num) => {
                let number: f64 = num;
                Token::Number(number)
            }
            Err(e) => {
                return Err(Some(Err(ScanError::NumberLiteralParse {
                    parse_err: e,
                    number: number_literal,
                    line_index: self.line,
                    col_index: byte - self.line_start_byte,
                })));
            }
        })
    }
}
impl<'a> From<&'a String> for Scanner<'a> {
    fn from(source: &'a String) -> Self {
        Scanner {
            iterator: source.chars().enumerate().peekable(),
            line: 1,
            line_start_byte: 0,
            token_num: 0,
        }
    }
}

#[derive(Debug, Clone)]
pub enum ScanError {
    UnexpectedChar {
        char: String,
        line_index: usize,
        col_index: usize,
    },
    UnterminatedString {
        line_str: String,
        line_index: usize,
        col_index: usize,
    },
    NumberLiteralParse {
        parse_err: ParseFloatError,
        number: String,
        line_index: usize,
        col_index: usize,
    },
}