expr_solver/
lexer.rs

1use crate::token::Token;
2
3// top level lexer.
4pub struct Lexer {
5    // source string.
6    pub source_string: String,
7    // source string as vec of chars.
8    pub source_chars: Vec<char>,
9    // start of current token.
10    pub start: usize,
11    // current position of the character in consideration.
12    pub current: usize,
13    // Vec of tokens.
14    pub tokens: Vec<Token>,
15    // len of the input string.
16    pub len: usize,
17    // flag for errors.
18    pub has_errors: bool,
19}
20
21impl Lexer {
22    /// Constructor for lexer
23    ///
24    /// # Arguments
25    /// * input - Input string to tokenize.
26    ///
27    /// # Returns
28    /// * Lexer - new instance of lexer with prepopulated fields.
29    pub fn new(input: &String) -> Self {
30        log::trace!(" [expr-solver] creating new lexer instance : {}", &input);
31        Self {
32            source_chars: input.chars().collect(),
33            source_string: input.clone(),
34            start: 0,
35            current: 0,
36            len: input.len(),
37            tokens: vec![],
38            has_errors: false,
39        }
40    }
41
42    /// Top level public function to start tokenizing the input string.
43    pub fn scan(&mut self) {
44        log::trace!("[expr-solver] starting scanning for tokens.");
45        while !self.is_at_end() {
46            self.scan_token();
47            self.start = self.current;
48        }
49
50        // we add a EOF token at the end, comes handy when parsing.
51        self.add_token(Token::Eof);
52
53        log::trace!(
54            "[expr-solver] Scanned {} token. tokens={:?}",
55            self.tokens.len(),
56            &self.tokens
57        );
58
59        // we reverse the entire tokens vec, because we will be
60        // using pop function to retrieve one token at time, which
61        // takes tokens from the end.
62        self.tokens.reverse();
63    }
64
65    /// Internal function which parses one token at a time.
66    fn scan_token(&mut self) {
67        let current_char = self.advance();
68
69        match current_char {
70            '+' => self.add_token(Token::Plus),
71            '-' => self.add_token(Token::Minus),
72            '*' => self.add_token(Token::Star),
73            '/' => self.add_token(Token::Slash),
74            '!' => self.add_token(Token::Bang),
75            '(' => self.add_token(Token::LeftParen),
76            ')' => self.add_token(Token::RightParen),
77            ' ' | '\t' | '\r' => {}
78            _ => {
79                if current_char.is_ascii_digit() {
80                    self.scan_number();
81                } else {
82                    self.has_errors = true;
83                }
84            }
85        }
86    }
87
88    /// Scans a number type of token.
89    fn scan_number(&mut self) {
90        while self.look_ahead().is_ascii_digit() {
91            self.advance();
92        }
93
94        // for floating point numbers.
95        if self.look_ahead() == '.' && self.look_ahead_twice().is_ascii_digit() {
96            self.advance();
97            while self.look_ahead().is_ascii_digit() {
98                self.advance();
99            }
100        }
101
102        // we take literal string of the number and parse it into rust's f64.
103        let number_literal = self.source_string[self.start..self.current]
104            .to_string()
105            .parse::<f64>();
106
107        match number_literal {
108            Ok(number_literal) => self.add_token(Token::Number(number_literal)),
109            // probably never going to happen but still you never know.
110            Err(_) => panic!("Failed to parse number literal as f64"),
111        }
112    }
113
114    /// consumes current character and returns it.
115    fn advance(&mut self) -> char {
116        self.current += 1;
117        self.source_chars[self.current - 1]
118    }
119
120    /// returns current character but doesn't consume it.
121    fn look_ahead(&mut self) -> char {
122        if self.is_at_end() {
123            return '\0';
124        }
125        self.source_chars[self.current]
126    }
127
128    /// returns next character but doesn't consume it.
129    fn look_ahead_twice(&mut self) -> char {
130        if self.current + 1 >= self.len {
131            return '\0';
132        }
133        self.source_chars[self.current + 1]
134    }
135
136    /// returns the next token, and also consumes it.
137    pub fn next_token(&mut self) -> Token {
138        self.tokens.pop().unwrap_or(Token::Eof)
139    }
140
141    /// returns the next token, but doesn't consume it.
142    pub fn peek(&self) -> Token {
143        self.tokens.last().copied().unwrap_or(Token::Eof)
144    }
145
146    // checks if reached the end of the input string.
147    fn is_at_end(&self) -> bool {
148        self.current >= self.len
149    }
150
151    // helper function to add tokens.
152    fn add_token(&mut self, token_type: Token) {
153        log::trace!("[expr-solver] adding token={}", token_type);
154        self.tokens.push(token_type);
155    }
156}