lox_scanner/
lib.rs

1mod token;
2mod source_iterator;
3
4use source_iterator::SourceIterator;
5use token::Token;
6
7pub struct Scanner<T: Iterator<Item=char>> {
8    source: SourceIterator<T>,
9    line: usize,
10    // TODO keep track of the column as well
11}
12
13impl Scanner<std::str::Chars<'_>> {
14    // TODO can I do this with a less restrictive lifetime?
15    pub fn from_str(s: &'static str) -> Self {
16        let chars = s.chars();
17        let source = SourceIterator::new(chars);
18        Scanner {
19            source: source,
20            line: 0,
21        }
22    }
23}
24
25impl<T> Scanner<T>
26    where
27        T: Iterator<Item=char>,
28{
29    // TODO implement from iterator instead
30    pub fn new(source: T) -> Self {
31        let source = SourceIterator::new(source);
32        Scanner {
33            source,
34            line: 0,
35        }
36    }
37
38    pub fn next_nonblank(&mut self) -> Option<char> {
39        while let Some(c) = self.source.next() {
40            match c {
41                ' ' | '\r' | '\t' => (),
42                // update the line count
43                '\n' => self.line = self.line.saturating_add(1),
44                _ => return Some(c),
45            }
46        }
47        None
48    }
49
50    fn scan_token(&mut self) -> Option<Token> {
51        while let Some(c) = self.next_nonblank() {
52            if let Some(token) = self.scan_single_char(c) {
53                return Some(token);
54            } else if let Some(token) = self.scan_two_chars(c) {
55                return Some(token);
56            } else if let Some(token) = self.scan_multi_chars(c) {
57                return Some(token);
58            }
59        }
60        None
61    }
62
63    fn scan_single_char(&mut self, c: char) -> Option<Token> {
64        use Token::*;
65        let token = match c {
66            '(' => LeftParen,
67            ')' => RightParen,
68            '{' => LeftBrace,
69            '}' => RightBrace,
70            '[' => LeftBracket,
71            ']' => RightBracket,
72            ',' => Comma,
73            '.' => Dot,
74            '-' => Minus,
75            '+' => Plus,
76            '*' => Star,
77            ';' => Semicolon,
78            '=' => Equal,
79            _ => return None,
80        };
81        Some(token)
82    }
83
84    fn scan_two_chars(&mut self, c: char) -> Option<Token> {
85        use Token::*;
86        let token = match c {
87            '!' if self.source.next_if_matches('=') => BangEqual,
88            '!' => Bang,
89
90            '=' if self.source.next_if_matches('=') => EqualEqual,
91            '=' => Equal,
92
93            '<' if self.source.next_if_matches('=')=> LessEqual,
94            '<' => Less,
95
96            '>' if self.source.next_if_matches('=') => GreaterEqual,
97            '>' => Greater,
98
99            _ => return None,
100        };
101        Some(token)
102    }
103
104    fn scan_multi_chars(&mut self, c: char) -> Option<Token> {
105        use Token::*;
106        match c {
107            '/' if self.source.next_if_matches('/') => {
108                // the comment goes until the end of the line
109                while !matches!(self.source.peek(), None | Some(&'\n')) {
110                    self.source.next();
111                }
112                None // comment consumed
113            }
114            '/' => Some(Slash),
115            '"' => return self.scan_string(),
116            // no match found
117            _ => Some(Invalid("no match found".to_string(), self.line)),
118        }
119    }
120
121    fn scan_string(&mut self) -> Option<Token> /* <-- this has to be either a token or an error but not an optional */ {
122        // TODO optimize by allocating the optimal capacity
123        let mut lexeme = String::new();
124        while !matches!(self.source.peek(), Some(&'"') | None) {
125            // neither end of input nor end of string
126            if matches!(self.source.peek(), Some(&'\n')) {
127                // bypassing the next_nonblank(), soneed to keep track of new lines
128                self.line = self.line.saturating_add(1);
129            }
130            let c = self.source.next();
131            lexeme.push(c.unwrap());
132        }
133        // either end of input or closing double quotes found
134        match self.source.next() {
135            None => return Some(Token::Invalid("unterminated string".to_string(), self.line)),
136            Some('"') => return Some(Token::String(lexeme)),
137            _ => unreachable!(),
138        }
139    }
140}
141
142impl<T> IntoIterator for Scanner<T>
143    where
144        T: Iterator<Item=char>,
145{
146    type Item = Token;
147    type IntoIter = TokenIterator<T>;
148    fn into_iter(self) -> Self::IntoIter {
149        TokenIterator {
150            scanner: self
151        }
152    }
153}
154
155pub struct TokenIterator<T: Iterator<Item=char>> {
156    scanner: Scanner<T>,
157}
158
159impl<T> Iterator for TokenIterator<T>
160    where T:
161        Iterator<Item=char>,
162{
163    type Item = Token;
164    fn next(&mut self) -> Option<Self::Item> {
165        self.scanner.scan_token()
166    }
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn test_empty_source() {
175        let source = "";
176        let mut scanner = Scanner::from_str(source);
177        let token = scanner.scan_token();
178        assert!(matches!(token, None));
179    }
180
181    #[test]
182    fn test_single_char() {
183        let source = "+";
184        let mut scanner = Scanner::from_str(source);
185        let token = scanner.scan_token();
186        assert!(matches!(token, Some(Token::Plus)));
187    }
188
189    #[test]
190    fn test_list_single_char_tokens() {
191        use Token::*;
192        let source = "(){}[],.;-+/*=!><";
193        let scanner = Scanner::from_str(source);
194        let mut output = vec![
195            LeftParen,
196            RightParen,
197            LeftBrace,
198            RightBrace,
199            LeftBracket,
200            RightBracket,
201            Comma,
202            Dot,
203            Semicolon,
204            Minus,
205            Plus,
206            Slash,
207            Star,
208            Equal,
209            Bang,
210            Greater,
211            Less,
212        ];
213
214        output.reverse();
215
216        for token in scanner {
217            assert_eq!(token, output.pop().unwrap());
218        }
219    }
220
221    #[test]
222    fn test_unterminated_string() {
223        let source = "\"this is unterminated\nstring";
224        let mut scanner = Scanner::from_str(source);
225        let token = scanner.scan_token();
226        assert!(matches!(token, Some(Token::Invalid(_,_))));
227    }
228
229    #[test]
230    fn test_string() {
231        let source = "\"FooBarBuzz\"";
232        let mut scanner = Scanner::from_str(source);
233        let token = scanner.scan_token();
234        match token {
235            Some(Token::String(s)) => assert_eq!(s, "FooBarBuzz"),
236            _ => unreachable!("it should have returned a String token"),
237        }
238    }
239}