1mod token;
2mod source_iterator;
3
4use source_iterator::SourceIterator;
5use token::Token;
6
7pub struct Scanner<T: Iterator<Item=char>> {
8 source: SourceIterator<T>,
9 line: usize,
10 }
12
13impl Scanner<std::str::Chars<'_>> {
14 pub fn from_str(s: &'static str) -> Self {
16 let chars = s.chars();
17 let source = SourceIterator::new(chars);
18 Scanner {
19 source: source,
20 line: 0,
21 }
22 }
23}
24
25impl<T> Scanner<T>
26 where
27 T: Iterator<Item=char>,
28{
29 pub fn new(source: T) -> Self {
31 let source = SourceIterator::new(source);
32 Scanner {
33 source,
34 line: 0,
35 }
36 }
37
38 pub fn next_nonblank(&mut self) -> Option<char> {
39 while let Some(c) = self.source.next() {
40 match c {
41 ' ' | '\r' | '\t' => (),
42 '\n' => self.line = self.line.saturating_add(1),
44 _ => return Some(c),
45 }
46 }
47 None
48 }
49
50 fn scan_token(&mut self) -> Option<Token> {
51 while let Some(c) = self.next_nonblank() {
52 if let Some(token) = self.scan_single_char(c) {
53 return Some(token);
54 } else if let Some(token) = self.scan_two_chars(c) {
55 return Some(token);
56 } else if let Some(token) = self.scan_multi_chars(c) {
57 return Some(token);
58 }
59 }
60 None
61 }
62
63 fn scan_single_char(&mut self, c: char) -> Option<Token> {
64 use Token::*;
65 let token = match c {
66 '(' => LeftParen,
67 ')' => RightParen,
68 '{' => LeftBrace,
69 '}' => RightBrace,
70 '[' => LeftBracket,
71 ']' => RightBracket,
72 ',' => Comma,
73 '.' => Dot,
74 '-' => Minus,
75 '+' => Plus,
76 '*' => Star,
77 ';' => Semicolon,
78 '=' => Equal,
79 _ => return None,
80 };
81 Some(token)
82 }
83
84 fn scan_two_chars(&mut self, c: char) -> Option<Token> {
85 use Token::*;
86 let token = match c {
87 '!' if self.source.next_if_matches('=') => BangEqual,
88 '!' => Bang,
89
90 '=' if self.source.next_if_matches('=') => EqualEqual,
91 '=' => Equal,
92
93 '<' if self.source.next_if_matches('=')=> LessEqual,
94 '<' => Less,
95
96 '>' if self.source.next_if_matches('=') => GreaterEqual,
97 '>' => Greater,
98
99 _ => return None,
100 };
101 Some(token)
102 }
103
104 fn scan_multi_chars(&mut self, c: char) -> Option<Token> {
105 use Token::*;
106 match c {
107 '/' if self.source.next_if_matches('/') => {
108 while !matches!(self.source.peek(), None | Some(&'\n')) {
110 self.source.next();
111 }
112 None }
114 '/' => Some(Slash),
115 '"' => return self.scan_string(),
116 _ => Some(Invalid("no match found".to_string(), self.line)),
118 }
119 }
120
121 fn scan_string(&mut self) -> Option<Token> {
122 let mut lexeme = String::new();
124 while !matches!(self.source.peek(), Some(&'"') | None) {
125 if matches!(self.source.peek(), Some(&'\n')) {
127 self.line = self.line.saturating_add(1);
129 }
130 let c = self.source.next();
131 lexeme.push(c.unwrap());
132 }
133 match self.source.next() {
135 None => return Some(Token::Invalid("unterminated string".to_string(), self.line)),
136 Some('"') => return Some(Token::String(lexeme)),
137 _ => unreachable!(),
138 }
139 }
140}
141
142impl<T> IntoIterator for Scanner<T>
143 where
144 T: Iterator<Item=char>,
145{
146 type Item = Token;
147 type IntoIter = TokenIterator<T>;
148 fn into_iter(self) -> Self::IntoIter {
149 TokenIterator {
150 scanner: self
151 }
152 }
153}
154
155pub struct TokenIterator<T: Iterator<Item=char>> {
156 scanner: Scanner<T>,
157}
158
159impl<T> Iterator for TokenIterator<T>
160 where T:
161 Iterator<Item=char>,
162{
163 type Item = Token;
164 fn next(&mut self) -> Option<Self::Item> {
165 self.scanner.scan_token()
166 }
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172
173 #[test]
174 fn test_empty_source() {
175 let source = "";
176 let mut scanner = Scanner::from_str(source);
177 let token = scanner.scan_token();
178 assert!(matches!(token, None));
179 }
180
181 #[test]
182 fn test_single_char() {
183 let source = "+";
184 let mut scanner = Scanner::from_str(source);
185 let token = scanner.scan_token();
186 assert!(matches!(token, Some(Token::Plus)));
187 }
188
189 #[test]
190 fn test_list_single_char_tokens() {
191 use Token::*;
192 let source = "(){}[],.;-+/*=!><";
193 let scanner = Scanner::from_str(source);
194 let mut output = vec![
195 LeftParen,
196 RightParen,
197 LeftBrace,
198 RightBrace,
199 LeftBracket,
200 RightBracket,
201 Comma,
202 Dot,
203 Semicolon,
204 Minus,
205 Plus,
206 Slash,
207 Star,
208 Equal,
209 Bang,
210 Greater,
211 Less,
212 ];
213
214 output.reverse();
215
216 for token in scanner {
217 assert_eq!(token, output.pop().unwrap());
218 }
219 }
220
221 #[test]
222 fn test_unterminated_string() {
223 let source = "\"this is unterminated\nstring";
224 let mut scanner = Scanner::from_str(source);
225 let token = scanner.scan_token();
226 assert!(matches!(token, Some(Token::Invalid(_,_))));
227 }
228
229 #[test]
230 fn test_string() {
231 let source = "\"FooBarBuzz\"";
232 let mut scanner = Scanner::from_str(source);
233 let token = scanner.scan_token();
234 match token {
235 Some(Token::String(s)) => assert_eq!(s, "FooBarBuzz"),
236 _ => unreachable!("it should have returned a String token"),
237 }
238 }
239}