1use crate::error::*;
4
5#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
7pub enum TokenClass {
8 Identifier,
9 Int,
10 Float,
11 Assignment,
12 Plus,
13 Minus,
14 Multiply,
15 Divide,
16 Eq,
17 Semicolon,
18 Comma,
19 Newline,
20 Prime,
21 OpenParen,
22 CloseParen,
23 OpenBracket,
24 CloseBracket,
25}
26
27#[derive(Clone, Debug)]
29pub struct Token {
30 class: TokenClass,
31 value: String,
32}
33
34impl Token {
35 pub fn new(class: TokenClass, value: String) -> Self {
37 Self {
38 class,
39 value,
40 }
41 }
42
43 pub fn get_class(&self) -> TokenClass {
45 self.class
46 }
47
48 pub fn get_value(&self) -> String {
50 self.value.to_owned()
51 }
52
53 pub fn check(&self, class: TokenClass) -> bool {
55 self.class == class
56 }
57}
58
59
60pub struct CharStream {
62 characters: Vec<char>,
63 index: usize,
64}
65
66impl CharStream {
67 pub fn from(input: String) -> Self {
69 let characters = input.as_str().chars().collect::<Vec<char>>();
70 let index = 0;
71
72 Self {
73 characters,
74 index,
75 }
76 }
77
78 pub fn next(&mut self) -> Option<char> {
80 let character = self.peek();
81 if self.index >= self.characters.len() {
82 None
83 } else {
84 self.index += 1;
85 character
86 }
87 }
88
89 pub fn peek(&self) -> Option<char> {
91 if self.index >= self.characters.len() {
92 None
93 } else {
94 Some (self.characters[self.index])
95 }
96 }
97
98 pub fn lookahead(&self, n: usize) -> Option<char> {
102 if self.index >= self.characters.len() {
103 None
104 } else {
105 Some (self.characters[self.index + n])
106 }
107 }
108
109 pub fn get(&mut self, superstring: &str) -> String {
113 let mut current = String::new();
114 while let Some(c) = self.peek() {
115 if superstring.contains(c) {
116 self.next();
117 current.push(c);
118 } else {
119 break;
120 }
121 }
122 current
123 }
124
125 pub fn skip_comments(&mut self) {
127 while self.peek() == Some('/') && self.lookahead(1) == Some('/') {
128 while self.peek() != Some('\n') {
129 self.next();
130 }
131 self.next();
133 }
134 }
135}
136
137
138const IDENTIFIER: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
143
144
145const NUMERIC: &str = "01235456789.";
149
150
151const SEPARATORS: &str = " \t\n";
153
154
155pub struct Tokenizer {
157 tokens: Vec<Token>,
158 index: usize,
159}
160
161impl Tokenizer {
162 pub fn from(input: String) -> Self {
164 let index = 0;
165 let mut charstream = CharStream::from(input);
166 let mut tokens = Vec::new();
167
168 charstream.skip_comments();
170
171 while let Some(c) = charstream.next() {
172 if SEPARATORS.contains(c) {
173 continue;
174 }
175
176 let token = match c {
177 'a'..='z' | 'A'..='Z' | '_' => {
178 let name = format!(
179 "{}{}",
180 c,
181 charstream.get(IDENTIFIER),
182 );
183 Token::new(TokenClass::Identifier, name)
184 },
185 '0'..='9' => {
186 let raw = format!(
187 "{}{}",
188 c,
189 charstream.get(NUMERIC),
190 );
191
192 let token = match str::parse::<i64>(&raw) {
193 Ok(_) => Token::new(TokenClass::Int, raw),
194 Err(_) => match str::parse::<f64>(&raw) {
195 Ok(_) => Token::new(TokenClass::Float, raw),
196 Err(_) => {
197 throw(CouldNotParseNumeric);
198 Token::new(TokenClass::Float, "0.0".to_string())
199 },
200 },
201 };
202 token
203 },
204 '=' => if charstream.peek() == Some('=') {
205 Token::new(TokenClass::Eq, "==".to_string())
206 } else if let Some(_) = charstream.peek() {
207 Token::new(TokenClass::Assignment, "=".to_string())
208 } else {
209 throw(UnexpectedEof);
210 Token::new(TokenClass::Newline, '\n'.to_string())
211 },
212 '\n' => Token::new(TokenClass::Newline, '\n'.to_string()),
213 '+' => Token::new(TokenClass::Plus, '+'.to_string()),
214 '-' => {
215 let chr = match charstream.peek() {
216 Some(p) => p,
217 None => {
218 throw(UnexpectedEof);
219 '\n'
220 },
221 };
222 if NUMERIC.contains(chr) {
223 let raw = format!(
224 "{}{}",
225 c,
226 charstream.get(NUMERIC),
227 );
228
229 let token = match str::parse::<i64>(&raw) {
230 Ok(_) => Token::new(TokenClass::Int, raw),
231 Err(_) => match str::parse::<f64>(&raw) {
232 Ok(_) => Token::new(TokenClass::Float, raw),
233 Err(_) => {
234 throw(CouldNotParseNumeric);
235 Token::new(TokenClass::Float, "0.0".to_string())
236 },
237 },
238 };
239 token
240 } else {
241 Token::new(TokenClass::Minus, '-'.to_string())
242 }
243 }
244 '*' => Token::new(TokenClass::Multiply, '*'.to_string()),
245 '/' => Token::new(TokenClass::Divide, '/'.to_string()),
246 ';' => Token::new(TokenClass::Semicolon, ';'.to_string()),
247 '(' => Token::new(TokenClass::OpenParen, '('.to_string()),
248 ')' => Token::new(TokenClass::CloseParen, ')'.to_string()),
249 '[' => Token::new(TokenClass::OpenBracket, '['.to_string()),
250 ']' => Token::new(TokenClass::CloseBracket, ']'.to_string()),
251 ',' => Token::new(TokenClass::Comma, ';'.to_string()),
252 '\'' => Token::new(TokenClass::Prime, '\''.to_string()),
253 _ => {
254 throw(UnexpectedEof);
255 Token::new(TokenClass::Newline, '\n'.to_string())
256 },
257 };
258 tokens.push(token);
259
260 charstream.skip_comments();
262 }
263
264 Self {
265 tokens,
266 index,
267 }
268 }
269
270 pub fn peek(&self) -> Option<Token> {
272 if self.index >= self.tokens.len() {
273 None
274 } else {
275 Some (self.tokens[self.index].to_owned())
276 }
277 }
278
279 pub fn next(&mut self) -> Option<Token> {
281 let token = self.peek();
282 self.index += 1;
283 token
284 }
285
286 pub fn get_tokens(&mut self) -> Vec<Token> {
288 self.tokens.to_owned()
289 }
290
291 pub fn chk_silent(&self) -> bool {
295 if self.tokens.len() != 0 {
296 self.tokens.len() != 0 && self.tokens[self.tokens.len() - 1].get_class() == TokenClass::Semicolon
297 } else {
298 true
299 }
300 }
301
302 pub fn get_next_precedence(&self) -> u8 {
304 if let Some(t) = self.peek() {
305 t.get_class().into()
306 } else {
307 0
308 }
309 }
310}
311
312#[test]
313fn tokenize_00() {
314 let input: String = "x = 1.3\ny = 2.6".to_string();
315 let mut tokenizer = Tokenizer::from(input);
316 println!("Tokens: {:#?}", tokenizer.get_tokens());
317}