1pub enum TokenVariant
2{
3 Again,
5 Defer,
6 Forget,
7 N,
8 Print,
9 Read,
10 U,
11
12 Plus, Minus, Number(usize),
19 String,
20 UnBoolOp, BinBoolOp, BinNumBoolOp, MathOp, Comma,
27 LeftParens,
28 RightParens,
29 Semicolon,
30 Sharp,
31
32 EOI
34}
35
36pub struct Token<'a>
37{
38 pub tok: &'a str,
39 pub variant: TokenVariant
40}
41
42pub fn eat<'a>(input: &'a str)
60 -> Result<(Token<'a>, &'a str), (String, &'a str)>
61{
62 let input = input.trim_start();
63
64macro_rules! make_token
65{
66 ($type: expr, $input: ident, $pos: expr) =>
67 {
68 {
69 let (token, output) = $input.split_at($pos);
70 Ok((Token { tok: token, variant: $type }, output))
71 }
72 };
73}
74
75 match input.chars().next()
76 {
77 None => make_token!(TokenVariant::EOI, input, 0),
78 Some('+') => make_token!(TokenVariant::Plus, input, 1),
79 Some('-') => make_token!(TokenVariant::Minus, input, 1),
80 Some('*') | Some('/') =>
81 make_token!(TokenVariant::MathOp, input, 1),
82 Some('!') => make_token!(TokenVariant::UnBoolOp, input, 1),
83 Some(',') => make_token!(TokenVariant::Comma, input, 1),
84 Some('(') => make_token!(TokenVariant::LeftParens, input, 1),
85 Some(')') => make_token!(TokenVariant::RightParens, input, 1),
86 Some(';') => make_token!(TokenVariant::Semicolon, input, 1),
87 Some('#') => make_token!(TokenVariant::Sharp, input, 1),
88 Some('"') =>
89 {
90 let mut escape = false;
91
92 for (i, c) in input.char_indices().skip(1)
93 {
94 match c
95 {
96 '"' if escape == false => {
97 return make_token!(TokenVariant::String, input, i + 1)
98 },
99 '\\' if escape == false => escape = true,
100 _ => escape = false,
101 }
102 }
103
104 Err((String::from("End of input while reading string"), input))
105 }
106 Some(c) if c.is_ascii_digit() => {
108 let mut base = 10;
109 let mut start = 0;
110 let mut end = 0;
111 if c == '0'
112 {
113 match input.chars().skip(1).next()
114 {
115 Some('b') => {
117 let mut pos = 2;
118 for (i, c) in input.char_indices().skip(2)
119 {
120 if c != '0' && c != '1'
121 {
122 break;
123 }
124 pos = i + 1;
125 }
126 if 2 < pos
127 {
128 base = 2;
129 start = 2;
130 end = pos
131 }
132 },
133 Some('x') | Some('X') => {
135 let mut pos = 2;
136 for (i, c) in input.char_indices().skip(2)
137 {
138 if !c.is_ascii_hexdigit()
139 {
140 break;
141 }
142 pos = i + 1;
143 }
144 if 2 < pos
145 {
146 base = 16;
147 start = 2;
148 end = pos;
149 }
150 }
151 Some(d) if d.is_ascii_digit() => {
153 base = 8;
155 start = 1;
156 }
157 Some(_) | None => () }
159 }
160 if end == 0
161 {
162 for (i, c) in input.char_indices().skip(start)
163 {
164 if !c.is_ascii_digit()
165 {
166 break;
167 }
168 end = i + 1;
169 }
170 }
171
172 let (tok, output) = input.split_at(end);
173 match usize::from_str_radix(&tok[start..], base)
174 {
175 Ok(num) => Ok((Token { tok,
176 variant: TokenVariant::Number(num) },
177 output)),
178 Err(error) => Err((error.to_string(), &tok[start..]))
179 }
180 }
181 Some(c) if c.is_ascii_alphabetic() =>
182 {
183 let mut end = 1;
184 for (i, c) in input.char_indices().skip(1)
185 {
186 if !c.is_ascii_alphabetic()
187 {
188 break;
189 }
190 end = i + 1
191 }
192
193 match &input[..end]
194 {
195 "again" => make_token!(TokenVariant::Again, input, end),
196 "defer" => make_token!(TokenVariant::Defer, input, end),
197 "forget" => make_token!(TokenVariant::Forget, input, end),
198 "N" => make_token!(TokenVariant::N, input, end),
199 "print" => make_token!(TokenVariant::Print, input, end),
200 "read" => make_token!(TokenVariant::Read, input, end),
201 "U" => make_token!(TokenVariant::U, input, end),
202 _ => Err((String::from("Unknown token"), input))
203 }
204 }
205 Some('<') | Some('>') =>
206 {
207 match input.chars().skip(1).next()
208 {
209 Some('=') => make_token!(TokenVariant::BinNumBoolOp, input, 2),
210 _ => make_token!(TokenVariant::BinNumBoolOp, input, 1)
211 }
212 }
213 Some(_) =>
214 {
215 match input.get(..2)
217 {
218 Some("&&") | Some("||") =>
219 make_token!(TokenVariant::BinBoolOp, input, 2),
220 Some("==") | Some("!=") =>
221 make_token!(TokenVariant::BinNumBoolOp, input, 2),
222 _ => Err((String::from("Unknown token"), input))
223 }
224 }
225 }
226}
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231
232 macro_rules! check_token
233 {
234 ($input: expr, $type: pat, $from: expr, $at: expr) =>
235 {
236 {
237 let input = $input;
238
239 let res = eat(input);
240 if let Ok((token, output)) = res
241 {
242 if let $type = token.variant
243 {
244 assert_eq!(token.tok, &input[$from..$at]);
245 assert_eq!(output, &input[$at..]);
246 }
247 else
248 {
249 panic!("Not a {}", stringify!($type));
250 }
251 }
252 else
253 {
254 panic!("Not OK");
255 }
256 }
257 };
258 }
259
260 #[test]
261 fn end_of_input_check()
262 {
263 check_token!("", TokenVariant::EOI, 0, 0);
264 check_token!(" \t\n ", TokenVariant::EOI, 7, 7)
265 }
266
267 #[test]
268 fn plus_check()
269 {
270 check_token!("+abc", TokenVariant::Plus, 0, 1)
271 }
272
273 #[test]
274 fn string_check()
275 {
276 check_token!("\"ab\\\"\\\\\"de", TokenVariant::String, 0, 8)
278 }
279
280 #[test]
281 fn skip_spaces()
282 {
283 check_token!(" \t\n !", TokenVariant::UnBoolOp, 10, 11)
284 }
285
286 macro_rules! check_token_number
287 {
288 ($input: expr, $val: expr, $from: expr, $at: expr) =>
289 {
290 {
291 let input = $input;
292
293 let res = eat(input);
294 if let Ok((token, output)) = res
295 {
296 if let TokenVariant::Number(num) = token.variant
297 {
298 assert_eq!(token.tok, &input[$from..$at]);
299 assert_eq!(num, $val);
300 assert_eq!(output, &input[$at..]);
301 }
302 else
303 {
304 panic!("Not a TokenVariant::Number");
305 }
306 }
307 else
308 {
309 panic!("Not OK");
310 }
311 }
312 };
313 }
314
315 #[test]
316 fn number_check()
317 {
318 check_token_number!("42abc", 42, 0, 2);
319 check_token_number!(" 1337", 1337, 2, 6);
320 check_token_number!("0", 0, 0, 1);
321 check_token_number!("0b", 0, 0, 1);
322 check_token_number!("0x", 0, 0, 1);
323 check_token_number!("00", 0, 0, 2);
324 check_token_number!("0a", 0, 0, 1);
325 check_token_number!("0b101010", 0b101010, 0, 8);
326 check_token_number!("0xdeadbeef", 0xdeadbeef, 0, 10);
327 check_token_number!("0XCaFe", 0xCAFE, 0, 6);
328 check_token_number!("0777", 0x1ff, 0, 4);
329 check_token_number!(" 42💻", 42, 1, 3);
330 }
331
332 #[test]
333 fn keyword_check()
334 {
335 check_token!(" again ", TokenVariant::Again, 2, 7);
336 check_token!("print()", TokenVariant::Print, 0, 5);
337 }
338
339 macro_rules! check_token_error
340 {
341 ($input: expr) =>
342 {
343 {
344 let input = $input;
345
346 let res = eat(input);
347
348 assert!(res.is_err());
349 }
350 }
351 }
352
353 #[test]
354 fn error_check()
355 {
356 check_token_error!("\"abcd"); check_token_error!("089"); check_token_error!("Again"); check_token_error!("abcd"); check_token_error!("💻"); }
362
363 #[test]
364 fn string_tokenization()
365 {
366 let line =
368 "1 again (1) defer (3 || N(1)<=N(2) || N(7)>99) 2#N(1),3,7;";
369 let mut input : &str = line;
370
371 let expected = vec![
372 "1", "again", "(", "1", ")", "defer", "(", "3", "||", "N", "(", "1",
373 ")", "<=", "N", "(", "2", ")", "||", "N", "(", "7", ")", ">", "99",
374 ")", "2", "#", "N", "(", "1", ")", ",", "3", ",", "7", ";"];
375 let mut actual = Vec::new();
376
377 loop
378 {
379 match eat(input)
380 {
381 Err((error, _)) => panic!("{}. input: \"{}\"", error, input),
382 Ok((token, remainder)) =>
383 {
384 if let TokenVariant::EOI = token.variant
385 {
386 break;
387 }
388 actual.push(token.tok);
389 input = remainder;
390 }
391 }
392 }
393
394 assert_eq!(actual, expected);
395 }
396}