whenever_parser/
lexer.rs

1pub enum TokenVariant
2{
3    // Whenever specific tokens
4    Again,
5    Defer,
6    Forget,
7    N,
8    Print,
9    Read,
10    U,
11
12    // Same token for number and string contexts
13    Plus, // +
14    // Same token for unary and binary operators
15    Minus, // -
16
17    // Usual tokens
18    Number(usize),
19    String,
20    UnBoolOp, // !
21    BinBoolOp, // && ||
22    BinNumBoolOp, // == != < <= > >=
23    MathOp, // * /
24
25    // Separator tokens
26    Comma,
27    LeftParens,
28    RightParens,
29    Semicolon,
30    Sharp,
31
32    // End of input
33    EOI
34}
35
36pub struct Token<'a>
37{
38    pub tok: &'a str,
39    pub variant: TokenVariant
40}
41
42/// Reads a token from the input.
43///
44/// Returns a token and a slice from the end of the token to the end of the
45/// input.
46///
47/// Note: difference between input and output slices may be more than the length
48/// of the token, as leading whitespace is ignored.
49///
50/// # Errors
51///
52/// Will return `Err` if:
53/// * end of input is reached in an unfinished token (such as a string)
54/// * a number contains an invalid digit
55/// * token is not known (reminder: keywords are case sensitive)
56///
57/// `Err`'s contents are a description of the issue and the slice where it
58/// happened.
59pub fn eat<'a>(input: &'a str)
60    -> Result<(Token<'a>, &'a str), (String, &'a str)>
61{
62    let input = input.trim_start();
63
64macro_rules! make_token
65{
66    ($type: expr, $input: ident, $pos: expr) =>
67    {
68        {
69            let (token, output) = $input.split_at($pos);
70            Ok((Token { tok: token, variant: $type }, output))
71        }
72    };
73}
74
75    match input.chars().next()
76    {
77        None => make_token!(TokenVariant::EOI, input, 0),
78        Some('+') => make_token!(TokenVariant::Plus, input, 1),
79        Some('-') => make_token!(TokenVariant::Minus, input, 1),
80        Some('*') | Some('/') =>
81            make_token!(TokenVariant::MathOp, input, 1),
82        Some('!') => make_token!(TokenVariant::UnBoolOp, input, 1),
83        Some(',') => make_token!(TokenVariant::Comma, input, 1),
84        Some('(') => make_token!(TokenVariant::LeftParens, input, 1),
85        Some(')') => make_token!(TokenVariant::RightParens, input, 1),
86        Some(';') => make_token!(TokenVariant::Semicolon, input, 1),
87        Some('#') => make_token!(TokenVariant::Sharp, input, 1),
88        Some('"') =>
89        {
90            let mut escape = false;
91
92            for (i, c) in input.char_indices().skip(1)
93            {
94                match c
95                {
96                    '"' if escape == false => {
97                        return make_token!(TokenVariant::String, input, i + 1)
98                    },
99                    '\\' if escape == false => escape = true,
100                    _ => escape = false,
101                }
102            }
103
104            Err((String::from("End of input while reading string"), input))
105        }
106        Some(c) if c.is_ascii_digit() => // Number
107        {
108            let mut base = 10;
109            let mut start = 0;
110            let mut end = 0;
111            if c == '0'
112            {
113                match input.chars().skip(1).next()
114                {
115                    Some('b') => // 0bXXXX...: binary
116                    {
117                        let mut pos = 2;
118                        for (i, c) in input.char_indices().skip(2)
119                        {
120                            if c != '0' && c != '1'
121                            {
122                                break;
123                            }
124                            pos = i + 1;
125                        }
126                        if 2 < pos
127                        {
128                            base = 2;
129                            start = 2;
130                            end = pos
131                        }
132                    },
133                    Some('x') | Some('X') => // 0xXXXX...: hexadecimal
134                    {
135                        let mut pos = 2;
136                        for (i, c) in input.char_indices().skip(2)
137                        {
138                            if !c.is_ascii_hexdigit()
139                            {
140                                break;
141                            }
142                            pos = i + 1;
143                        }
144                        if 2 < pos
145                        {
146                            base = 16;
147                            start = 2;
148                            end = pos;
149                        }
150                    }
151                    Some(d) if d.is_ascii_digit() => // 0XXX...: octal
152                    {
153                        // We will use the same digit reading as base 10
154                        base = 8;
155                        start = 1;
156                    }
157                    Some(_) | None => () // End of input
158                }
159            }
160            if end == 0
161            {
162                for (i, c) in input.char_indices().skip(start)
163                {
164                    if !c.is_ascii_digit()
165                    {
166                        break;
167                    }
168                    end = i + 1;
169                }
170            }
171
172            let (tok, output) = input.split_at(end);
173            match usize::from_str_radix(&tok[start..], base)
174            {
175                Ok(num) => Ok((Token { tok,
176                                       variant: TokenVariant::Number(num) },
177                               output)),
178                Err(error) => Err((error.to_string(), &tok[start..]))
179            }
180        }
181        Some(c) if c.is_ascii_alphabetic() =>
182        {
183            let mut end = 1;
184            for (i, c) in input.char_indices().skip(1)
185            {
186                if !c.is_ascii_alphabetic()
187                {
188                    break;
189                }
190                end = i + 1
191            }
192
193            match &input[..end]
194            {
195                "again" => make_token!(TokenVariant::Again, input, end),
196                "defer" => make_token!(TokenVariant::Defer, input, end),
197                "forget" => make_token!(TokenVariant::Forget, input, end),
198                "N" => make_token!(TokenVariant::N, input, end),
199                "print" => make_token!(TokenVariant::Print, input, end),
200                "read" => make_token!(TokenVariant::Read, input, end),
201                "U" => make_token!(TokenVariant::U, input, end),
202                _ => Err((String::from("Unknown token"), input))
203            }
204        }
205        Some('<') | Some('>') =>
206        {
207            match input.chars().skip(1).next()
208            {
209                Some('=') => make_token!(TokenVariant::BinNumBoolOp, input, 2),
210                _ => make_token!(TokenVariant::BinNumBoolOp, input, 1)
211            }
212        }
213        Some(_) =>
214        {
215            // Match the remaining tokens: && || == !=
216            match input.get(..2)
217            {
218                Some("&&") | Some("||") =>
219                    make_token!(TokenVariant::BinBoolOp, input, 2),
220                Some("==") | Some("!=") =>
221                    make_token!(TokenVariant::BinNumBoolOp, input, 2),
222                _ => Err((String::from("Unknown token"), input))
223            }
224        }
225    }
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231
232    macro_rules! check_token
233    {
234        ($input: expr, $type: pat, $from: expr, $at: expr) =>
235        {
236            {
237                let input = $input;
238
239                let res = eat(input);
240                if let Ok((token, output)) = res
241                {
242                    if let $type = token.variant
243                    {
244                        assert_eq!(token.tok, &input[$from..$at]);
245                        assert_eq!(output, &input[$at..]);
246                    }
247                    else
248                    {
249                        panic!("Not a {}", stringify!($type));
250                    }
251                }
252                else
253                {
254                    panic!("Not OK");
255                }
256            }
257        };
258    }
259
260    #[test]
261    fn end_of_input_check()
262    {
263        check_token!("", TokenVariant::EOI, 0, 0);
264        check_token!("    \t\n ", TokenVariant::EOI, 7, 7)
265    }
266
267    #[test]
268    fn plus_check()
269    {
270        check_token!("+abc", TokenVariant::Plus, 0, 1)
271    }
272
273    #[test]
274    fn string_check()
275    {
276        // Actual bytes: "ab\"\\"de
277        check_token!("\"ab\\\"\\\\\"de", TokenVariant::String, 0, 8)
278    }
279
280    #[test]
281    fn skip_spaces()
282    {
283        check_token!("       \t\n !", TokenVariant::UnBoolOp, 10, 11)
284    }
285
286    macro_rules! check_token_number
287    {
288        ($input: expr, $val: expr, $from: expr, $at: expr) =>
289        {
290            {
291                let input = $input;
292
293                let res = eat(input);
294                if let Ok((token, output)) = res
295                {
296                    if let TokenVariant::Number(num) = token.variant
297                    {
298                        assert_eq!(token.tok, &input[$from..$at]);
299                        assert_eq!(num, $val);
300                        assert_eq!(output, &input[$at..]);
301                    }
302                    else
303                    {
304                        panic!("Not a TokenVariant::Number");
305                    }
306                }
307                else
308                {
309                    panic!("Not OK");
310                }
311            }
312        };
313    }
314
315    #[test]
316    fn number_check()
317    {
318        check_token_number!("42abc", 42, 0, 2);
319        check_token_number!("  1337", 1337, 2, 6);
320        check_token_number!("0", 0, 0, 1);
321        check_token_number!("0b", 0, 0, 1);
322        check_token_number!("0x", 0, 0, 1);
323        check_token_number!("00", 0, 0, 2);
324        check_token_number!("0a", 0, 0, 1);
325        check_token_number!("0b101010", 0b101010, 0, 8);
326        check_token_number!("0xdeadbeef", 0xdeadbeef, 0, 10);
327        check_token_number!("0XCaFe", 0xCAFE, 0, 6);
328        check_token_number!("0777", 0x1ff, 0, 4);
329        check_token_number!(" 42💻", 42, 1, 3);
330    }
331
332    #[test]
333    fn keyword_check()
334    {
335        check_token!("  again ", TokenVariant::Again, 2, 7);
336        check_token!("print()", TokenVariant::Print, 0, 5);
337    }
338
339    macro_rules! check_token_error
340    {
341        ($input: expr) =>
342        {
343            {
344                let input = $input;
345
346                let res = eat(input);
347
348                assert!(res.is_err());
349            }
350        }
351    }
352
353    #[test]
354    fn error_check()
355    {
356        check_token_error!("\"abcd"); // End of input while in string
357        check_token_error!("089"); // Wrong digits for base
358        check_token_error!("Again"); // Wrong case keyword
359        check_token_error!("abcd"); // Unknown token
360        check_token_error!("💻"); // Unknown non-ASCII token
361    }
362
363    #[test]
364    fn string_tokenization()
365    {
366        // First line of fibo.wnvr
367        let line =
368            "1 again (1) defer (3 || N(1)<=N(2) || N(7)>99) 2#N(1),3,7;";
369        let mut input : &str = line;
370
371        let expected = vec![
372            "1", "again", "(", "1", ")", "defer", "(", "3", "||", "N", "(", "1",
373            ")", "<=", "N", "(", "2", ")", "||", "N", "(", "7", ")", ">", "99",
374            ")", "2", "#", "N", "(", "1", ")", ",", "3", ",", "7", ";"];
375        let mut actual = Vec::new();
376
377        loop
378        {
379            match eat(input)
380            {
381                Err((error, _)) => panic!("{}. input: \"{}\"", error, input),
382                Ok((token, remainder)) =>
383                {
384                    if let TokenVariant::EOI = token.variant
385                    {
386                        break;
387                    }
388                    actual.push(token.tok);
389                    input = remainder;
390                }
391            }
392        }
393
394        assert_eq!(actual, expected);
395    }
396}