ablescript/
lexer.rs

1use logos::{Lexer, Logos};
2
3#[derive(Logos, Debug, PartialEq, Eq, Clone)]
4#[logos(skip r"[ \t\n\f]+")]
5#[logos(skip r"owo .*")]
6#[rustfmt::skip]
7pub enum Token {
8    // Symbols
9    #[token("(")] LeftParen,
10    #[token(")")] RightParen,
11    #[token("[")] LeftBracket,
12    #[token("]")] RightBracket,
13    #[token("{")] LeftCurly,
14    #[token("}")] RightCurly,
15    #[token(";")] Semicolon,
16    #[token(",")] Comma,
17
18    // Operators
19    #[token("+")] Plus,
20    #[token("-")] Minus,
21    #[token("*")] Star,
22    #[token("/")] FwdSlash,
23    #[token("=:")] Assign,
24    #[token("<=")] Arrow,
25
26    // Logical operators
27    #[token("<")] LessThan,
28    #[token(">")] GreaterThan,
29    #[token("=")] Equals,
30    #[token("ain't")] Aint,
31
32    // Keywords
33    #[token("functio")] Functio,
34    #[token("bff")] Bff,
35    #[token("dim")] Dim,
36    #[token("print")] Print,
37    #[token("read")] Read,
38    #[token("melo")] Melo,
39    #[token("T-Dark")] TDark,
40
41    // Control flow keywords
42    #[token("unless")] Unless,
43    #[token("loop")] Loop,
44    #[token("enough")] Enough,
45    #[token("and again")] AndAgain,
46    #[token("finally")] Finally,
47    #[token("rlyeh")] Rlyeh,
48
49    #[token("rickroll")] Rickroll,
50
51    // Literals
52    #[token("/*", get_string)] String(String),
53    #[regex(r"-?[0-9]+", get_value)] Integer(isize),
54    #[regex(r"\p{XID_Start}", get_value)] Char(char),
55    #[regex(r"\p{XID_Start}[\p{XID_Continue}]+", get_ident)]
56    #[token("and ", |_| "and".to_owned())]
57    Identifier(String),
58}
59
60fn get_value<T: std::str::FromStr>(lexer: &mut Lexer<Token>) -> Option<T> {
61    lexer.slice().parse().ok()
62}
63
64fn get_string(lexer: &mut Lexer<Token>) -> Option<String> {
65    lexer.bump(lexer.remainder().find("*/")?);
66
67    let mut string = String::new();
68    let mut slice = &lexer.slice()[2..];
69    while let Some(escape_start) = slice.find('"') {
70        // Push predeceasing string
71        string.push_str(slice.get(..escape_start)?);
72
73        // Move slice behind escape start delimiter
74        slice = slice.get(escape_start + 1..)?;
75
76        // Get escape end delimiter position and parse string before it to
77        // a character from it's unicode value (base-12) and push it to string
78        let escape_end = slice.find('"')?;
79        string.push(
80            u32::from_str_radix(slice.get(..escape_end)?, 12)
81                .ok()
82                .and_then(char::from_u32)?,
83        );
84
85        // Move slice behind escape end delimiter
86        slice = slice.get(escape_end + 1..)?;
87    }
88
89    // Push remaining string
90    string.push_str(slice);
91    lexer.bump(2);
92
93    Some(string)
94}
95
96fn get_ident(lexer: &mut Lexer<Token>) -> String {
97    lexer.slice().to_owned()
98}
99
100#[cfg(test)]
101mod tests {
102    use super::Token;
103    use super::Token::*;
104    use logos::Logos;
105
106    #[test]
107    fn simple_fn() {
108        let code = "functio test() { dim var 3; unless (var ain't 3) { var print } }";
109        let expected = &[
110            Functio,
111            Identifier("test".to_owned()),
112            LeftParen,
113            RightParen,
114            LeftCurly,
115            Dim,
116            Identifier("var".to_owned()),
117            Integer(3),
118            Semicolon,
119            Unless,
120            LeftParen,
121            Identifier("var".to_owned()),
122            Aint,
123            Integer(3),
124            RightParen,
125            LeftCurly,
126            Identifier("var".to_owned()),
127            Print,
128            RightCurly,
129            RightCurly,
130        ];
131
132        let result: Vec<_> = Token::lexer(code).collect::<Result<_, _>>().unwrap();
133        assert_eq!(result, expected);
134    }
135
136    #[test]
137    fn escapes() {
138        let code = r#"/*»"720B""722B""7195"«*/"#;
139        let expected = &[Token::String("»にゃぁ«".to_owned())];
140
141        let result: Vec<_> = Token::lexer(code).collect::<Result<_, _>>().unwrap();
142        assert_eq!(result, expected);
143    }
144}