1use logos::{Lexer, Logos};
2
3#[derive(Logos, Debug, PartialEq, Eq, Clone)]
4#[logos(skip r"[ \t\n\f]+")]
5#[logos(skip r"owo .*")]
6#[rustfmt::skip]
7pub enum Token {
8 #[token("(")] LeftParen,
10 #[token(")")] RightParen,
11 #[token("[")] LeftBracket,
12 #[token("]")] RightBracket,
13 #[token("{")] LeftCurly,
14 #[token("}")] RightCurly,
15 #[token(";")] Semicolon,
16 #[token(",")] Comma,
17
18 #[token("+")] Plus,
20 #[token("-")] Minus,
21 #[token("*")] Star,
22 #[token("/")] FwdSlash,
23 #[token("=:")] Assign,
24 #[token("<=")] Arrow,
25
26 #[token("<")] LessThan,
28 #[token(">")] GreaterThan,
29 #[token("=")] Equals,
30 #[token("ain't")] Aint,
31
32 #[token("functio")] Functio,
34 #[token("bff")] Bff,
35 #[token("dim")] Dim,
36 #[token("print")] Print,
37 #[token("read")] Read,
38 #[token("melo")] Melo,
39 #[token("T-Dark")] TDark,
40
41 #[token("unless")] Unless,
43 #[token("loop")] Loop,
44 #[token("enough")] Enough,
45 #[token("and again")] AndAgain,
46 #[token("finally")] Finally,
47 #[token("rlyeh")] Rlyeh,
48
49 #[token("rickroll")] Rickroll,
50
51 #[token("/*", get_string)] String(String),
53 #[regex(r"-?[0-9]+", get_value)] Integer(isize),
54 #[regex(r"\p{XID_Start}", get_value)] Char(char),
55 #[regex(r"\p{XID_Start}[\p{XID_Continue}]+", get_ident)]
56 #[token("and ", |_| "and".to_owned())]
57 Identifier(String),
58}
59
60fn get_value<T: std::str::FromStr>(lexer: &mut Lexer<Token>) -> Option<T> {
61 lexer.slice().parse().ok()
62}
63
64fn get_string(lexer: &mut Lexer<Token>) -> Option<String> {
65 lexer.bump(lexer.remainder().find("*/")?);
66
67 let mut string = String::new();
68 let mut slice = &lexer.slice()[2..];
69 while let Some(escape_start) = slice.find('"') {
70 string.push_str(slice.get(..escape_start)?);
72
73 slice = slice.get(escape_start + 1..)?;
75
76 let escape_end = slice.find('"')?;
79 string.push(
80 u32::from_str_radix(slice.get(..escape_end)?, 12)
81 .ok()
82 .and_then(char::from_u32)?,
83 );
84
85 slice = slice.get(escape_end + 1..)?;
87 }
88
89 string.push_str(slice);
91 lexer.bump(2);
92
93 Some(string)
94}
95
96fn get_ident(lexer: &mut Lexer<Token>) -> String {
97 lexer.slice().to_owned()
98}
99
100#[cfg(test)]
101mod tests {
102 use super::Token;
103 use super::Token::*;
104 use logos::Logos;
105
106 #[test]
107 fn simple_fn() {
108 let code = "functio test() { dim var 3; unless (var ain't 3) { var print } }";
109 let expected = &[
110 Functio,
111 Identifier("test".to_owned()),
112 LeftParen,
113 RightParen,
114 LeftCurly,
115 Dim,
116 Identifier("var".to_owned()),
117 Integer(3),
118 Semicolon,
119 Unless,
120 LeftParen,
121 Identifier("var".to_owned()),
122 Aint,
123 Integer(3),
124 RightParen,
125 LeftCurly,
126 Identifier("var".to_owned()),
127 Print,
128 RightCurly,
129 RightCurly,
130 ];
131
132 let result: Vec<_> = Token::lexer(code).collect::<Result<_, _>>().unwrap();
133 assert_eq!(result, expected);
134 }
135
136 #[test]
137 fn escapes() {
138 let code = r#"/*»"720B""722B""7195"«*/"#;
139 let expected = &[Token::String("»にゃぁ«".to_owned())];
140
141 let result: Vec<_> = Token::lexer(code).collect::<Result<_, _>>().unwrap();
142 assert_eq!(result, expected);
143 }
144}