1use logos::Logos;
2
3pub type Span = logos::Span;
4
5#[derive(Clone, Copy, Debug, Eq, Logos, PartialEq)]
6#[logos(skip r"[ \r\t\f]+")]
7pub enum Token {
8 #[token("(")]
9 LParen,
10 #[token(")")]
11 RParen,
12 #[token("[")]
13 LBracket,
14 #[token("]")]
15 RBracket,
16 #[token("{")]
17 LBrace,
18 #[token("}")]
19 RBrace,
20 #[token(":")]
21 Colon,
22 #[token(";")]
23 Semi,
24 #[token(",")]
25 Comma,
26 #[token(".")]
27 Dot,
28 #[token("|")]
29 Bar,
30 #[token("=")]
31 Equal,
32 #[token("+")]
33 Plus,
34 #[token("-")]
35 Minus,
36 #[token("*")]
37 Star,
38 #[token("/")]
39 Slash,
40 #[token("%")]
41 Percent,
42 #[token("<")]
43 Less,
44 #[token("<=")]
45 LessEqual,
46 #[token("==")]
47 EqualEqual,
48 #[token(">=")]
49 GreaterEqual,
50 #[token(">")]
51 Greater,
52 #[token("!=")]
53 BangEqual,
54 #[token("^")]
55 Caret,
56 #[token("&&")]
57 DoubleAmpersand,
58 #[token("||")]
59 DoubleBar,
60 #[token("!")]
61 Bang,
62 #[token("->")]
63 Arrow,
64 #[token("=>")]
65 FatArrow,
66 #[token("<-")]
67 LeftArrow,
68 #[token(":=")]
69 ColonEqual,
70 #[token("let")]
71 Let,
72 #[token("if")]
73 If,
74 #[token("then")]
75 Then,
76 #[token("else")]
77 Else,
78 #[token("condition")]
79 Condition,
80 #[token("alternative")]
81 Alternative,
82 #[token("match")]
83 Match,
84 #[token("with")]
85 With,
86 #[token("case")]
87 Case,
88 #[token("of")]
89 Of,
90 #[token("as")]
91 As,
92 #[token("begin")]
93 Begin,
94 #[token("end")]
95 End,
96 #[token("fresh")]
97 Fresh,
98 #[token("and")]
99 And,
100 #[token("or")]
101 Or,
102 #[token("guard")]
103 Guard,
104 #[token("undefined")]
105 Undefined,
106 #[token("datatype")]
107 Datatype,
108 #[token("function")]
109 Function,
110 #[token("query")]
111 Query,
112 #[token("where")]
113 Where,
114 #[regex(r"-?[0-9]([0-9])*")]
115 Int,
116 #[regex(r"-?[0-9]([0-9])*\.[0-9]([0-9])*")]
117 Float,
118 #[token("true")]
119 #[token("false")]
120 Bool,
121 #[regex(r"'(.|\\.)'")]
122 Char,
123 #[token("Int")]
124 TyInt,
125 #[token("Float")]
126 TyFloat,
127 #[token("Bool")]
128 TyBool,
129 #[token("Char")]
130 TyChar,
131 #[token("()")] Unit,
133 #[regex(r"([a-z]|_)([a-zA-Z0-9]|_)*")]
135 LowerIdent,
136 #[regex(r"[A-Z]([a-zA-Z0-9]|_)*")]
137 UpperIdent,
138 #[regex(r"@[a-zA-Z]([a-zA-Z0-9]|_)*")]
139 PrimOpr,
140 #[token("//", line_comment)]
141 LineComment,
142 #[token("/*", block_comment)]
143 BlockComment,
144 #[token("\n")]
145 NewLine,
146 TokError,
148 EndOfFile,
149}
150
151fn line_comment(lex: &mut logos::Lexer<Token>) -> bool {
152 let mut rest = lex.remainder().chars();
153 loop {
154 if let Some(ch) = rest.next() {
155 lex.bump(ch.len_utf8());
156 if ch == '\n' {
157 return true;
158 }
159 } else {
160 return false;
161 }
162 }
163}
164
165fn block_comment(lex: &mut logos::Lexer<Token>) -> bool {
166 let mut rest = lex.remainder().chars();
167 let mut last_char = ' ';
168 let mut nested_level: usize = 1;
169 loop {
170 if let Some(ch) = rest.next() {
171 lex.bump(ch.len_utf8());
172 match ch {
173 '/' if last_char == '*' => {
174 nested_level -= 1;
175 }
176 '*' if last_char == '/' => {
177 nested_level += 1;
178 }
179 _ => {}
180 }
181 if nested_level == 0 {
182 return true;
183 }
184 last_char = ch;
185 } else {
186 return false;
187 }
188 }
189}
190
191pub struct TokenSpan {
192 pub token: Token,
193 pub span: Span,
194}
195
196pub fn tokenize(source: &str) -> Vec<TokenSpan> {
197 let mut lex = Token::lexer(source);
198 let mut vec = Vec::new();
199 while let Some(tok) = lex.next() {
200 let span = lex.span();
201 match tok {
202 Ok(Token::NewLine) | Ok(Token::LineComment) | Ok(Token::BlockComment) => {}
205 Ok(token) => {
206 vec.push(TokenSpan { token, span });
207 }
208 Err(()) => {
209 let token = Token::TokError;
210 vec.push(TokenSpan { token, span });
211 }
212 }
213 }
214 let token = Token::EndOfFile;
215 let span = lex.span();
216 vec.push(TokenSpan { token, span });
217 vec
218}
219
220#[test]
221#[ignore = "just to see result"]
222fn lexer_test() {
223 let s = r#"
224// test line comment
225/*
226 /*
227 test block comment
228 */
229*/
230datatype IntList where
231| Cons(Int, IntList)
232| Nil
233end
234
235function append(xs: IntList, x: Int) -> Int
236begin
237 match xs with
238 | Cons(head, tail) => Cons(head, append(tail, x))
239 | Nil => Cons(x, Nil)
240 end
241end
242"#;
243
244 let mut lex = Token::lexer(s);
245
246 loop {
247 if let Some(tok) = lex.next() {
248 println!("{:?} {:?} {}", tok, lex.span(), lex.slice());
249 } else {
250 break;
251 }
252 }
253}