1use std::fmt;
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum TokenType {
8 Eof,
9 LParen,
10 RParen,
11 Arrow,
12 Keyword,
13 Symbol,
14 Str,
15 Number,
16 Guard,
17}
18
19impl fmt::Display for TokenType {
20 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21 match self {
22 TokenType::Eof => write!(f, "EOF"),
23 TokenType::LParen => write!(f, "("),
24 TokenType::RParen => write!(f, ")"),
25 TokenType::Arrow => write!(f, "->"),
26 TokenType::Keyword => write!(f, "keyword"),
27 TokenType::Symbol => write!(f, "symbol"),
28 TokenType::Str => write!(f, "string"),
29 TokenType::Number => write!(f, "number"),
30 TokenType::Guard => write!(f, "guard"),
31 }
32 }
33}
34
35#[derive(Debug, Clone)]
37pub struct Token {
38 pub typ: TokenType,
39 pub literal: String,
40 pub pos: usize,
41}
42
43impl fmt::Display for Token {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 write!(f, "Token({}, {:?}, {})", self.typ, self.literal, self.pos)
46 }
47}
48
49pub struct Lexer {
51 input: Vec<u8>,
52 pos: usize,
53 read_pos: usize,
54 ch: u8,
55}
56
57impl Lexer {
58 pub fn new(input: &str) -> Self {
59 let mut l = Self {
60 input: input.as_bytes().to_vec(),
61 pos: 0,
62 read_pos: 0,
63 ch: 0,
64 };
65 l.read_char();
66 l
67 }
68
69 fn read_char(&mut self) {
70 if self.read_pos >= self.input.len() {
71 self.ch = 0;
72 } else {
73 self.ch = self.input[self.read_pos];
74 }
75 self.pos = self.read_pos;
76 self.read_pos += 1;
77 }
78
79 fn peek_char(&self) -> u8 {
80 if self.read_pos >= self.input.len() {
81 0
82 } else {
83 self.input[self.read_pos]
84 }
85 }
86
87 fn skip_whitespace(&mut self) {
88 while self.ch == b' ' || self.ch == b'\t' || self.ch == b'\n' || self.ch == b'\r' {
89 self.read_char();
90 }
91 }
92
93 fn skip_comment(&mut self) {
94 while self.ch != 0 && self.ch != b'\n' {
95 self.read_char();
96 }
97 }
98
99 pub fn next_token(&mut self) -> Token {
100 loop {
101 self.skip_whitespace();
102 if self.ch == b';' {
103 self.skip_comment();
104 continue;
105 }
106 break;
107 }
108
109 let pos = self.pos;
110
111 match self.ch {
112 0 => Token {
113 typ: TokenType::Eof,
114 literal: String::new(),
115 pos,
116 },
117 b'(' => {
118 self.read_char();
119 Token {
120 typ: TokenType::LParen,
121 literal: "(".into(),
122 pos,
123 }
124 }
125 b')' => {
126 self.read_char();
127 Token {
128 typ: TokenType::RParen,
129 literal: ")".into(),
130 pos,
131 }
132 }
133 b'-' => {
134 if self.peek_char() == b'>' {
135 self.read_char();
136 self.read_char();
137 Token {
138 typ: TokenType::Arrow,
139 literal: "->".into(),
140 pos,
141 }
142 } else if is_digit(self.peek_char()) {
143 self.read_char();
144 let num = self.read_number();
145 Token {
146 typ: TokenType::Number,
147 literal: format!("-{}", num),
148 pos,
149 }
150 } else {
151 let sym = self.read_symbol();
152 Token {
153 typ: TokenType::Symbol,
154 literal: sym,
155 pos,
156 }
157 }
158 }
159 b':' => {
160 self.read_char();
161 let kw = self.read_symbol();
162 Token {
163 typ: TokenType::Keyword,
164 literal: format!(":{}", kw),
165 pos,
166 }
167 }
168 b'"' => {
169 self.read_char();
170 let s = self.read_string(b'"');
171 Token {
172 typ: TokenType::Str,
173 literal: s,
174 pos,
175 }
176 }
177 b'{' => {
178 self.read_char();
179 let g = self.read_guard();
180 Token {
181 typ: TokenType::Guard,
182 literal: g,
183 pos,
184 }
185 }
186 ch if is_digit(ch) => {
187 let num = self.read_number();
188 Token {
189 typ: TokenType::Number,
190 literal: num,
191 pos,
192 }
193 }
194 ch if is_symbol_start(ch) => {
195 let sym = self.read_symbol();
196 Token {
197 typ: TokenType::Symbol,
198 literal: sym,
199 pos,
200 }
201 }
202 _ => {
203 self.read_char();
204 Token {
205 typ: TokenType::Eof,
206 literal: String::new(),
207 pos,
208 }
209 }
210 }
211 }
212
213 fn read_symbol(&mut self) -> String {
214 let start = self.pos;
215 while is_symbol_char(self.ch) {
216 self.read_char();
217 }
218 String::from_utf8_lossy(&self.input[start..self.pos]).to_string()
219 }
220
221 fn read_number(&mut self) -> String {
222 let start = self.pos;
223 while is_digit(self.ch) {
224 self.read_char();
225 }
226 String::from_utf8_lossy(&self.input[start..self.pos]).to_string()
227 }
228
229 fn read_string(&mut self, quote: u8) -> String {
230 let mut result = Vec::new();
231 while self.ch != 0 && self.ch != quote {
232 if self.ch == b'\\' {
233 self.read_char();
234 match self.ch {
235 b'n' => result.push(b'\n'),
236 b't' => result.push(b'\t'),
237 b'r' => result.push(b'\r'),
238 b'\\' => result.push(b'\\'),
239 b'"' => result.push(b'"'),
240 other => result.push(other),
241 }
242 } else {
243 result.push(self.ch);
244 }
245 self.read_char();
246 }
247 if self.ch == quote {
248 self.read_char();
249 }
250 String::from_utf8_lossy(&result).to_string()
251 }
252
253 fn read_guard(&mut self) -> String {
254 let mut result = Vec::new();
255 let mut depth = 1;
256 while self.ch != 0 && depth > 0 {
257 if self.ch == b'{' {
258 depth += 1;
259 } else if self.ch == b'}' {
260 depth -= 1;
261 if depth == 0 {
262 self.read_char();
263 break;
264 }
265 }
266 result.push(self.ch);
267 self.read_char();
268 }
269 String::from_utf8_lossy(&result).to_string()
270 }
271}
272
273fn is_symbol_start(ch: u8) -> bool {
274 ch.is_ascii_alphabetic() || ch == b'_'
275}
276
277fn is_symbol_char(ch: u8) -> bool {
278 ch.is_ascii_alphanumeric() || ch == b'_' || ch == b'-' || ch == b'[' || ch == b']' || ch == b'.'
279}
280
281fn is_digit(ch: u8) -> bool {
282 ch.is_ascii_digit()
283}
284
285pub fn tokenize(input: &str) -> Vec<Token> {
287 let mut lexer = Lexer::new(input);
288 let mut tokens = Vec::new();
289 loop {
290 let tok = lexer.next_token();
291 let is_eof = tok.typ == TokenType::Eof;
292 tokens.push(tok);
293 if is_eof {
294 break;
295 }
296 }
297 tokens
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303
304 #[test]
305 fn test_basic_tokens() {
306 let tokens = tokenize("(schema ERC-020)");
307 assert_eq!(tokens[0].typ, TokenType::LParen);
308 assert_eq!(tokens[1].typ, TokenType::Symbol);
309 assert_eq!(tokens[1].literal, "schema");
310 assert_eq!(tokens[2].typ, TokenType::Symbol);
311 assert_eq!(tokens[2].literal, "ERC-020");
312 assert_eq!(tokens[3].typ, TokenType::RParen);
313 }
314
315 #[test]
316 fn test_keywords() {
317 let tokens = tokenize(":type :guard :keys");
318 assert_eq!(tokens[0].typ, TokenType::Keyword);
319 assert_eq!(tokens[0].literal, ":type");
320 assert_eq!(tokens[1].typ, TokenType::Keyword);
321 assert_eq!(tokens[1].literal, ":guard");
322 }
323
324 #[test]
325 fn test_arrow() {
326 let tokens = tokenize("balances -> transfer");
327 assert_eq!(tokens[0].typ, TokenType::Symbol);
328 assert_eq!(tokens[1].typ, TokenType::Arrow);
329 assert_eq!(tokens[2].typ, TokenType::Symbol);
330 }
331
332 #[test]
333 fn test_guard() {
334 let tokens = tokenize("{balances[from] >= amount}");
335 assert_eq!(tokens[0].typ, TokenType::Guard);
336 assert_eq!(tokens[0].literal, "balances[from] >= amount");
337 }
338
339 #[test]
340 fn test_numbers() {
341 let tokens = tokenize("123 -456");
342 assert_eq!(tokens[0].typ, TokenType::Number);
343 assert_eq!(tokens[0].literal, "123");
344 assert_eq!(tokens[1].typ, TokenType::Number);
345 assert_eq!(tokens[1].literal, "-456");
346 }
347
348 #[test]
349 fn test_comments() {
350 let tokens = tokenize("; this is a comment\n(schema test)");
351 assert_eq!(tokens[0].typ, TokenType::LParen);
352 assert_eq!(tokens[1].literal, "schema");
353 }
354}