1use std::fmt;
2use thiserror::Error;
3
4#[derive(Debug, Clone, PartialEq)]
5pub enum TokenKind {
6 Int(i64),
8 Float(f64),
9 Str(String),
10 InterpStr(Vec<(bool, String)>), Bool(bool),
12 Ident(String),
14 Module,
16 Depends,
17 Exposes,
18 Intent,
19 Type,
20 Record,
21 Fn,
22 Effects,
23 Decision,
24 Verify,
25 Match,
26 Arrow, FatArrow, Eq, Neq, Lte, Gte, Assign, Bang, Question, Lt, Gt, Plus, Minus, Star, Slash, Dot, Colon, Comma, LParen, RParen, LBracket, RBracket, LBrace, RBrace, Indent,
53 Dedent,
54 Newline,
55 Eof,
56}
57
58impl fmt::Display for TokenKind {
59 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60 match self {
61 TokenKind::Int(n) => write!(f, "integer '{}'", n),
62 TokenKind::Float(n) => write!(f, "float '{}'", n),
63 TokenKind::Str(s) => write!(f, "string \"{}\"", s),
64 TokenKind::InterpStr(_) => write!(f, "interpolated string"),
65 TokenKind::Bool(b) => write!(f, "'{}'", b),
66 TokenKind::Ident(s) => write!(f, "'{}'", s),
67 TokenKind::Module => write!(f, "'module'"),
68 TokenKind::Depends => write!(f, "'depends'"),
69 TokenKind::Exposes => write!(f, "'exposes'"),
70 TokenKind::Intent => write!(f, "'intent'"),
71 TokenKind::Type => write!(f, "'type'"),
72 TokenKind::Record => write!(f, "'record'"),
73 TokenKind::Fn => write!(f, "'fn'"),
74 TokenKind::Effects => write!(f, "'effects'"),
75 TokenKind::Decision => write!(f, "'decision'"),
76 TokenKind::Verify => write!(f, "'verify'"),
77 TokenKind::Match => write!(f, "'match'"),
78 TokenKind::Arrow => write!(f, "'->'"),
79 TokenKind::FatArrow => write!(f, "'=>'"),
80 TokenKind::Eq => write!(f, "'=='"),
81 TokenKind::Neq => write!(f, "'!='"),
82 TokenKind::Lte => write!(f, "'<='"),
83 TokenKind::Gte => write!(f, "'>='"),
84 TokenKind::Assign => write!(f, "'='"),
85 TokenKind::Bang => write!(f, "'!'"),
86 TokenKind::Question => write!(f, "'?'"),
87 TokenKind::Lt => write!(f, "'<'"),
88 TokenKind::Gt => write!(f, "'>'"),
89 TokenKind::Plus => write!(f, "'+'"),
90 TokenKind::Minus => write!(f, "'-'"),
91 TokenKind::Star => write!(f, "'*'"),
92 TokenKind::Slash => write!(f, "'/'"),
93 TokenKind::Dot => write!(f, "'.'"),
94 TokenKind::Colon => write!(f, "':'"),
95 TokenKind::Comma => write!(f, "','"),
96 TokenKind::LParen => write!(f, "'('"),
97 TokenKind::RParen => write!(f, "')'"),
98 TokenKind::LBracket => write!(f, "'['"),
99 TokenKind::RBracket => write!(f, "']'"),
100 TokenKind::LBrace => write!(f, "'{{'"),
101 TokenKind::RBrace => write!(f, "'}}'"),
102 TokenKind::Indent => write!(f, "indentation"),
103 TokenKind::Dedent => write!(f, "end of block"),
104 TokenKind::Newline => write!(f, "end of line"),
105 TokenKind::Eof => write!(f, "end of file"),
106 }
107 }
108}
109
110#[derive(Debug, Clone)]
111pub struct Token {
112 pub kind: TokenKind,
113 pub line: usize,
114 pub col: usize,
115}
116
117#[derive(Debug, Error)]
118pub enum LexerError {
119 #[error("error[{line}:{col}]: {msg}")]
120 Error {
121 msg: String,
122 line: usize,
123 col: usize,
124 },
125}
126
127fn keyword(s: &str) -> Option<TokenKind> {
128 match s {
129 "module" => Some(TokenKind::Module),
130 "depends" => Some(TokenKind::Depends),
131 "exposes" => Some(TokenKind::Exposes),
132 "intent" => Some(TokenKind::Intent),
133 "type" => Some(TokenKind::Type),
134 "record" => Some(TokenKind::Record),
135 "fn" => Some(TokenKind::Fn),
136 "effects" => Some(TokenKind::Effects),
137 "decision" => Some(TokenKind::Decision),
138 "verify" => Some(TokenKind::Verify),
139 "match" => Some(TokenKind::Match),
140 "true" => Some(TokenKind::Bool(true)),
141 "false" => Some(TokenKind::Bool(false)),
142 _ => None,
143 }
144}
145
146pub struct Lexer {
147 chars: Vec<char>,
148 pos: usize,
149 line: usize,
150 col: usize,
151 indent_stack: Vec<usize>,
152 at_line_start: bool,
153}
154
155impl Lexer {
156 pub fn new(source: &str) -> Self {
157 Lexer {
158 chars: source.chars().collect(),
159 pos: 0,
160 line: 1,
161 col: 1,
162 indent_stack: vec![0],
163 at_line_start: true,
164 }
165 }
166
167 fn error(&self, msg: impl Into<String>) -> LexerError {
168 LexerError::Error {
169 msg: msg.into(),
170 line: self.line,
171 col: self.col,
172 }
173 }
174
175 fn peek(&self, offset: usize) -> Option<char> {
176 self.chars.get(self.pos + offset).copied()
177 }
178
179 fn current(&self) -> Option<char> {
180 self.chars.get(self.pos).copied()
181 }
182
183 fn advance(&mut self) -> Option<char> {
184 let ch = self.chars.get(self.pos).copied()?;
185 self.pos += 1;
186 if ch == '\n' {
187 self.line += 1;
188 self.col = 1;
189 } else {
190 self.col += 1;
191 }
192 Some(ch)
193 }
194
195 pub fn tokenize(&mut self) -> Result<Vec<Token>, LexerError> {
196 let mut tokens = Vec::new();
197
198 while self.pos < self.chars.len() {
199 self.scan_token(&mut tokens)?;
200 }
201
202 while self.indent_stack.len() > 1 {
204 self.indent_stack.pop();
205 tokens.push(Token {
206 kind: TokenKind::Dedent,
207 line: self.line,
208 col: self.col,
209 });
210 }
211
212 tokens.push(Token {
213 kind: TokenKind::Eof,
214 line: self.line,
215 col: self.col,
216 });
217
218 Ok(tokens)
219 }
220
221 fn scan_token(&mut self, tokens: &mut Vec<Token>) -> Result<(), LexerError> {
222 if self.at_line_start {
223 self.handle_indentation(tokens)?;
224 if self.pos >= self.chars.len() {
225 return Ok(());
226 }
227 }
228
229 let ch = match self.current() {
230 Some(c) => c,
231 None => return Ok(()),
232 };
233
234 if ch == ' ' {
236 self.advance();
237 return Ok(());
238 }
239
240 if ch == '\n' {
242 let line = self.line;
243 let col = self.col;
244 self.advance();
245
246 let last_is_structural = tokens
247 .last()
248 .map(|t| {
249 matches!(
250 t.kind,
251 TokenKind::Newline | TokenKind::Indent | TokenKind::Dedent
252 )
253 })
254 .unwrap_or(true);
255
256 if !tokens.is_empty() && !last_is_structural {
257 tokens.push(Token {
258 kind: TokenKind::Newline,
259 line,
260 col,
261 });
262 }
263 self.at_line_start = true;
264 return Ok(());
265 }
266
267 if ch == '\r' {
269 self.advance();
270 return Ok(());
271 }
272
273 if ch == '/' && self.peek(1) == Some('/') {
275 self.skip_comment();
276 return Ok(());
277 }
278
279 if ch == '"' {
281 let tok = self.scan_string()?;
282 tokens.push(tok);
283 return Ok(());
284 }
285
286 if ch.is_ascii_digit() {
288 let tok = self.scan_number()?;
289 tokens.push(tok);
290 return Ok(());
291 }
292
293 if ch.is_alphabetic() || ch == '_' {
295 let tok = self.scan_identifier();
296 tokens.push(tok);
297 return Ok(());
298 }
299
300 let tok = self.scan_operator()?;
302 tokens.push(tok);
303 Ok(())
304 }
305
306 fn handle_indentation(&mut self, tokens: &mut Vec<Token>) -> Result<(), LexerError> {
307 self.at_line_start = false;
308 let mut indent = 0;
309
310 while self.pos < self.chars.len() && self.chars[self.pos] == ' ' {
311 indent += 1;
312 self.pos += 1;
313 self.col += 1;
314 }
315
316 if self.pos < self.chars.len() {
318 let ch = self.chars[self.pos];
319 if ch == '\n' || ch == '\r' {
320 return Ok(());
321 }
322 if ch == '/' && self.pos + 1 < self.chars.len() && self.chars[self.pos + 1] == '/' {
323 return Ok(());
324 }
325 } else {
326 return Ok(());
327 }
328
329 let current = *self.indent_stack.last().unwrap();
330 let line = self.line;
331
332 if indent > current {
333 self.indent_stack.push(indent);
334 tokens.push(Token {
335 kind: TokenKind::Indent,
336 line,
337 col: 1,
338 });
339 } else if indent < current {
340 while self.indent_stack.len() > 1 && *self.indent_stack.last().unwrap() > indent {
341 self.indent_stack.pop();
342 tokens.push(Token {
343 kind: TokenKind::Dedent,
344 line,
345 col: 1,
346 });
347 }
348 if *self.indent_stack.last().unwrap() != indent {
349 return Err(self.error(format!("Invalid indentation level: {}", indent)));
350 }
351 }
352
353 Ok(())
354 }
355
356 fn skip_comment(&mut self) {
357 while self.pos < self.chars.len() && self.chars[self.pos] != '\n' {
358 self.advance();
359 }
360 }
361
362 fn scan_string(&mut self) -> Result<Token, LexerError> {
363 let line = self.line;
364 let col = self.col;
365 self.advance(); let mut parts: Vec<(bool, String)> = Vec::new(); let mut current = String::new();
369 let mut has_interp = false;
370
371 loop {
372 match self.current() {
373 None => return Err(self.error("Unterminated string literal")),
374 Some('"') => {
375 self.advance();
376 break;
377 }
378 Some('{') => {
379 if self.chars.get(self.pos + 1).copied() == Some('{') {
381 current.push('{');
382 self.advance(); self.advance(); } else {
385 has_interp = true;
386 if !current.is_empty() {
387 parts.push((false, current.clone()));
388 current.clear();
389 }
390 self.advance(); let mut expr_text = String::new();
392 let mut depth = 1usize;
393 while self.pos < self.chars.len() && depth > 0 {
394 match self.chars[self.pos] {
395 '{' => {
396 depth += 1;
397 expr_text.push('{');
398 self.advance();
399 }
400 '}' => {
401 depth -= 1;
402 if depth > 0 {
403 expr_text.push('}');
404 }
405 self.advance();
406 }
407 c => {
408 expr_text.push(c);
409 self.advance();
410 }
411 }
412 }
413 parts.push((true, expr_text));
414 }
415 }
416 Some('}') => {
417 if self.chars.get(self.pos + 1).copied() == Some('}') {
419 current.push('}');
420 self.advance(); self.advance(); } else {
423 current.push('}');
424 self.advance();
425 }
426 }
427 Some('\\') => {
428 self.advance();
429 match self.advance() {
430 Some('b') => current.push('\u{0008}'),
431 Some('f') => current.push('\u{000C}'),
432 Some('n') => current.push('\n'),
433 Some('t') => current.push('\t'),
434 Some('r') => current.push('\r'),
435 Some('"') => current.push('"'),
436 Some('\\') => current.push('\\'),
437 Some(c) => current.push(c),
438 None => return Err(self.error("Unterminated string literal")),
439 }
440 }
441 Some('\n') => return Err(self.error("Unterminated string literal")),
442 Some(c) => {
443 current.push(c);
444 self.advance();
445 }
446 }
447 }
448
449 if !current.is_empty() {
450 parts.push((false, current));
451 }
452
453 if has_interp {
454 Ok(Token {
455 kind: TokenKind::InterpStr(parts),
456 line,
457 col,
458 })
459 } else {
460 let plain = parts.into_iter().map(|(_, s)| s).collect::<String>();
461 Ok(Token {
462 kind: TokenKind::Str(plain),
463 line,
464 col,
465 })
466 }
467 }
468
469 fn scan_number(&mut self) -> Result<Token, LexerError> {
470 let line = self.line;
471 let col = self.col;
472 let mut num_str = String::new();
473 let mut is_float = false;
474
475 while let Some(c) = self.current() {
476 if c.is_ascii_digit() {
477 num_str.push(c);
478 self.advance();
479 } else {
480 break;
481 }
482 }
483
484 if self.current() == Some('.') && self.peek(1).map(|c| c.is_ascii_digit()).unwrap_or(false)
485 {
486 is_float = true;
487 num_str.push('.');
488 self.advance(); while let Some(c) = self.current() {
490 if c.is_ascii_digit() {
491 num_str.push(c);
492 self.advance();
493 } else {
494 break;
495 }
496 }
497 }
498
499 if is_float {
500 let f: f64 = num_str
501 .parse()
502 .map_err(|_| self.error("Invalid floating-point number"))?;
503 Ok(Token {
504 kind: TokenKind::Float(f),
505 line,
506 col,
507 })
508 } else {
509 let i: i64 = num_str
510 .parse()
511 .map_err(|_| self.error("Invalid integer literal"))?;
512 Ok(Token {
513 kind: TokenKind::Int(i),
514 line,
515 col,
516 })
517 }
518 }
519
520 fn scan_identifier(&mut self) -> Token {
521 let line = self.line;
522 let col = self.col;
523 let mut ident = String::new();
524
525 while let Some(c) = self.current() {
526 if c.is_alphanumeric() || c == '_' {
527 ident.push(c);
528 self.advance();
529 } else {
530 break;
531 }
532 }
533
534 let kind = keyword(&ident).unwrap_or(TokenKind::Ident(ident));
535 Token { kind, line, col }
536 }
537
538 fn scan_operator(&mut self) -> Result<Token, LexerError> {
539 let line = self.line;
540 let col = self.col;
541 let ch = self.advance().unwrap();
542
543 let kind = match ch {
544 '-' if self.current() == Some('>') => {
545 self.advance();
546 TokenKind::Arrow
547 }
548 '=' if self.current() == Some('>') => {
549 self.advance();
550 TokenKind::FatArrow
551 }
552 '=' if self.current() == Some('=') => {
553 self.advance();
554 TokenKind::Eq
555 }
556 '!' if self.current() == Some('=') => {
557 self.advance();
558 TokenKind::Neq
559 }
560 '<' if self.current() == Some('=') => {
561 self.advance();
562 TokenKind::Lte
563 }
564 '>' if self.current() == Some('=') => {
565 self.advance();
566 TokenKind::Gte
567 }
568 '=' => TokenKind::Assign,
569 '<' => TokenKind::Lt,
570 '>' => TokenKind::Gt,
571 '+' => TokenKind::Plus,
572 '-' => TokenKind::Minus,
573 '*' => TokenKind::Star,
574 '/' => TokenKind::Slash,
575 '!' => TokenKind::Bang,
576 '?' => TokenKind::Question,
577 '.' => TokenKind::Dot,
578 ':' => TokenKind::Colon,
579 ',' => TokenKind::Comma,
580 '(' => TokenKind::LParen,
581 ')' => TokenKind::RParen,
582 '[' => TokenKind::LBracket,
583 ']' => TokenKind::RBracket,
584 '{' => TokenKind::LBrace,
585 '}' => TokenKind::RBrace,
586 other => return Err(self.error(format!("Unknown character: {:?}", other))),
587 };
588
589 Ok(Token { kind, line, col })
590 }
591}