1use std::fmt;
2use thiserror::Error;
3
4#[derive(Debug, Clone, PartialEq)]
5pub enum TokenKind {
6 Int(i64),
8 Float(f64),
9 Str(String),
10 InterpStr(Vec<(bool, String)>), Bool(bool),
12 Ident(String),
14 Module,
16 Depends,
17 Exposes,
18 Intent,
19 Type,
20 Record,
21 Fn,
22 Effects,
23 Decision,
24 Verify,
25 Match,
26 Arrow, FatArrow, Eq, Neq, Lte, Gte, Assign, Bang, Question, Lt, Gt, Plus, Minus, Star, Slash, Dot, Colon, Comma, LParen, RParen, LBracket, RBracket, LBrace, RBrace, Indent,
53 Dedent,
54 Newline,
55 Eof,
56}
57
58impl fmt::Display for TokenKind {
59 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60 match self {
61 TokenKind::Int(n) => write!(f, "integer '{}'", n),
62 TokenKind::Float(n) => write!(f, "float '{}'", n),
63 TokenKind::Str(s) => write!(f, "string \"{}\"", s),
64 TokenKind::InterpStr(_) => write!(f, "interpolated string"),
65 TokenKind::Bool(b) => write!(f, "'{}'", b),
66 TokenKind::Ident(s) => write!(f, "'{}'", s),
67 TokenKind::Module => write!(f, "'module'"),
68 TokenKind::Depends => write!(f, "'depends'"),
69 TokenKind::Exposes => write!(f, "'exposes'"),
70 TokenKind::Intent => write!(f, "'intent'"),
71 TokenKind::Type => write!(f, "'type'"),
72 TokenKind::Record => write!(f, "'record'"),
73 TokenKind::Fn => write!(f, "'fn'"),
74 TokenKind::Effects => write!(f, "'effects'"),
75 TokenKind::Decision => write!(f, "'decision'"),
76 TokenKind::Verify => write!(f, "'verify'"),
77 TokenKind::Match => write!(f, "'match'"),
78 TokenKind::Arrow => write!(f, "'->'"),
79 TokenKind::FatArrow => write!(f, "'=>'"),
80 TokenKind::Eq => write!(f, "'=='"),
81 TokenKind::Neq => write!(f, "'!='"),
82 TokenKind::Lte => write!(f, "'<='"),
83 TokenKind::Gte => write!(f, "'>='"),
84 TokenKind::Assign => write!(f, "'='"),
85 TokenKind::Bang => write!(f, "'!'"),
86 TokenKind::Question => write!(f, "'?'"),
87 TokenKind::Lt => write!(f, "'<'"),
88 TokenKind::Gt => write!(f, "'>'"),
89 TokenKind::Plus => write!(f, "'+'"),
90 TokenKind::Minus => write!(f, "'-'"),
91 TokenKind::Star => write!(f, "'*'"),
92 TokenKind::Slash => write!(f, "'/'"),
93 TokenKind::Dot => write!(f, "'.'"),
94 TokenKind::Colon => write!(f, "':'"),
95 TokenKind::Comma => write!(f, "','"),
96 TokenKind::LParen => write!(f, "'('"),
97 TokenKind::RParen => write!(f, "')'"),
98 TokenKind::LBracket => write!(f, "'['"),
99 TokenKind::RBracket => write!(f, "']'"),
100 TokenKind::LBrace => write!(f, "'{{'"),
101 TokenKind::RBrace => write!(f, "'}}'"),
102 TokenKind::Indent => write!(f, "indentation"),
103 TokenKind::Dedent => write!(f, "end of block"),
104 TokenKind::Newline => write!(f, "end of line"),
105 TokenKind::Eof => write!(f, "end of file"),
106 }
107 }
108}
109
110#[derive(Debug, Clone)]
111pub struct Token {
112 pub kind: TokenKind,
113 pub line: usize,
114 pub col: usize,
115}
116
117#[derive(Debug, Error)]
118pub enum LexerError {
119 #[error("error[{line}:{col}]: {msg}")]
120 Error {
121 msg: String,
122 line: usize,
123 col: usize,
124 },
125}
126
127fn keyword(s: &str) -> Option<TokenKind> {
128 match s {
129 "module" => Some(TokenKind::Module),
130 "depends" => Some(TokenKind::Depends),
131 "exposes" => Some(TokenKind::Exposes),
132 "intent" => Some(TokenKind::Intent),
133 "type" => Some(TokenKind::Type),
134 "record" => Some(TokenKind::Record),
135 "fn" => Some(TokenKind::Fn),
136 "effects" => Some(TokenKind::Effects),
137 "decision" => Some(TokenKind::Decision),
138 "verify" => Some(TokenKind::Verify),
139 "match" => Some(TokenKind::Match),
140 "true" => Some(TokenKind::Bool(true)),
141 "false" => Some(TokenKind::Bool(false)),
142 _ => None,
143 }
144}
145
146pub struct Lexer {
147 chars: Vec<char>,
148 pos: usize,
149 line: usize,
150 col: usize,
151 indent_stack: Vec<usize>,
152 at_line_start: bool,
153}
154
155impl Lexer {
156 pub fn new(source: &str) -> Self {
157 Lexer {
158 chars: source.chars().collect(),
159 pos: 0,
160 line: 1,
161 col: 1,
162 indent_stack: vec![0],
163 at_line_start: true,
164 }
165 }
166
167 fn error(&self, msg: impl Into<String>) -> LexerError {
168 LexerError::Error {
169 msg: msg.into(),
170 line: self.line,
171 col: self.col,
172 }
173 }
174
175 fn peek(&self, offset: usize) -> Option<char> {
176 self.chars.get(self.pos + offset).copied()
177 }
178
179 fn current(&self) -> Option<char> {
180 self.chars.get(self.pos).copied()
181 }
182
183 fn advance(&mut self) -> Option<char> {
184 let ch = self.chars.get(self.pos).copied()?;
185 self.pos += 1;
186 if ch == '\n' {
187 self.line += 1;
188 self.col = 1;
189 } else {
190 self.col += 1;
191 }
192 Some(ch)
193 }
194
195 pub fn tokenize(&mut self) -> Result<Vec<Token>, LexerError> {
196 let mut tokens = Vec::new();
197
198 while self.pos < self.chars.len() {
199 self.scan_token(&mut tokens)?;
200 }
201
202 while self.indent_stack.len() > 1 {
204 self.indent_stack.pop();
205 tokens.push(Token {
206 kind: TokenKind::Dedent,
207 line: self.line,
208 col: self.col,
209 });
210 }
211
212 tokens.push(Token {
213 kind: TokenKind::Eof,
214 line: self.line,
215 col: self.col,
216 });
217
218 Ok(tokens)
219 }
220
221 fn scan_token(&mut self, tokens: &mut Vec<Token>) -> Result<(), LexerError> {
222 if self.at_line_start {
223 self.handle_indentation(tokens)?;
224 if self.pos >= self.chars.len() {
225 return Ok(());
226 }
227 }
228
229 let ch = match self.current() {
230 Some(c) => c,
231 None => return Ok(()),
232 };
233
234 if ch == ' ' {
236 self.advance();
237 return Ok(());
238 }
239
240 if ch == '\n' {
242 let line = self.line;
243 let col = self.col;
244 self.advance();
245
246 let last_is_structural = tokens
247 .last()
248 .map(|t| {
249 matches!(
250 t.kind,
251 TokenKind::Newline | TokenKind::Indent | TokenKind::Dedent
252 )
253 })
254 .unwrap_or(true);
255
256 if !tokens.is_empty() && !last_is_structural {
257 tokens.push(Token {
258 kind: TokenKind::Newline,
259 line,
260 col,
261 });
262 }
263 self.at_line_start = true;
264 return Ok(());
265 }
266
267 if ch == '\r' {
269 self.advance();
270 return Ok(());
271 }
272
273 if ch == '/' && self.peek(1) == Some('/') {
275 self.skip_comment();
276 return Ok(());
277 }
278
279 if ch == '"' {
281 let tok = self.scan_string()?;
282 tokens.push(tok);
283 return Ok(());
284 }
285
286 if ch.is_ascii_digit() {
288 let tok = self.scan_number()?;
289 tokens.push(tok);
290 return Ok(());
291 }
292
293 if ch.is_alphabetic() || ch == '_' {
295 let tok = self.scan_identifier();
296 tokens.push(tok);
297 return Ok(());
298 }
299
300 let tok = self.scan_operator()?;
302 tokens.push(tok);
303 Ok(())
304 }
305
306 fn handle_indentation(&mut self, tokens: &mut Vec<Token>) -> Result<(), LexerError> {
307 self.at_line_start = false;
308 let mut indent = 0;
309
310 while self.pos < self.chars.len() && self.chars[self.pos] == ' ' {
311 indent += 1;
312 self.pos += 1;
313 self.col += 1;
314 }
315
316 if self.pos < self.chars.len() {
318 let ch = self.chars[self.pos];
319 if ch == '\n' || ch == '\r' {
320 return Ok(());
321 }
322 if ch == '/' && self.pos + 1 < self.chars.len() && self.chars[self.pos + 1] == '/' {
323 return Ok(());
324 }
325 } else {
326 return Ok(());
327 }
328
329 let current = *self.indent_stack.last().unwrap();
330 let line = self.line;
331
332 if indent > current {
333 self.indent_stack.push(indent);
334 tokens.push(Token {
335 kind: TokenKind::Indent,
336 line,
337 col: 1,
338 });
339 } else if indent < current {
340 while self.indent_stack.len() > 1 && *self.indent_stack.last().unwrap() > indent {
341 self.indent_stack.pop();
342 tokens.push(Token {
343 kind: TokenKind::Dedent,
344 line,
345 col: 1,
346 });
347 }
348 if *self.indent_stack.last().unwrap() != indent {
349 return Err(self.error(format!("Invalid indentation level: {}", indent)));
350 }
351 }
352
353 Ok(())
354 }
355
356 fn skip_comment(&mut self) {
357 while self.pos < self.chars.len() && self.chars[self.pos] != '\n' {
358 self.advance();
359 }
360 }
361
362 fn scan_string(&mut self) -> Result<Token, LexerError> {
363 let line = self.line;
364 let col = self.col;
365 self.advance(); let mut parts: Vec<(bool, String)> = Vec::new(); let mut current = String::new();
369 let mut has_interp = false;
370
371 loop {
372 match self.current() {
373 None => return Err(self.error("Unterminated string literal")),
374 Some('"') => {
375 self.advance();
376 break;
377 }
378 Some('{') => {
379 if self.chars.get(self.pos + 1).copied() == Some('{') {
381 current.push('{');
382 self.advance(); self.advance(); } else {
385 has_interp = true;
386 if !current.is_empty() {
387 parts.push((false, current.clone()));
388 current.clear();
389 }
390 self.advance(); let mut expr_text = String::new();
392 let mut depth = 1usize;
393 while self.pos < self.chars.len() && depth > 0 {
394 match self.chars[self.pos] {
395 '{' => {
396 depth += 1;
397 expr_text.push('{');
398 self.advance();
399 }
400 '}' => {
401 depth -= 1;
402 if depth > 0 {
403 expr_text.push('}');
404 }
405 self.advance();
406 }
407 c => {
408 expr_text.push(c);
409 self.advance();
410 }
411 }
412 }
413 parts.push((true, expr_text));
414 }
415 }
416 Some('}') => {
417 if self.chars.get(self.pos + 1).copied() == Some('}') {
419 current.push('}');
420 self.advance(); self.advance(); } else {
423 current.push('}');
424 self.advance();
425 }
426 }
427 Some('\\') => {
428 self.advance();
429 match self.advance() {
430 Some('n') => current.push('\n'),
431 Some('t') => current.push('\t'),
432 Some('r') => current.push('\r'),
433 Some('"') => current.push('"'),
434 Some('\\') => current.push('\\'),
435 Some(c) => current.push(c),
436 None => return Err(self.error("Unterminated string literal")),
437 }
438 }
439 Some('\n') => return Err(self.error("Unterminated string literal")),
440 Some(c) => {
441 current.push(c);
442 self.advance();
443 }
444 }
445 }
446
447 if !current.is_empty() {
448 parts.push((false, current));
449 }
450
451 if has_interp {
452 Ok(Token {
453 kind: TokenKind::InterpStr(parts),
454 line,
455 col,
456 })
457 } else {
458 let plain = parts.into_iter().map(|(_, s)| s).collect::<String>();
459 Ok(Token {
460 kind: TokenKind::Str(plain),
461 line,
462 col,
463 })
464 }
465 }
466
467 fn scan_number(&mut self) -> Result<Token, LexerError> {
468 let line = self.line;
469 let col = self.col;
470 let mut num_str = String::new();
471 let mut is_float = false;
472
473 while let Some(c) = self.current() {
474 if c.is_ascii_digit() {
475 num_str.push(c);
476 self.advance();
477 } else {
478 break;
479 }
480 }
481
482 if self.current() == Some('.') && self.peek(1).map(|c| c.is_ascii_digit()).unwrap_or(false)
483 {
484 is_float = true;
485 num_str.push('.');
486 self.advance(); while let Some(c) = self.current() {
488 if c.is_ascii_digit() {
489 num_str.push(c);
490 self.advance();
491 } else {
492 break;
493 }
494 }
495 }
496
497 if is_float {
498 let f: f64 = num_str
499 .parse()
500 .map_err(|_| self.error("Invalid floating-point number"))?;
501 Ok(Token {
502 kind: TokenKind::Float(f),
503 line,
504 col,
505 })
506 } else {
507 let i: i64 = num_str
508 .parse()
509 .map_err(|_| self.error("Invalid integer literal"))?;
510 Ok(Token {
511 kind: TokenKind::Int(i),
512 line,
513 col,
514 })
515 }
516 }
517
518 fn scan_identifier(&mut self) -> Token {
519 let line = self.line;
520 let col = self.col;
521 let mut ident = String::new();
522
523 while let Some(c) = self.current() {
524 if c.is_alphanumeric() || c == '_' {
525 ident.push(c);
526 self.advance();
527 } else {
528 break;
529 }
530 }
531
532 let kind = keyword(&ident).unwrap_or(TokenKind::Ident(ident));
533 Token { kind, line, col }
534 }
535
536 fn scan_operator(&mut self) -> Result<Token, LexerError> {
537 let line = self.line;
538 let col = self.col;
539 let ch = self.advance().unwrap();
540
541 let kind = match ch {
542 '-' if self.current() == Some('>') => {
543 self.advance();
544 TokenKind::Arrow
545 }
546 '=' if self.current() == Some('>') => {
547 self.advance();
548 TokenKind::FatArrow
549 }
550 '=' if self.current() == Some('=') => {
551 self.advance();
552 TokenKind::Eq
553 }
554 '!' if self.current() == Some('=') => {
555 self.advance();
556 TokenKind::Neq
557 }
558 '<' if self.current() == Some('=') => {
559 self.advance();
560 TokenKind::Lte
561 }
562 '>' if self.current() == Some('=') => {
563 self.advance();
564 TokenKind::Gte
565 }
566 '=' => TokenKind::Assign,
567 '<' => TokenKind::Lt,
568 '>' => TokenKind::Gt,
569 '+' => TokenKind::Plus,
570 '-' => TokenKind::Minus,
571 '*' => TokenKind::Star,
572 '/' => TokenKind::Slash,
573 '!' => TokenKind::Bang,
574 '?' => TokenKind::Question,
575 '.' => TokenKind::Dot,
576 ':' => TokenKind::Colon,
577 ',' => TokenKind::Comma,
578 '(' => TokenKind::LParen,
579 ')' => TokenKind::RParen,
580 '[' => TokenKind::LBracket,
581 ']' => TokenKind::RBracket,
582 '{' => TokenKind::LBrace,
583 '}' => TokenKind::RBrace,
584 other => return Err(self.error(format!("Unknown character: {:?}", other))),
585 };
586
587 Ok(Token { kind, line, col })
588 }
589}