1use std::fmt;
2use thiserror::Error;
3
4#[derive(Debug, Clone, PartialEq)]
5pub enum TokenKind {
6 Int(i64),
8 Float(f64),
9 Str(String),
10 InterpStr(Vec<(bool, String)>), Bool(bool),
12 Ident(String),
14 Module,
16 Depends,
17 Exposes,
18 Intent,
19 Type,
20 Record,
21 Reason,
22 Fn,
23 Effect,
24 Effects,
25 Service,
26 Needs,
27 Decision,
28 Verify,
29 Case,
30 Match,
31 Where,
32 Input,
33 Expect,
34 Date,
35 Author,
36 Chosen,
37 Rejected,
38 Impacts,
39 Arrow, Pipe, FatArrow, Eq, Neq, Lte, Gte, Assign, Bang, Question, Lt, Gt, Plus, Minus, Star, Slash, Dot, Colon, Comma, LParen, RParen, LBracket, RBracket, LBrace, RBrace, Indent,
67 Dedent,
68 Newline,
69 Eof,
70}
71
72impl fmt::Display for TokenKind {
73 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
74 match self {
75 TokenKind::Int(n) => write!(f, "integer '{}'", n),
76 TokenKind::Float(n) => write!(f, "float '{}'", n),
77 TokenKind::Str(s) => write!(f, "string \"{}\"", s),
78 TokenKind::InterpStr(_) => write!(f, "interpolated string"),
79 TokenKind::Bool(b) => write!(f, "'{}'", b),
80 TokenKind::Ident(s) => write!(f, "'{}'", s),
81 TokenKind::Module => write!(f, "'module'"),
82 TokenKind::Depends => write!(f, "'depends'"),
83 TokenKind::Exposes => write!(f, "'exposes'"),
84 TokenKind::Intent => write!(f, "'intent'"),
85 TokenKind::Type => write!(f, "'type'"),
86 TokenKind::Record => write!(f, "'record'"),
87 TokenKind::Reason => write!(f, "'reason'"),
88 TokenKind::Fn => write!(f, "'fn'"),
89 TokenKind::Effect => write!(f, "'effect'"),
90 TokenKind::Effects => write!(f, "'effects'"),
91 TokenKind::Service => write!(f, "'service'"),
92 TokenKind::Needs => write!(f, "'needs'"),
93 TokenKind::Decision => write!(f, "'decision'"),
94 TokenKind::Verify => write!(f, "'verify'"),
95 TokenKind::Case => write!(f, "'case'"),
96 TokenKind::Match => write!(f, "'match'"),
97 TokenKind::Where => write!(f, "'where'"),
98 TokenKind::Input => write!(f, "'input'"),
99 TokenKind::Expect => write!(f, "'expect'"),
100 TokenKind::Date => write!(f, "'date'"),
101 TokenKind::Author => write!(f, "'author'"),
102 TokenKind::Chosen => write!(f, "'chosen'"),
103 TokenKind::Rejected => write!(f, "'rejected'"),
104 TokenKind::Impacts => write!(f, "'impacts'"),
105 TokenKind::Arrow => write!(f, "'->'"),
106 TokenKind::Pipe => write!(f, "'|>'"),
107 TokenKind::FatArrow => write!(f, "'=>'"),
108 TokenKind::Eq => write!(f, "'=='"),
109 TokenKind::Neq => write!(f, "'!='"),
110 TokenKind::Lte => write!(f, "'<='"),
111 TokenKind::Gte => write!(f, "'>='"),
112 TokenKind::Assign => write!(f, "'='"),
113 TokenKind::Bang => write!(f, "'!'"),
114 TokenKind::Question => write!(f, "'?'"),
115 TokenKind::Lt => write!(f, "'<'"),
116 TokenKind::Gt => write!(f, "'>'"),
117 TokenKind::Plus => write!(f, "'+'"),
118 TokenKind::Minus => write!(f, "'-'"),
119 TokenKind::Star => write!(f, "'*'"),
120 TokenKind::Slash => write!(f, "'/'"),
121 TokenKind::Dot => write!(f, "'.'"),
122 TokenKind::Colon => write!(f, "':'"),
123 TokenKind::Comma => write!(f, "','"),
124 TokenKind::LParen => write!(f, "'('"),
125 TokenKind::RParen => write!(f, "')'"),
126 TokenKind::LBracket => write!(f, "'['"),
127 TokenKind::RBracket => write!(f, "']'"),
128 TokenKind::LBrace => write!(f, "'{{'"),
129 TokenKind::RBrace => write!(f, "'}}'"),
130 TokenKind::Indent => write!(f, "indentation"),
131 TokenKind::Dedent => write!(f, "end of block"),
132 TokenKind::Newline => write!(f, "end of line"),
133 TokenKind::Eof => write!(f, "end of file"),
134 }
135 }
136}
137
138#[derive(Debug, Clone)]
139pub struct Token {
140 pub kind: TokenKind,
141 pub line: usize,
142 pub col: usize,
143}
144
145#[derive(Debug, Error)]
146pub enum LexerError {
147 #[error("error[{line}:{col}]: {msg}")]
148 Error {
149 msg: String,
150 line: usize,
151 col: usize,
152 },
153}
154
155fn keyword(s: &str) -> Option<TokenKind> {
156 match s {
157 "module" => Some(TokenKind::Module),
158 "depends" => Some(TokenKind::Depends),
159 "exposes" => Some(TokenKind::Exposes),
160 "intent" => Some(TokenKind::Intent),
161 "type" => Some(TokenKind::Type),
162 "record" => Some(TokenKind::Record),
163 "reason" => Some(TokenKind::Reason),
164 "fn" => Some(TokenKind::Fn),
165 "effect" => Some(TokenKind::Effect),
166 "effects" => Some(TokenKind::Effects),
167 "service" => Some(TokenKind::Service),
168 "needs" => Some(TokenKind::Needs),
169 "decision" => Some(TokenKind::Decision),
170 "verify" => Some(TokenKind::Verify),
171 "case" => Some(TokenKind::Case),
172 "match" => Some(TokenKind::Match),
173 "where" => Some(TokenKind::Where),
174 "input" => Some(TokenKind::Input),
175 "expect" => Some(TokenKind::Expect),
176 "date" => Some(TokenKind::Date),
177 "author" => Some(TokenKind::Author),
178 "chosen" => Some(TokenKind::Chosen),
179 "rejected" => Some(TokenKind::Rejected),
180 "impacts" => Some(TokenKind::Impacts),
181 "true" => Some(TokenKind::Bool(true)),
182 "false" => Some(TokenKind::Bool(false)),
183 _ => None,
184 }
185}
186
187pub struct Lexer {
188 chars: Vec<char>,
189 pos: usize,
190 line: usize,
191 col: usize,
192 indent_stack: Vec<usize>,
193 at_line_start: bool,
194}
195
196impl Lexer {
197 pub fn new(source: &str) -> Self {
198 Lexer {
199 chars: source.chars().collect(),
200 pos: 0,
201 line: 1,
202 col: 1,
203 indent_stack: vec![0],
204 at_line_start: true,
205 }
206 }
207
208 fn error(&self, msg: impl Into<String>) -> LexerError {
209 LexerError::Error {
210 msg: msg.into(),
211 line: self.line,
212 col: self.col,
213 }
214 }
215
216 fn peek(&self, offset: usize) -> Option<char> {
217 self.chars.get(self.pos + offset).copied()
218 }
219
220 fn current(&self) -> Option<char> {
221 self.chars.get(self.pos).copied()
222 }
223
224 fn advance(&mut self) -> Option<char> {
225 let ch = self.chars.get(self.pos).copied()?;
226 self.pos += 1;
227 if ch == '\n' {
228 self.line += 1;
229 self.col = 1;
230 } else {
231 self.col += 1;
232 }
233 Some(ch)
234 }
235
236 pub fn tokenize(&mut self) -> Result<Vec<Token>, LexerError> {
237 let mut tokens = Vec::new();
238
239 while self.pos < self.chars.len() {
240 self.scan_token(&mut tokens)?;
241 }
242
243 while self.indent_stack.len() > 1 {
245 self.indent_stack.pop();
246 tokens.push(Token {
247 kind: TokenKind::Dedent,
248 line: self.line,
249 col: self.col,
250 });
251 }
252
253 tokens.push(Token {
254 kind: TokenKind::Eof,
255 line: self.line,
256 col: self.col,
257 });
258
259 Ok(tokens)
260 }
261
262 fn scan_token(&mut self, tokens: &mut Vec<Token>) -> Result<(), LexerError> {
263 if self.at_line_start {
264 self.handle_indentation(tokens)?;
265 if self.pos >= self.chars.len() {
266 return Ok(());
267 }
268 }
269
270 let ch = match self.current() {
271 Some(c) => c,
272 None => return Ok(()),
273 };
274
275 if ch == ' ' {
277 self.advance();
278 return Ok(());
279 }
280
281 if ch == '\n' {
283 let line = self.line;
284 let col = self.col;
285 self.advance();
286
287 let last_is_structural = tokens
288 .last()
289 .map(|t| {
290 matches!(
291 t.kind,
292 TokenKind::Newline | TokenKind::Indent | TokenKind::Dedent
293 )
294 })
295 .unwrap_or(true);
296
297 if !tokens.is_empty() && !last_is_structural {
298 tokens.push(Token {
299 kind: TokenKind::Newline,
300 line,
301 col,
302 });
303 }
304 self.at_line_start = true;
305 return Ok(());
306 }
307
308 if ch == '\r' {
310 self.advance();
311 return Ok(());
312 }
313
314 if ch == '/' && self.peek(1) == Some('/') {
316 self.skip_comment();
317 return Ok(());
318 }
319
320 if ch == '"' {
322 let tok = self.scan_string()?;
323 tokens.push(tok);
324 return Ok(());
325 }
326
327 if ch.is_ascii_digit() {
329 let tok = self.scan_number()?;
330 tokens.push(tok);
331 return Ok(());
332 }
333
334 if ch.is_alphabetic() || ch == '_' {
336 let tok = self.scan_identifier();
337 tokens.push(tok);
338 return Ok(());
339 }
340
341 let tok = self.scan_operator()?;
343 tokens.push(tok);
344 Ok(())
345 }
346
347 fn handle_indentation(&mut self, tokens: &mut Vec<Token>) -> Result<(), LexerError> {
348 self.at_line_start = false;
349 let mut indent = 0;
350
351 while self.pos < self.chars.len() && self.chars[self.pos] == ' ' {
352 indent += 1;
353 self.pos += 1;
354 self.col += 1;
355 }
356
357 if self.pos < self.chars.len() {
359 let ch = self.chars[self.pos];
360 if ch == '\n' || ch == '\r' {
361 return Ok(());
362 }
363 if ch == '/' && self.pos + 1 < self.chars.len() && self.chars[self.pos + 1] == '/' {
364 return Ok(());
365 }
366 } else {
367 return Ok(());
368 }
369
370 let current = *self.indent_stack.last().unwrap();
371 let line = self.line;
372
373 if indent > current {
374 self.indent_stack.push(indent);
375 tokens.push(Token {
376 kind: TokenKind::Indent,
377 line,
378 col: 1,
379 });
380 } else if indent < current {
381 while self.indent_stack.len() > 1 && *self.indent_stack.last().unwrap() > indent {
382 self.indent_stack.pop();
383 tokens.push(Token {
384 kind: TokenKind::Dedent,
385 line,
386 col: 1,
387 });
388 }
389 if *self.indent_stack.last().unwrap() != indent {
390 return Err(self.error(format!("Invalid indentation level: {}", indent)));
391 }
392 }
393
394 Ok(())
395 }
396
397 fn skip_comment(&mut self) {
398 while self.pos < self.chars.len() && self.chars[self.pos] != '\n' {
399 self.advance();
400 }
401 }
402
403 fn scan_string(&mut self) -> Result<Token, LexerError> {
404 let line = self.line;
405 let col = self.col;
406 self.advance(); let mut parts: Vec<(bool, String)> = Vec::new(); let mut current = String::new();
410 let mut has_interp = false;
411
412 loop {
413 match self.current() {
414 None => return Err(self.error("Unterminated string literal")),
415 Some('"') => {
416 self.advance();
417 break;
418 }
419 Some('{') => {
420 if self.chars.get(self.pos + 1).copied() == Some('{') {
422 current.push('{');
423 self.advance(); self.advance(); } else {
426 has_interp = true;
427 if !current.is_empty() {
428 parts.push((false, current.clone()));
429 current.clear();
430 }
431 self.advance(); let mut expr_text = String::new();
433 let mut depth = 1usize;
434 while self.pos < self.chars.len() && depth > 0 {
435 match self.chars[self.pos] {
436 '{' => {
437 depth += 1;
438 expr_text.push('{');
439 self.advance();
440 }
441 '}' => {
442 depth -= 1;
443 if depth > 0 {
444 expr_text.push('}');
445 }
446 self.advance();
447 }
448 c => {
449 expr_text.push(c);
450 self.advance();
451 }
452 }
453 }
454 parts.push((true, expr_text));
455 }
456 }
457 Some('}') => {
458 if self.chars.get(self.pos + 1).copied() == Some('}') {
460 current.push('}');
461 self.advance(); self.advance(); } else {
464 current.push('}');
465 self.advance();
466 }
467 }
468 Some('\\') => {
469 self.advance();
470 match self.advance() {
471 Some('n') => current.push('\n'),
472 Some('t') => current.push('\t'),
473 Some('r') => current.push('\r'),
474 Some('"') => current.push('"'),
475 Some('\\') => current.push('\\'),
476 Some(c) => current.push(c),
477 None => return Err(self.error("Unterminated string literal")),
478 }
479 }
480 Some('\n') => return Err(self.error("Unterminated string literal")),
481 Some(c) => {
482 current.push(c);
483 self.advance();
484 }
485 }
486 }
487
488 if !current.is_empty() {
489 parts.push((false, current));
490 }
491
492 if has_interp {
493 Ok(Token {
494 kind: TokenKind::InterpStr(parts),
495 line,
496 col,
497 })
498 } else {
499 let plain = parts.into_iter().map(|(_, s)| s).collect::<String>();
500 Ok(Token {
501 kind: TokenKind::Str(plain),
502 line,
503 col,
504 })
505 }
506 }
507
508 fn scan_number(&mut self) -> Result<Token, LexerError> {
509 let line = self.line;
510 let col = self.col;
511 let mut num_str = String::new();
512 let mut is_float = false;
513
514 while let Some(c) = self.current() {
515 if c.is_ascii_digit() {
516 num_str.push(c);
517 self.advance();
518 } else {
519 break;
520 }
521 }
522
523 if self.current() == Some('.') && self.peek(1).map(|c| c.is_ascii_digit()).unwrap_or(false)
524 {
525 is_float = true;
526 num_str.push('.');
527 self.advance(); while let Some(c) = self.current() {
529 if c.is_ascii_digit() {
530 num_str.push(c);
531 self.advance();
532 } else {
533 break;
534 }
535 }
536 }
537
538 if is_float {
539 let f: f64 = num_str
540 .parse()
541 .map_err(|_| self.error("Invalid floating-point number"))?;
542 Ok(Token {
543 kind: TokenKind::Float(f),
544 line,
545 col,
546 })
547 } else {
548 let i: i64 = num_str
549 .parse()
550 .map_err(|_| self.error("Invalid integer literal"))?;
551 Ok(Token {
552 kind: TokenKind::Int(i),
553 line,
554 col,
555 })
556 }
557 }
558
559 fn scan_identifier(&mut self) -> Token {
560 let line = self.line;
561 let col = self.col;
562 let mut ident = String::new();
563
564 while let Some(c) = self.current() {
565 if c.is_alphanumeric() || c == '_' {
566 ident.push(c);
567 self.advance();
568 } else {
569 break;
570 }
571 }
572
573 let kind = keyword(&ident).unwrap_or_else(|| TokenKind::Ident(ident));
574 Token { kind, line, col }
575 }
576
577 fn scan_operator(&mut self) -> Result<Token, LexerError> {
578 let line = self.line;
579 let col = self.col;
580 let ch = self.advance().unwrap();
581
582 let kind = match ch {
583 '-' if self.current() == Some('>') => {
584 self.advance();
585 TokenKind::Arrow
586 }
587 '|' if self.current() == Some('>') => {
588 self.advance();
589 TokenKind::Pipe
590 }
591 '=' if self.current() == Some('>') => {
592 self.advance();
593 TokenKind::FatArrow
594 }
595 '=' if self.current() == Some('=') => {
596 self.advance();
597 TokenKind::Eq
598 }
599 '!' if self.current() == Some('=') => {
600 self.advance();
601 TokenKind::Neq
602 }
603 '<' if self.current() == Some('=') => {
604 self.advance();
605 TokenKind::Lte
606 }
607 '>' if self.current() == Some('=') => {
608 self.advance();
609 TokenKind::Gte
610 }
611 '=' => TokenKind::Assign,
612 '<' => TokenKind::Lt,
613 '>' => TokenKind::Gt,
614 '+' => TokenKind::Plus,
615 '-' => TokenKind::Minus,
616 '*' => TokenKind::Star,
617 '/' => TokenKind::Slash,
618 '!' => TokenKind::Bang,
619 '?' => TokenKind::Question,
620 '.' => TokenKind::Dot,
621 ':' => TokenKind::Colon,
622 ',' => TokenKind::Comma,
623 '(' => TokenKind::LParen,
624 ')' => TokenKind::RParen,
625 '[' => TokenKind::LBracket,
626 ']' => TokenKind::RBracket,
627 '{' => TokenKind::LBrace,
628 '}' => TokenKind::RBrace,
629 other => return Err(self.error(format!("Unknown character: {:?}", other))),
630 };
631
632 Ok(Token { kind, line, col })
633 }
634}