1use crate::error::{ReplError, Result};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11pub enum TokenType {
12 String(String),
14 Number(f64),
15 Integer(i64),
16 Boolean(bool),
17 Duration(u64, String), Size(u64, String), Identifier(String),
22 Keyword(Keyword),
23
24 Plus,
26 Minus,
27 Multiply,
28 Divide,
29 Modulo,
30 Equal,
31 NotEqual,
32 LessThan,
33 LessThanOrEqual,
34 GreaterThan,
35 GreaterThanOrEqual,
36 And,
37 Or,
38 Not,
39 BitwiseAnd,
40 BitwiseOr,
41 BitwiseXor,
42 BitwiseNot,
43 LeftShift,
44 RightShift,
45 Assign,
46 Question,
47
48 LeftParen,
50 RightParen,
51 LeftBrace,
52 RightBrace,
53 LeftBracket,
54 RightBracket,
55 Comma,
56 Semicolon,
57 Colon,
58 Dot,
59 Arrow,
60 FatArrow,
61
62 Newline,
64 Eof,
65 Comment(String),
66}
67
68#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
70pub enum Keyword {
71 Agent,
72 Behavior,
73 Function,
74 Struct,
75 Let,
76 If,
77 Else,
78 Match,
79 For,
80 While,
81 Try,
82 Catch,
83 Return,
84 Emit,
85 Require,
86 Check,
87 On,
88 In,
89 Invoke,
90 True,
91 False,
92 Null,
93 Capability,
94 Capabilities,
95 Policy,
96 Has,
97 Name,
98 Version,
99 Author,
100 Description,
101 Resources,
102 Security,
103 Policies,
104 Input,
105 Output,
106 Steps,
107 Memory,
108 Cpu,
109 Network,
110 Storage,
111 Tier,
112 Sandbox,
113 Allow,
114 Strict,
115 Moderate,
116 Permissive,
117 Timeout,
118 Retry,
119 Failure,
120 Terminate,
121 Restart,
122 Escalate,
123 Ignore,
124 Tier1,
125 Tier2,
126 Tier3,
127 Tier4,
128}
129
130#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
132pub struct Token {
133 pub token_type: TokenType,
134 pub line: usize,
135 pub column: usize,
136 pub offset: usize,
137 pub length: usize,
138}
139
140pub struct Lexer {
142 input: Vec<char>,
143 position: usize,
144 line: usize,
145 column: usize,
146 keywords: HashMap<String, Keyword>,
147}
148
149impl Lexer {
150 pub fn new(input: &str) -> Self {
152 let mut keywords = HashMap::new();
153
154 keywords.insert("agent".to_string(), Keyword::Agent);
156 keywords.insert("behavior".to_string(), Keyword::Behavior);
157 keywords.insert("function".to_string(), Keyword::Function);
158 keywords.insert("struct".to_string(), Keyword::Struct);
159 keywords.insert("let".to_string(), Keyword::Let);
160 keywords.insert("if".to_string(), Keyword::If);
161 keywords.insert("else".to_string(), Keyword::Else);
162 keywords.insert("match".to_string(), Keyword::Match);
163 keywords.insert("for".to_string(), Keyword::For);
164 keywords.insert("while".to_string(), Keyword::While);
165 keywords.insert("try".to_string(), Keyword::Try);
166 keywords.insert("catch".to_string(), Keyword::Catch);
167 keywords.insert("return".to_string(), Keyword::Return);
168 keywords.insert("emit".to_string(), Keyword::Emit);
169 keywords.insert("require".to_string(), Keyword::Require);
170 keywords.insert("check".to_string(), Keyword::Check);
171 keywords.insert("on".to_string(), Keyword::On);
172 keywords.insert("in".to_string(), Keyword::In);
173 keywords.insert("invoke".to_string(), Keyword::Invoke);
174 keywords.insert("true".to_string(), Keyword::True);
175 keywords.insert("false".to_string(), Keyword::False);
176 keywords.insert("null".to_string(), Keyword::Null);
177 keywords.insert("capability".to_string(), Keyword::Capability);
178 keywords.insert("capabilities".to_string(), Keyword::Capabilities);
179 keywords.insert("policy".to_string(), Keyword::Policy);
180 keywords.insert("has".to_string(), Keyword::Has);
181 keywords.insert("name".to_string(), Keyword::Name);
182 keywords.insert("version".to_string(), Keyword::Version);
183 keywords.insert("author".to_string(), Keyword::Author);
184 keywords.insert("description".to_string(), Keyword::Description);
185 keywords.insert("resources".to_string(), Keyword::Resources);
186 keywords.insert("security".to_string(), Keyword::Security);
187 keywords.insert("policies".to_string(), Keyword::Policies);
188 keywords.insert("input".to_string(), Keyword::Input);
189 keywords.insert("output".to_string(), Keyword::Output);
190 keywords.insert("steps".to_string(), Keyword::Steps);
191 keywords.insert("memory".to_string(), Keyword::Memory);
192 keywords.insert("cpu".to_string(), Keyword::Cpu);
193 keywords.insert("network".to_string(), Keyword::Network);
194 keywords.insert("storage".to_string(), Keyword::Storage);
195 keywords.insert("tier".to_string(), Keyword::Tier);
196 keywords.insert("sandbox".to_string(), Keyword::Sandbox);
197 keywords.insert("allow".to_string(), Keyword::Allow);
198 keywords.insert("strict".to_string(), Keyword::Strict);
199 keywords.insert("moderate".to_string(), Keyword::Moderate);
200 keywords.insert("permissive".to_string(), Keyword::Permissive);
201 keywords.insert("timeout".to_string(), Keyword::Timeout);
202 keywords.insert("retry".to_string(), Keyword::Retry);
203 keywords.insert("failure".to_string(), Keyword::Failure);
204 keywords.insert("terminate".to_string(), Keyword::Terminate);
205 keywords.insert("restart".to_string(), Keyword::Restart);
206 keywords.insert("escalate".to_string(), Keyword::Escalate);
207 keywords.insert("ignore".to_string(), Keyword::Ignore);
208 keywords.insert("Tier1".to_string(), Keyword::Tier1);
209 keywords.insert("Tier2".to_string(), Keyword::Tier2);
210 keywords.insert("Tier3".to_string(), Keyword::Tier3);
211 keywords.insert("Tier4".to_string(), Keyword::Tier4);
212
213 Self {
214 input: input.chars().collect(),
215 position: 0,
216 line: 1,
217 column: 1,
218 keywords,
219 }
220 }
221
222 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
224 let mut tokens = Vec::new();
225
226 loop {
227 let token = self.next_token()?;
228 let is_eof = matches!(token.token_type, TokenType::Eof);
229 tokens.push(token);
230
231 if is_eof {
232 break;
233 }
234 }
235
236 Ok(tokens)
237 }
238
239 pub fn next_token(&mut self) -> Result<Token> {
241 self.skip_whitespace();
242
243 let start_line = self.line;
244 let start_column = self.column;
245 let start_offset = self.position;
246
247 if self.position >= self.input.len() {
248 return Ok(Token {
249 token_type: TokenType::Eof,
250 line: start_line,
251 column: start_column,
252 offset: start_offset,
253 length: 0,
254 });
255 }
256
257 let ch = self.current_char();
258
259 let token_type = match ch {
260 '/' if self.peek_char() == Some('/') => {
262 let comment = self.read_line_comment();
263 TokenType::Comment(comment)
264 }
265 '/' if self.peek_char() == Some('*') => {
266 let comment = self.read_block_comment()?;
267 TokenType::Comment(comment)
268 }
269
270 '"' => {
272 let string = self.read_string()?;
273 TokenType::String(string)
274 }
275
276 c if c.is_ascii_digit() => self.read_number()?,
278
279 c if c.is_alphabetic() || c == '_' => {
281 let identifier = self.read_identifier();
282 if let Some(keyword) = self.keywords.get(&identifier) {
283 TokenType::Keyword(keyword.clone())
284 } else {
285 TokenType::Identifier(identifier)
286 }
287 }
288
289 '+' => {
291 self.advance();
292 TokenType::Plus
293 }
294 '-' if self.peek_char() == Some('>') => {
295 self.advance(); self.advance(); TokenType::Arrow
298 }
299 '-' => {
300 self.advance();
301 TokenType::Minus
302 }
303 '*' => {
304 self.advance();
305 TokenType::Multiply
306 }
307 '/' => {
308 self.advance();
309 TokenType::Divide
310 }
311 '%' => {
312 self.advance();
313 TokenType::Modulo
314 }
315 '=' if self.peek_char() == Some('=') => {
316 self.advance(); self.advance(); TokenType::Equal
319 }
320 '=' if self.peek_char() == Some('>') => {
321 self.advance(); self.advance(); TokenType::FatArrow
324 }
325 '=' => {
326 self.advance();
327 TokenType::Assign
328 }
329 '!' if self.peek_char() == Some('=') => {
330 self.advance(); self.advance(); TokenType::NotEqual
333 }
334 '!' => {
335 self.advance();
336 TokenType::Not
337 }
338 '<' if self.peek_char() == Some('=') => {
339 self.advance(); self.advance(); TokenType::LessThanOrEqual
342 }
343 '<' if self.peek_char() == Some('<') => {
344 self.advance(); self.advance(); TokenType::LeftShift
347 }
348 '<' => {
349 self.advance();
350 TokenType::LessThan
351 }
352 '>' if self.peek_char() == Some('=') => {
353 self.advance(); self.advance(); TokenType::GreaterThanOrEqual
356 }
357 '>' if self.peek_char() == Some('>') => {
358 self.advance(); self.advance(); TokenType::RightShift
361 }
362 '>' => {
363 self.advance();
364 TokenType::GreaterThan
365 }
366 '&' if self.peek_char() == Some('&') => {
367 self.advance(); self.advance(); TokenType::And
370 }
371 '&' => {
372 self.advance();
373 TokenType::BitwiseAnd
374 }
375 '|' if self.peek_char() == Some('|') => {
376 self.advance(); self.advance(); TokenType::Or
379 }
380 '|' => {
381 self.advance();
382 TokenType::BitwiseOr
383 }
384 '^' => {
385 self.advance();
386 TokenType::BitwiseXor
387 }
388 '~' => {
389 self.advance();
390 TokenType::BitwiseNot
391 }
392 '?' => {
393 self.advance();
394 TokenType::Question
395 }
396
397 '(' => {
399 self.advance();
400 TokenType::LeftParen
401 }
402 ')' => {
403 self.advance();
404 TokenType::RightParen
405 }
406 '{' => {
407 self.advance();
408 TokenType::LeftBrace
409 }
410 '}' => {
411 self.advance();
412 TokenType::RightBrace
413 }
414 '[' => {
415 self.advance();
416 TokenType::LeftBracket
417 }
418 ']' => {
419 self.advance();
420 TokenType::RightBracket
421 }
422 ',' => {
423 self.advance();
424 TokenType::Comma
425 }
426 ';' => {
427 self.advance();
428 TokenType::Semicolon
429 }
430 ':' => {
431 self.advance();
432 TokenType::Colon
433 }
434 '.' => {
435 self.advance();
436 TokenType::Dot
437 }
438
439 '\n' => {
441 self.advance();
442 self.line += 1;
443 self.column = 1;
444 TokenType::Newline
445 }
446
447 _ => {
449 return Err(ReplError::Lexing(format!(
450 "Unexpected character '{}' at line {}, column {}",
451 ch, self.line, self.column
452 )));
453 }
454 };
455
456 let length = self.position - start_offset;
457
458 Ok(Token {
459 token_type,
460 line: start_line,
461 column: start_column,
462 offset: start_offset,
463 length,
464 })
465 }
466
467 fn skip_whitespace(&mut self) {
469 while let Some(ch) = self.current_char_opt() {
470 if ch.is_whitespace() && ch != '\n' {
471 self.advance();
472 } else {
473 break;
474 }
475 }
476 }
477
478 fn read_string(&mut self) -> Result<String> {
480 self.advance(); let mut string = String::new();
482
483 while let Some(ch) = self.current_char_opt() {
484 match ch {
485 '"' => {
486 self.advance(); return Ok(string);
488 }
489 '\\' => {
490 self.advance(); if let Some(escaped) = self.current_char_opt() {
492 match escaped {
493 'n' => string.push('\n'),
494 't' => string.push('\t'),
495 'r' => string.push('\r'),
496 '\\' => string.push('\\'),
497 '"' => string.push('"'),
498 _ => {
499 string.push('\\');
500 string.push(escaped);
501 }
502 }
503 self.advance();
504 } else {
505 return Err(ReplError::Lexing("Unterminated string literal".to_string()));
506 }
507 }
508 '\n' => {
509 self.line += 1;
510 self.column = 1;
511 string.push(ch);
512 self.advance();
513 }
514 _ => {
515 string.push(ch);
516 self.advance();
517 }
518 }
519 }
520
521 Err(ReplError::Lexing("Unterminated string literal".to_string()))
522 }
523
524 fn read_number(&mut self) -> Result<TokenType> {
526 let mut number_str = String::new();
527 let mut has_dot = false;
528
529 while let Some(ch) = self.current_char_opt() {
531 if ch.is_ascii_digit() {
532 number_str.push(ch);
533 self.advance();
534 } else if ch == '.' && !has_dot {
535 has_dot = true;
536 number_str.push(ch);
537 self.advance();
538 } else {
539 break;
540 }
541 }
542
543 if let Some(ch) = self.current_char_opt() {
545 if ch.is_alphabetic() {
546 let unit = self.read_unit();
547 let value = if has_dot {
548 number_str
549 .parse::<f64>()
550 .map_err(|_| ReplError::Lexing(format!("Invalid number: {}", number_str)))?
551 as u64
552 } else {
553 number_str
554 .parse::<u64>()
555 .map_err(|_| ReplError::Lexing(format!("Invalid number: {}", number_str)))?
556 };
557
558 if matches!(unit.as_str(), "s" | "m" | "h" | "d" | "ms") {
560 return Ok(TokenType::Duration(value, unit));
561 } else if matches!(unit.as_str(), "B" | "KB" | "MB" | "GB" | "TB") {
562 return Ok(TokenType::Size(value, unit));
563 }
564 }
565 }
566
567 if has_dot {
569 let value = number_str
570 .parse::<f64>()
571 .map_err(|_| ReplError::Lexing(format!("Invalid number: {}", number_str)))?;
572 Ok(TokenType::Number(value))
573 } else {
574 let value = number_str
575 .parse::<i64>()
576 .map_err(|_| ReplError::Lexing(format!("Invalid number: {}", number_str)))?;
577 Ok(TokenType::Integer(value))
578 }
579 }
580
581 fn read_unit(&mut self) -> String {
583 let mut unit = String::new();
584 while let Some(ch) = self.current_char_opt() {
585 if ch.is_alphabetic() {
586 unit.push(ch);
587 self.advance();
588 } else {
589 break;
590 }
591 }
592 unit
593 }
594
595 fn read_identifier(&mut self) -> String {
597 let mut identifier = String::new();
598
599 while let Some(ch) = self.current_char_opt() {
600 if ch.is_alphanumeric() || ch == '_' {
601 identifier.push(ch);
602 self.advance();
603 } else {
604 break;
605 }
606 }
607
608 identifier
609 }
610
611 fn read_line_comment(&mut self) -> String {
613 self.advance(); self.advance(); let mut comment = String::new();
617 while let Some(ch) = self.current_char_opt() {
618 if ch == '\n' {
619 break;
620 }
621 comment.push(ch);
622 self.advance();
623 }
624
625 comment
626 }
627
628 fn read_block_comment(&mut self) -> Result<String> {
630 self.advance(); self.advance(); let mut comment = String::new();
634
635 while self.position < self.input.len() - 1 {
636 let ch = self.current_char();
637 let next_ch = self.peek_char();
638
639 if ch == '*' && next_ch == Some('/') {
640 self.advance(); self.advance(); return Ok(comment);
643 }
644
645 if ch == '\n' {
646 self.line += 1;
647 self.column = 1;
648 }
649
650 comment.push(ch);
651 self.advance();
652 }
653
654 Err(ReplError::Lexing("Unterminated block comment".to_string()))
655 }
656
657 fn current_char(&self) -> char {
659 self.input[self.position]
660 }
661
662 fn current_char_opt(&self) -> Option<char> {
664 self.input.get(self.position).copied()
665 }
666
667 fn peek_char(&self) -> Option<char> {
669 self.input.get(self.position + 1).copied()
670 }
671
672 fn advance(&mut self) {
674 if self.position < self.input.len() {
675 self.position += 1;
676 self.column += 1;
677 }
678 }
679}
680
681#[cfg(test)]
682mod tests {
683 use super::*;
684
685 #[test]
686 fn test_basic_tokens() {
687 let mut lexer = Lexer::new("let x = 42");
688 let tokens = lexer.tokenize().unwrap();
689
690 assert_eq!(tokens.len(), 5); assert!(matches!(
692 tokens[0].token_type,
693 TokenType::Keyword(Keyword::Let)
694 ));
695 assert!(matches!(tokens[1].token_type, TokenType::Identifier(_)));
696 assert!(matches!(tokens[2].token_type, TokenType::Assign));
697 assert!(matches!(tokens[3].token_type, TokenType::Integer(42)));
698 assert!(matches!(tokens[4].token_type, TokenType::Eof));
699 }
700
701 #[test]
702 fn test_string_literal() {
703 let mut lexer = Lexer::new(r#""Hello, world!""#);
704 let tokens = lexer.tokenize().unwrap();
705
706 assert_eq!(tokens.len(), 2); assert!(matches!(tokens[0].token_type, TokenType::String(ref s) if s == "Hello, world!"));
708 }
709
710 #[test]
711 fn test_duration_literal() {
712 let mut lexer = Lexer::new("30s 5m 2h");
713 let tokens = lexer.tokenize().unwrap();
714
715 assert_eq!(tokens.len(), 4); assert!(matches!(tokens[0].token_type, TokenType::Duration(30, ref unit) if unit == "s"));
717 assert!(matches!(tokens[1].token_type, TokenType::Duration(5, ref unit) if unit == "m"));
718 assert!(matches!(tokens[2].token_type, TokenType::Duration(2, ref unit) if unit == "h"));
719 }
720
721 #[test]
722 fn test_size_literal() {
723 let mut lexer = Lexer::new("1KB 512MB 2GB");
724 let tokens = lexer.tokenize().unwrap();
725
726 assert_eq!(tokens.len(), 4); assert!(matches!(tokens[0].token_type, TokenType::Size(1, ref unit) if unit == "KB"));
728 assert!(matches!(tokens[1].token_type, TokenType::Size(512, ref unit) if unit == "MB"));
729 assert!(matches!(tokens[2].token_type, TokenType::Size(2, ref unit) if unit == "GB"));
730 }
731
732 #[test]
733 fn test_comments() {
734 let mut lexer = Lexer::new("// line comment\n/* block comment */");
735 let tokens = lexer.tokenize().unwrap();
736
737 assert_eq!(tokens.len(), 4); assert!(matches!(tokens[0].token_type, TokenType::Comment(_)));
739 assert!(matches!(tokens[1].token_type, TokenType::Newline));
740 assert!(matches!(tokens[2].token_type, TokenType::Comment(_)));
741 }
742}