helix/dna/atp/
lexer.rs

1use std::str::Chars;
2use std::iter::Peekable;
3use std::fmt;
4#[derive(Debug, Clone, PartialEq)]
5pub enum Token {
6    String(String),
7    Number(f64),
8    Bool(bool),
9    Duration(u64, TimeUnit),
10    Identifier(String),
11    Keyword(Keyword),
12    Assign,
13    Plus,
14    Arrow,
15    Tilde,
16    Pipe,
17    LeftBrace,
18    RightBrace,
19    LeftBracket,
20    RightBracket,
21    LeftParen,
22    RightParen,
23    Comma,
24    Dot,
25    LessThan,
26    GreaterThan,
27    Colon,
28    Semicolon,
29    Comment(String),
30    Variable(String),
31    Reference(String),
32    Newline,
33    Eof,
34}
35#[derive(Debug, Clone, PartialEq)]
36pub enum Keyword {
37    Project,
38    Agent,
39    Workflow,
40    Memory,
41    Context,
42    Crew,
43    Plugin,
44    //Database,
45    Step,
46    Task,
47    Pipeline,
48    Trigger,
49    Capabilities,
50    Backstory,
51    Secrets,
52    Variables,
53    Embeddings,
54    True,
55    False,
56    Null,
57    DependsOn,
58    Parallel,
59    Timeout,
60    Load,
61    Section,
62}
63#[derive(Debug, Clone, PartialEq)]
64pub enum TimeUnit {
65    Seconds,
66    Minutes,
67    Hours,
68    Days,
69}
70#[derive(Debug, Clone)]
71pub struct SourceLocation {
72    pub line: usize,
73    pub column: usize,
74    pub position: usize,
75}
76#[derive(Debug, Clone)]
77pub struct TokenWithLocation {
78    pub token: Token,
79    pub location: SourceLocation,
80}
81#[derive(Debug, Clone)]
82pub enum LexError {
83    UnterminatedString { location: SourceLocation },
84    InvalidNumber { location: SourceLocation, text: String },
85    UnexpectedCharacter { location: SourceLocation, char: char },
86    InvalidEscape { location: SourceLocation, char: char },
87}
88impl fmt::Display for LexError {
89    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90        match self {
91            LexError::UnterminatedString { location } => {
92                write!(
93                    f, "Unterminated string at line {}, column {}", location.line,
94                    location.column
95                )
96            }
97            LexError::InvalidNumber { location, text } => {
98                write!(
99                    f, "Invalid number '{}' at line {}, column {}", text, location.line,
100                    location.column
101                )
102            }
103            LexError::UnexpectedCharacter { location, char } => {
104                write!(
105                    f, "Unexpected character '{}' at line {}, column {}", char, location
106                    .line, location.column
107                )
108            }
109            LexError::InvalidEscape { location, char } => {
110                write!(
111                    f, "Invalid escape sequence '\\{}' at line {}, column {}", char,
112                    location.line, location.column
113                )
114            }
115        }
116    }
117}
118pub struct Lexer<'a> {
119    input: Peekable<Chars<'a>>,
120    current_char: Option<char>,
121    position: usize,
122    line: usize,
123    column: usize,
124}
125impl<'a> Lexer<'a> {
126    pub fn new(input: &'a str) -> Self {
127        let mut lexer = Lexer {
128            input: input.chars().peekable(),
129            current_char: None,
130            position: 0,
131            line: 1,
132            column: 0,
133        };
134        lexer.advance();
135        lexer
136    }
137    fn current_location(&self) -> SourceLocation {
138        SourceLocation {
139            line: self.line,
140            column: self.column,
141            position: self.position,
142        }
143    }
144    fn advance(&mut self) {
145        self.current_char = self.input.next();
146        self.position += 1;
147        if let Some(ch) = self.current_char {
148            if ch == '\n' {
149                self.line += 1;
150                self.column = 0;
151            } else {
152                self.column += 1;
153            }
154        }
155    }
156    fn peek(&mut self) -> Option<&char> {
157        self.input.peek()
158    }
159    fn skip_whitespace(&mut self) {
160        while let Some(ch) = self.current_char {
161            if ch.is_whitespace() && ch != '\n' {
162                self.advance();
163            } else if ch == '\\' && self.peek() == Some(&'\n') {
164                self.advance();
165                self.advance();
166                self.line += 1;
167                self.column = 0;
168            } else {
169                break;
170            }
171        }
172    }
173    fn read_string(&mut self) -> Result<String, LexError> {
174        let quote_char = self.current_char.unwrap(); // Either '"' or '\''
175        let mut result = String::new();
176        self.advance();
177        while let Some(ch) = self.current_char {
178            if ch == quote_char {
179                self.advance();
180                return Ok(result);
181            } else if ch == '\\' {
182                self.advance();
183                if let Some(escaped) = self.current_char {
184                    result
185                        .push(
186                            match escaped {
187                                'n' => '\n',
188                                't' => '\t',
189                                'r' => '\r',
190                                '\\' => '\\',
191                                '"' => '"',
192                                '\'' => '\'',
193                                _ => {
194                                    return Err(LexError::InvalidEscape {
195                                        location: self.current_location(),
196                                        char: escaped,
197                                    });
198                                }
199                            },
200                        );
201                    self.advance();
202                }
203            } else {
204                result.push(ch);
205                self.advance();
206            }
207        }
208        Err(LexError::UnterminatedString {
209            location: self.current_location(),
210        })
211    }
212    fn read_number(&mut self) -> f64 {
213        let mut num_str = String::new();
214        while let Some(ch) = self.current_char {
215            if ch.is_numeric() || ch == '.' || ch == '_' {
216                num_str.push(ch);
217                self.advance();
218            } else {
219                break;
220            }
221        }
222        let clean_num_str = num_str.replace('_', "");
223        clean_num_str.parse().unwrap_or(0.0)
224    }
225    fn read_identifier(&mut self) -> String {
226        let mut ident = String::new();
227        while let Some(ch) = self.current_char {
228            if ch.is_alphanumeric() || ch == '_' || ch == '-' {
229                ident.push(ch);
230                self.advance();
231            } else if ch == '!' {
232                ident.push(ch);
233                self.advance();
234                break;
235            } else {
236                break;
237            }
238        }
239        ident
240    }
241    fn read_comment(&mut self) -> String {
242        let mut comment = String::new();
243        self.advance();
244        while let Some(ch) = self.current_char {
245            if ch == '\n' {
246                break;
247            }
248            comment.push(ch);
249            self.advance();
250        }
251        comment.trim().to_string()
252    }
253    fn read_variable(&mut self) -> String {
254        let mut var = String::new();
255        self.advance();
256        while let Some(ch) = self.current_char {
257            if ch.is_alphanumeric() || ch == '_' {
258                var.push(ch);
259                self.advance();
260            } else {
261                break;
262            }
263        }
264        var
265    }
266    fn read_reference(&mut self) -> String {
267        let mut reference = String::new();
268        self.advance();
269        while let Some(ch) = self.current_char {
270            if ch.is_alphanumeric() || ch == '_' || ch == '.' {
271                reference.push(ch);
272                self.advance();
273            } else {
274                break;
275            }
276        }
277        reference
278    }
279    fn check_keyword(&self, ident: &str) -> Option<Keyword> {
280        match ident {
281            "project" => Some(Keyword::Project),
282            "agent" => Some(Keyword::Agent),
283            "workflow" => Some(Keyword::Workflow),
284            "memory" => Some(Keyword::Memory),
285            "context" => Some(Keyword::Context),
286            "crew" => Some(Keyword::Crew),
287            "plugin" => Some(Keyword::Plugin),
288            //"database" => Some(Keyword::Database),
289            "step" => Some(Keyword::Step),
290            "task" => Some(Keyword::Task),
291            "pipeline" => Some(Keyword::Pipeline),
292            "trigger" => Some(Keyword::Trigger),
293            "capabilities" => Some(Keyword::Capabilities),
294            "backstory" => Some(Keyword::Backstory),
295            "secrets" => Some(Keyword::Secrets),
296            "variables" => Some(Keyword::Variables),
297            "embeddings" => Some(Keyword::Embeddings),
298            "true" => Some(Keyword::True),
299            "false" => Some(Keyword::False),
300            "null" => Some(Keyword::Null),
301            "depends_on" => Some(Keyword::DependsOn),
302            "parallel" => Some(Keyword::Parallel),
303            "timeout" => Some(Keyword::Timeout),
304            "load" => Some(Keyword::Load),
305            "section" => Some(Keyword::Section),
306            _ => None,
307        }
308    }
309    fn read_duration(&mut self, num: f64) -> Option<Token> {
310        let mut unit_str = String::new();
311        while let Some(ch) = self.current_char {
312            if ch.is_alphabetic() {
313                unit_str.push(ch);
314                self.advance();
315            } else {
316                break;
317            }
318        }
319        let unit = match unit_str.as_str() {
320            "s" | "sec" | "seconds" => Some(TimeUnit::Seconds),
321            "m" | "min" | "minutes" => Some(TimeUnit::Minutes),
322            "h" | "hr" | "hours" => Some(TimeUnit::Hours),
323            "d" | "days" => Some(TimeUnit::Days),
324            _ => None,
325        };
326        if let Some(u) = unit { Some(Token::Duration(num as u64, u)) } else { None }
327    }
328    pub fn next_token_with_location(&mut self) -> TokenWithLocation {
329        self.skip_whitespace();
330        let location = self.current_location();
331        let token = self.next_token_internal();
332        TokenWithLocation {
333            token,
334            location,
335        }
336    }
337    pub fn next_token(&mut self) -> Token {
338        self.next_token_internal()
339    }
340    fn next_token_internal(&mut self) -> Token {
341        self.skip_whitespace();
342        match self.current_char {
343            None => Token::Eof,
344            Some('\n') => {
345                self.advance();
346                Token::Newline
347            }
348            Some('#') => {
349                let comment = self.read_comment();
350                Token::Comment(comment)
351            }
352            Some('"') | Some('\'') => {
353                match self.read_string() {
354                    Ok(string) => Token::String(string),
355                    Err(_) => Token::String("".to_string()),
356                }
357            }
358            Some('$') => {
359                let var = self.read_variable();
360                Token::Variable(var)
361            }
362            Some('@') => {
363                let reference = self.read_reference();
364                Token::Reference(reference)
365            }
366            Some('{') => {
367                self.advance();
368                Token::LeftBrace
369            }
370            Some('}') => {
371                self.advance();
372                Token::RightBrace
373            }
374            Some('[') => {
375                self.advance();
376                Token::LeftBracket
377            }
378            Some(']') => {
379                self.advance();
380                Token::RightBracket
381            }
382            Some('(') => {
383                self.advance();
384                Token::LeftParen
385            }
386            Some(')') => {
387                self.advance();
388                Token::RightParen
389            }
390            Some(',') => {
391                self.advance();
392                Token::Comma
393            }
394            Some('.') => {
395                self.advance();
396                Token::Dot
397            }
398            Some('<') => {
399                self.advance();
400                Token::LessThan
401            }
402            Some('>') => {
403                self.advance();
404                Token::GreaterThan
405            }
406            Some(':') => {
407                self.advance();
408                Token::Colon
409            }
410            Some(';') => {
411                self.advance();
412                Token::Semicolon
413            }
414            Some('=') => {
415                self.advance();
416                Token::Assign
417            }
418            Some('-') => {
419                self.advance();
420                if self.current_char == Some('>') {
421                    self.advance();
422                    Token::Arrow
423                } else {
424                    if let Some(ch) = self.current_char {
425                        if ch.is_numeric() {
426                            let num = -self.read_number();
427                            Token::Number(num)
428                        } else {
429                            let mut ident = String::from("-");
430                            ident.push_str(&self.read_identifier());
431                            if let Some(keyword) = self.check_keyword(&ident) {
432                                Token::Keyword(keyword)
433                            } else {
434                                Token::Identifier(ident)
435                            }
436                        }
437                    } else {
438                        Token::Identifier("-".to_string())
439                    }
440                }
441            }
442            Some('|') => {
443                self.advance();
444                Token::Pipe
445            }
446            Some('~') => {
447                self.advance();
448                Token::Tilde
449            }
450            Some('+') => {
451                self.advance();
452                Token::Plus
453            }
454            Some('!') => {
455                let mut var_name = String::new();
456                self.advance();
457                while let Some(ch) = self.current_char {
458                    if ch.is_alphanumeric() || ch == '_' {
459                        var_name.push(ch);
460                        self.advance();
461                    } else {
462                        break;
463                    }
464                }
465                if self.current_char == Some('!') {
466                    self.advance();
467                    Token::String(format!("!{}!", var_name))
468                } else {
469                    Token::String(format!("!{}", var_name))
470                }
471            }
472            Some(ch) if ch.is_numeric() => {
473                let num = self.read_number();
474                while let Some(' ') | Some('\t') = self.current_char {
475                    self.advance();
476                }
477                if let Some(duration_token) = self.read_duration(num) {
478                    duration_token
479                } else {
480                    Token::Number(num)
481                }
482            }
483            Some(ch) if ch.is_alphabetic() || ch == '_' => {
484                let ident = self.read_identifier();
485                if ident.ends_with('!') {
486                    Token::String(ident)
487                } else if let Some(keyword) = self.check_keyword(&ident) {
488                    match keyword {
489                        Keyword::True => Token::Bool(true),
490                        Keyword::False => Token::Bool(false),
491                        _ => Token::Keyword(keyword),
492                    }
493                } else {
494                    Token::Identifier(ident)
495                }
496            }
497            Some(ch) => {
498                self.advance();
499                Token::Identifier(ch.to_string())
500            }
501        }
502    }
503}
504pub fn tokenize(input: &str) -> Result<Vec<Token>, String> {
505    let mut lexer = Lexer::new(input);
506    let mut tokens = Vec::new();
507    loop {
508        let token = lexer.next_token();
509        match &token {
510            Token::Eof => {
511                tokens.push(token);
512                break;
513            }
514            Token::Comment(_) => {}
515            _ => {
516                tokens.push(token);
517            }
518        }
519    }
520    Ok(tokens)
521}
522pub fn tokenize_with_locations(input: &str) -> Result<Vec<TokenWithLocation>, LexError> {
523    let mut lexer = Lexer::new(input);
524    let mut tokens = Vec::new();
525    loop {
526        let token_with_loc = lexer.next_token_with_location();
527        match &token_with_loc.token {
528            Token::Eof => {
529                tokens.push(token_with_loc);
530                break;
531            }
532            Token::Comment(_) => {}
533            _ => {
534                tokens.push(token_with_loc);
535            }
536        }
537    }
538    Ok(tokens)
539}
540pub struct SourceMap {
541    pub tokens: Vec<TokenWithLocation>,
542    pub source: String,
543}
544impl SourceMap {
545    pub fn new(source: String) -> Result<Self, LexError> {
546        let tokens = tokenize_with_locations(&source)?;
547        Ok(SourceMap { tokens, source })
548    }
549    pub fn get_line(&self, line_num: usize) -> Option<&str> {
550        self.source.lines().nth(line_num - 1)
551    }
552    pub fn get_context(
553        &self,
554        location: &SourceLocation,
555        context_lines: usize,
556    ) -> String {
557        let mut result = String::new();
558        let start_line = location.line.saturating_sub(context_lines);
559        let end_line = location.line + context_lines;
560        for (i, line) in self.source.lines().enumerate() {
561            let line_num = i + 1;
562            if line_num >= start_line && line_num <= end_line {
563                if line_num == location.line {
564                    result.push_str(&format!("{:4} | {}\n", line_num, line));
565                    result
566                        .push_str(
567                            &format!(
568                                "     | {}^\n", " ".repeat(location.column
569                                .saturating_sub(1))
570                            ),
571                        );
572                } else {
573                    result.push_str(&format!("{:4} | {}\n", line_num, line));
574                }
575            }
576        }
577        result
578    }
579}
580#[cfg(test)]
581mod tests {
582    use super::*;
583    #[test]
584    fn test_basic_tokens() {
585        let input = r#"project "test" { }"#;
586        let tokens = tokenize(input).unwrap();
587        assert_eq!(tokens[0], Token::Keyword(Keyword::Project));
588        assert_eq!(tokens[1], Token::String("test".to_string()));
589        assert_eq!(tokens[2], Token::LeftBrace);
590        assert_eq!(tokens[3], Token::RightBrace);
591    }
592    #[test]
593    fn test_duration() {
594        let input = "timeout = 30m";
595        let tokens = tokenize(input).unwrap();
596        assert_eq!(tokens[0], Token::Keyword(Keyword::Timeout));
597        assert_eq!(tokens[1], Token::Assign);
598        assert_eq!(tokens[2], Token::Duration(30, TimeUnit::Minutes));
599    }
600    #[test]
601    fn test_duration_with_space() {
602        let input = "timeout = 30 m";
603        let tokens = tokenize(input).unwrap();
604        assert_eq!(tokens[0], Token::Keyword(Keyword::Timeout));
605        assert_eq!(tokens[1], Token::Assign);
606        assert_eq!(tokens[2], Token::Duration(30, TimeUnit::Minutes));
607    }
608    #[test]
609    fn test_section_keyword() {
610        let input = "section test { }";
611        let tokens = tokenize(input).unwrap();
612        assert_eq!(tokens[0], Token::Keyword(Keyword::Section));
613        assert_eq!(tokens[1], Token::Identifier("test".to_string()));
614        assert_eq!(tokens[2], Token::LeftBrace);
615        assert_eq!(tokens[3], Token::RightBrace);
616    }
617    #[test]
618    fn test_variables_and_references() {
619        let input = "$API_KEY @memory.context";
620        let tokens = tokenize(input).unwrap();
621        assert_eq!(tokens[0], Token::Variable("API_KEY".to_string()));
622        assert_eq!(tokens[1], Token::Reference("memory.context".to_string()));
623    }
624}