helix_core/
lexer.rs

1use std::str::Chars;
2use std::iter::Peekable;
3use std::fmt;
4#[derive(Debug, Clone, PartialEq)]
5pub enum Token {
6    String(String),
7    Number(f64),
8    Bool(bool),
9    Duration(u64, TimeUnit),
10    Identifier(String),
11    Keyword(Keyword),
12    Assign,
13    Arrow,
14    Pipe,
15    LeftBrace,
16    RightBrace,
17    LeftBracket,
18    RightBracket,
19    LeftParen,
20    RightParen,
21    Comma,
22    Dot,
23    Comment(String),
24    Variable(String),
25    Reference(String),
26    Newline,
27    Eof,
28}
29#[derive(Debug, Clone, PartialEq)]
30pub enum Keyword {
31    Project,
32    Agent,
33    Workflow,
34    Memory,
35    Context,
36    Crew,
37    Plugin,
38    Database,
39    Step,
40    Pipeline,
41    Trigger,
42    Capabilities,
43    Backstory,
44    Secrets,
45    Variables,
46    Embeddings,
47    True,
48    False,
49    Null,
50    DependsOn,
51    Parallel,
52    Timeout,
53    Load,
54}
55#[derive(Debug, Clone, PartialEq)]
56pub enum TimeUnit {
57    Seconds,
58    Minutes,
59    Hours,
60    Days,
61}
62#[derive(Debug, Clone)]
63pub struct SourceLocation {
64    pub line: usize,
65    pub column: usize,
66    pub position: usize,
67}
68#[derive(Debug, Clone)]
69pub struct TokenWithLocation {
70    pub token: Token,
71    pub location: SourceLocation,
72}
73#[derive(Debug, Clone)]
74pub enum LexError {
75    UnterminatedString { location: SourceLocation },
76    InvalidNumber { location: SourceLocation, text: String },
77    UnexpectedCharacter { location: SourceLocation, char: char },
78    InvalidEscape { location: SourceLocation, char: char },
79}
80impl fmt::Display for LexError {
81    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82        match self {
83            LexError::UnterminatedString { location } => {
84                write!(
85                    f, "Unterminated string at line {}, column {}", location.line,
86                    location.column
87                )
88            }
89            LexError::InvalidNumber { location, text } => {
90                write!(
91                    f, "Invalid number '{}' at line {}, column {}", text, location.line,
92                    location.column
93                )
94            }
95            LexError::UnexpectedCharacter { location, char } => {
96                write!(
97                    f, "Unexpected character '{}' at line {}, column {}", char, location
98                    .line, location.column
99                )
100            }
101            LexError::InvalidEscape { location, char } => {
102                write!(
103                    f, "Invalid escape sequence '\\{}' at line {}, column {}", char,
104                    location.line, location.column
105                )
106            }
107        }
108    }
109}
110pub struct Lexer<'a> {
111    input: Peekable<Chars<'a>>,
112    current_char: Option<char>,
113    position: usize,
114    line: usize,
115    column: usize,
116}
117impl<'a> Lexer<'a> {
118    pub fn new(input: &'a str) -> Self {
119        let mut lexer = Lexer {
120            input: input.chars().peekable(),
121            current_char: None,
122            position: 0,
123            line: 1,
124            column: 0,
125        };
126        lexer.advance();
127        lexer
128    }
129    fn current_location(&self) -> SourceLocation {
130        SourceLocation {
131            line: self.line,
132            column: self.column,
133            position: self.position,
134        }
135    }
136    fn advance(&mut self) {
137        self.current_char = self.input.next();
138        self.position += 1;
139        if let Some(ch) = self.current_char {
140            if ch == '\n' {
141                self.line += 1;
142                self.column = 0;
143            } else {
144                self.column += 1;
145            }
146        }
147    }
148    fn peek(&mut self) -> Option<&char> {
149        self.input.peek()
150    }
151    fn skip_whitespace(&mut self) {
152        while let Some(ch) = self.current_char {
153            if ch.is_whitespace() && ch != '\n' {
154                self.advance();
155            } else if ch == '\\' && self.peek() == Some(&'\n') {
156                self.advance();
157                self.advance();
158                self.line += 1;
159                self.column = 0;
160            } else {
161                break;
162            }
163        }
164    }
165    fn read_string(&mut self) -> String {
166        let mut result = String::new();
167        self.advance();
168        while let Some(ch) = self.current_char {
169            if ch == '"' {
170                self.advance();
171                break;
172            } else if ch == '\\' {
173                self.advance();
174                if let Some(escaped) = self.current_char {
175                    result
176                        .push(
177                            match escaped {
178                                'n' => '\n',
179                                't' => '\t',
180                                'r' => '\r',
181                                '\\' => '\\',
182                                '"' => '"',
183                                _ => escaped,
184                            },
185                        );
186                    self.advance();
187                }
188            } else {
189                result.push(ch);
190                self.advance();
191            }
192        }
193        result
194    }
195    fn read_number(&mut self) -> f64 {
196        let mut num_str = String::new();
197        while let Some(ch) = self.current_char {
198            if ch.is_numeric() || ch == '.' {
199                num_str.push(ch);
200                self.advance();
201            } else {
202                break;
203            }
204        }
205        num_str.parse().unwrap_or(0.0)
206    }
207    fn read_identifier(&mut self) -> String {
208        let mut ident = String::new();
209        while let Some(ch) = self.current_char {
210            if ch.is_alphanumeric() || ch == '_' || ch == '-' {
211                ident.push(ch);
212                self.advance();
213            } else {
214                break;
215            }
216        }
217        ident
218    }
219    fn read_comment(&mut self) -> String {
220        let mut comment = String::new();
221        self.advance();
222        while let Some(ch) = self.current_char {
223            if ch == '\n' {
224                break;
225            }
226            comment.push(ch);
227            self.advance();
228        }
229        comment.trim().to_string()
230    }
231    fn read_variable(&mut self) -> String {
232        let mut var = String::new();
233        self.advance();
234        while let Some(ch) = self.current_char {
235            if ch.is_alphanumeric() || ch == '_' {
236                var.push(ch);
237                self.advance();
238            } else {
239                break;
240            }
241        }
242        var
243    }
244    fn read_reference(&mut self) -> String {
245        let mut reference = String::new();
246        self.advance();
247        while let Some(ch) = self.current_char {
248            if ch.is_alphanumeric() || ch == '_' || ch == '.' {
249                reference.push(ch);
250                self.advance();
251            } else {
252                break;
253            }
254        }
255        reference
256    }
257    fn check_keyword(&self, ident: &str) -> Option<Keyword> {
258        match ident {
259            "project" => Some(Keyword::Project),
260            "agent" => Some(Keyword::Agent),
261            "workflow" => Some(Keyword::Workflow),
262            "memory" => Some(Keyword::Memory),
263            "context" => Some(Keyword::Context),
264            "crew" => Some(Keyword::Crew),
265            "plugin" => Some(Keyword::Plugin),
266            "database" => Some(Keyword::Database),
267            "step" => Some(Keyword::Step),
268            "pipeline" => Some(Keyword::Pipeline),
269            "trigger" => Some(Keyword::Trigger),
270            "capabilities" => Some(Keyword::Capabilities),
271            "backstory" => Some(Keyword::Backstory),
272            "secrets" => Some(Keyword::Secrets),
273            "variables" => Some(Keyword::Variables),
274            "embeddings" => Some(Keyword::Embeddings),
275            "true" => Some(Keyword::True),
276            "false" => Some(Keyword::False),
277            "null" => Some(Keyword::Null),
278            "depends_on" => Some(Keyword::DependsOn),
279            "parallel" => Some(Keyword::Parallel),
280            "timeout" => Some(Keyword::Timeout),
281            "load" => Some(Keyword::Load),
282            _ => None,
283        }
284    }
285    fn read_duration(&mut self, num: f64) -> Option<Token> {
286        let mut unit_str = String::new();
287        while let Some(ch) = self.current_char {
288            if ch.is_alphabetic() {
289                unit_str.push(ch);
290                self.advance();
291            } else {
292                break;
293            }
294        }
295        let unit = match unit_str.as_str() {
296            "s" | "sec" | "seconds" => Some(TimeUnit::Seconds),
297            "m" | "min" | "minutes" => Some(TimeUnit::Minutes),
298            "h" | "hr" | "hours" => Some(TimeUnit::Hours),
299            "d" | "days" => Some(TimeUnit::Days),
300            _ => None,
301        };
302        if let Some(u) = unit { Some(Token::Duration(num as u64, u)) } else { None }
303    }
304    pub fn next_token_with_location(&mut self) -> TokenWithLocation {
305        self.skip_whitespace();
306        let location = self.current_location();
307        let token = self.next_token_internal();
308        TokenWithLocation {
309            token,
310            location,
311        }
312    }
313    pub fn next_token(&mut self) -> Token {
314        self.next_token_internal()
315    }
316    fn next_token_internal(&mut self) -> Token {
317        self.skip_whitespace();
318        match self.current_char {
319            None => Token::Eof,
320            Some('\n') => {
321                self.advance();
322                Token::Newline
323            }
324            Some('#') => {
325                let comment = self.read_comment();
326                Token::Comment(comment)
327            }
328            Some('"') => {
329                let string = self.read_string();
330                Token::String(string)
331            }
332            Some('$') => {
333                let var = self.read_variable();
334                Token::Variable(var)
335            }
336            Some('@') => {
337                let reference = self.read_reference();
338                Token::Reference(reference)
339            }
340            Some('{') => {
341                self.advance();
342                Token::LeftBrace
343            }
344            Some('}') => {
345                self.advance();
346                Token::RightBrace
347            }
348            Some('[') => {
349                self.advance();
350                Token::LeftBracket
351            }
352            Some(']') => {
353                self.advance();
354                Token::RightBracket
355            }
356            Some('(') => {
357                self.advance();
358                Token::LeftParen
359            }
360            Some(')') => {
361                self.advance();
362                Token::RightParen
363            }
364            Some(',') => {
365                self.advance();
366                Token::Comma
367            }
368            Some('.') => {
369                self.advance();
370                Token::Dot
371            }
372            Some('=') => {
373                self.advance();
374                Token::Assign
375            }
376            Some('-') => {
377                self.advance();
378                if self.current_char == Some('>') {
379                    self.advance();
380                    Token::Arrow
381                } else {
382                    if let Some(ch) = self.current_char {
383                        if ch.is_numeric() {
384                            let num = -self.read_number();
385                            Token::Number(num)
386                        } else {
387                            let mut ident = String::from("-");
388                            ident.push_str(&self.read_identifier());
389                            if let Some(keyword) = self.check_keyword(&ident) {
390                                Token::Keyword(keyword)
391                            } else {
392                                Token::Identifier(ident)
393                            }
394                        }
395                    } else {
396                        Token::Identifier("-".to_string())
397                    }
398                }
399            }
400            Some('|') => {
401                self.advance();
402                Token::Pipe
403            }
404            Some(ch) if ch.is_numeric() => {
405                let num = self.read_number();
406                if let Some(duration_token) = self.read_duration(num) {
407                    duration_token
408                } else {
409                    Token::Number(num)
410                }
411            }
412            Some(ch) if ch.is_alphabetic() || ch == '_' => {
413                let ident = self.read_identifier();
414                if let Some(keyword) = self.check_keyword(&ident) {
415                    match keyword {
416                        Keyword::True => Token::Bool(true),
417                        Keyword::False => Token::Bool(false),
418                        _ => Token::Keyword(keyword),
419                    }
420                } else {
421                    Token::Identifier(ident)
422                }
423            }
424            Some(ch) => {
425                self.advance();
426                Token::Identifier(ch.to_string())
427            }
428        }
429    }
430}
431pub fn tokenize(input: &str) -> Result<Vec<Token>, String> {
432    let mut lexer = Lexer::new(input);
433    let mut tokens = Vec::new();
434    loop {
435        let token = lexer.next_token();
436        match &token {
437            Token::Eof => {
438                tokens.push(token);
439                break;
440            }
441            Token::Comment(_) => {}
442            _ => {
443                tokens.push(token);
444            }
445        }
446    }
447    Ok(tokens)
448}
449pub fn tokenize_with_locations(input: &str) -> Result<Vec<TokenWithLocation>, LexError> {
450    let mut lexer = Lexer::new(input);
451    let mut tokens = Vec::new();
452    loop {
453        let token_with_loc = lexer.next_token_with_location();
454        match &token_with_loc.token {
455            Token::Eof => {
456                tokens.push(token_with_loc);
457                break;
458            }
459            Token::Comment(_) => {}
460            _ => {
461                tokens.push(token_with_loc);
462            }
463        }
464    }
465    Ok(tokens)
466}
467pub struct SourceMap {
468    pub tokens: Vec<TokenWithLocation>,
469    pub source: String,
470}
471impl SourceMap {
472    pub fn new(source: String) -> Result<Self, LexError> {
473        let tokens = tokenize_with_locations(&source)?;
474        Ok(SourceMap { tokens, source })
475    }
476    pub fn get_line(&self, line_num: usize) -> Option<&str> {
477        self.source.lines().nth(line_num - 1)
478    }
479    pub fn get_context(
480        &self,
481        location: &SourceLocation,
482        context_lines: usize,
483    ) -> String {
484        let mut result = String::new();
485        let start_line = location.line.saturating_sub(context_lines);
486        let end_line = location.line + context_lines;
487        for (i, line) in self.source.lines().enumerate() {
488            let line_num = i + 1;
489            if line_num >= start_line && line_num <= end_line {
490                if line_num == location.line {
491                    result.push_str(&format!("{:4} | {}\n", line_num, line));
492                    result
493                        .push_str(
494                            &format!(
495                                "     | {}^\n", " ".repeat(location.column
496                                .saturating_sub(1))
497                            ),
498                        );
499                } else {
500                    result.push_str(&format!("{:4} | {}\n", line_num, line));
501                }
502            }
503        }
504        result
505    }
506}
507#[cfg(test)]
508mod tests {
509    use super::*;
510    #[test]
511    fn test_basic_tokens() {
512        let input = r#"project "test" { }"#;
513        let tokens = tokenize(input).unwrap();
514        assert_eq!(tokens[0], Token::Keyword(Keyword::Project));
515        assert_eq!(tokens[1], Token::String("test".to_string()));
516        assert_eq!(tokens[2], Token::LeftBrace);
517        assert_eq!(tokens[3], Token::RightBrace);
518    }
519    #[test]
520    fn test_duration() {
521        let input = "timeout = 30m";
522        let tokens = tokenize(input).unwrap();
523        assert_eq!(tokens[0], Token::Keyword(Keyword::Timeout));
524        assert_eq!(tokens[1], Token::Assign);
525        assert_eq!(tokens[2], Token::Duration(30, TimeUnit::Minutes));
526    }
527    #[test]
528    fn test_variables_and_references() {
529        let input = "$API_KEY @memory.context";
530        let tokens = tokenize(input).unwrap();
531        assert_eq!(tokens[0], Token::Variable("API_KEY".to_string()));
532        assert_eq!(tokens[1], Token::Reference("memory.context".to_string()));
533    }
534}