Skip to main content

slash_lang/parser/
lexer.rs

1use crate::parser::errors::ParseError;
2
3/// A classified token produced by the lexer.
4#[derive(Debug, Clone, PartialEq)]
5pub enum Token {
6    /// A slash-command token, e.g. `"/Build.target(release)!"`
7    Command(String),
8    /// `|`
9    Pipe,
10    /// `|&`
11    PipeErr,
12    /// `&&`
13    And,
14    /// `||`
15    Or,
16    /// `>`
17    Redirect,
18    /// `>>`
19    Append,
20    /// Any other token (redirection target, bare word, standalone `!`, etc.)
21    Word(String),
22}
23
24/// Tokenize input, splitting on whitespace but respecting balanced parentheses.
25///
26/// Content inside `(...)` is kept as a single token even if it contains spaces.
27/// This allows `/echo(hello world)` to remain one token.
28pub fn lex(input: &str) -> Result<Vec<Token>, ParseError> {
29    let raw_tokens = split_respecting_parens(input);
30    let tokens = raw_tokens.into_iter().map(|s| classify(&s)).collect();
31    Ok(tokens)
32}
33
34fn classify(s: &str) -> Token {
35    match s {
36        "|" => Token::Pipe,
37        "|&" => Token::PipeErr,
38        "&&" => Token::And,
39        "||" => Token::Or,
40        ">" => Token::Redirect,
41        ">>" => Token::Append,
42        _ if s.starts_with('/') => Token::Command(s.to_string()),
43        _ => Token::Word(s.to_string()),
44    }
45}
46
47/// Split on ASCII whitespace, but keep content inside balanced `()` together.
48fn split_respecting_parens(input: &str) -> Vec<String> {
49    let mut tokens = Vec::new();
50    let mut current = String::new();
51    let mut depth: usize = 0;
52
53    for ch in input.chars() {
54        match ch {
55            '(' => {
56                depth += 1;
57                current.push(ch);
58            }
59            ')' => {
60                depth = depth.saturating_sub(1);
61                current.push(ch);
62            }
63            c if c.is_ascii_whitespace() && depth == 0 => {
64                if !current.is_empty() {
65                    tokens.push(std::mem::take(&mut current));
66                }
67            }
68            _ => {
69                current.push(ch);
70            }
71        }
72    }
73
74    if !current.is_empty() {
75        tokens.push(current);
76    }
77
78    tokens
79}