Skip to main content

command_stream/
shell_parser.rs

1//! Enhanced shell command parser that handles &&, ||, ;, and () operators
2//! This allows virtual commands to work properly with shell operators
3
4use std::fmt;
5
6/// Token types for the parser
7#[derive(Debug, Clone, PartialEq)]
8pub enum TokenType {
9    Word(String),
10    And,            // &&
11    Or,             // ||
12    Semicolon,      // ;
13    Pipe,           // |
14    LParen,         // (
15    RParen,         // )
16    RedirectOut,    // >
17    RedirectAppend, // >>
18    RedirectIn,     // <
19    Eof,
20}
21
22impl fmt::Display for TokenType {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        match self {
25            TokenType::Word(s) => write!(f, "Word({})", s),
26            TokenType::And => write!(f, "&&"),
27            TokenType::Or => write!(f, "||"),
28            TokenType::Semicolon => write!(f, ";"),
29            TokenType::Pipe => write!(f, "|"),
30            TokenType::LParen => write!(f, "("),
31            TokenType::RParen => write!(f, ")"),
32            TokenType::RedirectOut => write!(f, ">"),
33            TokenType::RedirectAppend => write!(f, ">>"),
34            TokenType::RedirectIn => write!(f, "<"),
35            TokenType::Eof => write!(f, "EOF"),
36        }
37    }
38}
39
40/// A token with its type and original value
41#[derive(Debug, Clone)]
42pub struct Token {
43    pub token_type: TokenType,
44    pub value: String,
45}
46
47/// Redirect information
48#[derive(Debug, Clone)]
49pub struct Redirect {
50    pub redirect_type: TokenType,
51    pub target: String,
52}
53
54/// Parsed argument with quote information
55#[derive(Debug, Clone)]
56pub struct ParsedArg {
57    pub value: String,
58    pub quoted: bool,
59    pub quote_char: Option<char>,
60}
61
62/// Types of parsed commands
63#[derive(Debug, Clone)]
64pub enum ParsedCommand {
65    /// A simple command with command name, arguments, and optional redirects
66    Simple {
67        cmd: String,
68        args: Vec<ParsedArg>,
69        redirects: Vec<Redirect>,
70    },
71    /// A sequence of commands connected by &&, ||, or ;
72    Sequence {
73        commands: Vec<ParsedCommand>,
74        operators: Vec<TokenType>,
75    },
76    /// A pipeline of commands connected by |
77    Pipeline { commands: Vec<ParsedCommand> },
78    /// A subshell (commands in parentheses)
79    Subshell { command: Box<ParsedCommand> },
80}
81
82/// Tokenize a shell command string
83pub fn tokenize(command: &str) -> Vec<Token> {
84    let mut tokens = Vec::new();
85    let chars: Vec<char> = command.chars().collect();
86    let mut i = 0;
87
88    while i < chars.len() {
89        // Skip whitespace
90        while i < chars.len() && chars[i].is_whitespace() {
91            i += 1;
92        }
93
94        if i >= chars.len() {
95            break;
96        }
97
98        // Check for operators
99        if chars[i] == '&' && i + 1 < chars.len() && chars[i + 1] == '&' {
100            tokens.push(Token {
101                token_type: TokenType::And,
102                value: "&&".to_string(),
103            });
104            i += 2;
105        } else if chars[i] == '|' && i + 1 < chars.len() && chars[i + 1] == '|' {
106            tokens.push(Token {
107                token_type: TokenType::Or,
108                value: "||".to_string(),
109            });
110            i += 2;
111        } else if chars[i] == '|' {
112            tokens.push(Token {
113                token_type: TokenType::Pipe,
114                value: "|".to_string(),
115            });
116            i += 1;
117        } else if chars[i] == ';' {
118            tokens.push(Token {
119                token_type: TokenType::Semicolon,
120                value: ";".to_string(),
121            });
122            i += 1;
123        } else if chars[i] == '(' {
124            tokens.push(Token {
125                token_type: TokenType::LParen,
126                value: "(".to_string(),
127            });
128            i += 1;
129        } else if chars[i] == ')' {
130            tokens.push(Token {
131                token_type: TokenType::RParen,
132                value: ")".to_string(),
133            });
134            i += 1;
135        } else if chars[i] == '>' && i + 1 < chars.len() && chars[i + 1] == '>' {
136            tokens.push(Token {
137                token_type: TokenType::RedirectAppend,
138                value: ">>".to_string(),
139            });
140            i += 2;
141        } else if chars[i] == '>' {
142            tokens.push(Token {
143                token_type: TokenType::RedirectOut,
144                value: ">".to_string(),
145            });
146            i += 1;
147        } else if chars[i] == '<' {
148            tokens.push(Token {
149                token_type: TokenType::RedirectIn,
150                value: "<".to_string(),
151            });
152            i += 1;
153        } else {
154            // Parse word (respecting quotes)
155            let mut word = String::new();
156            let mut in_quote = false;
157            let mut quote_char = ' ';
158
159            while i < chars.len() {
160                let c = chars[i];
161
162                if !in_quote {
163                    if c == '"' || c == '\'' {
164                        in_quote = true;
165                        quote_char = c;
166                        word.push(c);
167                        i += 1;
168                    } else if c.is_whitespace() || "&|;()<>".contains(c) {
169                        break;
170                    } else if c == '\\' && i + 1 < chars.len() {
171                        // Handle escape sequences
172                        word.push(c);
173                        i += 1;
174                        if i < chars.len() {
175                            word.push(chars[i]);
176                            i += 1;
177                        }
178                    } else {
179                        word.push(c);
180                        i += 1;
181                    }
182                } else {
183                    let prev_char = if i > 0 { Some(chars[i - 1]) } else { None };
184                    if c == quote_char && prev_char != Some('\\') {
185                        in_quote = false;
186                        word.push(c);
187                        i += 1;
188                    } else if c == '\\' && i + 1 < chars.len() {
189                        let next_char = chars[i + 1];
190                        if next_char == quote_char || next_char == '\\' {
191                            // Handle escaped quotes and backslashes inside quotes
192                            word.push(c);
193                            i += 1;
194                            if i < chars.len() {
195                                word.push(chars[i]);
196                                i += 1;
197                            }
198                        } else {
199                            word.push(c);
200                            i += 1;
201                        }
202                    } else {
203                        word.push(c);
204                        i += 1;
205                    }
206                }
207            }
208
209            if !word.is_empty() {
210                tokens.push(Token {
211                    token_type: TokenType::Word(word.clone()),
212                    value: word,
213                });
214            }
215        }
216    }
217
218    tokens.push(Token {
219        token_type: TokenType::Eof,
220        value: String::new(),
221    });
222
223    tokens
224}
225
226/// Shell command parser
227pub struct ShellParser {
228    tokens: Vec<Token>,
229    pos: usize,
230}
231
232impl ShellParser {
233    /// Create a new parser for the given command
234    pub fn new(command: &str) -> Self {
235        ShellParser {
236            tokens: tokenize(command),
237            pos: 0,
238        }
239    }
240
241    fn current(&self) -> Token {
242        self.tokens.get(self.pos).cloned().unwrap_or(Token {
243            token_type: TokenType::Eof,
244            value: String::new(),
245        })
246    }
247
248    fn consume(&mut self) -> Token {
249        let token = self.current().clone();
250        self.pos += 1;
251        token
252    }
253
254    /// Parse the main command sequence
255    pub fn parse(&mut self) -> Option<ParsedCommand> {
256        self.parse_sequence()
257    }
258
259    /// Parse a sequence of commands connected by &&, ||, ;
260    fn parse_sequence(&mut self) -> Option<ParsedCommand> {
261        let mut commands = Vec::new();
262        let mut operators = Vec::new();
263
264        // Parse first command
265        if let Some(cmd) = self.parse_pipeline() {
266            commands.push(cmd);
267        }
268
269        // Parse additional commands with operators
270        loop {
271            match &self.current().token_type {
272                TokenType::Eof | TokenType::RParen => break,
273                TokenType::And | TokenType::Or | TokenType::Semicolon => {
274                    let op = self.consume().token_type;
275                    operators.push(op);
276
277                    if let Some(cmd) = self.parse_pipeline() {
278                        commands.push(cmd);
279                    }
280                }
281                _ => break,
282            }
283        }
284
285        if commands.len() == 1 && operators.is_empty() {
286            return commands.into_iter().next();
287        }
288
289        if commands.is_empty() {
290            return None;
291        }
292
293        Some(ParsedCommand::Sequence {
294            commands,
295            operators,
296        })
297    }
298
299    /// Parse a pipeline (commands connected by |)
300    fn parse_pipeline(&mut self) -> Option<ParsedCommand> {
301        let mut commands = Vec::new();
302
303        if let Some(cmd) = self.parse_command() {
304            commands.push(cmd);
305        }
306
307        while matches!(self.current().token_type, TokenType::Pipe) {
308            self.consume();
309            if let Some(cmd) = self.parse_command() {
310                commands.push(cmd);
311            }
312        }
313
314        if commands.len() == 1 {
315            return commands.into_iter().next();
316        }
317
318        if commands.is_empty() {
319            return None;
320        }
321
322        Some(ParsedCommand::Pipeline { commands })
323    }
324
325    /// Parse a single command or subshell
326    fn parse_command(&mut self) -> Option<ParsedCommand> {
327        // Check for subshell
328        if matches!(self.current().token_type, TokenType::LParen) {
329            self.consume(); // consume (
330            let subshell = self.parse_sequence();
331
332            if matches!(self.current().token_type, TokenType::RParen) {
333                self.consume(); // consume )
334            }
335
336            return subshell.map(|cmd| ParsedCommand::Subshell {
337                command: Box::new(cmd),
338            });
339        }
340
341        // Parse simple command
342        self.parse_simple_command()
343    }
344
345    /// Parse a simple command (command + args + redirections)
346    fn parse_simple_command(&mut self) -> Option<ParsedCommand> {
347        let mut words = Vec::new();
348        let mut redirects = Vec::new();
349
350        loop {
351            match &self.current().token_type {
352                TokenType::Eof => break,
353                TokenType::Word(w) => {
354                    words.push(w.clone());
355                    self.consume();
356                }
357                TokenType::RedirectOut | TokenType::RedirectAppend | TokenType::RedirectIn => {
358                    let redirect_type = self.consume().token_type;
359                    if let TokenType::Word(target) = &self.current().token_type {
360                        redirects.push(Redirect {
361                            redirect_type,
362                            target: target.clone(),
363                        });
364                        self.consume();
365                    }
366                }
367                _ => break,
368            }
369        }
370
371        if words.is_empty() {
372            return None;
373        }
374
375        let cmd = words.remove(0);
376        let args: Vec<ParsedArg> = words
377            .into_iter()
378            .map(|word| {
379                // Remove quotes if present
380                if (word.starts_with('"') && word.ends_with('"'))
381                    || (word.starts_with('\'') && word.ends_with('\''))
382                {
383                    ParsedArg {
384                        value: word[1..word.len() - 1].to_string(),
385                        quoted: true,
386                        quote_char: Some(word.chars().next().unwrap()),
387                    }
388                } else {
389                    ParsedArg {
390                        value: word,
391                        quoted: false,
392                        quote_char: None,
393                    }
394                }
395            })
396            .collect();
397
398        Some(ParsedCommand::Simple {
399            cmd,
400            args,
401            redirects,
402        })
403    }
404}
405
406/// Parse a shell command with support for &&, ||, ;, and ()
407pub fn parse_shell_command(command: &str) -> Option<ParsedCommand> {
408    let mut parser = ShellParser::new(command);
409    parser.parse()
410}
411
412/// Check if a command needs shell features we don't handle
413pub fn needs_real_shell(command: &str) -> bool {
414    // Check for features we don't handle yet
415    let unsupported = [
416        "`",   // Command substitution
417        "$(",  // Command substitution
418        "${",  // Variable expansion
419        "~",   // Home expansion (at start of word)
420        "*",   // Glob patterns
421        "?",   // Glob patterns
422        "[",   // Glob patterns
423        "2>",  // stderr redirection
424        "&>",  // Combined redirection
425        ">&",  // File descriptor duplication
426        "<<",  // Here documents
427        "<<<", // Here strings
428    ];
429
430    for feature in &unsupported {
431        if command.contains(feature) {
432            return true;
433        }
434    }
435
436    false
437}
438
439#[cfg(test)]
440mod tests {
441    use super::*;
442
443    #[test]
444    fn test_tokenize_simple_command() {
445        let tokens = tokenize("echo hello world");
446        assert_eq!(tokens.len(), 4); // 3 words + EOF
447        assert!(matches!(tokens[0].token_type, TokenType::Word(_)));
448        assert!(matches!(tokens[3].token_type, TokenType::Eof));
449    }
450
451    #[test]
452    fn test_tokenize_with_operators() {
453        let tokens = tokenize("cmd1 && cmd2 || cmd3");
454        assert_eq!(tokens.len(), 6); // 3 words + 2 operators + EOF
455        assert!(matches!(tokens[1].token_type, TokenType::And));
456        assert!(matches!(tokens[3].token_type, TokenType::Or));
457    }
458
459    #[test]
460    fn test_tokenize_with_pipe() {
461        let tokens = tokenize("ls | grep foo");
462        assert_eq!(tokens.len(), 5); // 3 words + 1 pipe + EOF
463        assert!(matches!(tokens[1].token_type, TokenType::Pipe));
464    }
465
466    #[test]
467    fn test_tokenize_with_quotes() {
468        let tokens = tokenize("echo 'hello world'");
469        assert_eq!(tokens.len(), 3); // echo + quoted string + EOF
470        if let TokenType::Word(w) = &tokens[1].token_type {
471            assert_eq!(w, "'hello world'");
472        } else {
473            panic!("Expected Word token");
474        }
475    }
476
477    #[test]
478    fn test_parse_simple_command() {
479        let cmd = parse_shell_command("echo hello world").unwrap();
480        match cmd {
481            ParsedCommand::Simple { cmd, args, .. } => {
482                assert_eq!(cmd, "echo");
483                assert_eq!(args.len(), 2);
484                assert_eq!(args[0].value, "hello");
485                assert_eq!(args[1].value, "world");
486            }
487            _ => panic!("Expected Simple command"),
488        }
489    }
490
491    #[test]
492    fn test_parse_pipeline() {
493        let cmd = parse_shell_command("ls | grep foo | wc -l").unwrap();
494        match cmd {
495            ParsedCommand::Pipeline { commands } => {
496                assert_eq!(commands.len(), 3);
497            }
498            _ => panic!("Expected Pipeline"),
499        }
500    }
501
502    #[test]
503    fn test_parse_sequence() {
504        let cmd = parse_shell_command("cmd1 && cmd2 || cmd3").unwrap();
505        match cmd {
506            ParsedCommand::Sequence {
507                commands,
508                operators,
509            } => {
510                assert_eq!(commands.len(), 3);
511                assert_eq!(operators.len(), 2);
512                assert!(matches!(operators[0], TokenType::And));
513                assert!(matches!(operators[1], TokenType::Or));
514            }
515            _ => panic!("Expected Sequence"),
516        }
517    }
518
519    #[test]
520    fn test_needs_real_shell() {
521        assert!(needs_real_shell("echo $(date)"));
522        assert!(needs_real_shell("ls *.txt"));
523        assert!(needs_real_shell("echo ${HOME}"));
524        assert!(!needs_real_shell("echo hello"));
525        assert!(!needs_real_shell("ls | grep foo"));
526    }
527
528    #[test]
529    fn test_parse_with_redirect() {
530        let cmd = parse_shell_command("echo hello > output.txt").unwrap();
531        match cmd {
532            ParsedCommand::Simple {
533                cmd,
534                args,
535                redirects,
536            } => {
537                assert_eq!(cmd, "echo");
538                assert_eq!(args.len(), 1);
539                assert_eq!(redirects.len(), 1);
540                assert!(matches!(redirects[0].redirect_type, TokenType::RedirectOut));
541                assert_eq!(redirects[0].target, "output.txt");
542            }
543            _ => panic!("Expected Simple command with redirect"),
544        }
545    }
546
547    #[test]
548    fn test_parse_subshell() {
549        let cmd = parse_shell_command("(echo hello) && echo world").unwrap();
550        match cmd {
551            ParsedCommand::Sequence { commands, .. } => {
552                assert_eq!(commands.len(), 2);
553                assert!(matches!(commands[0], ParsedCommand::Subshell { .. }));
554            }
555            _ => panic!("Expected Sequence with Subshell"),
556        }
557    }
558}