1use std::fmt;
5
6#[derive(Debug, Clone, PartialEq)]
8pub enum TokenType {
9 Word(String),
10 And, Or, Semicolon, Pipe, LParen, RParen, RedirectOut, RedirectAppend, RedirectIn, Eof,
20}
21
22impl fmt::Display for TokenType {
23 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24 match self {
25 TokenType::Word(s) => write!(f, "Word({})", s),
26 TokenType::And => write!(f, "&&"),
27 TokenType::Or => write!(f, "||"),
28 TokenType::Semicolon => write!(f, ";"),
29 TokenType::Pipe => write!(f, "|"),
30 TokenType::LParen => write!(f, "("),
31 TokenType::RParen => write!(f, ")"),
32 TokenType::RedirectOut => write!(f, ">"),
33 TokenType::RedirectAppend => write!(f, ">>"),
34 TokenType::RedirectIn => write!(f, "<"),
35 TokenType::Eof => write!(f, "EOF"),
36 }
37 }
38}
39
40#[derive(Debug, Clone)]
42pub struct Token {
43 pub token_type: TokenType,
44 pub value: String,
45}
46
47#[derive(Debug, Clone)]
49pub struct Redirect {
50 pub redirect_type: TokenType,
51 pub target: String,
52}
53
54#[derive(Debug, Clone)]
56pub struct ParsedArg {
57 pub value: String,
58 pub quoted: bool,
59 pub quote_char: Option<char>,
60}
61
62#[derive(Debug, Clone)]
64pub enum ParsedCommand {
65 Simple {
67 cmd: String,
68 args: Vec<ParsedArg>,
69 redirects: Vec<Redirect>,
70 },
71 Sequence {
73 commands: Vec<ParsedCommand>,
74 operators: Vec<TokenType>,
75 },
76 Pipeline { commands: Vec<ParsedCommand> },
78 Subshell { command: Box<ParsedCommand> },
80}
81
82pub fn tokenize(command: &str) -> Vec<Token> {
84 let mut tokens = Vec::new();
85 let chars: Vec<char> = command.chars().collect();
86 let mut i = 0;
87
88 while i < chars.len() {
89 while i < chars.len() && chars[i].is_whitespace() {
91 i += 1;
92 }
93
94 if i >= chars.len() {
95 break;
96 }
97
98 if chars[i] == '&' && i + 1 < chars.len() && chars[i + 1] == '&' {
100 tokens.push(Token {
101 token_type: TokenType::And,
102 value: "&&".to_string(),
103 });
104 i += 2;
105 } else if chars[i] == '|' && i + 1 < chars.len() && chars[i + 1] == '|' {
106 tokens.push(Token {
107 token_type: TokenType::Or,
108 value: "||".to_string(),
109 });
110 i += 2;
111 } else if chars[i] == '|' {
112 tokens.push(Token {
113 token_type: TokenType::Pipe,
114 value: "|".to_string(),
115 });
116 i += 1;
117 } else if chars[i] == ';' {
118 tokens.push(Token {
119 token_type: TokenType::Semicolon,
120 value: ";".to_string(),
121 });
122 i += 1;
123 } else if chars[i] == '(' {
124 tokens.push(Token {
125 token_type: TokenType::LParen,
126 value: "(".to_string(),
127 });
128 i += 1;
129 } else if chars[i] == ')' {
130 tokens.push(Token {
131 token_type: TokenType::RParen,
132 value: ")".to_string(),
133 });
134 i += 1;
135 } else if chars[i] == '>' && i + 1 < chars.len() && chars[i + 1] == '>' {
136 tokens.push(Token {
137 token_type: TokenType::RedirectAppend,
138 value: ">>".to_string(),
139 });
140 i += 2;
141 } else if chars[i] == '>' {
142 tokens.push(Token {
143 token_type: TokenType::RedirectOut,
144 value: ">".to_string(),
145 });
146 i += 1;
147 } else if chars[i] == '<' {
148 tokens.push(Token {
149 token_type: TokenType::RedirectIn,
150 value: "<".to_string(),
151 });
152 i += 1;
153 } else {
154 let mut word = String::new();
156 let mut in_quote = false;
157 let mut quote_char = ' ';
158
159 while i < chars.len() {
160 let c = chars[i];
161
162 if !in_quote {
163 if c == '"' || c == '\'' {
164 in_quote = true;
165 quote_char = c;
166 word.push(c);
167 i += 1;
168 } else if c.is_whitespace() || "&|;()<>".contains(c) {
169 break;
170 } else if c == '\\' && i + 1 < chars.len() {
171 word.push(c);
173 i += 1;
174 if i < chars.len() {
175 word.push(chars[i]);
176 i += 1;
177 }
178 } else {
179 word.push(c);
180 i += 1;
181 }
182 } else {
183 let prev_char = if i > 0 { Some(chars[i - 1]) } else { None };
184 if c == quote_char && prev_char != Some('\\') {
185 in_quote = false;
186 word.push(c);
187 i += 1;
188 } else if c == '\\' && i + 1 < chars.len() {
189 let next_char = chars[i + 1];
190 if next_char == quote_char || next_char == '\\' {
191 word.push(c);
193 i += 1;
194 if i < chars.len() {
195 word.push(chars[i]);
196 i += 1;
197 }
198 } else {
199 word.push(c);
200 i += 1;
201 }
202 } else {
203 word.push(c);
204 i += 1;
205 }
206 }
207 }
208
209 if !word.is_empty() {
210 tokens.push(Token {
211 token_type: TokenType::Word(word.clone()),
212 value: word,
213 });
214 }
215 }
216 }
217
218 tokens.push(Token {
219 token_type: TokenType::Eof,
220 value: String::new(),
221 });
222
223 tokens
224}
225
226pub struct ShellParser {
228 tokens: Vec<Token>,
229 pos: usize,
230}
231
232impl ShellParser {
233 pub fn new(command: &str) -> Self {
235 ShellParser {
236 tokens: tokenize(command),
237 pos: 0,
238 }
239 }
240
241 fn current(&self) -> Token {
242 self.tokens.get(self.pos).cloned().unwrap_or(Token {
243 token_type: TokenType::Eof,
244 value: String::new(),
245 })
246 }
247
248 fn consume(&mut self) -> Token {
249 let token = self.current().clone();
250 self.pos += 1;
251 token
252 }
253
254 pub fn parse(&mut self) -> Option<ParsedCommand> {
256 self.parse_sequence()
257 }
258
259 fn parse_sequence(&mut self) -> Option<ParsedCommand> {
261 let mut commands = Vec::new();
262 let mut operators = Vec::new();
263
264 if let Some(cmd) = self.parse_pipeline() {
266 commands.push(cmd);
267 }
268
269 loop {
271 match &self.current().token_type {
272 TokenType::Eof | TokenType::RParen => break,
273 TokenType::And | TokenType::Or | TokenType::Semicolon => {
274 let op = self.consume().token_type;
275 operators.push(op);
276
277 if let Some(cmd) = self.parse_pipeline() {
278 commands.push(cmd);
279 }
280 }
281 _ => break,
282 }
283 }
284
285 if commands.len() == 1 && operators.is_empty() {
286 return commands.into_iter().next();
287 }
288
289 if commands.is_empty() {
290 return None;
291 }
292
293 Some(ParsedCommand::Sequence {
294 commands,
295 operators,
296 })
297 }
298
299 fn parse_pipeline(&mut self) -> Option<ParsedCommand> {
301 let mut commands = Vec::new();
302
303 if let Some(cmd) = self.parse_command() {
304 commands.push(cmd);
305 }
306
307 while matches!(self.current().token_type, TokenType::Pipe) {
308 self.consume();
309 if let Some(cmd) = self.parse_command() {
310 commands.push(cmd);
311 }
312 }
313
314 if commands.len() == 1 {
315 return commands.into_iter().next();
316 }
317
318 if commands.is_empty() {
319 return None;
320 }
321
322 Some(ParsedCommand::Pipeline { commands })
323 }
324
325 fn parse_command(&mut self) -> Option<ParsedCommand> {
327 if matches!(self.current().token_type, TokenType::LParen) {
329 self.consume(); let subshell = self.parse_sequence();
331
332 if matches!(self.current().token_type, TokenType::RParen) {
333 self.consume(); }
335
336 return subshell.map(|cmd| ParsedCommand::Subshell {
337 command: Box::new(cmd),
338 });
339 }
340
341 self.parse_simple_command()
343 }
344
345 fn parse_simple_command(&mut self) -> Option<ParsedCommand> {
347 let mut words = Vec::new();
348 let mut redirects = Vec::new();
349
350 loop {
351 match &self.current().token_type {
352 TokenType::Eof => break,
353 TokenType::Word(w) => {
354 words.push(w.clone());
355 self.consume();
356 }
357 TokenType::RedirectOut | TokenType::RedirectAppend | TokenType::RedirectIn => {
358 let redirect_type = self.consume().token_type;
359 if let TokenType::Word(target) = &self.current().token_type {
360 redirects.push(Redirect {
361 redirect_type,
362 target: target.clone(),
363 });
364 self.consume();
365 }
366 }
367 _ => break,
368 }
369 }
370
371 if words.is_empty() {
372 return None;
373 }
374
375 let cmd = words.remove(0);
376 let args: Vec<ParsedArg> = words
377 .into_iter()
378 .map(|word| {
379 if (word.starts_with('"') && word.ends_with('"'))
381 || (word.starts_with('\'') && word.ends_with('\''))
382 {
383 ParsedArg {
384 value: word[1..word.len() - 1].to_string(),
385 quoted: true,
386 quote_char: Some(word.chars().next().unwrap()),
387 }
388 } else {
389 ParsedArg {
390 value: word,
391 quoted: false,
392 quote_char: None,
393 }
394 }
395 })
396 .collect();
397
398 Some(ParsedCommand::Simple {
399 cmd,
400 args,
401 redirects,
402 })
403 }
404}
405
406pub fn parse_shell_command(command: &str) -> Option<ParsedCommand> {
408 let mut parser = ShellParser::new(command);
409 parser.parse()
410}
411
412pub fn needs_real_shell(command: &str) -> bool {
414 let unsupported = [
416 "`", "$(", "${", "~", "*", "?", "[", "2>", "&>", ">&", "<<", "<<<", ];
429
430 for feature in &unsupported {
431 if command.contains(feature) {
432 return true;
433 }
434 }
435
436 false
437}
438
439#[cfg(test)]
440mod tests {
441 use super::*;
442
443 #[test]
444 fn test_tokenize_simple_command() {
445 let tokens = tokenize("echo hello world");
446 assert_eq!(tokens.len(), 4); assert!(matches!(tokens[0].token_type, TokenType::Word(_)));
448 assert!(matches!(tokens[3].token_type, TokenType::Eof));
449 }
450
451 #[test]
452 fn test_tokenize_with_operators() {
453 let tokens = tokenize("cmd1 && cmd2 || cmd3");
454 assert_eq!(tokens.len(), 6); assert!(matches!(tokens[1].token_type, TokenType::And));
456 assert!(matches!(tokens[3].token_type, TokenType::Or));
457 }
458
459 #[test]
460 fn test_tokenize_with_pipe() {
461 let tokens = tokenize("ls | grep foo");
462 assert_eq!(tokens.len(), 5); assert!(matches!(tokens[1].token_type, TokenType::Pipe));
464 }
465
466 #[test]
467 fn test_tokenize_with_quotes() {
468 let tokens = tokenize("echo 'hello world'");
469 assert_eq!(tokens.len(), 3); if let TokenType::Word(w) = &tokens[1].token_type {
471 assert_eq!(w, "'hello world'");
472 } else {
473 panic!("Expected Word token");
474 }
475 }
476
477 #[test]
478 fn test_parse_simple_command() {
479 let cmd = parse_shell_command("echo hello world").unwrap();
480 match cmd {
481 ParsedCommand::Simple { cmd, args, .. } => {
482 assert_eq!(cmd, "echo");
483 assert_eq!(args.len(), 2);
484 assert_eq!(args[0].value, "hello");
485 assert_eq!(args[1].value, "world");
486 }
487 _ => panic!("Expected Simple command"),
488 }
489 }
490
491 #[test]
492 fn test_parse_pipeline() {
493 let cmd = parse_shell_command("ls | grep foo | wc -l").unwrap();
494 match cmd {
495 ParsedCommand::Pipeline { commands } => {
496 assert_eq!(commands.len(), 3);
497 }
498 _ => panic!("Expected Pipeline"),
499 }
500 }
501
502 #[test]
503 fn test_parse_sequence() {
504 let cmd = parse_shell_command("cmd1 && cmd2 || cmd3").unwrap();
505 match cmd {
506 ParsedCommand::Sequence {
507 commands,
508 operators,
509 } => {
510 assert_eq!(commands.len(), 3);
511 assert_eq!(operators.len(), 2);
512 assert!(matches!(operators[0], TokenType::And));
513 assert!(matches!(operators[1], TokenType::Or));
514 }
515 _ => panic!("Expected Sequence"),
516 }
517 }
518
519 #[test]
520 fn test_needs_real_shell() {
521 assert!(needs_real_shell("echo $(date)"));
522 assert!(needs_real_shell("ls *.txt"));
523 assert!(needs_real_shell("echo ${HOME}"));
524 assert!(!needs_real_shell("echo hello"));
525 assert!(!needs_real_shell("ls | grep foo"));
526 }
527
528 #[test]
529 fn test_parse_with_redirect() {
530 let cmd = parse_shell_command("echo hello > output.txt").unwrap();
531 match cmd {
532 ParsedCommand::Simple {
533 cmd,
534 args,
535 redirects,
536 } => {
537 assert_eq!(cmd, "echo");
538 assert_eq!(args.len(), 1);
539 assert_eq!(redirects.len(), 1);
540 assert!(matches!(redirects[0].redirect_type, TokenType::RedirectOut));
541 assert_eq!(redirects[0].target, "output.txt");
542 }
543 _ => panic!("Expected Simple command with redirect"),
544 }
545 }
546
547 #[test]
548 fn test_parse_subshell() {
549 let cmd = parse_shell_command("(echo hello) && echo world").unwrap();
550 match cmd {
551 ParsedCommand::Sequence { commands, .. } => {
552 assert_eq!(commands.len(), 2);
553 assert!(matches!(commands[0], ParsedCommand::Subshell { .. }));
554 }
555 _ => panic!("Expected Sequence with Subshell"),
556 }
557 }
558}