Skip to main content

xtask_todo_lib/devshell/
parser.rs

1use std::fmt;
2
3/// Redirect: fd 0=stdin, 1=stdout, 2=stderr
4#[derive(Debug)]
5pub struct Redirect {
6    pub fd: u8,
7    pub path: String,
8}
9
10#[derive(Debug)]
11pub struct SimpleCommand {
12    pub argv: Vec<String>,
13    pub redirects: Vec<Redirect>,
14}
15
16#[derive(Debug)]
17pub struct Pipeline {
18    pub commands: Vec<SimpleCommand>,
19}
20
21#[derive(Debug)]
22pub struct ParseError(pub String);
23
24impl fmt::Display for ParseError {
25    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26        write!(f, "{}", self.0)
27    }
28}
29
30impl std::error::Error for ParseError {}
31
32/// Tokenize line: split on whitespace, treat `>`, `2>`, `<`, `|` as separate tokens.
33fn tokenize(line: &str) -> Vec<String> {
34    let mut tokens = Vec::new();
35    let mut current = String::new();
36    let mut chars = line.chars().peekable();
37
38    while let Some(c) = chars.next() {
39        if c.is_whitespace() {
40            if !current.is_empty() {
41                tokens.push(std::mem::take(&mut current));
42            }
43            continue;
44        }
45        if c == '2' && chars.peek() == Some(&'>') {
46            chars.next();
47            if !current.is_empty() {
48                tokens.push(std::mem::take(&mut current));
49            }
50            tokens.push("2>".to_string());
51            continue;
52        }
53        if c == '>' || c == '<' || c == '|' {
54            if !current.is_empty() {
55                tokens.push(std::mem::take(&mut current));
56            }
57            tokens.push(c.to_string());
58            continue;
59        }
60        current.push(c);
61    }
62    if !current.is_empty() {
63        tokens.push(current);
64    }
65    tokens
66}
67
68/// Split token list by `|` into command token lists.
69fn split_by_pipe(tokens: Vec<String>) -> Vec<Vec<String>> {
70    let mut commands = Vec::new();
71    let mut current = Vec::new();
72    for t in tokens {
73        if t == "|" {
74            if !current.is_empty() {
75                commands.push(std::mem::take(&mut current));
76            }
77        } else {
78            current.push(t);
79        }
80    }
81    if !current.is_empty() {
82        commands.push(current);
83    }
84    if commands.is_empty() {
85        commands.push(Vec::new());
86    }
87    commands
88}
89
90/// Parse one command's tokens into `SimpleCommand` (argv + redirects).
91fn parse_simple_command(tokens: &[String]) -> Result<SimpleCommand, ParseError> {
92    let mut argv = Vec::new();
93    let mut redirects = Vec::new();
94    let mut i = 0;
95    while i < tokens.len() {
96        let t = &tokens[i];
97        if t == ">" {
98            i += 1;
99            let path = tokens
100                .get(i)
101                .ok_or_else(|| ParseError("redirect '>' missing path".to_string()))?;
102            redirects.push(Redirect {
103                fd: 1,
104                path: path.clone(),
105            });
106        } else if t == "2>" {
107            i += 1;
108            let path = tokens
109                .get(i)
110                .ok_or_else(|| ParseError("redirect '2>' missing path".to_string()))?;
111            redirects.push(Redirect {
112                fd: 2,
113                path: path.clone(),
114            });
115        } else if t == "<" {
116            i += 1;
117            let path = tokens
118                .get(i)
119                .ok_or_else(|| ParseError("redirect '<' missing path".to_string()))?;
120            redirects.push(Redirect {
121                fd: 0,
122                path: path.clone(),
123            });
124        } else {
125            argv.push(t.clone());
126        }
127        i += 1;
128    }
129    Ok(SimpleCommand { argv, redirects })
130}
131
132/// Parse a single line into a pipeline of commands (split by `|`) with redirects.
133///
134/// # Errors
135/// Returns `ParseError` if a redirect is missing its path.
136pub fn parse_line(line: &str) -> Result<Pipeline, ParseError> {
137    let tokens = tokenize(line.trim());
138    let command_tokens_list = split_by_pipe(tokens);
139    let mut commands = Vec::new();
140    for ct in command_tokens_list {
141        commands.push(parse_simple_command(&ct)?);
142    }
143    Ok(Pipeline { commands })
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn parse_simple_pwd() {
152        let p = parse_line("pwd").unwrap();
153        assert_eq!(p.commands.len(), 1);
154        assert_eq!(p.commands[0].argv, vec!["pwd"]);
155    }
156
157    #[test]
158    fn parse_exit_quit() {
159        parse_line("exit").unwrap();
160        parse_line("quit").unwrap();
161    }
162
163    #[test]
164    fn parse_redirect_stdout() {
165        let p = parse_line("echo hello > out").unwrap();
166        assert_eq!(p.commands[0].argv, vec!["echo", "hello"]);
167        assert_eq!(p.commands[0].redirects.len(), 1);
168        assert_eq!(p.commands[0].redirects[0].fd, 1);
169        assert_eq!(p.commands[0].redirects[0].path, "out");
170    }
171
172    #[test]
173    fn parse_redirect_stderr() {
174        let p = parse_line("cmd 2> err").unwrap();
175        assert_eq!(p.commands[0].redirects[0].fd, 2);
176        assert_eq!(p.commands[0].redirects[0].path, "err");
177    }
178
179    /// Tokenize "2>" with preceding token (no space) to cover tokenize branch.
180    #[test]
181    fn parse_redirect_2_after_token() {
182        let p = parse_line("a2> out").unwrap();
183        assert_eq!(p.commands[0].argv, vec!["a"]);
184        assert_eq!(p.commands[0].redirects[0].fd, 2);
185        assert_eq!(p.commands[0].redirects[0].path, "out");
186    }
187
188    /// Tokenize ">" with preceding token (no space) to cover tokenize single-char branch.
189    #[test]
190    fn parse_redirect_gt_after_token() {
191        let p = parse_line("a> out").unwrap();
192        assert_eq!(p.commands[0].argv, vec!["a"]);
193        assert_eq!(p.commands[0].redirects[0].fd, 1);
194        assert_eq!(p.commands[0].redirects[0].path, "out");
195    }
196
197    #[test]
198    fn parse_redirect_stdin() {
199        let p = parse_line("cat < in").unwrap();
200        assert_eq!(p.commands[0].redirects[0].fd, 0);
201        assert_eq!(p.commands[0].redirects[0].path, "in");
202    }
203
204    #[test]
205    fn parse_redirect_missing_path_err() {
206        assert!(parse_line("echo >").is_err());
207        assert!(parse_line("echo 2>").is_err());
208        assert!(parse_line("cat <").is_err());
209    }
210
211    #[test]
212    fn parse_error_display() {
213        let e = parse_line("echo >").unwrap_err();
214        assert!(e.to_string().contains("redirect") || e.0.contains("path"));
215    }
216
217    #[test]
218    fn parse_pipeline() {
219        let p = parse_line("a | b").unwrap();
220        assert_eq!(p.commands.len(), 2);
221        assert_eq!(p.commands[0].argv, vec!["a"]);
222        assert_eq!(p.commands[1].argv, vec!["b"]);
223    }
224
225    #[test]
226    fn parse_pipe_only_empty_command() {
227        let p = parse_line("|").unwrap();
228        assert_eq!(p.commands.len(), 1);
229        assert!(p.commands[0].argv.is_empty());
230    }
231
232    #[test]
233    fn parse_stdout_redirect_token() {
234        let p = parse_line("echo x > out").unwrap();
235        assert_eq!(p.commands[0].redirects[0].fd, 1);
236        assert_eq!(p.commands[0].redirects[0].path, "out");
237    }
238}