agentzero_tools/
shell_parse.rs1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum QuoteContext {
10 Unquoted,
11 SingleQuoted,
12 DoubleQuoted,
13}
14
15#[derive(Debug, Clone, Copy)]
17pub struct AnnotatedChar {
18 pub ch: char,
19 pub context: QuoteContext,
20}
21
22#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct ShellToken {
25 pub text: String,
26 pub was_quoted: bool,
27}
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30enum State {
31 Unquoted,
32 SingleQuoted,
33 DoubleQuoted,
34}
35
36pub fn tokenize(input: &str) -> anyhow::Result<Vec<ShellToken>> {
40 let mut tokens = Vec::new();
41 let mut current = String::new();
42 let mut was_quoted = false;
43 let mut state = State::Unquoted;
44 let mut chars = input.chars().peekable();
45
46 while let Some(ch) = chars.next() {
47 match state {
48 State::Unquoted => match ch {
49 '\'' => {
50 state = State::SingleQuoted;
51 was_quoted = true;
52 }
53 '"' => {
54 state = State::DoubleQuoted;
55 was_quoted = true;
56 }
57 '\\' => {
58 if let Some(next) = chars.next() {
59 current.push(next);
60 }
61 }
62 c if c.is_ascii_whitespace() => {
63 if !current.is_empty() || was_quoted {
64 tokens.push(ShellToken {
65 text: std::mem::take(&mut current),
66 was_quoted,
67 });
68 was_quoted = false;
69 }
70 }
71 c => current.push(c),
72 },
73 State::SingleQuoted => match ch {
74 '\'' => state = State::Unquoted,
75 c => current.push(c),
76 },
77 State::DoubleQuoted => match ch {
78 '"' => state = State::Unquoted,
79 '\\' => {
80 if let Some(&next) = chars.peek() {
81 if matches!(next, '$' | '`' | '"' | '\\' | '\n') {
82 chars.next();
83 current.push(next);
84 } else {
85 current.push('\\');
86 }
87 } else {
88 current.push('\\');
89 }
90 }
91 c => current.push(c),
92 },
93 }
94 }
95
96 if state != State::Unquoted {
97 anyhow::bail!("unbalanced quotes in shell command");
98 }
99
100 if !current.is_empty() || was_quoted {
101 tokens.push(ShellToken {
102 text: current,
103 was_quoted,
104 });
105 }
106
107 Ok(tokens)
108}
109
110pub fn tokenize_annotated(input: &str) -> anyhow::Result<Vec<Vec<AnnotatedChar>>> {
117 let mut tokens: Vec<Vec<AnnotatedChar>> = Vec::new();
118 let mut current: Vec<AnnotatedChar> = Vec::new();
119 let mut in_token = false;
120 let mut state = State::Unquoted;
121 let mut chars = input.chars().peekable();
122
123 while let Some(ch) = chars.next() {
124 match state {
125 State::Unquoted => match ch {
126 '\'' => {
127 state = State::SingleQuoted;
128 in_token = true;
129 }
130 '"' => {
131 state = State::DoubleQuoted;
132 in_token = true;
133 }
134 '\\' => {
135 if let Some(next) = chars.next() {
136 current.push(AnnotatedChar {
137 ch: next,
138 context: QuoteContext::Unquoted,
139 });
140 in_token = true;
141 }
142 }
143 c if c.is_ascii_whitespace() => {
144 if !current.is_empty() || in_token {
145 tokens.push(std::mem::take(&mut current));
146 in_token = false;
147 }
148 }
149 c => {
150 current.push(AnnotatedChar {
151 ch: c,
152 context: QuoteContext::Unquoted,
153 });
154 in_token = true;
155 }
156 },
157 State::SingleQuoted => match ch {
158 '\'' => state = State::Unquoted,
159 c => {
160 current.push(AnnotatedChar {
161 ch: c,
162 context: QuoteContext::SingleQuoted,
163 });
164 }
165 },
166 State::DoubleQuoted => match ch {
167 '"' => state = State::Unquoted,
168 '\\' => {
169 if let Some(&next) = chars.peek() {
170 if matches!(next, '$' | '`' | '"' | '\\' | '\n') {
171 chars.next();
172 current.push(AnnotatedChar {
173 ch: next,
174 context: QuoteContext::DoubleQuoted,
175 });
176 } else {
177 current.push(AnnotatedChar {
178 ch: '\\',
179 context: QuoteContext::DoubleQuoted,
180 });
181 }
182 } else {
183 current.push(AnnotatedChar {
184 ch: '\\',
185 context: QuoteContext::DoubleQuoted,
186 });
187 }
188 }
189 c => {
190 current.push(AnnotatedChar {
191 ch: c,
192 context: QuoteContext::DoubleQuoted,
193 });
194 }
195 },
196 }
197 }
198
199 if state != State::Unquoted {
200 anyhow::bail!("unbalanced quotes in shell command");
201 }
202
203 if !current.is_empty() || in_token {
204 tokens.push(current);
205 }
206
207 Ok(tokens)
208}
209
210#[cfg(test)]
211mod tests {
212 use super::*;
213
214 fn texts(tokens: &[ShellToken]) -> Vec<&str> {
215 tokens.iter().map(|t| t.text.as_str()).collect()
216 }
217
218 #[test]
219 fn tokenize_simple_command() {
220 let tokens = tokenize("echo hello").unwrap();
221 assert_eq!(texts(&tokens), vec!["echo", "hello"]);
222 assert!(!tokens[0].was_quoted);
223 assert!(!tokens[1].was_quoted);
224 }
225
226 #[test]
227 fn tokenize_single_quoted() {
228 let tokens = tokenize("echo 'hello world'").unwrap();
229 assert_eq!(texts(&tokens), vec!["echo", "hello world"]);
230 assert!(tokens[1].was_quoted);
231 }
232
233 #[test]
234 fn tokenize_double_quoted() {
235 let tokens = tokenize(r#"echo "hello world""#).unwrap();
236 assert_eq!(texts(&tokens), vec!["echo", "hello world"]);
237 assert!(tokens[1].was_quoted);
238 }
239
240 #[test]
241 fn tokenize_semicolon_in_single_quotes() {
242 let tokens = tokenize("echo 'hello;world'").unwrap();
243 assert_eq!(texts(&tokens), vec!["echo", "hello;world"]);
244 }
245
246 #[test]
247 fn tokenize_pipe_in_double_quotes() {
248 let tokens = tokenize(r#"echo "a|b""#).unwrap();
249 assert_eq!(texts(&tokens), vec!["echo", "a|b"]);
250 }
251
252 #[test]
253 fn tokenize_backslash_escape() {
254 let tokens = tokenize(r"echo hello\ world").unwrap();
255 assert_eq!(texts(&tokens), vec!["echo", "hello world"]);
256 }
257
258 #[test]
259 fn tokenize_unbalanced_single_quote_errors() {
260 assert!(tokenize("echo 'hello").is_err());
261 }
262
263 #[test]
264 fn tokenize_unbalanced_double_quote_errors() {
265 assert!(tokenize(r#"echo "hello"#).is_err());
266 }
267
268 #[test]
269 fn tokenize_empty_input() {
270 let tokens = tokenize("").unwrap();
271 assert!(tokens.is_empty());
272 }
273
274 #[test]
275 fn tokenize_backslash_in_double_quotes() {
276 let tokens = tokenize(r#"echo "a\"b""#).unwrap();
277 assert_eq!(texts(&tokens), vec!["echo", r#"a"b"#]);
278 }
279
280 #[test]
281 fn tokenize_adjacent_quotes() {
282 let tokens = tokenize(r#"echo 'a'"b""#).unwrap();
283 assert_eq!(texts(&tokens), vec!["echo", "ab"]);
284 }
285
286 #[test]
287 fn annotated_preserves_context() {
288 let tokens = tokenize_annotated("echo 'a;b'").unwrap();
289 assert_eq!(tokens.len(), 2);
290 assert!(tokens[0]
292 .iter()
293 .all(|c| c.context == QuoteContext::Unquoted));
294 assert!(tokens[1]
296 .iter()
297 .all(|c| c.context == QuoteContext::SingleQuoted));
298 assert_eq!(tokens[1][1].ch, ';');
299 }
300
301 #[test]
302 fn annotated_mixed_context() {
303 let tokens = tokenize_annotated(r#"echo hello";"world"#).unwrap();
304 assert_eq!(tokens.len(), 2);
305 let second = &tokens[1];
307 assert_eq!(second[0].ch, 'h');
308 assert_eq!(second[0].context, QuoteContext::Unquoted);
309 assert_eq!(second[5].ch, ';');
310 assert_eq!(second[5].context, QuoteContext::DoubleQuoted);
311 assert_eq!(second[6].ch, 'w');
312 assert_eq!(second[6].context, QuoteContext::Unquoted);
313 }
314}