agent_shell_parser/parse/
tokenize.rs1use super::resolve::{classify_surface, default_command_config};
2use super::types::{CommandArg, CommandCharacteristics, ParsedCommand, ParsedFlag};
3
4pub fn find_base_command(words: &[String]) -> String {
7 let cmd = words
8 .iter()
9 .find(|t| !is_env_assignment(t))
10 .map(String::as_str)
11 .unwrap_or("");
12
13 match cmd.rsplit_once('/') {
14 Some((_, name)) if !name.is_empty() => name.to_string(),
15 _ => cmd.to_string(),
16 }
17}
18
19pub fn command_characteristics(command: &str) -> CommandCharacteristics {
28 let tokens = shlex_or_whitespace(command);
29 let base = find_base_command(&tokens);
30 let has_dynamic_command = base.starts_with('$');
31 let indirect_execution = classify_surface(&base, &tokens, default_command_config());
32
33 CommandCharacteristics {
34 base_command: base,
35 indirect_execution,
36 has_dynamic_command,
37 }
38}
39
40pub fn base_command(command: &str) -> String {
45 command_characteristics(command).base_command
46}
47
48pub fn env_vars(command: &str) -> Vec<(String, String)> {
53 let tokens = shlex_or_whitespace(command);
54 let mut result = Vec::new();
55 for token in &tokens {
56 if let Some(eq_pos) = token.find('=') {
57 let key = &token[..eq_pos];
58 if is_valid_env_key(key) {
59 let val = &token[eq_pos + 1..];
60 result.push((key.to_string(), val.to_string()));
61 continue;
62 }
63 }
64 break;
65 }
66 result
67}
68
69pub fn tokenize(command: &str) -> Vec<String> {
75 shlex_or_whitespace(command)
76}
77
78pub(crate) fn is_env_assignment(token: &str) -> bool {
79 match token.find('=') {
80 Some(eq_pos) => is_valid_env_key(&token[..eq_pos]),
81 None => false,
82 }
83}
84
85pub(crate) fn is_valid_env_key(key: &str) -> bool {
86 !key.is_empty()
87 && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
88 && key
89 .chars()
90 .next()
91 .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
92}
93
94pub fn parse_command(command: &str) -> ParsedCommand {
102 let tokens = shlex_or_whitespace(command);
103
104 let cmd_idx = tokens.iter().position(|t| !is_env_assignment(t));
105 let Some(cmd_idx) = cmd_idx else {
106 return ParsedCommand {
107 command: String::new(),
108 args: vec![],
109 };
110 };
111
112 let cmd_token = &tokens[cmd_idx];
113 let base = match cmd_token.rsplit_once('/') {
114 Some((_, name)) if !name.is_empty() => name.to_string(),
115 _ => cmd_token.to_string(),
116 };
117
118 let mut args = Vec::new();
119 let mut past_double_dash = false;
120
121 for token in &tokens[cmd_idx + 1..] {
122 if past_double_dash {
123 args.push(CommandArg::Positional(token.clone()));
124 continue;
125 }
126 if token == "--" {
127 past_double_dash = true;
128 continue;
129 }
130 if let Some(rest) = token.strip_prefix("--") {
131 if let Some((name, value)) = rest.split_once('=') {
132 args.push(CommandArg::Flag(ParsedFlag {
133 name: format!("--{name}"),
134 value: Some(value.to_string()),
135 }));
136 } else {
137 args.push(CommandArg::Flag(ParsedFlag {
138 name: token.clone(),
139 value: None,
140 }));
141 }
142 } else if token.starts_with('-') && token.len() > 1 {
143 args.push(CommandArg::Flag(ParsedFlag {
144 name: token.clone(),
145 value: None,
146 }));
147 } else {
148 args.push(CommandArg::Positional(token.clone()));
149 }
150 }
151
152 ParsedCommand {
153 command: base,
154 args,
155 }
156}
157
158fn shlex_or_whitespace(command: &str) -> Vec<String> {
159 shlex::split(command).unwrap_or_else(|| command.split_whitespace().map(String::from).collect())
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
167 fn base_command_simple() {
168 assert_eq!(base_command("ls -la"), "ls");
169 }
170
171 #[test]
172 fn base_command_with_env() {
173 assert_eq!(
174 base_command("GIT_CONFIG_GLOBAL=~/.gitconfig.ai git push"),
175 "git"
176 );
177 }
178
179 #[test]
180 fn base_command_absolute_path() {
181 assert_eq!(base_command("/usr/bin/ls -la"), "ls");
182 }
183
184 #[test]
185 fn base_command_relative_path() {
186 assert_eq!(base_command("./script.sh --flag"), "script.sh");
187 }
188
189 #[test]
190 fn base_command_deep_path() {
191 assert_eq!(
192 base_command("/home/user/dev/tool/target/release/tool --dump-config"),
193 "tool"
194 );
195 }
196
197 #[test]
198 fn base_command_env_with_path() {
199 assert_eq!(base_command("FOO=bar /usr/local/bin/git status"), "git");
200 }
201
202 #[test]
203 fn base_command_empty() {
204 assert_eq!(base_command(""), "");
205 }
206
207 #[test]
208 fn base_command_quoted_env_value() {
209 assert_eq!(
210 base_command(r#"GIT_AUTHOR_NAME="Jane Doe" git commit"#),
211 "git"
212 );
213 }
214
215 #[test]
216 fn base_command_single_quoted_env_value() {
217 assert_eq!(base_command("FOO='bar baz' git push"), "git");
218 }
219
220 #[test]
221 fn base_command_multiple_quoted_env() {
222 assert_eq!(base_command(r#"A="x y" B='1 2' git status"#), "git");
223 }
224
225 #[test]
226 fn env_vars_single() {
227 assert_eq!(env_vars("FOO=bar cmd"), vec![("FOO".into(), "bar".into())]);
228 }
229
230 #[test]
231 fn env_vars_multiple() {
232 assert_eq!(
233 env_vars("A=1 B=2 cmd"),
234 vec![("A".into(), "1".into()), ("B".into(), "2".into())]
235 );
236 }
237
238 #[test]
239 fn env_vars_none() {
240 assert!(env_vars("cmd --flag").is_empty());
241 }
242
243 #[test]
244 fn env_vars_quoted_value() {
245 assert_eq!(
246 env_vars(r#"FOO="bar baz" cmd"#),
247 vec![("FOO".into(), "bar baz".into())]
248 );
249 }
250
251 #[test]
252 fn env_vars_single_quoted_value() {
253 assert_eq!(
254 env_vars("FOO='bar baz' cmd"),
255 vec![("FOO".into(), "bar baz".into())]
256 );
257 }
258
259 #[test]
260 fn env_vars_value_with_equals() {
261 assert_eq!(
262 env_vars(r#"OPTS="--foo=bar" cmd"#),
263 vec![("OPTS".into(), "--foo=bar".into())]
264 );
265 }
266
267 #[test]
268 fn tokenize_simple() {
269 assert_eq!(tokenize("ls -la /tmp"), vec!["ls", "-la", "/tmp"]);
270 }
271
272 #[test]
273 fn tokenize_quoted() {
274 assert_eq!(tokenize("echo 'hello world'"), vec!["echo", "hello world"]);
275 }
276
277 #[test]
278 fn tokenize_double_quoted() {
279 assert_eq!(
280 tokenize("echo \"hello world\""),
281 vec!["echo", "hello world"]
282 );
283 }
284
285 #[test]
288 fn parse_simple_command() {
289 let p = parse_command("ls -la /tmp");
290 assert_eq!(p.command, "ls");
291 assert_eq!(p.subcommand(), Some("/tmp"));
292 assert_eq!(p.flags().count(), 1);
293 assert_eq!(p.flags().next().map(|f| f.name.as_str()), Some("-la"));
294 assert_eq!(p.positional().collect::<Vec<_>>(), vec!["/tmp"]);
295 }
296
297 #[test]
298 fn parse_git_push() {
299 let p = parse_command("git push --force origin main");
300 assert_eq!(p.command, "git");
301 assert_eq!(p.subcommand(), Some("push"));
302 assert!(p.has_flag("--force"));
303 assert_eq!(
304 p.positional().collect::<Vec<_>>(),
305 vec!["push", "origin", "main"]
306 );
307 }
308
309 #[test]
310 fn parse_flag_with_equals() {
311 let p = parse_command("cargo build --color=always");
312 assert_eq!(p.command, "cargo");
313 let flags: Vec<_> = p.flags().collect();
314 assert_eq!(flags.len(), 1);
315 assert_eq!(flags[0].name, "--color");
316 assert_eq!(flags[0].value.as_deref(), Some("always"));
317 }
318
319 #[test]
320 fn parse_double_dash_separator() {
321 let p = parse_command("git log -- file.rs");
322 assert_eq!(p.command, "git");
323 assert!(p.positional().any(|s| s == "file.rs"));
324 }
325
326 #[test]
327 fn parse_with_env_vars() {
328 let p = parse_command("FOO=bar git status");
329 assert_eq!(p.command, "git");
330 assert_eq!(p.subcommand(), Some("status"));
331 }
332
333 #[test]
334 fn parse_path_command() {
335 let p = parse_command("/usr/bin/git commit -m test");
336 assert_eq!(p.command, "git");
337 assert_eq!(p.subcommand(), Some("commit"));
338 }
339
340 #[test]
341 fn parse_empty() {
342 let p = parse_command("");
343 assert_eq!(p.command, "");
344 assert!(p.subcommand().is_none());
345 }
346}