Skip to main content

tracevault_core/
software.rs

1use std::collections::HashSet;
2
3const PREFIX_COMMANDS: &[&str] = &[
4    "sudo",
5    "env",
6    "nix-shell",
7    "command",
8    "exec",
9    "nice",
10    "time",
11];
12
13const SHELL_BUILTINS: &[&str] = &[
14    "cd", "echo", "export", "source", "set", "unset", "true", "false", "test", "[", "]", "declare",
15    "local", "readonly", "typeset", "alias", "bg", "fg", "jobs", "kill", "wait", "trap", "eval",
16    "let", "shift", "return", "exit", "break", "continue", "pwd", "pushd", "popd", "dirs", "umask",
17    "ulimit", "hash", "type", "builtin", "caller", "compgen", "complete", "printf", "read",
18];
19
20/// Extract external software/CLI tool names from a Bash command string.
21///
22/// Splits on pipes and chain operators, takes the first token of each segment,
23/// strips path prefixes, skips prefix commands (sudo, env, etc.) and shell builtins.
24pub fn extract_software(command: &str) -> Vec<String> {
25    let mut seen = HashSet::new();
26    let mut result = Vec::new();
27
28    let segments = split_command(command);
29
30    for segment in segments {
31        if let Some(name) = extract_first_tool(segment.trim()) {
32            if !SHELL_BUILTINS.contains(&name.as_str()) && seen.insert(name.clone()) {
33                result.push(name);
34            }
35        }
36    }
37
38    result
39}
40
41fn split_command(command: &str) -> Vec<&str> {
42    let mut segments = Vec::new();
43    let mut start = 0;
44    let bytes = command.as_bytes();
45    let len = bytes.len();
46    let mut i = 0;
47    let mut in_single_quote = false;
48    let mut in_double_quote = false;
49
50    while i < len {
51        let b = bytes[i];
52
53        if b == b'\'' && !in_double_quote {
54            in_single_quote = !in_single_quote;
55            i += 1;
56            continue;
57        }
58        if b == b'"' && !in_single_quote {
59            in_double_quote = !in_double_quote;
60            i += 1;
61            continue;
62        }
63
64        if in_single_quote || in_double_quote {
65            i += 1;
66            continue;
67        }
68
69        // Check for ||, &&
70        if i + 1 < len {
71            let next = bytes[i + 1];
72            if (b == b'|' && next == b'|') || (b == b'&' && next == b'&') {
73                segments.push(&command[start..i]);
74                i += 2;
75                start = i;
76                continue;
77            }
78        }
79
80        // Check for single | or ;
81        if b == b'|' || b == b';' {
82            segments.push(&command[start..i]);
83            i += 1;
84            start = i;
85            continue;
86        }
87
88        i += 1;
89    }
90
91    if start < len {
92        segments.push(&command[start..]);
93    }
94
95    segments
96}
97
98fn extract_first_tool(segment: &str) -> Option<String> {
99    let mut tokens = segment.split_whitespace();
100    let mut token = tokens.next()?;
101
102    // Skip prefix commands (may chain: `sudo env X=1 cargo build`)
103    while PREFIX_COMMANDS.contains(&strip_path(token).as_str()) {
104        token = tokens.next()?;
105        // Skip flags (e.g. `nice -n 10`, `sudo -u root`) and KEY=VALUE args (`env X=1`)
106        loop {
107            if token.starts_with('-') {
108                // Flag — consume it and its value if the next token looks like a flag argument
109                token = tokens.next()?;
110                // If the next token is a plain numeric value, it's likely the flag's argument
111                if token.parse::<f64>().is_ok() {
112                    token = tokens.next()?;
113                }
114            } else if token.contains('=') {
115                // KEY=VALUE style (env)
116                token = tokens.next()?;
117            } else {
118                break;
119            }
120        }
121    }
122
123    let name = strip_path(token);
124
125    if name.is_empty() {
126        return None;
127    }
128
129    Some(name)
130}
131
132fn strip_path(token: &str) -> String {
133    match token.rfind('/') {
134        Some(idx) => token[idx + 1..].to_string(),
135        None => token.to_string(),
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn simple_command() {
145        assert_eq!(extract_software("git commit -m \"fix\""), vec!["git"]);
146    }
147
148    #[test]
149    fn chained_same_tool() {
150        assert_eq!(extract_software("cargo build && cargo test"), vec!["cargo"]);
151    }
152
153    #[test]
154    fn piped_different_tools() {
155        assert_eq!(
156            extract_software("cat foo.txt | grep bar | wc -l"),
157            vec!["cat", "grep", "wc"]
158        );
159    }
160
161    #[test]
162    fn sudo_prefix() {
163        assert_eq!(extract_software("sudo docker compose up"), vec!["docker"]);
164    }
165
166    #[test]
167    fn cd_filtered_out() {
168        assert_eq!(extract_software("cd /tmp && npm install"), vec!["npm"]);
169    }
170
171    #[test]
172    fn echo_filtered_out() {
173        let result = extract_software("echo \"hello\"");
174        assert!(result.is_empty());
175    }
176
177    #[test]
178    fn absolute_path_stripped() {
179        assert_eq!(
180            extract_software("/usr/local/bin/python3 script.py"),
181            vec!["python3"]
182        );
183    }
184
185    #[test]
186    fn empty_command() {
187        assert!(extract_software("").is_empty());
188    }
189
190    #[test]
191    fn whitespace_only() {
192        assert!(extract_software("   ").is_empty());
193    }
194
195    #[test]
196    fn semicolon_separator() {
197        assert_eq!(
198            extract_software("git add . ; git commit -m 'test'"),
199            vec!["git"]
200        );
201    }
202
203    #[test]
204    fn or_chain() {
205        assert_eq!(
206            extract_software("docker ps || docker start mycontainer"),
207            vec!["docker"]
208        );
209    }
210
211    #[test]
212    fn env_prefix() {
213        assert_eq!(
214            extract_software("env NODE_ENV=production node server.js"),
215            vec!["node"]
216        );
217    }
218
219    #[test]
220    fn time_prefix() {
221        assert_eq!(extract_software("time cargo build"), vec!["cargo"]);
222    }
223
224    #[test]
225    fn nice_prefix() {
226        assert_eq!(extract_software("nice -n 10 make -j4"), vec!["make"]);
227    }
228
229    #[test]
230    fn multiple_builtins_all_filtered() {
231        assert!(extract_software("export FOO=bar && cd /tmp && echo done").is_empty());
232    }
233
234    #[test]
235    fn mixed_builtins_and_real() {
236        assert_eq!(
237            extract_software("export PATH=$PATH:/foo && cargo build"),
238            vec!["cargo"]
239        );
240    }
241
242    #[test]
243    fn complex_pipeline() {
244        assert_eq!(
245            extract_software("find . -name '*.rs' | xargs grep 'TODO' | sort -u"),
246            vec!["find", "xargs", "sort"]
247        );
248    }
249
250    #[test]
251    fn quoted_pipe_not_split() {
252        let result = extract_software(r#"echo "hello | world""#);
253        assert!(result.is_empty());
254    }
255
256    #[test]
257    fn flag_with_equals() {
258        assert_eq!(
259            extract_software("cargo build --target=x86_64"),
260            vec!["cargo"]
261        );
262    }
263
264    #[test]
265    fn trailing_operator() {
266        assert_eq!(extract_software("git add . &&"), vec!["git"]);
267    }
268}