1pub fn split_outside_quotes(cmd: &str) -> Vec<String> {
2 let mut segments = Vec::new();
3 let mut current = String::new();
4 let mut in_single = false;
5 let mut in_double = false;
6 let mut escaped = false;
7 let mut chars = cmd.chars().peekable();
8
9 while let Some(c) = chars.next() {
10 if escaped {
11 current.push(c);
12 escaped = false;
13 continue;
14 }
15 if c == '\\' && !in_single {
16 escaped = true;
17 current.push(c);
18 continue;
19 }
20 if c == '\'' && !in_double {
21 in_single = !in_single;
22 current.push(c);
23 continue;
24 }
25 if c == '"' && !in_single {
26 in_double = !in_double;
27 current.push(c);
28 continue;
29 }
30 if !in_single && !in_double {
31 if c == '|' {
32 segments.push(current.clone());
33 current.clear();
34 continue;
35 }
36 if c == '&' && !current.ends_with('>') {
37 segments.push(current.clone());
38 current.clear();
39 if chars.peek() == Some(&'&') {
40 chars.next();
41 }
42 continue;
43 }
44 if c == ';' || c == '\n' {
45 segments.push(current.clone());
46 current.clear();
47 continue;
48 }
49 }
50 current.push(c);
51 }
52 segments.push(current);
53 segments
54 .into_iter()
55 .map(|s| s.trim().to_string())
56 .filter(|s| !s.is_empty())
57 .collect()
58}
59
60pub fn tokenize(segment: &str) -> Option<Vec<String>> {
61 shell_words::split(segment).ok()
62}
63
64pub fn has_unsafe_shell_syntax(segment: &str) -> bool {
65 let mut in_single = false;
66 let mut in_double = false;
67 let mut escaped = false;
68 let chars: Vec<char> = segment.chars().collect();
69
70 for (i, &c) in chars.iter().enumerate() {
71 if escaped {
72 escaped = false;
73 continue;
74 }
75 if c == '\\' && !in_single {
76 escaped = true;
77 continue;
78 }
79 if c == '\'' && !in_double {
80 in_single = !in_single;
81 continue;
82 }
83 if c == '"' && !in_single {
84 in_double = !in_double;
85 continue;
86 }
87 if !in_single && !in_double {
88 if c == '>' || c == '<' {
89 let next = chars.get(i + 1);
90 if next == Some(&'&')
91 && chars
92 .get(i + 2)
93 .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
94 {
95 continue;
96 }
97 if is_dev_null_target(&chars, i + 1, c) {
98 continue;
99 }
100 return true;
101 }
102 if c == '`' {
103 return true;
104 }
105 if c == '$' && chars.get(i + 1) == Some(&'(') {
106 return true;
107 }
108 }
109 }
110 false
111}
112
113const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
114
115fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
116 let mut j = start;
117 if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
118 j += 1;
119 }
120 while j < chars.len() && chars[j] == ' ' {
121 j += 1;
122 }
123 if j + DEV_NULL.len() > chars.len() {
124 return false;
125 }
126 if chars[j..j + DEV_NULL.len()] != DEV_NULL {
127 return false;
128 }
129 let end = j + DEV_NULL.len();
130 end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
131}
132
133pub fn has_flag(tokens: &[String], short: &str, long: Option<&str>) -> bool {
134 let short_char = short.trim_start_matches('-');
135 for token in &tokens[1..] {
136 if token == "--" {
137 return false;
138 }
139 if let Some(long_flag) = long
140 && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
141 {
142 return true;
143 }
144 if token.starts_with('-') && !token.starts_with("--") && token[1..].contains(short_char) {
145 return true;
146 }
147 }
148 false
149}
150
151pub fn is_fd_redirect(token: &str) -> bool {
152 let bytes = token.as_bytes();
153 if bytes.len() < 3 {
154 return false;
155 }
156 let start = usize::from(bytes[0].is_ascii_digit());
157 bytes.get(start) == Some(&b'>')
158 && bytes.get(start + 1) == Some(&b'&')
159 && bytes[start + 2..].iter().all(|b| b.is_ascii_digit() || *b == b'-')
160}
161
162pub fn strip_fd_redirects(s: &str) -> String {
163 match tokenize(s) {
164 Some(tokens) => {
165 let filtered: Vec<_> = tokens
166 .into_iter()
167 .filter(|t| !is_fd_redirect(t))
168 .collect();
169 shell_words::join(&filtered)
170 }
171 None => s.to_string(),
172 }
173}
174
175pub fn strip_env_prefix(segment: &str) -> &str {
176 let mut rest = segment;
177 loop {
178 let trimmed = rest.trim_start();
179 if trimmed.is_empty() {
180 return trimmed;
181 }
182 let bytes = trimmed.as_bytes();
183 if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
184 return trimmed;
185 }
186 if let Some(eq_pos) = trimmed.find('=') {
187 let key = &trimmed[..eq_pos];
188 let valid_key = key
189 .bytes()
190 .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
191 if !valid_key {
192 return trimmed;
193 }
194 if let Some(space_pos) = trimmed[eq_pos..].find(' ') {
195 rest = &trimmed[eq_pos + space_pos..];
196 continue;
197 }
198 return trimmed;
199 }
200 return trimmed;
201 }
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207
208 #[test]
209 fn split_pipe() {
210 assert_eq!(
211 split_outside_quotes("grep foo | head -5"),
212 vec!["grep foo", "head -5"]
213 );
214 }
215
216 #[test]
217 fn split_and() {
218 assert_eq!(
219 split_outside_quotes("ls && echo done"),
220 vec!["ls", "echo done"]
221 );
222 }
223
224 #[test]
225 fn split_semicolon() {
226 assert_eq!(
227 split_outside_quotes("ls; echo done"),
228 vec!["ls", "echo done"]
229 );
230 }
231
232 #[test]
233 fn split_preserves_quoted_pipes() {
234 assert_eq!(
235 split_outside_quotes("echo 'a | b' foo"),
236 vec!["echo 'a | b' foo"]
237 );
238 }
239
240 #[test]
241 fn split_background_operator() {
242 assert_eq!(
243 split_outside_quotes("cat file & rm -rf /"),
244 vec!["cat file", "rm -rf /"]
245 );
246 }
247
248 #[test]
249 fn split_newline() {
250 assert_eq!(
251 split_outside_quotes("echo foo\necho bar"),
252 vec!["echo foo", "echo bar"]
253 );
254 }
255
256 #[test]
257 fn unsafe_redirect() {
258 assert!(has_unsafe_shell_syntax("echo hello > file.txt"));
259 }
260
261 #[test]
262 fn safe_fd_redirect_stderr_to_stdout() {
263 assert!(!has_unsafe_shell_syntax("cargo clippy 2>&1"));
264 }
265
266 #[test]
267 fn safe_fd_redirect_close() {
268 assert!(!has_unsafe_shell_syntax("cmd 2>&-"));
269 }
270
271 #[test]
272 fn unsafe_redirect_ampersand_no_digit() {
273 assert!(has_unsafe_shell_syntax("echo hello >& file.txt"));
274 }
275
276 #[test]
277 fn unsafe_backtick() {
278 assert!(has_unsafe_shell_syntax("echo `rm -rf /`"));
279 }
280
281 #[test]
282 fn unsafe_command_substitution() {
283 assert!(has_unsafe_shell_syntax("echo $(rm -rf /)"));
284 }
285
286 #[test]
287 fn safe_quoted_dollar_paren() {
288 assert!(!has_unsafe_shell_syntax("echo '$(safe)' arg"));
289 }
290
291 #[test]
292 fn safe_quoted_redirect() {
293 assert!(!has_unsafe_shell_syntax("echo 'greater > than' test"));
294 }
295
296 #[test]
297 fn safe_no_special_chars() {
298 assert!(!has_unsafe_shell_syntax("grep pattern file"));
299 }
300
301 #[test]
302 fn safe_redirect_to_dev_null() {
303 assert!(!has_unsafe_shell_syntax("cmd >/dev/null"));
304 }
305
306 #[test]
307 fn safe_redirect_stderr_to_dev_null() {
308 assert!(!has_unsafe_shell_syntax("cmd 2>/dev/null"));
309 }
310
311 #[test]
312 fn safe_redirect_append_to_dev_null() {
313 assert!(!has_unsafe_shell_syntax("cmd >>/dev/null"));
314 }
315
316 #[test]
317 fn safe_redirect_space_dev_null() {
318 assert!(!has_unsafe_shell_syntax("cmd > /dev/null"));
319 }
320
321 #[test]
322 fn safe_redirect_input_dev_null() {
323 assert!(!has_unsafe_shell_syntax("cmd < /dev/null"));
324 }
325
326 #[test]
327 fn safe_redirect_both_dev_null() {
328 assert!(!has_unsafe_shell_syntax("cmd 2>/dev/null"));
329 }
330
331 #[test]
332 fn unsafe_redirect_dev_null_prefix() {
333 assert!(has_unsafe_shell_syntax("cmd > /dev/nullicious"));
334 }
335
336 #[test]
337 fn unsafe_redirect_dev_null_path_traversal() {
338 assert!(has_unsafe_shell_syntax("cmd > /dev/null/../etc/passwd"));
339 }
340
341 #[test]
342 fn unsafe_redirect_dev_null_subpath() {
343 assert!(has_unsafe_shell_syntax("cmd > /dev/null/foo"));
344 }
345
346 #[test]
347 fn unsafe_redirect_to_file() {
348 assert!(has_unsafe_shell_syntax("cmd > output.txt"));
349 }
350
351 #[test]
352 fn has_flag_short() {
353 let tokens: Vec<String> = vec!["sed", "-i", "s/foo/bar/"]
354 .into_iter()
355 .map(String::from)
356 .collect();
357 assert!(has_flag(&tokens, "-i", Some("--in-place")));
358 }
359
360 #[test]
361 fn has_flag_long_with_eq() {
362 let tokens: Vec<String> = vec!["sed", "--in-place=.bak", "s/foo/bar/"]
363 .into_iter()
364 .map(String::from)
365 .collect();
366 assert!(has_flag(&tokens, "-i", Some("--in-place")));
367 }
368
369 #[test]
370 fn has_flag_combined_short() {
371 let tokens: Vec<String> = vec!["sed", "-ni", "s/foo/bar/p"]
372 .into_iter()
373 .map(String::from)
374 .collect();
375 assert!(has_flag(&tokens, "-i", Some("--in-place")));
376 }
377
378 #[test]
379 fn has_flag_stops_at_double_dash() {
380 let tokens: Vec<String> = vec!["cmd", "--", "-i"]
381 .into_iter()
382 .map(String::from)
383 .collect();
384 assert!(!has_flag(&tokens, "-i", Some("--in-place")));
385 }
386
387 #[test]
388 fn strip_single_env_var() {
389 assert_eq!(strip_env_prefix("RACK_ENV=test bundle exec rspec"), "bundle exec rspec");
390 }
391
392 #[test]
393 fn strip_multiple_env_vars() {
394 assert_eq!(
395 strip_env_prefix("RACK_ENV=test RAILS_ENV=test bundle exec rspec"),
396 "bundle exec rspec"
397 );
398 }
399
400 #[test]
401 fn strip_no_env_var() {
402 assert_eq!(strip_env_prefix("bundle exec rspec"), "bundle exec rspec");
403 }
404
405 #[test]
406 fn tokenize_simple() {
407 assert_eq!(
408 tokenize("grep foo file.txt"),
409 Some(vec!["grep".to_string(), "foo".to_string(), "file.txt".to_string()])
410 );
411 }
412
413 #[test]
414 fn tokenize_quoted() {
415 assert_eq!(
416 tokenize("echo 'hello world'"),
417 Some(vec!["echo".to_string(), "hello world".to_string()])
418 );
419 }
420}