1#[derive(Debug, Clone, PartialEq, Eq)]
2pub struct CommandLine(String);
3
4#[derive(Debug, Clone, PartialEq, Eq)]
5pub struct Segment(String);
6
7#[derive(Debug, Clone, Eq)]
8pub struct Token(String);
9
10impl CommandLine {
11 pub fn new(s: impl Into<String>) -> Self {
12 Self(s.into())
13 }
14
15 pub fn as_str(&self) -> &str {
16 &self.0
17 }
18
19 pub fn segments(&self) -> Vec<Segment> {
20 split_outside_quotes(&self.0)
21 .into_iter()
22 .map(Segment)
23 .collect()
24 }
25}
26
27impl Segment {
28 pub fn as_str(&self) -> &str {
29 &self.0
30 }
31
32 pub fn is_empty(&self) -> bool {
33 self.0.is_empty()
34 }
35
36 pub fn from_words<S: AsRef<str>>(words: &[S]) -> Self {
37 Segment(shell_words::join(words))
38 }
39
40 pub fn tokenize(&self) -> Option<Vec<Token>> {
41 shell_words::split(&self.0)
42 .ok()
43 .map(|v| v.into_iter().map(Token).collect())
44 }
45
46 pub fn has_unsafe_shell_syntax(&self) -> bool {
47 check_unsafe_shell_syntax(&self.0)
48 }
49
50 pub fn strip_env_prefix(&self) -> Segment {
51 Segment(strip_env_prefix_str(self.as_str()).trim().to_string())
52 }
53
54 pub fn strip_fd_redirects(&self) -> Segment {
55 match self.tokenize() {
56 Some(tokens) => {
57 let filtered: Vec<_> = tokens
58 .into_iter()
59 .filter(|t| !t.is_fd_redirect())
60 .collect();
61 Token::join(&filtered)
62 }
63 None => Segment(self.0.clone()),
64 }
65 }
66}
67
68impl Token {
69 pub fn as_str(&self) -> &str {
70 &self.0
71 }
72
73 pub fn join(tokens: &[Token]) -> Segment {
74 Segment::from_words(tokens)
75 }
76
77 pub fn as_command_line(&self) -> CommandLine {
78 CommandLine(self.0.clone())
79 }
80
81 pub fn command_name(&self) -> &str {
82 self.rsplit('/').next().unwrap_or(self.as_str())
83 }
84
85 pub fn is_fd_redirect(&self) -> bool {
86 let s = self.as_str();
87 let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
88 if rest.len() < 2 || !rest.starts_with(">&") {
89 return false;
90 }
91 let after = &rest[2..];
92 !after.is_empty() && after.bytes().all(|b| b.is_ascii_digit() || b == b'-')
93 }
94
95 pub fn is_dev_null_redirect(&self) -> bool {
96 let s = self.as_str();
97 let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
98 rest.strip_prefix(">>")
99 .or_else(|| rest.strip_prefix('>'))
100 .or_else(|| rest.strip_prefix('<'))
101 .is_some_and(|after| after == "/dev/null")
102 }
103
104 pub fn is_redirect_operator(&self) -> bool {
105 let s = self.as_str();
106 let rest = s.trim_start_matches(|c: char| c.is_ascii_digit());
107 matches!(rest, ">" | ">>" | "<")
108 }
109}
110
111impl std::ops::Deref for Token {
112 type Target = str;
113 fn deref(&self) -> &str {
114 &self.0
115 }
116}
117
118impl AsRef<str> for Token {
119 fn as_ref(&self) -> &str {
120 &self.0
121 }
122}
123
124impl PartialEq for Token {
125 fn eq(&self, other: &Self) -> bool {
126 self.0 == other.0
127 }
128}
129
130impl PartialEq<str> for Token {
131 fn eq(&self, other: &str) -> bool {
132 self.0 == other
133 }
134}
135
136impl PartialEq<&str> for Token {
137 fn eq(&self, other: &&str) -> bool {
138 self.0 == *other
139 }
140}
141
142impl PartialEq<Token> for str {
143 fn eq(&self, other: &Token) -> bool {
144 self == other.as_str()
145 }
146}
147
148impl PartialEq<Token> for &str {
149 fn eq(&self, other: &Token) -> bool {
150 *self == other.as_str()
151 }
152}
153
154impl std::fmt::Display for Token {
155 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156 f.write_str(&self.0)
157 }
158}
159
160pub fn has_flag(tokens: &[Token], short: &str, long: Option<&str>) -> bool {
161 let short_char = short.trim_start_matches('-');
162 for token in &tokens[1..] {
163 if token == "--" {
164 return false;
165 }
166 if let Some(long_flag) = long
167 && (token == long_flag || token.starts_with(&format!("{long_flag}=")))
168 {
169 return true;
170 }
171 if token.starts_with('-') && !token.starts_with("--") && token[1..].contains(short_char) {
172 return true;
173 }
174 }
175 false
176}
177
178fn split_outside_quotes(cmd: &str) -> Vec<String> {
179 let mut segments = Vec::new();
180 let mut current = String::new();
181 let mut in_single = false;
182 let mut in_double = false;
183 let mut escaped = false;
184 let mut chars = cmd.chars().peekable();
185
186 while let Some(c) = chars.next() {
187 if escaped {
188 current.push(c);
189 escaped = false;
190 continue;
191 }
192 if c == '\\' && !in_single {
193 escaped = true;
194 current.push(c);
195 continue;
196 }
197 if c == '\'' && !in_double {
198 in_single = !in_single;
199 current.push(c);
200 continue;
201 }
202 if c == '"' && !in_single {
203 in_double = !in_double;
204 current.push(c);
205 continue;
206 }
207 if !in_single && !in_double {
208 if c == '|' {
209 segments.push(std::mem::take(&mut current));
210 continue;
211 }
212 if c == '&' && !current.ends_with('>') {
213 segments.push(std::mem::take(&mut current));
214 if chars.peek() == Some(&'&') {
215 chars.next();
216 }
217 continue;
218 }
219 if c == ';' || c == '\n' {
220 segments.push(std::mem::take(&mut current));
221 continue;
222 }
223 }
224 current.push(c);
225 }
226 segments.push(current);
227 segments
228 .into_iter()
229 .map(|s| s.trim().to_string())
230 .filter(|s| !s.is_empty())
231 .collect()
232}
233
234fn check_unsafe_shell_syntax(segment: &str) -> bool {
235 let mut in_single = false;
236 let mut in_double = false;
237 let mut escaped = false;
238 let chars: Vec<char> = segment.chars().collect();
239
240 for (i, &c) in chars.iter().enumerate() {
241 if escaped {
242 escaped = false;
243 continue;
244 }
245 if c == '\\' && !in_single {
246 escaped = true;
247 continue;
248 }
249 if c == '\'' && !in_double {
250 in_single = !in_single;
251 continue;
252 }
253 if c == '"' && !in_single {
254 in_double = !in_double;
255 continue;
256 }
257 if !in_single && !in_double {
258 if c == '>' || c == '<' {
259 let next = chars.get(i + 1);
260 if next == Some(&'&')
261 && chars
262 .get(i + 2)
263 .is_some_and(|ch| ch.is_ascii_digit() || *ch == '-')
264 {
265 continue;
266 }
267 if is_dev_null_target(&chars, i + 1, c) {
268 continue;
269 }
270 return true;
271 }
272 if c == '`' {
273 return true;
274 }
275 if c == '$' && chars.get(i + 1) == Some(&'(') {
276 return true;
277 }
278 }
279 }
280 false
281}
282
283const DEV_NULL: [char; 9] = ['/', 'd', 'e', 'v', '/', 'n', 'u', 'l', 'l'];
284
285fn is_dev_null_target(chars: &[char], start: usize, redirect_char: char) -> bool {
286 let mut j = start;
287 if redirect_char == '>' && j < chars.len() && chars[j] == '>' {
288 j += 1;
289 }
290 while j < chars.len() && chars[j] == ' ' {
291 j += 1;
292 }
293 if j + DEV_NULL.len() > chars.len() {
294 return false;
295 }
296 if chars[j..j + DEV_NULL.len()] != DEV_NULL {
297 return false;
298 }
299 let end = j + DEV_NULL.len();
300 end >= chars.len() || chars[end].is_whitespace() || ";|&)".contains(chars[end])
301}
302
303fn find_unquoted_space(s: &str) -> Option<usize> {
304 let mut in_single = false;
305 let mut in_double = false;
306 let mut escaped = false;
307 for (i, b) in s.bytes().enumerate() {
308 if escaped {
309 escaped = false;
310 continue;
311 }
312 if b == b'\\' && !in_single {
313 escaped = true;
314 continue;
315 }
316 if b == b'\'' && !in_double {
317 in_single = !in_single;
318 continue;
319 }
320 if b == b'"' && !in_single {
321 in_double = !in_double;
322 continue;
323 }
324 if b == b' ' && !in_single && !in_double {
325 return Some(i);
326 }
327 }
328 None
329}
330
331fn strip_env_prefix_str(segment: &str) -> &str {
332 let mut rest = segment;
333 loop {
334 let trimmed = rest.trim_start();
335 if trimmed.is_empty() {
336 return trimmed;
337 }
338 let bytes = trimmed.as_bytes();
339 if !bytes[0].is_ascii_uppercase() && bytes[0] != b'_' {
340 return trimmed;
341 }
342 if let Some(eq_pos) = trimmed.find('=') {
343 let key = &trimmed[..eq_pos];
344 let valid_key = key
345 .bytes()
346 .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_');
347 if !valid_key {
348 return trimmed;
349 }
350 if let Some(space_pos) = find_unquoted_space(&trimmed[eq_pos..]) {
351 rest = &trimmed[eq_pos + space_pos..];
352 continue;
353 }
354 return trimmed;
355 }
356 return trimmed;
357 }
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363
364 fn seg(s: &str) -> Segment {
365 Segment(s.to_string())
366 }
367
368 fn tok(s: &str) -> Token {
369 Token(s.to_string())
370 }
371
372 fn toks(words: &[&str]) -> Vec<Token> {
373 words.iter().map(|s| tok(s)).collect()
374 }
375
376 #[test]
377 fn split_pipe() {
378 let segs = CommandLine::new("grep foo | head -5").segments();
379 assert_eq!(segs, vec![seg("grep foo"), seg("head -5")]);
380 }
381
382 #[test]
383 fn split_and() {
384 let segs = CommandLine::new("ls && echo done").segments();
385 assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
386 }
387
388 #[test]
389 fn split_semicolon() {
390 let segs = CommandLine::new("ls; echo done").segments();
391 assert_eq!(segs, vec![seg("ls"), seg("echo done")]);
392 }
393
394 #[test]
395 fn split_preserves_quoted_pipes() {
396 let segs = CommandLine::new("echo 'a | b' foo").segments();
397 assert_eq!(segs, vec![seg("echo 'a | b' foo")]);
398 }
399
400 #[test]
401 fn split_background_operator() {
402 let segs = CommandLine::new("cat file & rm -rf /").segments();
403 assert_eq!(segs, vec![seg("cat file"), seg("rm -rf /")]);
404 }
405
406 #[test]
407 fn split_newline() {
408 let segs = CommandLine::new("echo foo\necho bar").segments();
409 assert_eq!(segs, vec![seg("echo foo"), seg("echo bar")]);
410 }
411
412 #[test]
413 fn unsafe_redirect() {
414 assert!(seg("echo hello > file.txt").has_unsafe_shell_syntax());
415 }
416
417 #[test]
418 fn safe_fd_redirect_stderr_to_stdout() {
419 assert!(!seg("cargo clippy 2>&1").has_unsafe_shell_syntax());
420 }
421
422 #[test]
423 fn safe_fd_redirect_close() {
424 assert!(!seg("cmd 2>&-").has_unsafe_shell_syntax());
425 }
426
427 #[test]
428 fn unsafe_redirect_ampersand_no_digit() {
429 assert!(seg("echo hello >& file.txt").has_unsafe_shell_syntax());
430 }
431
432 #[test]
433 fn unsafe_backtick() {
434 assert!(seg("echo `rm -rf /`").has_unsafe_shell_syntax());
435 }
436
437 #[test]
438 fn unsafe_command_substitution() {
439 assert!(seg("echo $(rm -rf /)").has_unsafe_shell_syntax());
440 }
441
442 #[test]
443 fn safe_quoted_dollar_paren() {
444 assert!(!seg("echo '$(safe)' arg").has_unsafe_shell_syntax());
445 }
446
447 #[test]
448 fn safe_quoted_redirect() {
449 assert!(!seg("echo 'greater > than' test").has_unsafe_shell_syntax());
450 }
451
452 #[test]
453 fn safe_no_special_chars() {
454 assert!(!seg("grep pattern file").has_unsafe_shell_syntax());
455 }
456
457 #[test]
458 fn safe_redirect_to_dev_null() {
459 assert!(!seg("cmd >/dev/null").has_unsafe_shell_syntax());
460 }
461
462 #[test]
463 fn safe_redirect_stderr_to_dev_null() {
464 assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
465 }
466
467 #[test]
468 fn safe_redirect_append_to_dev_null() {
469 assert!(!seg("cmd >>/dev/null").has_unsafe_shell_syntax());
470 }
471
472 #[test]
473 fn safe_redirect_space_dev_null() {
474 assert!(!seg("cmd > /dev/null").has_unsafe_shell_syntax());
475 }
476
477 #[test]
478 fn safe_redirect_input_dev_null() {
479 assert!(!seg("cmd < /dev/null").has_unsafe_shell_syntax());
480 }
481
482 #[test]
483 fn safe_redirect_both_dev_null() {
484 assert!(!seg("cmd 2>/dev/null").has_unsafe_shell_syntax());
485 }
486
487 #[test]
488 fn unsafe_redirect_dev_null_prefix() {
489 assert!(seg("cmd > /dev/nullicious").has_unsafe_shell_syntax());
490 }
491
492 #[test]
493 fn unsafe_redirect_dev_null_path_traversal() {
494 assert!(seg("cmd > /dev/null/../etc/passwd").has_unsafe_shell_syntax());
495 }
496
497 #[test]
498 fn unsafe_redirect_dev_null_subpath() {
499 assert!(seg("cmd > /dev/null/foo").has_unsafe_shell_syntax());
500 }
501
502 #[test]
503 fn unsafe_redirect_to_file() {
504 assert!(seg("cmd > output.txt").has_unsafe_shell_syntax());
505 }
506
507 #[test]
508 fn has_flag_short() {
509 let tokens = toks(&["sed", "-i", "s/foo/bar/"]);
510 assert!(has_flag(&tokens, "-i", Some("--in-place")));
511 }
512
513 #[test]
514 fn has_flag_long_with_eq() {
515 let tokens = toks(&["sed", "--in-place=.bak", "s/foo/bar/"]);
516 assert!(has_flag(&tokens, "-i", Some("--in-place")));
517 }
518
519 #[test]
520 fn has_flag_combined_short() {
521 let tokens = toks(&["sed", "-ni", "s/foo/bar/p"]);
522 assert!(has_flag(&tokens, "-i", Some("--in-place")));
523 }
524
525 #[test]
526 fn has_flag_stops_at_double_dash() {
527 let tokens = toks(&["cmd", "--", "-i"]);
528 assert!(!has_flag(&tokens, "-i", Some("--in-place")));
529 }
530
531 #[test]
532 fn strip_single_env_var() {
533 assert_eq!(
534 seg("RACK_ENV=test bundle exec rspec").strip_env_prefix(),
535 seg("bundle exec rspec")
536 );
537 }
538
539 #[test]
540 fn strip_multiple_env_vars() {
541 assert_eq!(
542 seg("RACK_ENV=test RAILS_ENV=test bundle exec rspec").strip_env_prefix(),
543 seg("bundle exec rspec")
544 );
545 }
546
547 #[test]
548 fn strip_no_env_var() {
549 assert_eq!(
550 seg("bundle exec rspec").strip_env_prefix(),
551 seg("bundle exec rspec")
552 );
553 }
554
555 #[test]
556 fn tokenize_simple() {
557 assert_eq!(
558 seg("grep foo file.txt").tokenize(),
559 Some(vec![tok("grep"), tok("foo"), tok("file.txt")])
560 );
561 }
562
563 #[test]
564 fn tokenize_quoted() {
565 assert_eq!(
566 seg("echo 'hello world'").tokenize(),
567 Some(vec![tok("echo"), tok("hello world")])
568 );
569 }
570
571 #[test]
572 fn strip_env_quoted_single() {
573 assert_eq!(
574 seg("FOO='bar baz' ls").strip_env_prefix(),
575 seg("ls")
576 );
577 }
578
579 #[test]
580 fn strip_env_quoted_double() {
581 assert_eq!(
582 seg("FOO=\"bar baz\" ls").strip_env_prefix(),
583 seg("ls")
584 );
585 }
586
587 #[test]
588 fn strip_env_quoted_with_equals() {
589 assert_eq!(
590 seg("FOO='a=b' ls").strip_env_prefix(),
591 seg("ls")
592 );
593 }
594
595 #[test]
596 fn strip_env_quoted_multiple() {
597 assert_eq!(
598 seg("FOO='x y' BAR=\"a b\" cmd").strip_env_prefix(),
599 seg("cmd")
600 );
601 }
602
603 #[test]
604 fn command_name_simple() {
605 assert_eq!(tok("ls").command_name(), "ls");
606 }
607
608 #[test]
609 fn command_name_with_path() {
610 assert_eq!(tok("/usr/bin/ls").command_name(), "ls");
611 }
612
613 #[test]
614 fn command_name_relative_path() {
615 assert_eq!(tok("./scripts/test.sh").command_name(), "test.sh");
616 }
617
618 #[test]
619 fn fd_redirect_detection() {
620 assert!(tok("2>&1").is_fd_redirect());
621 assert!(tok(">&2").is_fd_redirect());
622 assert!(tok("10>&1").is_fd_redirect());
623 assert!(tok("255>&2").is_fd_redirect());
624 assert!(tok("2>&-").is_fd_redirect());
625 assert!(tok("2>&10").is_fd_redirect());
626 assert!(!tok(">").is_fd_redirect());
627 assert!(!tok("/dev/null").is_fd_redirect());
628 assert!(!tok(">&").is_fd_redirect());
629 assert!(!tok("").is_fd_redirect());
630 assert!(!tok("42").is_fd_redirect());
631 assert!(!tok("123abc").is_fd_redirect());
632 }
633
634 #[test]
635 fn dev_null_redirect_single_token() {
636 assert!(tok(">/dev/null").is_dev_null_redirect());
637 assert!(tok(">>/dev/null").is_dev_null_redirect());
638 assert!(tok("2>/dev/null").is_dev_null_redirect());
639 assert!(tok("2>>/dev/null").is_dev_null_redirect());
640 assert!(tok("</dev/null").is_dev_null_redirect());
641 assert!(tok("10>/dev/null").is_dev_null_redirect());
642 assert!(tok("255>/dev/null").is_dev_null_redirect());
643 assert!(!tok(">/tmp/file").is_dev_null_redirect());
644 assert!(!tok(">/dev/nullicious").is_dev_null_redirect());
645 assert!(!tok("ls").is_dev_null_redirect());
646 assert!(!tok("").is_dev_null_redirect());
647 assert!(!tok("42").is_dev_null_redirect());
648 assert!(!tok("<</dev/null").is_dev_null_redirect());
649 }
650
651 #[test]
652 fn redirect_operator_detection() {
653 assert!(tok(">").is_redirect_operator());
654 assert!(tok(">>").is_redirect_operator());
655 assert!(tok("<").is_redirect_operator());
656 assert!(tok("2>").is_redirect_operator());
657 assert!(tok("2>>").is_redirect_operator());
658 assert!(tok("10>").is_redirect_operator());
659 assert!(tok("255>>").is_redirect_operator());
660 assert!(!tok("ls").is_redirect_operator());
661 assert!(!tok(">&1").is_redirect_operator());
662 assert!(!tok("/dev/null").is_redirect_operator());
663 assert!(!tok("").is_redirect_operator());
664 assert!(!tok("42").is_redirect_operator());
665 assert!(!tok("<<").is_redirect_operator());
666 }
667
668 #[test]
669 fn reverse_partial_eq() {
670 let t = tok("hello");
671 assert!("hello" == t);
672 assert!("world" != t);
673 let s: &str = "hello";
674 assert!(s == t);
675 }
676}