Skip to main content

kintsugi_core/
shell.rs

1//! A small, dependency-free shell tokenizer.
2//!
3//! Good enough to recover an `argv` from a command line for recording and for
4//! the Tier-1 rule engine: it understands single quotes, double quotes, and
5//! backslash escaping. It is deliberately *not* a full shell parser — it does
6//! not expand variables, globs, or handle here-docs. The raw command is always
7//! preserved separately, so this is only ever an aid, never the source of truth.
8
9/// Split a command line into tokens, honoring `'…'`, `"…"`, and `\` escapes.
10///
11/// Unterminated quotes are tolerated (the partial token is still emitted), so a
12/// malformed line never panics or loses data.
13pub fn split(line: &str) -> Vec<String> {
14    let mut tokens = Vec::new();
15    let mut cur = String::new();
16    let mut has_token = false;
17    let mut chars = line.chars().peekable();
18
19    while let Some(c) = chars.next() {
20        match c {
21            c if c.is_whitespace() => {
22                if has_token {
23                    tokens.push(std::mem::take(&mut cur));
24                    has_token = false;
25                }
26            }
27            '\'' => {
28                has_token = true;
29                for q in chars.by_ref() {
30                    if q == '\'' {
31                        break;
32                    }
33                    cur.push(q);
34                }
35            }
36            '"' => {
37                has_token = true;
38                while let Some(q) = chars.next() {
39                    match q {
40                        '"' => break,
41                        '\\' => {
42                            // In double quotes, backslash escapes a few metachars.
43                            if let Some(&next) = chars.peek() {
44                                if matches!(next, '"' | '\\' | '$' | '`') {
45                                    cur.push(chars.next().unwrap());
46                                } else {
47                                    cur.push('\\');
48                                }
49                            } else {
50                                cur.push('\\');
51                            }
52                        }
53                        _ => cur.push(q),
54                    }
55                }
56            }
57            '\\' => {
58                has_token = true;
59                if let Some(next) = chars.next() {
60                    cur.push(next);
61                } else {
62                    cur.push('\\');
63                }
64            }
65            _ => {
66                has_token = true;
67                cur.push(c);
68            }
69        }
70    }
71    if has_token {
72        tokens.push(cur);
73    }
74    tokens
75}
76
77#[cfg(test)]
78mod tests {
79    use super::split;
80
81    #[test]
82    fn plain_words() {
83        assert_eq!(split("rm -rf /tmp/x"), vec!["rm", "-rf", "/tmp/x"]);
84    }
85
86    #[test]
87    fn collapses_whitespace() {
88        assert_eq!(split("  ls   -la  "), vec!["ls", "-la"]);
89    }
90
91    #[test]
92    fn double_quotes() {
93        assert_eq!(
94            split(r#"git commit -m "two words""#),
95            vec!["git", "commit", "-m", "two words"]
96        );
97    }
98
99    #[test]
100    fn single_quotes_are_literal() {
101        assert_eq!(split(r#"echo 'a "b" c'"#), vec!["echo", r#"a "b" c"#]);
102    }
103
104    #[test]
105    fn backslash_escape() {
106        assert_eq!(split(r"echo a\ b"), vec!["echo", "a b"]);
107    }
108
109    #[test]
110    fn empty_quoted_token_is_kept() {
111        assert_eq!(split(r#"x "" y"#), vec!["x", "", "y"]);
112    }
113
114    #[test]
115    fn empty_line_is_no_tokens() {
116        assert!(split("   ").is_empty());
117        assert!(split("").is_empty());
118    }
119
120    #[test]
121    fn unterminated_quote_tolerated() {
122        assert_eq!(split(r#"echo "oops"#), vec!["echo", "oops"]);
123    }
124}