Skip to main content

cc_toolgate/parse/
tokenize.rs

1/// Extract the first real command word, skipping leading VAR=value assignments.
2pub fn base_command(command: &str) -> String {
3    let mut rest = command.trim();
4    // Skip VAR=value prefixes
5    loop {
6        if let Some(eq_pos) = rest.find('=') {
7            let before_eq = &rest[..eq_pos];
8            if !before_eq.is_empty()
9                && before_eq
10                    .chars()
11                    .all(|c| c.is_ascii_alphanumeric() || c == '_')
12                && before_eq
13                    .chars()
14                    .next()
15                    .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
16            {
17                let after_eq = &rest[eq_pos + 1..];
18                if let Some(sp) = after_eq.find(char::is_whitespace) {
19                    rest = after_eq[sp..].trim_start();
20                    continue;
21                }
22            }
23        }
24        break;
25    }
26    let word = rest.split_whitespace().next().unwrap_or("");
27    // Extract basename: /usr/bin/ls → ls, ./script.sh → script.sh
28    match word.rsplit_once('/') {
29        Some((_, name)) if !name.is_empty() => name.to_string(),
30        _ => word.to_string(),
31    }
32}
33
34/// Extract leading KEY=VALUE pairs from a command string.
35// TODO: Breaks on quoted values like FOO="bar baz" — scans for first
36// whitespace after `=` without respecting quotes. Rare in Claude Code
37// output but worth fixing eventually.
38pub fn env_vars(command: &str) -> Vec<(String, String)> {
39    let mut result = Vec::new();
40    let mut rest = command.trim();
41    loop {
42        if let Some(eq_pos) = rest.find('=') {
43            let before_eq = &rest[..eq_pos];
44            if !before_eq.is_empty()
45                && before_eq
46                    .chars()
47                    .all(|c| c.is_ascii_alphanumeric() || c == '_')
48                && before_eq
49                    .chars()
50                    .next()
51                    .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
52            {
53                let after_eq = &rest[eq_pos + 1..];
54                if let Some(sp) = after_eq.find(char::is_whitespace) {
55                    let key = before_eq.to_string();
56                    let val = after_eq[..sp].to_string();
57                    result.push((key, val));
58                    rest = after_eq[sp..].trim_start();
59                    continue;
60                }
61            }
62        }
63        break;
64    }
65    result
66}
67
68/// Tokenize a command segment into words using shlex (POSIX word splitting).
69pub fn tokenize(command: &str) -> Vec<String> {
70    shlex::split(command).unwrap_or_else(|| {
71        // Fallback: simple whitespace splitting if shlex can't parse
72        command.split_whitespace().map(String::from).collect()
73    })
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn base_command_simple() {
82        assert_eq!(base_command("ls -la"), "ls");
83    }
84
85    #[test]
86    fn base_command_with_env() {
87        assert_eq!(
88            base_command("GIT_CONFIG_GLOBAL=~/.gitconfig.ai git push"),
89            "git"
90        );
91    }
92
93    #[test]
94    fn base_command_absolute_path() {
95        assert_eq!(base_command("/usr/bin/ls -la"), "ls");
96    }
97
98    #[test]
99    fn base_command_relative_path() {
100        assert_eq!(base_command("./script.sh --flag"), "script.sh");
101    }
102
103    #[test]
104    fn base_command_deep_path() {
105        assert_eq!(
106            base_command("/home/user/dev/cc-toolgate/target/release/cc-toolgate --dump-config"),
107            "cc-toolgate"
108        );
109    }
110
111    #[test]
112    fn base_command_tilde_path() {
113        assert_eq!(
114            base_command("~/dev/cc-toolgate/target/release/cc-toolgate --dump-config"),
115            "cc-toolgate"
116        );
117    }
118
119    #[test]
120    fn base_command_env_with_path() {
121        assert_eq!(base_command("FOO=bar /usr/local/bin/git status"), "git");
122    }
123
124    #[test]
125    fn base_command_empty() {
126        assert_eq!(base_command(""), "");
127    }
128
129    #[test]
130    fn env_vars_single() {
131        let vars = env_vars("FOO=bar cmd");
132        assert_eq!(vars, vec![("FOO".into(), "bar".into())]);
133    }
134
135    #[test]
136    fn env_vars_multiple() {
137        let vars = env_vars("A=1 B=2 cmd");
138        assert_eq!(
139            vars,
140            vec![("A".into(), "1".into()), ("B".into(), "2".into())]
141        );
142    }
143
144    #[test]
145    fn env_vars_none() {
146        let vars = env_vars("cmd --flag");
147        assert!(vars.is_empty());
148    }
149
150    #[test]
151    fn tokenize_simple() {
152        assert_eq!(tokenize("ls -la /tmp"), vec!["ls", "-la", "/tmp"]);
153    }
154
155    #[test]
156    fn tokenize_quoted() {
157        assert_eq!(tokenize("echo 'hello world'"), vec!["echo", "hello world"]);
158    }
159
160    #[test]
161    fn tokenize_double_quoted() {
162        assert_eq!(
163            tokenize("echo \"hello world\""),
164            vec!["echo", "hello world"]
165        );
166    }
167}