vtcode_core/command_safety/
shell_parser.rs1use std::sync::Mutex;
17use std::sync::OnceLock;
18
19static BASH_PARSER: OnceLock<Result<Mutex<tree_sitter::Parser>, String>> = OnceLock::new();
21
22fn get_bash_parser() -> Result<&'static Mutex<tree_sitter::Parser>, String> {
24 BASH_PARSER
25 .get_or_init(|| {
26 let mut parser = tree_sitter::Parser::new();
27 let lang: tree_sitter::Language = tree_sitter_bash::LANGUAGE.into();
28 parser
29 .set_language(&lang)
30 .map_err(|e| format!("Failed to load bash grammar: {e}"))?;
31 Ok(Mutex::new(parser))
32 })
33 .as_ref()
34 .map_err(Clone::clone)
35}
36
37pub fn prewarm_bash_parser() -> Result<(), String> {
39 let _ = get_bash_parser()?;
40 Ok(())
41}
42
43pub fn parse_shell_commands(script: &str) -> Result<Vec<Vec<String>>, String> {
54 match parse_with_tree_sitter(script) {
56 Ok(commands) if !commands.is_empty() => return Ok(commands),
57 Ok(_) => {} Err(e) => {
59 tracing::debug!(
60 "Tree-sitter bash parsing failed: {}, falling back to basic tokenization",
61 e
62 );
63 }
64 }
65
66 parse_with_basic_tokenization(script)
68}
69
70pub fn parse_shell_commands_tree_sitter(script: &str) -> Result<Vec<Vec<String>>, String> {
74 parse_with_tree_sitter(script)
75}
76
77fn parse_with_tree_sitter(script: &str) -> Result<Vec<Vec<String>>, String> {
79 let parser_guard = get_bash_parser()?;
80 let mut parser = parser_guard
81 .lock()
82 .map_err(|e| format!("Failed to lock parser: {}", e))?;
83
84 let tree = parser
85 .parse(script, None)
86 .ok_or_else(|| "Failed to parse script".to_string())?;
87
88 let mut commands = Vec::new();
89 let root = tree.root_node();
90
91 let mut cursor = root.walk();
93
94 for child in root.children(&mut cursor) {
95 if is_command_node(child)
96 && let Some(cmd) = extract_command_from_node(child, script)
97 && !cmd.is_empty()
98 {
99 commands.push(cmd);
100 }
101 }
102
103 Ok(commands)
104}
105
106fn is_command_node(node: tree_sitter::Node) -> bool {
108 matches!(
109 node.kind(),
110 "command" | "pipeline" | "compound_command" | "simple_command"
111 )
112}
113
114fn extract_command_from_node(node: tree_sitter::Node, source: &str) -> Option<Vec<String>> {
116 let mut command = Vec::new();
117 let mut cursor = node.walk();
118
119 if node.kind() == "pipeline" {
121 for child in node.children(&mut cursor) {
122 if child.kind() == "command" || child.kind() == "simple_command" {
123 return extract_command_from_node(child, source);
124 }
125 }
126 }
127
128 for child in node.children(&mut cursor) {
130 if child.kind() == "command_name" {
131 if let Ok(arg) = child.utf8_text(source.as_bytes()) {
132 let trimmed = arg.trim();
133 if !trimmed.is_empty() {
134 command.push(trimmed.to_string());
135 }
136 }
137 continue;
138 }
139
140 if matches!(
141 child.kind(),
142 "word" | "string" | "simple_expansion" | "variable_expansion"
143 ) {
144 let text = child.utf8_text(source.as_bytes());
145 if let Ok(arg) = text {
146 let trimmed = arg.trim();
147 if !trimmed.is_empty() {
148 command.push(trimmed.to_string());
149 }
150 }
151 }
152 }
153
154 if command.is_empty() {
155 None
156 } else {
157 Some(command)
158 }
159}
160
161fn parse_with_basic_tokenization(script: &str) -> Result<Vec<Vec<String>>, String> {
163 let mut commands = Vec::new();
164 let mut current_command = String::new();
165 let mut in_quotes = false;
166 let mut quote_char = ' ';
167 let mut escaped = false;
168
169 for ch in script.chars() {
170 if escaped {
171 current_command.push(ch);
172 escaped = false;
173 continue;
174 }
175
176 match ch {
177 '\\' => {
178 escaped = true;
179 }
180 '\'' | '"' if !in_quotes => {
181 in_quotes = true;
182 quote_char = ch;
183 }
184 c if c == quote_char && in_quotes => {
185 in_quotes = false;
186 }
187 '&' | '|' | ';' if !in_quotes => {
188 if !current_command.trim().is_empty()
189 && let Ok(cmd) = tokenize_command(¤t_command)
190 {
191 commands.push(cmd);
192 }
193 current_command.clear();
194 }
195 _ => current_command.push(ch),
196 }
197 }
198
199 if !current_command.trim().is_empty()
200 && let Ok(cmd) = tokenize_command(¤t_command)
201 {
202 commands.push(cmd);
203 }
204
205 Ok(commands)
206}
207
208fn tokenize_command(cmd: &str) -> Result<Vec<String>, String> {
211 shell_words::split(cmd).map_err(|err| format!("failed to tokenize command: {err}"))
212}
213
214pub fn parse_bash_lc_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
222 if command.is_empty() {
223 return None;
224 }
225
226 let cmd_name = command[0].as_str();
227 let base_cmd = std::path::Path::new(cmd_name)
228 .file_name()
229 .and_then(|osstr| osstr.to_str())
230 .unwrap_or("");
231
232 if base_cmd != "bash" && base_cmd != "zsh" && base_cmd != "sh" {
233 return None;
234 }
235
236 for window in command.windows(2) {
238 if matches!(window[0].as_str(), "-lc" | "-c" | "-il" | "-ic") {
239 let script = &window[1];
240 return parse_shell_commands(script).ok();
241 }
242 }
243
244 None
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250
251 #[test]
252 fn tokenize_simple_command() {
253 let cmd = "git status";
254 let tokens = tokenize_command(cmd).unwrap();
255 assert_eq!(tokens, vec!["git", "status"]);
256 }
257
258 #[test]
259 fn tokenize_quoted_arguments() {
260 let cmd = r#"echo "hello world""#;
261 let tokens = tokenize_command(cmd).unwrap();
262 assert_eq!(tokens, vec!["echo", "hello world"]);
263 }
264
265 #[test]
266 fn parse_single_command() {
267 let script = "git status";
268 let commands = parse_shell_commands(script).unwrap();
269 assert_eq!(commands.len(), 1);
270 assert_eq!(commands[0][0], "git");
271 }
272
273 #[test]
274 fn parse_chained_commands_with_and() {
275 let script = "git status && cargo check";
276 let commands = parse_shell_commands(script).unwrap();
277 assert_eq!(commands.len(), 2);
278 assert_eq!(commands[0][0], "git");
279 assert_eq!(commands[1][0], "cargo");
280 }
281
282 #[test]
283 fn parse_chained_commands_with_semicolon() {
284 let script = "git status; cargo check";
285 let commands = parse_shell_commands(script).unwrap();
286 assert_eq!(commands.len(), 2);
287 }
288
289 #[test]
290 fn parse_bash_lc_git_status() {
291 let cmd = vec![
292 "bash".to_string(),
293 "-lc".to_string(),
294 "git status".to_string(),
295 ];
296 let commands = parse_bash_lc_commands(&cmd);
297 assert!(commands.is_some());
298 let commands = commands.unwrap();
299 assert_eq!(commands.len(), 1);
300 assert_eq!(commands[0][0], "git");
301 }
302
303 #[test]
304 fn parse_bash_lc_chained() {
305 let cmd = vec![
306 "bash".to_string(),
307 "-lc".to_string(),
308 "git status && cargo check".to_string(),
309 ];
310 let commands = parse_bash_lc_commands(&cmd);
311 assert!(commands.is_some());
312 let commands = commands.unwrap();
313 assert_eq!(commands.len(), 2);
314 }
315
316 #[test]
317 fn parse_non_bash_command_returns_none() {
318 let cmd = vec!["echo".to_string(), "hello".to_string()];
319 let commands = parse_bash_lc_commands(&cmd);
320 assert!(commands.is_none());
321 }
322
323 #[test]
324 fn parse_bash_without_lc_returns_none() {
325 let cmd = vec!["bash".to_string(), "script.sh".to_string()];
326 let commands = parse_bash_lc_commands(&cmd);
327 assert!(commands.is_none());
328 }
329
330 #[test]
333 fn parse_complex_pipeline() {
334 let script = "cat file.txt | grep -i pattern | sort";
335 let commands = parse_shell_commands(script).unwrap();
336 assert!(!commands.is_empty());
337 }
338
339 #[test]
340 fn parse_with_pipes_and_redirects() {
341 let script = "ls -la | grep file > output.txt";
342 let commands = parse_shell_commands(script).unwrap();
343 assert!(!commands.is_empty());
344 }
345
346 #[test]
347 fn parse_command_substitution_fallback() {
348 let script = "echo $(git status)";
349 let commands = parse_shell_commands(script).unwrap();
350 assert!(!commands.is_empty());
351 }
352
353 #[test]
354 fn parse_escaped_quotes() {
355 let script = r#"echo "hello \"world\"""#;
356 let commands = parse_shell_commands(script).unwrap();
357 assert!(!commands.is_empty());
358 }
359
360 #[test]
361 fn parse_tree_sitter_preserves_command_name_with_quoted_args() {
362 let script = r#"echo "fish and chips""#;
363 let commands = parse_shell_commands_tree_sitter(script).unwrap();
364 assert!(!commands.is_empty());
365 assert_eq!(commands[0][0], "echo");
366 }
367
368 #[test]
369 fn parse_bash_lc_with_pipe() {
370 let cmd = vec![
371 "bash".to_string(),
372 "-lc".to_string(),
373 "ls -la | head -5".to_string(),
374 ];
375 let commands = parse_bash_lc_commands(&cmd);
376 assert!(commands.is_some());
377 let cmds = commands.unwrap();
378 assert!(!cmds.is_empty());
379 }
380
381 #[test]
382 fn parse_dangerous_shell_command() {
383 let script = "rm -rf /; echo done";
384 let commands = parse_shell_commands(script).unwrap();
385 assert_eq!(commands.len(), 2);
386 assert_eq!(commands[0][0], "rm");
387 }
388
389 #[test]
390 fn prewarm_bash_parser_initializes_successfully() {
391 prewarm_bash_parser().expect("bash parser should initialize");
392 }
393}
394
395use anyhow::{Result, bail};
398
399#[derive(Clone, Copy, Eq, PartialEq)]
401enum QuoteState {
402 None,
403 Single,
404 Double,
405}
406
407pub(crate) fn split_shell_segments(command: &str) -> Result<Vec<String>> {
410 let mut segments = Vec::new();
411 let mut state = QuoteState::None;
412 let mut escaped = false;
413 let mut segment_start = 0usize;
414 let mut chars = command.char_indices().peekable();
415
416 while let Some((idx, ch)) = chars.next() {
417 match state {
418 QuoteState::Single => {
419 if ch == '\'' {
420 state = QuoteState::None;
421 }
422 }
423 QuoteState::Double => {
424 if escaped {
425 escaped = false;
426 continue;
427 }
428
429 match ch {
430 '\\' => escaped = true,
431 '"' => state = QuoteState::None,
432 '`' => bail!("Command injection pattern detected"),
433 '$' if matches!(chars.peek(), Some((_, '('))) => {
434 bail!("Command injection pattern detected");
435 }
436 _ => {}
437 }
438 }
439 QuoteState::None => {
440 if escaped {
441 escaped = false;
442 continue;
443 }
444
445 match ch {
446 '\\' => escaped = true,
447 '\'' => state = QuoteState::Single,
448 '"' => state = QuoteState::Double,
449 '`' => bail!("Command injection pattern detected"),
450 '$' if matches!(chars.peek(), Some((_, '('))) => {
451 bail!("Command injection pattern detected");
452 }
453 ';' => bail!("Unquoted command chaining detected"),
454 '\n' => bail!("Command injection pattern detected"),
455 '|' | '&' => {
456 push_segment(command, segment_start, idx, &mut segments);
457 segment_start = idx + ch.len_utf8();
458 if let Some((next_idx, next_ch)) = chars.peek().copied()
459 && next_ch == ch
460 {
461 chars.next();
462 segment_start = next_idx + next_ch.len_utf8();
463 }
464 }
465 _ => {}
466 }
467 }
468 }
469 }
470
471 push_segment(command, segment_start, command.len(), &mut segments);
472 Ok(segments)
473}
474
475fn push_segment(command: &str, start: usize, end: usize, segments: &mut Vec<String>) {
476 let segment = command[start..end].trim();
477 if !segment.is_empty() {
478 segments.push(segment.to_string());
479 }
480}
481
482pub(crate) fn additional_dangerous_pattern(segment: &str) -> Option<&'static str> {
484 let segment_lower = segment.to_ascii_lowercase();
485 if segment_lower.starts_with(":(){:|:&};:") {
486 return Some(":(){:|:&};:");
487 }
488
489 let tokens = shell_words::split(segment).unwrap_or_else(|_| {
490 segment
491 .split_whitespace()
492 .map(ToString::to_string)
493 .collect()
494 });
495 let first = tokens.first()?;
496 let command_name = base_command_name(strip_wrapping_quotes(first)).to_ascii_lowercase();
497
498 match command_name.as_str() {
499 "rmdir" => Some("rmdir"),
500 "wget" => Some("wget"),
501 "curl" => Some("curl"),
502 "chmod"
503 if tokens
504 .iter()
505 .skip(1)
506 .any(|arg| strip_wrapping_quotes(arg).starts_with("777")) =>
507 {
508 Some("chmod 777")
509 }
510 "chown"
511 if tokens.iter().skip(1).any(|arg| {
512 let arg = strip_wrapping_quotes(arg).to_ascii_lowercase();
513 arg == "root" || arg.starts_with("root:")
514 }) =>
515 {
516 Some("chown root")
517 }
518 _ => None,
519 }
520}
521
522fn strip_wrapping_quotes(token: &str) -> &str {
523 token
524 .strip_prefix('\'')
525 .and_then(|token| token.strip_suffix('\''))
526 .or_else(|| {
527 token
528 .strip_prefix('"')
529 .and_then(|token| token.strip_suffix('"'))
530 })
531 .unwrap_or(token)
532}
533
534fn base_command_name(command: &str) -> &str {
535 std::path::Path::new(command)
536 .file_name()
537 .and_then(|name| name.to_str())
538 .unwrap_or(command)
539}