agentshield/parser/
shell.rs1use std::path::{Path, PathBuf};
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5
6use super::{LanguageParser, ParsedFile};
7use crate::error::Result;
8use crate::ir::execution_surface::*;
9use crate::ir::{ArgumentSource, Language, SourceLocation};
10
11pub struct ShellParser;
12
13static CURL_WGET_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)\b(curl|wget)\s+").unwrap());
14
15static EVAL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)\beval\s+").unwrap());
16
17static INSTALL_RE: Lazy<Regex> = Lazy::new(|| {
18 Regex::new(r"(?m)\b(pip3?\s+install|npm\s+install|npm\s+i\b|yarn\s+add|pnpm\s+add)").unwrap()
19});
20
21static BACKTICK_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"`[^`]+`").unwrap());
22
23static SENSITIVE_VAR_RE: Lazy<Regex> =
24 Lazy::new(|| Regex::new(r"(?i)\$\{?(AWS_|SECRET|TOKEN|PASSWORD|API_KEY|PRIVATE_KEY)").unwrap());
25
26impl LanguageParser for ShellParser {
27 fn language(&self) -> Language {
28 Language::Shell
29 }
30
31 fn parse_file(&self, path: &Path, content: &str) -> Result<ParsedFile> {
32 let mut parsed = ParsedFile::default();
33 let file_path = PathBuf::from(path);
34
35 for (line_idx, line) in content.lines().enumerate() {
36 let line_num = line_idx + 1;
37 let trimmed = line.trim();
38
39 if trimmed.starts_with('#') || trimmed.is_empty() {
40 continue;
41 }
42
43 if let Some(cap) = CURL_WGET_RE.find(trimmed) {
45 let func = cap.as_str().trim();
46 let arg_source = if trimmed.contains('$') {
47 ArgumentSource::Interpolated
48 } else {
49 ArgumentSource::Literal(trimmed.to_string())
50 };
51 parsed.network_operations.push(NetworkOperation {
52 function: func.to_string(),
53 url_arg: arg_source,
54 method: None,
55 sends_data: trimmed.contains("-d ") || trimmed.contains("--data"),
56 location: loc(&file_path, line_num),
57 });
58 }
59
60 if EVAL_RE.is_match(trimmed) {
62 parsed.dynamic_exec.push(DynamicExec {
63 function: "eval".into(),
64 code_arg: ArgumentSource::Interpolated,
65 location: loc(&file_path, line_num),
66 });
67 }
68
69 if BACKTICK_RE.is_match(trimmed) {
71 parsed.commands.push(CommandInvocation {
72 function: "backtick".into(),
73 command_arg: ArgumentSource::Interpolated,
74 location: loc(&file_path, line_num),
75 });
76 }
77
78 if INSTALL_RE.is_match(trimmed) {
80 parsed.commands.push(CommandInvocation {
81 function: "package_install".into(),
82 command_arg: ArgumentSource::Literal(trimmed.to_string()),
83 location: loc(&file_path, line_num),
84 });
85 }
86
87 for cap in SENSITIVE_VAR_RE.captures_iter(trimmed) {
89 let var = cap.get(0).map(|m| m.as_str()).unwrap_or("");
90 parsed.env_accesses.push(EnvAccess {
91 var_name: ArgumentSource::Literal(var.to_string()),
92 is_sensitive: true,
93 location: loc(&file_path, line_num),
94 });
95 }
96 }
97
98 Ok(parsed)
99 }
100}
101
102fn loc(file: &Path, line: usize) -> SourceLocation {
103 SourceLocation {
104 file: file.to_path_buf(),
105 line,
106 column: 0,
107 end_line: None,
108 end_column: None,
109 }
110}
111
112#[cfg(test)]
113mod tests {
114 use super::*;
115
116 #[test]
117 fn detects_curl() {
118 let code = "curl https://example.com/data\n";
119 let parsed = ShellParser.parse_file(Path::new("test.sh"), code).unwrap();
120 assert_eq!(parsed.network_operations.len(), 1);
121 }
122
123 #[test]
124 fn detects_eval() {
125 let code = "eval $USER_INPUT\n";
126 let parsed = ShellParser.parse_file(Path::new("test.sh"), code).unwrap();
127 assert_eq!(parsed.dynamic_exec.len(), 1);
128 }
129
130 #[test]
131 fn detects_pip_install() {
132 let code = "pip install requests\n";
133 let parsed = ShellParser.parse_file(Path::new("test.sh"), code).unwrap();
134 assert_eq!(parsed.commands.len(), 1);
135 assert!(parsed.commands[0].function.contains("package_install"));
136 }
137}