Skip to main content

bnto_shell/
validate.rs

1// Security validation for shell-command parameters.
2//
3// This module is the security boundary between recipe JSON and system
4// command execution. Every command passes through these checks before
5// reaching ProcessContext::run_command().
6
7/// Shell interpreters that must never be invoked as commands.
8/// Allowing these would let a recipe bypass the argument separation
9/// safety that std::process::Command provides (e.g., `bash -c "rm -rf /"`).
10const SHELL_DENYLIST: &[&str] = &[
11    "bash",
12    "sh",
13    "zsh",
14    "fish",
15    "dash",
16    "csh",
17    "tcsh",
18    "ksh",
19    "cmd",
20    "cmd.exe",
21    "powershell",
22    "pwsh",
23];
24
25/// Environment variables that must never be set by recipe JSON.
26/// These can hijack the subprocess in dangerous ways:
27///   - LD_PRELOAD / DYLD_*: load arbitrary shared libraries
28///   - PATH: redirect binary resolution to attacker-controlled dirs
29///   - HOME / SHELL / TMPDIR: alter tool behavior unpredictably
30const DENIED_ENV_VARS: &[&str] = &[
31    "LD_PRELOAD",
32    "LD_LIBRARY_PATH",
33    "DYLD_INSERT_LIBRARIES",
34    "DYLD_LIBRARY_PATH",
35    "DYLD_FRAMEWORK_PATH",
36    "PATH",
37    "HOME",
38    "SHELL",
39    "TMPDIR",
40];
41
42/// Default maximum bytes per output file.
43/// Configurable per-node via the `maxOutputSize` parameter (in MB).
44pub const DEFAULT_MAX_OUTPUT_MB: u64 = 500;
45
46/// Convert a megabyte value to bytes for comparison.
47pub fn max_output_bytes(mb: u64) -> usize {
48    (mb as usize) * 1024 * 1024
49}
50
51/// Default timeout in seconds for command execution.
52pub const DEFAULT_TIMEOUT_SECS: u64 = 300;
53
54/// Validate that a command name is safe to execute.
55///
56/// Rejects:
57///   - Empty commands
58///   - Shell interpreters (bash, sh, zsh, powershell, etc.)
59///   - Paths (absolute, relative, or parent traversal)
60///
61/// Commands must be bare binary names resolved via PATH by the OS.
62pub fn validate_command(cmd: &str) -> Result<(), String> {
63    if cmd.is_empty() {
64        return Err("command must not be empty".to_string());
65    }
66
67    // Reject shell interpreters — they bypass argument separation safety
68    let cmd_lower = cmd.to_lowercase();
69    for shell in SHELL_DENYLIST {
70        if cmd_lower == *shell {
71            return Err(format!(
72                "'{cmd}' is a shell interpreter and cannot be used as a command. \
73                 Use the specific tool directly (e.g., 'ffmpeg', 'yt-dlp')"
74            ));
75        }
76    }
77
78    // Reject paths — commands must be bare binary names
79    if cmd.contains('/') || cmd.contains('\\') {
80        return Err(format!(
81            "'{cmd}' contains a path separator. Commands must be bare binary \
82             names (e.g., 'ffmpeg', not './ffmpeg' or '/usr/bin/ffmpeg')"
83        ));
84    }
85
86    // Reject hidden-dot prefix (e.g., ".malicious")
87    if cmd.starts_with('.') {
88        return Err(format!(
89            "'{cmd}' starts with a dot. Commands must be bare binary names"
90        ));
91    }
92
93    Ok(())
94}
95
96/// Remove dangerous environment variables from a recipe-provided env map.
97///
98/// Returns the sanitized map (dangerous vars silently stripped).
99/// This is intentionally silent — we don't want to leak information about
100/// which vars are blocked, and benign recipes won't set these.
101pub fn sanitize_env(
102    env: &serde_json::Map<String, serde_json::Value>,
103) -> serde_json::Map<String, serde_json::Value> {
104    env.iter()
105        .filter(|(key, _)| {
106            let key_upper = key.to_uppercase();
107            !DENIED_ENV_VARS.contains(&key_upper.as_str()) && !key_upper.starts_with("DYLD_")
108        })
109        .map(|(k, v)| (k.clone(), v.clone()))
110        .collect()
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    // --- Shell denylist ---
118
119    #[test]
120    fn rejects_bash_command() {
121        let result = validate_command("bash");
122        assert!(result.is_err());
123        assert!(result.unwrap_err().contains("shell interpreter"));
124    }
125
126    #[test]
127    fn rejects_sh_command() {
128        assert!(validate_command("sh").is_err());
129    }
130
131    #[test]
132    fn rejects_zsh_command() {
133        assert!(validate_command("zsh").is_err());
134    }
135
136    #[test]
137    fn rejects_powershell_command() {
138        assert!(validate_command("powershell").is_err());
139    }
140
141    #[test]
142    fn rejects_cmd_exe_command() {
143        assert!(validate_command("cmd.exe").is_err());
144    }
145
146    #[test]
147    fn rejects_pwsh_command() {
148        assert!(validate_command("pwsh").is_err());
149    }
150
151    #[test]
152    fn rejects_fish_command() {
153        assert!(validate_command("fish").is_err());
154    }
155
156    #[test]
157    fn rejects_shell_case_insensitive() {
158        assert!(validate_command("BASH").is_err());
159        assert!(validate_command("Sh").is_err());
160        assert!(validate_command("PowerShell").is_err());
161    }
162
163    // --- Path validation ---
164
165    #[test]
166    fn rejects_relative_path_command() {
167        let result = validate_command("./malicious");
168        assert!(result.is_err());
169        assert!(result.unwrap_err().contains("path separator"));
170    }
171
172    #[test]
173    fn rejects_absolute_path_command() {
174        let result = validate_command("/tmp/evil");
175        assert!(result.is_err());
176        assert!(result.unwrap_err().contains("path separator"));
177    }
178
179    #[test]
180    fn rejects_parent_traversal_command() {
181        let result = validate_command("../../../bin/evil");
182        assert!(result.is_err());
183    }
184
185    #[test]
186    fn rejects_windows_path_command() {
187        let result = validate_command("C:\\Windows\\System32\\cmd.exe");
188        assert!(result.is_err());
189    }
190
191    #[test]
192    fn rejects_dot_prefixed_command() {
193        let result = validate_command(".hidden-binary");
194        assert!(result.is_err());
195        assert!(result.unwrap_err().contains("starts with a dot"));
196    }
197
198    // --- Empty / missing ---
199
200    #[test]
201    fn rejects_empty_command() {
202        let result = validate_command("");
203        assert!(result.is_err());
204        assert!(result.unwrap_err().contains("must not be empty"));
205    }
206
207    // --- Valid commands ---
208
209    #[test]
210    fn accepts_legitimate_binaries() {
211        assert!(validate_command("ffmpeg").is_ok());
212        assert!(validate_command("yt-dlp").is_ok());
213        assert!(validate_command("convert").is_ok());
214        assert!(validate_command("curl").is_ok());
215        assert!(validate_command("python3").is_ok());
216    }
217
218    // --- Env var sanitization ---
219
220    #[test]
221    fn strips_ld_preload_from_env() {
222        let mut env = serde_json::Map::new();
223        env.insert(
224            "LD_PRELOAD".to_string(),
225            serde_json::Value::String("/tmp/evil.so".to_string()),
226        );
227        env.insert(
228            "RUST_LOG".to_string(),
229            serde_json::Value::String("debug".to_string()),
230        );
231        let sanitized = sanitize_env(&env);
232        assert!(!sanitized.contains_key("LD_PRELOAD"));
233        assert!(sanitized.contains_key("RUST_LOG"));
234    }
235
236    #[test]
237    fn strips_path_from_env() {
238        let mut env = serde_json::Map::new();
239        env.insert(
240            "PATH".to_string(),
241            serde_json::Value::String("/tmp/malicious".to_string()),
242        );
243        let sanitized = sanitize_env(&env);
244        assert!(!sanitized.contains_key("PATH"));
245    }
246
247    #[test]
248    fn strips_dyld_vars_from_env() {
249        let mut env = serde_json::Map::new();
250        env.insert(
251            "DYLD_INSERT_LIBRARIES".to_string(),
252            serde_json::Value::String("/tmp/evil.dylib".to_string()),
253        );
254        env.insert(
255            "DYLD_FALLBACK_LIBRARY_PATH".to_string(),
256            serde_json::Value::String("/tmp".to_string()),
257        );
258        let sanitized = sanitize_env(&env);
259        assert!(!sanitized.contains_key("DYLD_INSERT_LIBRARIES"));
260        assert!(!sanitized.contains_key("DYLD_FALLBACK_LIBRARY_PATH"));
261    }
262
263    #[test]
264    fn strips_home_shell_tmpdir() {
265        let mut env = serde_json::Map::new();
266        env.insert("HOME".into(), serde_json::Value::String("/tmp".into()));
267        env.insert("SHELL".into(), serde_json::Value::String("/bin/sh".into()));
268        env.insert("TMPDIR".into(), serde_json::Value::String("/tmp".into()));
269        let sanitized = sanitize_env(&env);
270        assert!(sanitized.is_empty());
271    }
272
273    #[test]
274    fn allows_safe_env_vars() {
275        let mut env = serde_json::Map::new();
276        env.insert(
277            "RUST_LOG".to_string(),
278            serde_json::Value::String("debug".to_string()),
279        );
280        env.insert(
281            "FFMPEG_THREADS".to_string(),
282            serde_json::Value::String("4".to_string()),
283        );
284        let sanitized = sanitize_env(&env);
285        assert_eq!(sanitized.len(), 2);
286        assert!(sanitized.contains_key("RUST_LOG"));
287        assert!(sanitized.contains_key("FFMPEG_THREADS"));
288    }
289
290    #[test]
291    fn env_var_denylist_is_case_insensitive() {
292        let mut env = serde_json::Map::new();
293        env.insert(
294            "ld_preload".to_string(),
295            serde_json::Value::String("/tmp/evil.so".to_string()),
296        );
297        env.insert(
298            "path".to_string(),
299            serde_json::Value::String("/tmp".to_string()),
300        );
301        let sanitized = sanitize_env(&env);
302        assert!(sanitized.is_empty());
303    }
304}