Skip to main content

lean_ctx/core/shell_allowlist/
mod.rs

1//! Shell allowlist with AST-based command parsing.
2//!
3//! Security model (Information Bottleneck principle):
4//! - When allowlist is set: ALL segments of a compound command must be allowed (deny-by-default)
5//! - When empty: all commands pass (backwards-compatible blocklist-only mode)
6//! - Dangerous patterns (subshells, eval, backticks) are blocked in restricted mode
7
8#[cfg(test)]
9mod tests;
10
11/// Checks if a command is allowed by the shell allowlist.
12/// Returns `Ok(())` if allowed, `Err(message)` if blocked.
13///
14/// When the allowlist is empty, all commands pass (blocklist-only mode).
15/// When non-empty, EVERY command segment in the pipeline must match.
16pub fn check_shell_allowlist(command: &str) -> Result<(), String> {
17    let normalized = normalize_line_continuations(command);
18    let cmd = normalized.as_str();
19
20    if has_dangerous_patterns(cmd) {
21        return Err(format!(
22            "[BLOCKED — DO NOT RETRY] Command uses eval or $()/ backticks at command position, \
23             which is blocked regardless of allowlist. \
24             This is a permanent security restriction, not a transient error.\n\
25             Command: {command}"
26        ));
27    }
28
29    check_substitution_in_args(cmd);
30    check_pipe_to_bare_interpreter(cmd);
31
32    let allowlist = effective_allowlist();
33    if allowlist.is_empty() {
34        check_unconditional_blocked_only(cmd)?;
35        return Ok(());
36    }
37    check_all_segments(cmd, &allowlist)
38}
39
40/// Normalize the command string: remove backslash-newline continuations and
41/// replace Unicode line separators (U+2028, U+2029) with newlines.
42fn normalize_line_continuations(command: &str) -> String {
43    command
44        .replace("\\\r\n", "")
45        .replace("\\\n", "")
46        .replace(['\u{2028}', '\u{2029}'], "\n")
47}
48
49/// WARN-FIRST: Log warning (or block if strict) for $(), backticks, <() in arguments.
50fn check_substitution_in_args(command: &str) {
51    let strict = crate::core::config::Config::load().shell_strict_mode;
52    if has_unquoted_substitution_in_args(command) {
53        if strict {
54            tracing::warn!(
55                "[SECURITY] Command substitution in arguments blocked (shell_strict_mode=true): {command}"
56            );
57        } else {
58            tracing::warn!(
59                "[SECURITY] Command substitution in arguments detected (warn-only, set shell_strict_mode=true to block): {command}"
60            );
61        }
62    }
63}
64
65/// Check for $(), backticks, <(, >( outside of command position, outside quotes.
66fn has_unquoted_substitution_in_args(command: &str) -> bool {
67    let bytes = command.as_bytes();
68    let len = bytes.len();
69    let mut i = 0;
70    let mut in_single_quote = false;
71    let mut in_double_quote = false;
72    let mut past_first_token = false;
73    let mut seen_space_after_cmd = false;
74
75    while i < len {
76        let ch = bytes[i];
77        if in_single_quote {
78            if ch == b'\'' {
79                in_single_quote = false;
80            }
81            i += 1;
82            continue;
83        }
84        if in_double_quote {
85            if ch == b'"' && (i == 0 || bytes[i - 1] != b'\\') {
86                in_double_quote = false;
87            }
88            i += 1;
89            continue;
90        }
91        match ch {
92            b'\'' => {
93                in_single_quote = true;
94                i += 1;
95            }
96            b'"' => {
97                in_double_quote = true;
98                i += 1;
99            }
100            b' ' | b'\t' if !past_first_token => {
101                seen_space_after_cmd = true;
102                i += 1;
103            }
104            _ if !seen_space_after_cmd => {
105                i += 1;
106            }
107            _ => {
108                past_first_token = true;
109                if ch == b'$' && i + 1 < len && bytes[i + 1] == b'(' {
110                    return true;
111                }
112                if ch == b'`' {
113                    return true;
114                }
115                if (ch == b'<' || ch == b'>') && i + 1 < len && bytes[i + 1] == b'(' {
116                    return true;
117                }
118                i += 1;
119            }
120        }
121    }
122    false
123}
124
125/// WARN-FIRST: Log warning for piping into bare interpreter (no script file).
126fn check_pipe_to_bare_interpreter(command: &str) {
127    let segments = split_on_operators(command);
128    let pipe_indices: Vec<usize> = {
129        let mut indices = Vec::new();
130        let bytes = command.as_bytes();
131        let len = bytes.len();
132        let mut j = 0;
133        let mut in_sq = false;
134        let mut in_dq = false;
135        while j < len {
136            if in_sq {
137                if bytes[j] == b'\'' {
138                    in_sq = false;
139                }
140                j += 1;
141                continue;
142            }
143            if in_dq {
144                if bytes[j] == b'"' && (j == 0 || bytes[j - 1] != b'\\') {
145                    in_dq = false;
146                }
147                j += 1;
148                continue;
149            }
150            match bytes[j] {
151                b'\'' => {
152                    in_sq = true;
153                    j += 1;
154                }
155                b'"' => {
156                    in_dq = true;
157                    j += 1;
158                }
159                b'|' if j + 1 < len && bytes[j + 1] != b'|' => {
160                    indices.push(j);
161                    j += 1;
162                }
163                _ => {
164                    j += 1;
165                }
166            }
167        }
168        indices
169    };
170    let _ = pipe_indices;
171
172    for (idx, seg) in segments.iter().enumerate() {
173        if idx == 0 {
174            continue;
175        }
176        if is_bare_interpreter_stdin(seg) {
177            let base = extract_base_from_segment(seg);
178            let strict = crate::core::config::Config::load().shell_strict_mode;
179            if strict {
180                tracing::warn!(
181                    "[SECURITY] Pipe to bare interpreter '{base}' blocked (shell_strict_mode=true)"
182                );
183            } else {
184                tracing::warn!("[SECURITY] Pipe to bare interpreter '{base}' detected (warn-only)");
185            }
186        }
187    }
188}
189
190/// For empty allowlists: still enforce UNCONDITIONAL_BLOCKED commands.
191fn check_unconditional_blocked_only(command: &str) -> Result<(), String> {
192    let segments = extract_all_commands(command);
193    for seg in &segments {
194        let base = extract_base_from_segment(seg);
195        if !base.is_empty() && UNCONDITIONAL_BLOCKED.contains(&base.as_str()) {
196            return Err(format!(
197                "[BLOCKED — DO NOT RETRY] '{base}' is unconditionally blocked \
198                 regardless of allowlist configuration.\n\
199                 Command: {command}"
200            ));
201        }
202        check_inline_env_block(seg)?;
203        check_interpreter_eval_only(seg)?;
204        check_dangerous_flags(seg)?;
205    }
206    Ok(())
207}
208
209/// Tokenize a shell command segment respecting single/double quotes and backslash escapes.
210/// Returns tokens with outer quotes stripped, matching how the shell would parse them.
211/// E.g. `git -C "Program Files" status` → `["git", "-C", "Program Files", "status"]`
212pub fn shell_tokenize(input: &str) -> Vec<String> {
213    let mut tokens = Vec::new();
214    let mut current = String::new();
215    let mut chars = input.chars().peekable();
216    let mut in_single = false;
217    let mut in_double = false;
218
219    while let Some(c) = chars.next() {
220        match c {
221            '\'' if !in_double => in_single = !in_single,
222            '"' if !in_single => in_double = !in_double,
223            '\\' if !in_single => {
224                if let Some(next) = chars.next() {
225                    current.push(next);
226                }
227            }
228            c if c.is_whitespace() && !in_single && !in_double => {
229                if !current.is_empty() {
230                    tokens.push(std::mem::take(&mut current));
231                }
232            }
233            _ => current.push(c),
234        }
235    }
236    if !current.is_empty() {
237        tokens.push(current);
238    }
239    tokens
240}
241
242/// Returns the byte length of the first shell token in `input`, respecting quotes.
243/// Used by `skip_env_assignments` to advance past env assignments with quoted values
244/// like `FOO="bar baz"`.
245fn quote_aware_token_end(input: &str) -> usize {
246    let bytes = input.as_bytes();
247    let len = bytes.len();
248    let mut i = 0;
249    let mut in_single = false;
250    let mut in_double = false;
251
252    while i < len {
253        let ch = bytes[i];
254        match ch {
255            b'\'' if !in_double => {
256                in_single = !in_single;
257                i += 1;
258            }
259            b'"' if !in_single => {
260                in_double = !in_double;
261                i += 1;
262            }
263            b'\\' if !in_single => {
264                i = (i + 2).min(len);
265            }
266            b if b.is_ascii_whitespace() && !in_single && !in_double => return i,
267            _ => i += 1,
268        }
269    }
270    len
271}
272
273/// Like `check_interpreter_abuse` but only checks for eval flags on interpreters.
274/// Skips delegation-command checks (which require an allowlist for membership test).
275/// Used in blocklist-only mode where there is no allowlist.
276fn check_interpreter_eval_only(segment: &str) -> Result<(), String> {
277    let trimmed = skip_env_assignments(segment.trim());
278    let tokens = shell_tokenize(trimmed);
279    if tokens.is_empty() {
280        return Ok(());
281    }
282    let base = tokens[0]
283        .rsplit('/')
284        .next()
285        .unwrap_or(&tokens[0])
286        .to_string();
287    if !INTERPRETER_COMMANDS.contains(&base.as_str()) {
288        return Ok(());
289    }
290    for tok in &tokens[1..] {
291        if EVAL_FLAGS.contains(&tok.as_str()) {
292            return Err(format!(
293                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with inline code execution \
294                 flag '{tok}' is blocked. Use a script file instead.\n\
295                 This is a permanent security restriction."
296            ));
297        }
298        if has_eval_flag_prefix(tok) {
299            return Err(format!(
300                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with combined flag '{tok}' \
301                 containing eval flag is blocked.\n\
302                 This is a permanent security restriction."
303            ));
304        }
305    }
306    if tokens[1..].iter().any(|t| t.contains("<<")) {
307        return Err(format!(
308            "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with heredoc stdin is blocked. \
309             Use a script file instead.\n\
310             This is a permanent security restriction."
311        ));
312    }
313    Ok(())
314}
315
316/// Commands that are unconditionally blocked regardless of allowlist membership.
317/// These provide direct arbitrary code execution or re-enter the shell.
318const UNCONDITIONAL_BLOCKED: &[&str] = &["eval", "exec", "source", "."];
319
320/// Interpreters that can execute arbitrary code via -c/-e flags.
321const INTERPRETER_COMMANDS: &[&str] = &[
322    "python", "python3", "python2", "node", "ruby", "perl", "lua", "php", "bash", "sh", "zsh",
323    "fish", "dash", "ksh",
324];
325
326/// Flags that indicate inline code execution for interpreters.
327const EVAL_FLAGS: &[&str] = &[
328    "-c", "-e", "-r", "-p", "--eval", "--exec", "-exec", "--print", "--run",
329];
330
331/// Script file extensions that indicate a file argument (not stdin execution).
332const SCRIPT_EXTENSIONS: &[&str] = &[
333    ".py", ".rb", ".js", ".ts", ".pl", ".lua", ".php", ".sh", ".bash", ".zsh", ".mjs", ".cjs",
334    ".tsx", ".jsx",
335];
336
337/// Commands that delegate to another command (the delegated command must also be allowed).
338const DELEGATION_COMMANDS: &[&str] = &["env", "nice", "timeout", "sudo", "doas"];
339
340/// Check if a segment uses an interpreter with an eval flag, or a delegation command
341/// whose target is not in the allowlist.
342fn check_interpreter_abuse(segment: &str, allowlist: &[String]) -> Result<(), String> {
343    check_interpreter_abuse_inner(segment, allowlist, 0)
344}
345
346fn check_interpreter_abuse_inner(
347    segment: &str,
348    allowlist: &[String],
349    depth: usize,
350) -> Result<(), String> {
351    if depth > 3 {
352        return Ok(());
353    }
354    let trimmed = skip_env_assignments(segment.trim());
355    let tokens = shell_tokenize(trimmed);
356    if tokens.is_empty() {
357        return Ok(());
358    }
359
360    let base = tokens[0]
361        .rsplit('/')
362        .next()
363        .unwrap_or(&tokens[0])
364        .to_string();
365
366    if INTERPRETER_COMMANDS.contains(&base.as_str()) {
367        for tok in &tokens[1..] {
368            if EVAL_FLAGS.contains(&tok.as_str()) {
369                return Err(format!(
370                    "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with inline code execution \
371                     flag '{tok}' is blocked. Use a script file instead.\n\
372                     This is a permanent security restriction."
373                ));
374            }
375            if has_eval_flag_prefix(tok) {
376                return Err(format!(
377                    "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with combined flag '{tok}' \
378                     containing eval flag is blocked.\n\
379                     This is a permanent security restriction."
380                ));
381            }
382        }
383        if tokens[1..].iter().any(|t| t.contains("<<")) {
384            return Err(format!(
385                "[BLOCKED — DO NOT RETRY] Interpreter '{base}' with heredoc stdin is blocked. \
386                 Use a script file instead.\n\
387                 This is a permanent security restriction."
388            ));
389        }
390    }
391
392    if DELEGATION_COMMANDS.contains(&base.as_str()) {
393        let rest_tokens: Vec<&str> = tokens[1..]
394            .iter()
395            .map(std::string::String::as_str)
396            .skip_while(|t| t.starts_with('-') || t.contains('='))
397            .collect();
398        if let Some(&delegated_tok) = rest_tokens.first() {
399            let delegated = delegated_tok.rsplit('/').next().unwrap_or(delegated_tok);
400            if !delegated.is_empty() && !allowlist.iter().any(|a| a == delegated) {
401                return Err(format!(
402                    "[BLOCKED — DO NOT RETRY] '{base}' delegates to '{delegated}' which is not \
403                     in the shell allowlist. This is a permanent restriction."
404                ));
405            }
406            let rest_str = rest_tokens.join(" ");
407            check_interpreter_abuse_inner(&rest_str, allowlist, depth + 1)?;
408        }
409    }
410
411    Ok(())
412}
413
414/// Check for combined flags like -pe, -ne, -ce that contain eval characters.
415fn has_eval_flag_prefix(token: &str) -> bool {
416    if !token.starts_with('-') || token.starts_with("--") || token.len() < 3 {
417        return false;
418    }
419    let flag_chars = &token[1..];
420    let eval_chars = ['c', 'e', 'r', 'p'];
421    flag_chars.chars().any(|c| eval_chars.contains(&c))
422}
423
424/// Check if a segment is a bare interpreter after a pipe (no script file argument).
425fn is_bare_interpreter_stdin(segment: &str) -> bool {
426    let trimmed = skip_env_assignments(segment.trim());
427    let tokens = shell_tokenize(trimmed);
428    if tokens.is_empty() {
429        return false;
430    }
431    let base = tokens[0]
432        .rsplit('/')
433        .next()
434        .unwrap_or(&tokens[0])
435        .to_string();
436    if !INTERPRETER_COMMANDS.contains(&base.as_str()) {
437        return false;
438    }
439    !tokens[1..]
440        .iter()
441        .any(|t| !t.starts_with('-') && SCRIPT_EXTENSIONS.iter().any(|ext| t.ends_with(ext)))
442}
443
444/// Dangerous flag patterns for specific commands.
445const DANGEROUS_GIT_FLAGS: &[&str] = &[
446    "--upload-pack",
447    "--receive-pack",
448    "--config=core.sshcommand",
449    "--config=core.gitproxy",
450];
451
452const DANGEROUS_TAR_FLAGS: &[&str] = &["--to-command", "--use-compress-program"];
453
454/// Blocked inline environment assignments that can hijack execution.
455const BLOCKED_INLINE_ENV: &[&str] = &[
456    "PATH=",
457    "GIT_ASKPASS=",
458    "GIT_SSH=",
459    "GIT_SSH_COMMAND=",
460    "GIT_EDITOR=",
461    "GIT_EXTERNAL_DIFF=",
462    "SSH_ASKPASS=",
463    "LD_PRELOAD=",
464    "DYLD_INSERT_LIBRARIES=",
465];
466
467fn check_dangerous_flags(segment: &str) -> Result<(), String> {
468    let trimmed = skip_env_assignments(segment.trim());
469    let tokens = shell_tokenize(trimmed);
470    if tokens.is_empty() {
471        return Ok(());
472    }
473    let base = tokens[0]
474        .rsplit('/')
475        .next()
476        .unwrap_or(&tokens[0])
477        .to_string();
478
479    match base.as_str() {
480        "git" => {
481            for tok in &tokens[1..] {
482                for flag in DANGEROUS_GIT_FLAGS {
483                    if tok.starts_with(flag) {
484                        return Err(format!(
485                            "[BLOCKED — DO NOT RETRY] 'git' with dangerous flag '{tok}' is blocked.\n\
486                             This is a permanent security restriction."
487                        ));
488                    }
489                }
490            }
491        }
492        "tar" => {
493            for tok in &tokens[1..] {
494                for flag in DANGEROUS_TAR_FLAGS {
495                    if tok.starts_with(flag) {
496                        return Err(format!(
497                            "[BLOCKED — DO NOT RETRY] 'tar' with dangerous flag '{tok}' is blocked.\n\
498                             This is a permanent security restriction."
499                        ));
500                    }
501                }
502            }
503        }
504        "find" => {
505            for tok in &tokens[1..] {
506                if tok == "-exec" || tok == "-execdir" {
507                    return Err(format!(
508                        "[BLOCKED — DO NOT RETRY] 'find' with '{tok}' is blocked. \
509                         Use 'find ... -print' and pipe to xargs instead.\n\
510                         This is a permanent security restriction."
511                    ));
512                }
513            }
514        }
515        "awk" | "gawk" | "mawk" => {
516            for tok in &tokens[1..] {
517                if tok.contains("system(") {
518                    return Err(format!(
519                        "[BLOCKED — DO NOT RETRY] '{base}' with 'system()' call is blocked.\n\
520                         This is a permanent security restriction."
521                    ));
522                }
523            }
524        }
525        _ => {}
526    }
527    Ok(())
528}
529
530fn check_inline_env_block(segment: &str) -> Result<(), String> {
531    let trimmed = segment.trim();
532    for blocked in BLOCKED_INLINE_ENV {
533        if trimmed.starts_with(blocked) {
534            return Err(format!(
535                "[BLOCKED — DO NOT RETRY] Inline environment override '{blocked}' is blocked.\n\
536                 This is a permanent security restriction."
537            ));
538        }
539    }
540    Ok(())
541}
542
543fn check_all_segments(command: &str, allowlist: &[String]) -> Result<(), String> {
544    if allowlist.is_empty() {
545        return Ok(());
546    }
547
548    if has_dangerous_patterns(command) {
549        return Err(format!(
550            "[BLOCKED — DO NOT RETRY] Command uses eval or $()/ backticks at command position, \
551             which is blocked in restricted mode. \
552             This is a permanent security restriction, not a transient error.\n\
553             Command: {command}"
554        ));
555    }
556
557    let segments = extract_all_commands(command);
558    if segments.is_empty() {
559        return Err("[BLOCKED — DO NOT RETRY] Empty command".to_string());
560    }
561
562    for seg in &segments {
563        check_inline_env_block(seg)?;
564        let base = extract_base_from_segment(seg);
565        if base.is_empty() {
566            continue;
567        }
568        if UNCONDITIONAL_BLOCKED.contains(&base.as_str()) {
569            return Err(format!(
570                "[BLOCKED — DO NOT RETRY] '{base}' is unconditionally blocked \
571                 regardless of allowlist membership. \
572                 This is a permanent security restriction.\n\
573                 Command: {command}"
574            ));
575        }
576        check_interpreter_abuse(seg, allowlist)?;
577        check_dangerous_flags(seg)?;
578        if !allowlist.iter().any(|a| a == &base) {
579            return Err(allowlist_block_message(&base));
580        }
581    }
582    Ok(())
583}
584
585/// Detect dangerous shell patterns that bypass allowlist intent.
586///
587/// Only blocks patterns that are genuinely dangerous at command position.
588/// `$()` and backticks in *arguments* are allowed — the base command is
589/// already validated by the allowlist, and blocking substitutions in
590/// arguments breaks legitimate workflows (e.g. `git commit -m "$(cat ...)"`,
591/// pre-commit hooks, playwright scripts).
592fn has_dangerous_patterns(command: &str) -> bool {
593    let trimmed = command.trim();
594
595    for blocked in UNCONDITIONAL_BLOCKED {
596        let with_space = format!("{blocked} ");
597        if trimmed.starts_with(&with_space) {
598            return true;
599        }
600        for sep in ["; ", "&& ", "|| ", "| ", "\n"] {
601            if trimmed.contains(&format!("{sep}{blocked} ")) {
602                return true;
603            }
604        }
605    }
606
607    if has_substitution_at_command_pos(trimmed) {
608        return true;
609    }
610
611    false
612}
613
614/// Check if `$()` or backticks appear at command position (first token
615/// of any segment). Substitutions in *arguments* are intentionally
616/// allowed — the security boundary is the base-command allowlist check.
617fn has_substitution_at_command_pos(command: &str) -> bool {
618    let segments = split_on_operators(command);
619    for seg in segments {
620        let trimmed = seg.trim();
621        let cmd_start = skip_env_assignments(trimmed);
622
623        if cmd_start.starts_with("$(") {
624            return true;
625        }
626
627        let tokens = shell_tokenize(cmd_start);
628        let first_token = tokens.first().map_or("", std::string::String::as_str);
629        if first_token.starts_with('`') || first_token == "`" {
630            return true;
631        }
632    }
633    false
634}
635
636/// Extract ALL command segments from a compound shell command.
637/// Splits on: &&, ||, ;, | (pipe), and handles subshell grouping.
638fn extract_all_commands(command: &str) -> Vec<String> {
639    split_on_operators(command)
640        .into_iter()
641        .map(|s| s.trim().to_string())
642        .filter(|s| !s.is_empty())
643        .collect()
644}
645
646/// Split command string on shell operators: ;, &&, ||, |
647/// Respects single/double quotes and parentheses nesting.
648fn split_on_operators(command: &str) -> Vec<&str> {
649    let mut segments = Vec::new();
650    let mut start = 0;
651    let bytes = command.as_bytes();
652    let len = bytes.len();
653    let mut i = 0;
654    let mut in_single_quote = false;
655    let mut in_double_quote = false;
656    let mut paren_depth: u32 = 0;
657
658    while i < len {
659        let ch = bytes[i];
660
661        if in_single_quote {
662            if ch == b'\'' {
663                in_single_quote = false;
664            }
665            i += 1;
666            continue;
667        }
668
669        if in_double_quote {
670            if ch == b'"' && (i == 0 || bytes[i - 1] != b'\\') {
671                in_double_quote = false;
672            }
673            i += 1;
674            continue;
675        }
676
677        match ch {
678            b'\'' => {
679                in_single_quote = true;
680                i += 1;
681            }
682            b'"' => {
683                in_double_quote = true;
684                i += 1;
685            }
686            b'(' => {
687                paren_depth += 1;
688                i += 1;
689            }
690            b')' => {
691                paren_depth = paren_depth.saturating_sub(1);
692                i += 1;
693            }
694            b'\n' | b'\r' | b';' if paren_depth == 0 => {
695                segments.push(&command[start..i]);
696                i += 1;
697                start = i;
698            }
699            b'&' if paren_depth == 0 => {
700                if i + 1 < len && bytes[i + 1] == b'&' {
701                    // &&
702                    segments.push(&command[start..i]);
703                    i += 2;
704                    start = i;
705                } else if (i > 0 && bytes[i - 1] == b'>') || (i + 1 < len && bytes[i + 1] == b'>') {
706                    // Redirect operator, NOT a separator: `2>&1`, `1>&2`, `>&file` (prev is '>')
707                    // or `&>file`, `&>>file` (next is '>'). The '&' belongs to the current
708                    // command — splitting here would mistake the fd/target (e.g. `1`) for a
709                    // standalone command and falsely block it (#334).
710                    i += 1;
711                } else {
712                    // single & (background operator) — still a command separator
713                    segments.push(&command[start..i]);
714                    i += 1;
715                    start = i;
716                }
717            }
718            b'|' if paren_depth == 0 => {
719                if i + 1 < len && bytes[i + 1] == b'|' {
720                    // ||
721                    segments.push(&command[start..i]);
722                    i += 2;
723                    start = i;
724                } else {
725                    // pipe
726                    segments.push(&command[start..i]);
727                    i += 1;
728                    start = i;
729                }
730            }
731            _ => {
732                i += 1;
733            }
734        }
735    }
736
737    if start < len {
738        segments.push(&command[start..]);
739    }
740
741    segments
742}
743
744/// Extract the base command name from a single segment (no operators).
745fn extract_base_from_segment(segment: &str) -> String {
746    let trimmed = segment.trim();
747    if trimmed.is_empty() {
748        return String::new();
749    }
750
751    let cmd_part = skip_env_assignments(trimmed);
752    if cmd_part.is_empty() {
753        return String::new();
754    }
755
756    let tokens = shell_tokenize(cmd_part);
757    let first_token = tokens.first().map_or("", std::string::String::as_str);
758
759    first_token
760        .rsplit('/')
761        .next()
762        .unwrap_or(first_token)
763        .to_string()
764}
765
766/// Skip leading KEY=VALUE environment variable assignments.
767/// Uses quote-aware scanning so `FOO="bar baz" git status` correctly
768/// skips the entire `FOO="bar baz"` token.
769fn skip_env_assignments(segment: &str) -> &str {
770    let mut rest = segment;
771    loop {
772        let rest_trimmed = rest.trim_start();
773        if rest_trimmed.is_empty() {
774            return rest_trimmed;
775        }
776        let end = quote_aware_token_end(rest_trimmed);
777        if end == 0 {
778            return rest_trimmed;
779        }
780        let raw_token = &rest_trimmed[..end];
781        let unquoted: String = raw_token
782            .chars()
783            .filter(|c| *c != '"' && *c != '\'')
784            .collect();
785        if unquoted.contains('=')
786            && !unquoted.starts_with('-')
787            && !unquoted.starts_with('/')
788            && !unquoted.starts_with('.')
789        {
790            rest = &rest_trimmed[end..];
791        } else {
792            return rest_trimmed;
793        }
794    }
795}
796
797fn effective_allowlist() -> Vec<String> {
798    // LEAN_CTX_SHELL_ALLOWLIST_OVERRIDE completely replaces the config (for testing)
799    if let Ok(ov) = std::env::var("LEAN_CTX_SHELL_ALLOWLIST_OVERRIDE") {
800        return ov
801            .split(',')
802            .map(|s| s.trim().to_string())
803            .filter(|s| !s.is_empty())
804            .collect();
805    }
806    let cfg = crate::core::config::Config::load();
807    let mut list = cfg.shell_allowlist;
808    // `shell_allowlist_extra` is purely additive (written by `lean-ctx allow <cmd>`),
809    // so users can permit a command without nuking the built-in defaults. It only
810    // matters in restricted mode — when the base list is empty all commands pass anyway.
811    if !list.is_empty() {
812        for entry in cfg.shell_allowlist_extra {
813            if !entry.is_empty() && !list.contains(&entry) {
814                list.push(entry);
815            }
816        }
817    }
818    if let Ok(env_val) = std::env::var("LEAN_CTX_SHELL_ALLOWLIST") {
819        for entry in env_val
820            .split(',')
821            .map(|s| s.trim().to_string())
822            .filter(|s| !s.is_empty())
823        {
824            if !list.contains(&entry) {
825                list.push(entry);
826            }
827        }
828    }
829    list
830}
831
832/// Builds the actionable, self-diagnosing message shown when a command's base binary
833/// is not in the allowlist. Unlike a bare "not allowed" string, it tells the user
834/// (1) the exact additive fix, (2) the real config path the MCP server reads, and
835/// (3) — crucially — whether their `config.toml` silently failed to parse (in which
836/// case lean-ctx is on defaults, which is the usual reason an allowlist edit "did
837/// nothing"). That last signal is otherwise invisible over an MCP/stdio transport.
838fn allowlist_block_message(base: &str) -> String {
839    let cfg_path = crate::core::config::Config::path().map_or_else(
840        || "~/.lean-ctx/config.toml".to_string(),
841        |p| p.display().to_string(),
842    );
843
844    let mut msg = format!(
845        "[BLOCKED — DO NOT RETRY] '{base}' is not in the shell allowlist. \
846         This is a permanent restriction, not a transient error.\n\
847         Fix (additive, keeps the defaults): run  lean-ctx allow {base}\n\
848         Config in effect: {cfg_path}\n\
849         Or disable the allowlist entirely: set  shell_allowlist = []\n\
850         Do NOT retry this command — it will fail again with the same error."
851    );
852
853    if let Some(parse_err) = crate::core::config::last_config_parse_error() {
854        msg.push_str(&format!(
855            "\n\n⚠ Your config.toml currently FAILS to parse, so lean-ctx is running on the \
856             built-in defaults — this is almost certainly why editing the allowlist had no \
857             effect. Fix the TOML error below, then retry:\n  {parse_err}\n  File: {cfg_path}"
858        ));
859    }
860
861    msg
862}
863
864/// Public accessor for extracting all command segments.
865pub fn extract_all_commands_pub(command: &str) -> Vec<String> {
866    extract_all_commands(command)
867}
868
869/// Public accessor: the fully-resolved allowlist actually enforced by the MCP tools
870/// (base `shell_allowlist` + additive `shell_allowlist_extra` + env), deduplicated.
871/// Empty means blocklist-only mode (all commands pass). Used by `lean-ctx allow`
872/// and `lean-ctx doctor` to show users exactly what the runtime sees.
873#[must_use]
874pub fn effective_allowlist_pub() -> Vec<String> {
875    effective_allowlist()
876}
877
878// Legacy compat: single-segment extraction (used by other callers)
879pub fn extract_base_command(command: &str) -> String {
880    let first_seg = split_on_operators(command)
881        .into_iter()
882        .next()
883        .unwrap_or(command);
884    extract_base_from_segment(first_seg)
885}