harness/
shell_risk.rs

1//! Static risk classification for shell commands.
2//!
3//! [`classify_shell_command`] inspects a bash command *before* it is
4//! dispatched to the sandbox and assigns one of four risk levels:
5//!
6//!   * [`ShellRiskLevel::SafeRead`] — provably read-only (display/inspect
7//!     commands, read-only git, pipelines/lists composed solely of such
8//!     commands). Safe to auto-approve as a read.
9//!   * [`ShellRiskLevel::BoundedWrite`] — writes, but only to well-known
10//!     project-local artifact locations (build/test caches: `cargo test`,
11//!     `go build`, `npm test`, …).
12//!   * [`ShellRiskLevel::NeedsApproval`] — everything we cannot statically
13//!     prove safe. This is the *default*: any parse failure, any shell
14//!     metacharacter we don't model, any unknown command lands here.
15//!   * [`ShellRiskLevel::Blocked`] — a short hard-deny list of operations
16//!     that would destroy the sandbox session itself (recursive deletion of
17//!     critical system paths, raw block-device writes, shutdown/reboot,
18//!     killing PID 1, fork bombs). These are rejected before dispatch.
19//!
20//! Design notes:
21//!
22//!   * Commands run inside a disposable sandbox, so the deny list is NOT a
23//!     host-security boundary (sandbox isolation is). It exists to stop a
24//!     weak model from accidentally killing its own session with an
25//!     unrecoverable command. It is deliberately short and precise; anything
26//!     ambiguous falls to `NeedsApproval` instead of `Blocked`.
27//!   * Classification is purely static and conservative. The safe-side
28//!     analysis refuses anything it cannot fully parse (command
29//!     substitution, globs, redirects, `||` chains — whose right side only
30//!     runs on failure and therefore can't be proven read-only statically).
31//!   * The deny-side analysis is the opposite: it scans best-effort through
32//!     constructs the safe side refuses (subshells, `&&`/`||`/`;`/`|`
33//!     chains, `sh -c '…'` wrappers) so destructive segments can't hide
34//!     behind syntax that merely fails the safe parse.
35
36/// Risk level assigned to a shell command by [`classify_shell_command`].
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum ShellRiskLevel {
39    /// Provably read-only; safe to auto-approve.
40    SafeRead,
41    /// Writes only to well-known project-local build/test artifacts.
42    BoundedWrite,
43    /// Cannot be statically proven safe (the default).
44    NeedsApproval,
45    /// On the hard-deny list; must not be dispatched.
46    Blocked,
47}
48
49impl ShellRiskLevel {
50    pub fn as_str(&self) -> &'static str {
51        match self {
52            ShellRiskLevel::SafeRead => "safe_read",
53            ShellRiskLevel::BoundedWrite => "bounded_write",
54            ShellRiskLevel::NeedsApproval => "needs_approval",
55            ShellRiskLevel::Blocked => "blocked",
56        }
57    }
58}
59
60/// Classification result: a level plus a human-readable reason suitable for
61/// logging and for surfacing to the model when a command is blocked.
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub struct ShellRiskDecision {
64    pub level: ShellRiskLevel,
65    pub reason: String,
66}
67
68fn safe_read(reason: &str) -> ShellRiskDecision {
69    ShellRiskDecision {
70        level: ShellRiskLevel::SafeRead,
71        reason: reason.to_string(),
72    }
73}
74
75fn bounded_write(reason: String) -> ShellRiskDecision {
76    ShellRiskDecision {
77        level: ShellRiskLevel::BoundedWrite,
78        reason,
79    }
80}
81
82fn needs_approval(reason: &str) -> ShellRiskDecision {
83    ShellRiskDecision {
84        level: ShellRiskLevel::NeedsApproval,
85        reason: reason.to_string(),
86    }
87}
88
89fn blocked(reason: &str) -> ShellRiskDecision {
90    ShellRiskDecision {
91        level: ShellRiskLevel::Blocked,
92        reason: reason.to_string(),
93    }
94}
95
96/// Classify a shell command's static risk. See module docs for the levels.
97pub fn classify_shell_command(command: &str) -> ShellRiskDecision {
98    if let Some(decision) = hard_deny(command) {
99        return decision;
100    }
101    classify_allowable(command)
102}
103
104// ---------------------------------------------------------------------------
105// Hard deny: session-destroying operations
106// ---------------------------------------------------------------------------
107
108/// System paths whose recursive removal (or permission sweep) kills the
109/// sandbox session outright.
110const CRITICAL_SYSTEM_PATHS: &[&str] = &[
111    "/bin", "/boot", "/dev", "/etc", "/lib", "/lib64", "/proc", "/sbin", "/sys", "/usr", "/var",
112];
113
114/// Raw block/memory device prefixes a redirect or `dd of=` must never target.
115const RAW_DEVICE_PREFIXES: &[&str] = &[
116    "/dev/sd",
117    "/dev/vd",
118    "/dev/xvd",
119    "/dev/hd",
120    "/dev/nvme",
121    "/dev/mem",
122    "/dev/kmem",
123    "/dev/port",
124];
125
126/// Device paths that are always fine as a write target.
127const SAFE_DEVICE_SINKS: &[&str] = &["/dev/null", "/dev/stdout", "/dev/stderr"];
128
129fn hard_deny(command: &str) -> Option<ShellRiskDecision> {
130    if looks_like_fork_bomb(command) {
131        return Some(blocked(
132            "fork bomb pattern would exhaust sandbox PIDs and make the session unresponsive",
133        ));
134    }
135    for segment in deny_scan_segments(command) {
136        if let Some(decision) = deny_scan_segment(&segment) {
137            return Some(decision);
138        }
139    }
140    None
141}
142
143/// Best-effort split of a raw command line into candidate simple commands for
144/// deny scanning. Unlike the conservative safe-side splitters, this one keeps
145/// going through subshells, command substitution, `&`, and newlines: a
146/// destructive segment must be found wherever it hides, even in syntax the
147/// safe side refuses to parse.
148fn deny_scan_segments(command: &str) -> Vec<String> {
149    let mut segments = Vec::new();
150    let mut current = String::new();
151    let mut quote: Option<char> = None;
152    let mut escaped = false;
153    let flush = |current: &mut String, segments: &mut Vec<String>| {
154        let part = current.trim();
155        if !part.is_empty() {
156            segments.push(part.to_string());
157        }
158        current.clear();
159    };
160    for ch in command.chars() {
161        if let Some(q) = quote {
162            // Inside quotes the content is preserved verbatim into the
163            // current segment (the word splitter strips quotes later, so a
164            // `sh -c '…'` body stays scannable).
165            current.push(ch);
166            if q == '"' {
167                if escaped {
168                    escaped = false;
169                } else if ch == '\\' {
170                    escaped = true;
171                } else if ch == '"' {
172                    quote = None;
173                }
174            } else if ch == q {
175                quote = None;
176            }
177            continue;
178        }
179        if escaped {
180            escaped = false;
181            current.push(ch);
182            continue;
183        }
184        match ch {
185            '\\' => {
186                escaped = true;
187                current.push(ch);
188            }
189            '\'' | '"' => {
190                quote = Some(ch);
191                current.push(ch);
192            }
193            ';' | '|' | '&' | '\n' | '(' | ')' | '`' => flush(&mut current, &mut segments),
194            _ => current.push(ch),
195        }
196    }
197    flush(&mut current, &mut segments);
198    segments
199}
200
201/// Split a deny-scan segment into words. Quote-aware (quotes are stripped,
202/// content kept); unquoted `>` runs become standalone `">"` tokens so
203/// redirect targets show up as the following word.
204fn deny_scan_words(segment: &str) -> Vec<String> {
205    let mut words = Vec::new();
206    let mut word = String::new();
207    let mut in_word = false;
208    let mut quote: Option<char> = None;
209    let mut escaped = false;
210    let chars: Vec<char> = segment.chars().collect();
211    let mut i = 0;
212    while i < chars.len() {
213        let ch = chars[i];
214        if let Some(q) = quote {
215            if q == '"' {
216                if escaped {
217                    escaped = false;
218                    word.push(ch);
219                } else if ch == '\\' {
220                    escaped = true;
221                } else if ch == '"' {
222                    quote = None;
223                } else {
224                    word.push(ch);
225                }
226            } else if ch == q {
227                quote = None;
228            } else {
229                word.push(ch);
230            }
231            i += 1;
232            continue;
233        }
234        if escaped {
235            escaped = false;
236            word.push(ch);
237            i += 1;
238            continue;
239        }
240        match ch {
241            '\\' => escaped = true,
242            '\'' | '"' => {
243                quote = Some(ch);
244                in_word = true;
245            }
246            ' ' | '\t' => {
247                if in_word {
248                    words.push(std::mem::take(&mut word));
249                    in_word = false;
250                }
251            }
252            '>' => {
253                if in_word {
254                    words.push(std::mem::take(&mut word));
255                    in_word = false;
256                }
257                words.push(">".to_string());
258                // Collapse `>>` into a single redirect token.
259                if i + 1 < chars.len() && chars[i + 1] == '>' {
260                    i += 1;
261                }
262            }
263            _ => {
264                in_word = true;
265                word.push(ch);
266            }
267        }
268        i += 1;
269    }
270    if in_word {
271        words.push(word);
272    }
273    words
274}
275
276/// Drop leading wrappers (`sudo`, `env VAR=… `, `nohup`, `timeout 30`, …) so
277/// the real command name lands at index 0.
278fn strip_command_wrappers(words: &[String]) -> Vec<String> {
279    let mut rest: &[String] = words;
280    loop {
281        let Some(first) = rest.first() else {
282            return Vec::new();
283        };
284        match command_basename(first).as_str() {
285            "sudo" | "doas" => {
286                rest = &rest[1..];
287                while rest.first().is_some_and(|w| w.starts_with('-')) {
288                    rest = &rest[1..];
289                }
290            }
291            "env" => {
292                rest = &rest[1..];
293                while rest
294                    .first()
295                    .is_some_and(|w| w.contains('=') || w.starts_with('-'))
296                {
297                    rest = &rest[1..];
298                }
299            }
300            "nohup" | "command" | "exec" | "time" | "nice" | "ionice" | "stdbuf" => {
301                rest = &rest[1..];
302                while rest.first().is_some_and(|w| w.starts_with('-')) {
303                    rest = &rest[1..];
304                }
305            }
306            "timeout" => {
307                rest = &rest[1..];
308                while rest.first().is_some_and(|w| w.starts_with('-')) {
309                    rest = &rest[1..];
310                }
311                // Skip the duration operand.
312                if !rest.is_empty() {
313                    rest = &rest[1..];
314                }
315            }
316            _ => return rest.to_vec(),
317        }
318    }
319}
320
321fn command_basename(word: &str) -> String {
322    word.rsplit('/').next().unwrap_or(word).to_lowercase()
323}
324
325fn deny_scan_segment(segment: &str) -> Option<ShellRiskDecision> {
326    let words = strip_command_wrappers(&deny_scan_words(segment));
327    let cmd = command_basename(words.first()?);
328    let args = &words[1..];
329
330    // A `sh -c '…'` wrapper executes its quoted argument: scan it too.
331    if matches!(cmd.as_str(), "sh" | "bash" | "zsh" | "dash" | "ksh") {
332        let mut iter = args.iter();
333        while let Some(arg) = iter.next() {
334            if arg.starts_with('-') && arg.contains('c') {
335                if let Some(script) = iter.next() {
336                    if let Some(decision) = hard_deny(script) {
337                        return Some(decision);
338                    }
339                }
340                break;
341            }
342        }
343    }
344
345    // Redirect straight into a raw device bricks the filesystem.
346    let mut expect_redirect_target = false;
347    for word in &words {
348        if word == ">" {
349            expect_redirect_target = true;
350            continue;
351        }
352        if std::mem::take(&mut expect_redirect_target) && is_raw_device_path(word) {
353            return Some(blocked(
354                "redirecting output to a raw device would corrupt the sandbox filesystem",
355            ));
356        }
357    }
358
359    match cmd.as_str() {
360        "rm" => deny_check_rm(args),
361        "chmod" | "chown" | "chgrp" => deny_check_permission_sweep(&cmd, args),
362        "mkswap" | "wipefs" | "blkdiscard" => Some(blocked(
363            "filesystem/block-device destruction would brick the sandbox",
364        )),
365        "fdisk" | "parted" | "sgdisk" => {
366            let listing_only = args.iter().any(|a| a == "-l" || a == "--list");
367            if listing_only {
368                None
369            } else {
370                Some(blocked(
371                    "partition-table manipulation would brick the sandbox",
372                ))
373            }
374        }
375        "dd" => {
376            for arg in args {
377                if let Some(target) = arg.strip_prefix("of=") {
378                    if target.starts_with("/dev/") && !SAFE_DEVICE_SINKS.contains(&target) {
379                        return Some(blocked(
380                            "dd writing to a raw device would corrupt the sandbox filesystem",
381                        ));
382                    }
383                }
384            }
385            None
386        }
387        "shutdown" | "reboot" | "halt" | "poweroff" | "telinit" => {
388            Some(blocked("shutting down the sandbox terminates the session"))
389        }
390        "init" => {
391            if args.iter().any(|a| a == "0" || a == "6") {
392                Some(blocked(
393                    "changing the runlevel to halt/reboot terminates the session",
394                ))
395            } else {
396                None
397            }
398        }
399        "systemctl" => {
400            let sub = args.iter().find(|a| !a.starts_with('-'));
401            if sub.is_some_and(|s| matches!(s.as_str(), "reboot" | "poweroff" | "halt" | "kexec")) {
402                Some(blocked("shutting down the sandbox terminates the session"))
403            } else {
404                None
405            }
406        }
407        "kill" => deny_check_kill(args),
408        "killall5" => Some(blocked(
409            "signalling every process kills the sandbox session",
410        )),
411        _ => {
412            if cmd.starts_with("mkfs") {
413                return Some(blocked(
414                    "creating a filesystem over an existing device would brick the sandbox",
415                ));
416            }
417            None
418        }
419    }
420}
421
422fn deny_check_rm(args: &[String]) -> Option<ShellRiskDecision> {
423    let mut recursive = false;
424    let mut no_preserve_root = false;
425    let mut operands: Vec<&String> = Vec::new();
426    let mut end_of_options = false;
427    for arg in args {
428        if end_of_options {
429            operands.push(arg);
430            continue;
431        }
432        if arg == "--" {
433            end_of_options = true;
434        } else if arg == "--recursive" {
435            recursive = true;
436        } else if arg == "--no-preserve-root" {
437            no_preserve_root = true;
438        } else if let Some(short) = arg.strip_prefix('-') {
439            if !short.starts_with('-') && short.chars().any(|c| c == 'r' || c == 'R') {
440                recursive = true;
441            }
442        } else {
443            operands.push(arg);
444        }
445    }
446    if !recursive {
447        return None;
448    }
449    if no_preserve_root {
450        return Some(blocked(
451            "rm --no-preserve-root with recursion would destroy the sandbox session",
452        ));
453    }
454    if operands.iter().any(|p| is_critical_system_path(p)) {
455        return Some(blocked(
456            "recursive deletion of a critical system path would destroy the sandbox session",
457        ));
458    }
459    None
460}
461
462fn deny_check_permission_sweep(cmd: &str, args: &[String]) -> Option<ShellRiskDecision> {
463    let mut recursive = false;
464    let mut operands: Vec<&String> = Vec::new();
465    let mut end_of_options = false;
466    for arg in args {
467        if end_of_options {
468            operands.push(arg);
469            continue;
470        }
471        if arg == "--" {
472            end_of_options = true;
473        } else if arg == "--recursive" {
474            recursive = true;
475        } else if let Some(short) = arg.strip_prefix('-') {
476            if !short.starts_with('-') && short.chars().any(|c| c == 'R' || c == 'r') {
477                recursive = true;
478            }
479        } else {
480            operands.push(arg);
481        }
482    }
483    if recursive && operands.iter().any(|p| is_critical_system_path(p)) {
484        return Some(blocked(
485            // chmod -R 000 /usr is as fatal as deleting it.
486            match cmd {
487                "chmod" => "recursive permission sweep over a critical system path would destroy the sandbox session",
488                _ => "recursive ownership sweep over a critical system path would destroy the sandbox session",
489            },
490        ));
491    }
492    None
493}
494
495fn deny_check_kill(args: &[String]) -> Option<ShellRiskDecision> {
496    let mut saw_signal = false;
497    let mut end_of_options = false;
498    for arg in args {
499        if !end_of_options && arg == "--" {
500            end_of_options = true;
501            continue;
502        }
503        if !end_of_options && arg.starts_with('-') {
504            // The first dash argument is the signal spec (`-9`, `-TERM`,
505            // `-s`); a later `-1` is process group "everything".
506            if saw_signal && arg == "-1" {
507                return Some(blocked(
508                    "kill -1 signals every process and kills the sandbox session",
509                ));
510            }
511            saw_signal = true;
512            continue;
513        }
514        if arg == "1" || arg == "-1" {
515            return Some(blocked("killing PID 1 terminates the sandbox session"));
516        }
517    }
518    None
519}
520
521fn is_critical_system_path(path: &str) -> bool {
522    let trimmed = path.trim();
523    // `/usr/`, `/usr/*` and `/usr` are all the same target.
524    let stripped = trimmed.strip_suffix("/*").unwrap_or(trimmed);
525    let normalized = if stripped.len() > 1 {
526        stripped.trim_end_matches('/')
527    } else {
528        stripped
529    };
530    if normalized == "/" || normalized == "/*" || trimmed == "/*" {
531        return true;
532    }
533    CRITICAL_SYSTEM_PATHS.contains(&normalized)
534}
535
536fn is_raw_device_path(path: &str) -> bool {
537    RAW_DEVICE_PREFIXES
538        .iter()
539        .any(|prefix| path.starts_with(prefix))
540}
541
542/// Detect the classic fork-bomb shape: a function whose body pipes itself
543/// into itself in the background (`:(){ :|:& };:` and renamed variants).
544fn looks_like_fork_bomb(command: &str) -> bool {
545    let compact: String = command.chars().filter(|c| !c.is_whitespace()).collect();
546    let Some(def_at) = compact.find("(){") else {
547        return false;
548    };
549    let name: String = compact[..def_at]
550        .chars()
551        .rev()
552        .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == ':')
553        .collect::<String>()
554        .chars()
555        .rev()
556        .collect();
557    if name.is_empty() {
558        return false;
559    }
560    let body = &compact[def_at + 3..];
561    body.contains(&format!("{name}|{name}")) && body.contains('&')
562}
563
564// ---------------------------------------------------------------------------
565// Conservative safe-side classification
566// ---------------------------------------------------------------------------
567
568/// Command prefixes (matched word-by-word) that are read-only by nature.
569const READ_ONLY_PREFIXES: &[&str] = &[
570    "ls",
571    "pwd",
572    "echo",
573    "cat",
574    "head",
575    "tail",
576    "wc",
577    "file",
578    "tree",
579    "find",
580    "grep",
581    "rg",
582    "uptime",
583    "cal",
584    "free",
585    "df",
586    "du",
587    "locale",
588    "groups",
589    "nproc",
590    "stat",
591    "strings",
592    "hexdump",
593    "od",
594    "nl",
595    "basename",
596    "dirname",
597    "realpath",
598    "readlink",
599    "cut",
600    "paste",
601    "tr",
602    "column",
603    "tac",
604    "rev",
605    "fold",
606    "expand",
607    "unexpand",
608    "comm",
609    "cmp",
610    "numfmt",
611    "true",
612    "false",
613    "type",
614    "expr",
615    "test",
616    "getconf",
617    "seq",
618    "tsort",
619    "pr",
620    "go version",
621    "rustc --version",
622    "python --version",
623    "python3 --version",
624    "node --version",
625    "npm --version",
626    "npx --version",
627    "cargo --version",
628    "deno --version",
629    "bun --version",
630];
631
632fn classify_allowable(command: &str) -> ShellRiskDecision {
633    // A trailing stderr redirect to stdout or /dev/null doesn't change the
634    // risk of the underlying command.
635    if let Some(base) = strip_trailing_safe_stderr_redirect(command) {
636        return classify_allowable(&base);
637    }
638    if let Some(parts) = split_sequence(command) {
639        return classify_all_safe_read(&parts, "; list");
640    }
641    if let Some(parts) = split_and_list(command) {
642        return classify_all_safe_read(&parts, "&& list");
643    }
644    if let Some(parts) = split_pipeline(command) {
645        return classify_all_safe_read(&parts, "pipeline");
646    }
647    let Some(argv) = parse_simple_command(command) else {
648        return needs_approval("command is not a simple shell command");
649    };
650    let lower: Vec<String> = argv.iter().map(|a| a.to_lowercase()).collect();
651    if has_unsafe_args(&lower) {
652        return needs_approval(
653            "command contains arguments that may mutate files or execute arbitrary code",
654        );
655    }
656    if make_bounded_target_has_extra_args(&lower) {
657        return needs_approval("make bounded-write targets must not include extra targets or args");
658    }
659    if let Some(decision) = classify_builtin_read_only(&argv, &lower) {
660        return decision;
661    }
662    if lower[0] == "git" {
663        if git_command_read_only(&argv) {
664            return safe_read("git read-only command");
665        }
666        return needs_approval("git command is not classified as read-only");
667    }
668    if let Some(decision) = classify_bounded_write(&lower) {
669        return decision;
670    }
671    for prefix in READ_ONLY_PREFIXES {
672        if argv_has_prefix(&lower, prefix) {
673            return safe_read("built-in read-only command");
674        }
675    }
676    needs_approval("command is not classified as safe read-only or bounded-write")
677}
678
679/// `;`/`&&`/`|` lists are only safe when every element independently is.
680fn classify_all_safe_read(parts: &[String], kind: &str) -> ShellRiskDecision {
681    for part in parts {
682        let decision = classify_shell_command(part);
683        if decision.level == ShellRiskLevel::Blocked {
684            return decision;
685        }
686        if decision.level != ShellRiskLevel::SafeRead {
687            return needs_approval(&format!(
688                "{kind} contains a command that is not safe read-only"
689            ));
690        }
691    }
692    safe_read(&format!("{kind} of read-only commands"))
693}
694
695// --- splitters -------------------------------------------------------------
696
697/// Shared quote-aware scanner: splits `command` at unquoted occurrences of
698/// the separator. Returns `None` when the input has unbalanced quotes, a
699/// dangling escape, no separator at all, or an empty element.
700fn split_unquoted(command: &str, separator: &str) -> Option<Vec<String>> {
701    let mut parts = Vec::new();
702    let mut current = String::new();
703    let mut quote: Option<char> = None;
704    let mut escaped = false;
705    let mut saw_separator = false;
706    let runes: Vec<char> = command.trim().chars().collect();
707    let sep: Vec<char> = separator.chars().collect();
708    let mut i = 0;
709    while i < runes.len() {
710        let r = runes[i];
711        if quote == Some('\'') {
712            if r == '\'' {
713                quote = None;
714            }
715            current.push(r);
716            i += 1;
717            continue;
718        }
719        if escaped {
720            escaped = false;
721            current.push(r);
722            i += 1;
723            continue;
724        }
725        match r {
726            '\\' => {
727                if quote == Some('"') {
728                    escaped = true;
729                }
730                current.push(r);
731            }
732            '"' => {
733                if quote.is_none() {
734                    quote = Some('"');
735                } else if quote == Some('"') {
736                    quote = None;
737                }
738                current.push(r);
739            }
740            '\'' => {
741                if quote.is_none() {
742                    quote = Some('\'');
743                }
744                current.push(r);
745            }
746            _ if r == sep[0] && quote.is_none() => {
747                // Multi-char separators (`&&`) must match in full; a lone
748                // `&` stays part of the element (and later fails the simple
749                // parse, landing the whole command at NeedsApproval).
750                if sep.len() == 2 {
751                    if i + 1 >= runes.len() || runes[i + 1] != sep[1] {
752                        current.push(r);
753                        i += 1;
754                        continue;
755                    }
756                    i += 1;
757                }
758                let part = current.trim().to_string();
759                current.clear();
760                if part.is_empty() {
761                    return None;
762                }
763                parts.push(part);
764                saw_separator = true;
765            }
766            _ => current.push(r),
767        }
768        i += 1;
769    }
770    if quote.is_some() || escaped || !saw_separator {
771        return None;
772    }
773    let part = current.trim().to_string();
774    if part.is_empty() {
775        return None;
776    }
777    parts.push(part);
778    Some(parts)
779}
780
781/// Split on `;` only.
782fn split_sequence(command: &str) -> Option<Vec<String>> {
783    split_unquoted(command, ";")
784}
785
786/// Split on `&&` only. `||` is intentionally not handled: its right side
787/// runs only when the left side *fails*, so an `||` chain can't be proven
788/// read-only statically.
789fn split_and_list(command: &str) -> Option<Vec<String>> {
790    split_unquoted(command, "&&")
791}
792
793/// Split on `|`. Note `a || b` splits into an empty middle element and is
794/// rejected, falling through to the (failing) simple parse — by design.
795fn split_pipeline(command: &str) -> Option<Vec<String>> {
796    split_unquoted(command, "|")
797}
798
799// --- trailing stderr redirect ----------------------------------------------
800
801fn strip_trailing_safe_stderr_redirect(command: &str) -> Option<String> {
802    let trimmed = command.trim();
803    for redirect in ["2>&1", "2>/dev/null", "2> /dev/null"] {
804        if let Some(base) = strip_trailing_redirect(trimmed, redirect) {
805            return Some(base);
806        }
807    }
808    None
809}
810
811fn strip_trailing_redirect(command: &str, redirect: &str) -> Option<String> {
812    let base = command.strip_suffix(redirect)?;
813    if base.is_empty() || !base.ends_with([' ', '\t']) {
814        return None;
815    }
816    if !offset_outside_quotes(command, base.len()) {
817        return None;
818    }
819    let base = base.trim();
820    if base.is_empty() {
821        return None;
822    }
823    Some(base.to_string())
824}
825
826/// True when byte offset `offset` sits outside any quoted region of
827/// `command` (double-quote backslash escapes respected).
828fn offset_outside_quotes(command: &str, offset: usize) -> bool {
829    let mut quote: Option<char> = None;
830    let mut escaped = false;
831    for (i, r) in command.char_indices() {
832        if i >= offset {
833            break;
834        }
835        if quote == Some('\'') {
836            if r == '\'' {
837                quote = None;
838            }
839            continue;
840        }
841        if escaped {
842            escaped = false;
843            continue;
844        }
845        match r {
846            '\\' => {
847                if quote == Some('"') {
848                    escaped = true;
849                }
850            }
851            '"' => {
852                if quote.is_none() {
853                    quote = Some('"');
854                } else if quote == Some('"') {
855                    quote = None;
856                }
857            }
858            '\'' if quote.is_none() => {
859                quote = Some('\'');
860            }
861            _ => {}
862        }
863    }
864    quote.is_none() && !escaped
865}
866
867// --- simple-command parsing -------------------------------------------------
868
869/// Parse a command into argv, refusing anything that isn't a plain word
870/// list: any unquoted shell metacharacter (globs, redirects, expansion,
871/// subshells, comments) fails the parse and the command defaults to
872/// NeedsApproval.
873fn parse_simple_command(command: &str) -> Option<Vec<String>> {
874    let mut argv = Vec::new();
875    let mut word = String::new();
876    let mut in_word = false;
877    let mut quote: Option<char> = None;
878    for r in command.trim().chars() {
879        match quote {
880            Some('\'') => {
881                if r == '\'' {
882                    quote = None;
883                    continue;
884                }
885                word.push(r);
886                continue;
887            }
888            Some('"') => {
889                match r {
890                    '"' => quote = None,
891                    // Expansion inside double quotes can run arbitrary code.
892                    '\\' | '$' | '`' => return None,
893                    _ => word.push(r),
894                }
895                continue;
896            }
897            _ => {}
898        }
899        match r {
900            ' ' | '\t' => {
901                if in_word {
902                    argv.push(std::mem::take(&mut word));
903                    in_word = false;
904                }
905            }
906            '\'' | '"' => {
907                quote = Some(r);
908                in_word = true;
909            }
910            _ if rejected_simple_command_char(r) => return None,
911            _ => {
912                in_word = true;
913                word.push(r);
914            }
915        }
916    }
917    if quote.is_some() {
918        return None;
919    }
920    if in_word {
921        argv.push(word);
922    }
923    if argv.is_empty() {
924        None
925    } else {
926        Some(argv)
927    }
928}
929
930fn rejected_simple_command_char(r: char) -> bool {
931    matches!(
932        r,
933        '\\' | '$'
934            | '`'
935            | ';'
936            | '|'
937            | '&'
938            | '<'
939            | '>'
940            | '\n'
941            | '\r'
942            | '('
943            | ')'
944            | '{'
945            | '}'
946            | '#'
947            | '*'
948            | '?'
949            | '['
950            | ']'
951    )
952}
953
954// --- unsafe argument detection ----------------------------------------------
955
956fn has_unsafe_args(argv: &[String]) -> bool {
957    for field in &argv[1..] {
958        if field.contains(['$', '`', '&', '<', '>', '\n', '\r']) {
959            return true;
960        }
961    }
962    if argv_has_prefix(argv, "find") {
963        for field in argv {
964            match field.as_str() {
965                "-delete" | "-exec" | "-execdir" | "-ok" | "-okdir" | "-fls" => return true,
966                _ => {}
967            }
968            if field.starts_with("-fprint") {
969                return true;
970            }
971        }
972    } else if argv_has_prefix(argv, "git diff")
973        || argv_has_prefix(argv, "git show")
974        || argv_has_prefix(argv, "git log")
975    {
976        for field in argv {
977            if field == "--output"
978                || field.starts_with("--output=")
979                || field == "--ext-diff"
980                || field == "--external-diff"
981                || field == "--textconv"
982            {
983                return true;
984            }
985        }
986    } else if argv_has_prefix(argv, "rg") {
987        for field in argv {
988            if field == "--pre" || field.starts_with("--pre=") {
989                return true;
990            }
991        }
992    }
993    for field in argv {
994        match field.as_str() {
995            "--fix" | "--write" | "--update" | "--update-snapshot" | "--updatesnapshot" => {
996                return true
997            }
998            _ => {}
999        }
1000        if field.starts_with("--fix=")
1001            || field.starts_with("--write=")
1002            || field.starts_with("--update=")
1003            || field.starts_with("--update-snapshot=")
1004            || field.starts_with("--updatesnapshot=")
1005        {
1006            return true;
1007        }
1008    }
1009    if (argv_has_prefix(argv, "npx jest") || argv_has_prefix(argv, "npx vitest"))
1010        && argv.iter().any(|a| a == "-u")
1011    {
1012        return true;
1013    }
1014    false
1015}
1016
1017/// Bounded `make` targets must be invoked bare (`make test`), with no extra
1018/// targets or variable overrides.
1019const MAKE_BOUNDED_TARGETS: &[&str] =
1020    &["build", "test", "check", "lint", "fmt", "fmt-check", "vet"];
1021
1022fn make_bounded_target_has_extra_args(argv: &[String]) -> bool {
1023    if argv.len() < 2 || argv[0] != "make" {
1024        return false;
1025    }
1026    if MAKE_BOUNDED_TARGETS.contains(&argv[1].as_str()) {
1027        return argv.len() != 2;
1028    }
1029    false
1030}
1031
1032// --- builtin read-only commands with option allowlists -----------------------
1033
1034fn classify_builtin_read_only(argv: &[String], lower: &[String]) -> Option<ShellRiskDecision> {
1035    match lower[0].as_str() {
1036        "date" => Some(classify_date(argv, lower)),
1037        "uname" => Some(classify_uname(lower)),
1038        "whoami" => Some(classify_whoami(lower)),
1039        "id" => Some(classify_id(lower)),
1040        "which" => Some(classify_command_lookup(&lower[1..])),
1041        "command" => {
1042            if lower.len() >= 2 && lower[1] == "-v" {
1043                Some(classify_command_lookup(&lower[2..]))
1044            } else {
1045                None
1046            }
1047        }
1048        "sed" => Some(classify_sed_read_only(argv)),
1049        "sort" => Some(classify_sort(argv)),
1050        "uniq" => Some(classify_uniq(argv)),
1051        "printf" => Some(classify_printf(lower)),
1052        _ => None,
1053    }
1054}
1055
1056fn classify_date(argv: &[String], lower: &[String]) -> ShellRiskDecision {
1057    const FLAGS_WITH_VALUES: &[&str] = &["-d", "--date", "-r", "--reference", "--rfc-3339"];
1058    const SAFE_NO_VALUE_FLAGS: &[&str] = &[
1059        "-u",
1060        "--utc",
1061        "--universal",
1062        "-I",
1063        "-R",
1064        "--iso-8601",
1065        "--rfc-email",
1066        "--debug",
1067        "--help",
1068        "--version",
1069    ];
1070    let mut i = 1;
1071    while i < lower.len() {
1072        let raw = argv[i].as_str();
1073        let arg = lower[i].as_str();
1074        if raw == "-s"
1075            || arg == "--set"
1076            || arg.starts_with("--set=")
1077            || raw == "-f"
1078            || arg == "--file"
1079            || arg.starts_with("--file=")
1080        {
1081            return needs_approval("date can set system time or read batch dates with this option");
1082        }
1083        if raw.starts_with('+') {
1084            i += 1;
1085            continue;
1086        }
1087        if FLAGS_WITH_VALUES.contains(&raw)
1088            || (raw.starts_with("--") && FLAGS_WITH_VALUES.contains(&arg))
1089        {
1090            i += 1;
1091            if i >= lower.len() {
1092                return needs_approval("date flag requires a value");
1093            }
1094            i += 1;
1095            continue;
1096        }
1097        if arg.starts_with("--date=")
1098            || arg.starts_with("--reference=")
1099            || arg.starts_with("--iso-8601=")
1100            || arg.starts_with("--rfc-3339=")
1101        {
1102            i += 1;
1103            continue;
1104        }
1105        if SAFE_NO_VALUE_FLAGS.contains(&raw) || SAFE_NO_VALUE_FLAGS.contains(&arg) {
1106            i += 1;
1107            continue;
1108        }
1109        if raw.starts_with('-') {
1110            return needs_approval("date option is not on the safe display allowlist");
1111        }
1112        return needs_approval("date positional arguments can set system time");
1113    }
1114    safe_read("date display command")
1115}
1116
1117fn classify_uname(lower: &[String]) -> ShellRiskDecision {
1118    const SAFE_LONG: &[&str] = &[
1119        "--all",
1120        "--kernel-name",
1121        "--nodename",
1122        "--kernel-release",
1123        "--kernel-version",
1124        "--machine",
1125        "--processor",
1126        "--hardware-platform",
1127        "--operating-system",
1128        "--help",
1129        "--version",
1130    ];
1131    for arg in &lower[1..] {
1132        if SAFE_LONG.contains(&arg.as_str()) {
1133            continue;
1134        }
1135        if arg.starts_with('-') && arg.len() > 1 && !arg.starts_with("--") {
1136            if arg[1..].chars().all(|r| "asnrvmpio".contains(r)) {
1137                continue;
1138            }
1139            return needs_approval("uname option is not on the safe display allowlist");
1140        }
1141        return needs_approval("uname only supports safe display flags in auto-allow");
1142    }
1143    safe_read("uname display command")
1144}
1145
1146fn classify_whoami(lower: &[String]) -> ShellRiskDecision {
1147    for arg in &lower[1..] {
1148        if arg != "--help" && arg != "--version" {
1149            return needs_approval("whoami only supports help/version args in auto-allow");
1150        }
1151    }
1152    safe_read("whoami display command")
1153}
1154
1155fn classify_id(lower: &[String]) -> ShellRiskDecision {
1156    const SAFE_LONG: &[&str] = &[
1157        "--user",
1158        "--group",
1159        "--groups",
1160        "--name",
1161        "--real",
1162        "--zero",
1163        "--help",
1164        "--version",
1165    ];
1166    for arg in &lower[1..] {
1167        if SAFE_LONG.contains(&arg.as_str()) || is_command_name(arg) {
1168            continue;
1169        }
1170        if arg.starts_with('-') && arg.len() > 1 && !arg.starts_with("--") {
1171            if arg[1..].chars().all(|r| "uggnrz".contains(r)) {
1172                continue;
1173            }
1174            return needs_approval("id option is not on the safe display allowlist");
1175        }
1176        return needs_approval("id argument is not safe for auto-allow");
1177    }
1178    safe_read("id display command")
1179}
1180
1181fn classify_command_lookup(args: &[String]) -> ShellRiskDecision {
1182    if args.is_empty() {
1183        return needs_approval("command lookup requires at least one command name");
1184    }
1185    for arg in args {
1186        if !is_command_name(arg) {
1187            return needs_approval("command lookup operands must be simple command names");
1188        }
1189    }
1190    safe_read("command lookup")
1191}
1192
1193fn classify_printf(lower: &[String]) -> ShellRiskDecision {
1194    for arg in &lower[1..] {
1195        if arg.contains('/') && arg.starts_with('-') {
1196            return needs_approval("printf option is not on the safe display allowlist");
1197        }
1198    }
1199    safe_read("printf display command")
1200}
1201
1202fn is_command_name(v: &str) -> bool {
1203    let v = v.trim();
1204    if v.is_empty() || v.contains('/') || v.starts_with('-') {
1205        return false;
1206    }
1207    v.chars()
1208        .all(|r| r.is_alphanumeric() || matches!(r, '_' | '.' | '-' | '+'))
1209}
1210
1211// --- sed: stream-only substitution / range print -----------------------------
1212
1213fn classify_sed_read_only(argv: &[String]) -> ShellRiskDecision {
1214    if sed_print_range_read_only(argv) {
1215        return safe_read("sed range print command");
1216    }
1217    if sed_substitution_read_only(argv) {
1218        return safe_read("sed stream substitution command");
1219    }
1220    needs_approval("sed command is not classified as read-only")
1221}
1222
1223fn sed_substitution_read_only(argv: &[String]) -> bool {
1224    if argv.len() < 2 || argv[0] != "sed" {
1225        return false;
1226    }
1227    let mut i = 1;
1228    while i < argv.len() {
1229        match argv[i].as_str() {
1230            "-E" | "-r" | "--regexp-extended" | "-n" | "--quiet" | "--silent" => i += 1,
1231            "--" => {
1232                i += 1;
1233                break;
1234            }
1235            _ => break,
1236        }
1237    }
1238    if i >= argv.len() || !sed_substitution_script_read_only(&argv[i]) {
1239        return false;
1240    }
1241    i += 1;
1242    // Remaining operands must be plain input files, not more options.
1243    argv[i..].iter().all(|a| !a.starts_with('-'))
1244}
1245
1246fn sed_substitution_script_read_only(script: &str) -> bool {
1247    if script.is_empty() || !script.starts_with('s') {
1248        return false;
1249    }
1250    let runes: Vec<char> = script.chars().collect();
1251    if runes.len() < 4 {
1252        return false;
1253    }
1254    let delim = runes[1];
1255    if delim == '\\' || delim == '\n' || delim == '\r' {
1256        return false;
1257    }
1258    let mut parts = 0;
1259    let mut escaped = false;
1260    let mut i = 2;
1261    while i < runes.len() {
1262        let r = runes[i];
1263        if escaped {
1264            escaped = false;
1265            i += 1;
1266            continue;
1267        }
1268        if r == '\\' {
1269            escaped = true;
1270            i += 1;
1271            continue;
1272        }
1273        if r == delim {
1274            parts += 1;
1275            if parts == 2 {
1276                let flags: String = runes[i + 1..].iter().collect();
1277                return sed_substitution_flags_read_only(&flags);
1278            }
1279        }
1280        i += 1;
1281    }
1282    false
1283}
1284
1285fn sed_substitution_flags_read_only(flags: &str) -> bool {
1286    flags
1287        .chars()
1288        .all(|r| r.is_ascii_digit() || matches!(r, 'g' | 'p' | 'I' | 'i' | 'M' | 'm'))
1289}
1290
1291fn sed_print_range_read_only(argv: &[String]) -> bool {
1292    if argv.len() < 3 || argv[0] != "sed" {
1293        return false;
1294    }
1295    let mut i = 1;
1296    let mut saw_quiet = false;
1297    while i < argv.len() {
1298        match argv[i].as_str() {
1299            "-n" | "--quiet" | "--silent" => {
1300                saw_quiet = true;
1301                i += 1;
1302            }
1303            "--" => {
1304                i += 1;
1305                break;
1306            }
1307            _ => break,
1308        }
1309    }
1310    if !saw_quiet || i >= argv.len() || !sed_range_print_script(&argv[i]) {
1311        return false;
1312    }
1313    i += 1;
1314    argv[i..].iter().all(|a| !a.starts_with('-'))
1315}
1316
1317fn sed_range_print_script(script: &str) -> bool {
1318    let Some(addr) = script.strip_suffix('p') else {
1319        return false;
1320    };
1321    if script.is_empty() {
1322        return false;
1323    }
1324    let parts: Vec<&str> = addr.split(',').collect();
1325    if parts.len() > 2 {
1326        return false;
1327    }
1328    for part in parts {
1329        if part == "$" {
1330            continue;
1331        }
1332        if part.is_empty() || !part.chars().all(|r| r.is_ascii_digit()) {
1333            return false;
1334        }
1335    }
1336    true
1337}
1338
1339// --- sort / uniq: display-only option allowlists -----------------------------
1340
1341fn classify_sort(argv: &[String]) -> ShellRiskDecision {
1342    let mut end_options = false;
1343    let mut i = 1;
1344    while i < argv.len() {
1345        let arg = argv[i].as_str();
1346        if end_options || !arg.starts_with('-') || arg == "-" {
1347            i += 1;
1348            continue;
1349        }
1350        if arg == "--" {
1351            end_options = true;
1352            i += 1;
1353            continue;
1354        }
1355        if arg.starts_with("--") {
1356            if arg == "--output" || arg.starts_with("--output=") {
1357                return needs_approval(
1358                    "sort can write to an explicit output path with this option",
1359                );
1360            }
1361            if arg == "--compress-program" || arg.starts_with("--compress-program=") {
1362                return needs_approval("sort can execute an external compressor with this option");
1363            }
1364            if arg == "--temporary-directory" || arg.starts_with("--temporary-directory=") {
1365                return needs_approval(
1366                    "sort can write temporary files outside the input stream with this option",
1367                );
1368            }
1369            if sort_long_option_consumes_next(arg) && !arg.contains('=') {
1370                i += 1;
1371            }
1372            if !sort_long_option_safe(arg) {
1373                return needs_approval("sort option is not on the safe display allowlist");
1374            }
1375            i += 1;
1376            continue;
1377        }
1378        if !sort_short_options_safe(arg) {
1379            return needs_approval("sort option is not on the safe display allowlist");
1380        }
1381        i += 1;
1382    }
1383    safe_read("sort display command")
1384}
1385
1386fn sort_long_option_safe(arg: &str) -> bool {
1387    let name = arg.split('=').next().unwrap_or(arg);
1388    matches!(
1389        name,
1390        "--ignore-leading-blanks"
1391            | "--dictionary-order"
1392            | "--ignore-nonprinting"
1393            | "--ignore-case"
1394            | "--general-numeric-sort"
1395            | "--human-numeric-sort"
1396            | "--month-sort"
1397            | "--numeric-sort"
1398            | "--reverse"
1399            | "--unique"
1400            | "--stable"
1401            | "--version-sort"
1402            | "--zero-terminated"
1403            | "--check"
1404            | "--key"
1405            | "--field-separator"
1406    )
1407}
1408
1409fn sort_long_option_consumes_next(arg: &str) -> bool {
1410    let name = arg.split('=').next().unwrap_or(arg);
1411    matches!(name, "--key" | "--field-separator")
1412}
1413
1414/// Short sort options: display-only flags pass; `-o` (output file) and `-T`
1415/// (temp dir) fail; `-k`/`-t` take a key/separator value, which is harmless.
1416fn sort_short_options_safe(arg: &str) -> bool {
1417    let chars: Vec<char> = arg.chars().collect();
1418    for r in chars.iter().skip(1) {
1419        match r {
1420            'b' | 'c' | 'C' | 'd' | 'f' | 'g' | 'h' | 'i' | 'M' | 'm' | 'n' | 'r' | 's' | 'u'
1421            | 'V' | 'z' => continue,
1422            'k' | 't' => return true,
1423            _ => return false,
1424        }
1425    }
1426    chars.len() > 1
1427}
1428
1429fn classify_uniq(argv: &[String]) -> ShellRiskDecision {
1430    let mut operands = 0;
1431    let mut end_options = false;
1432    let mut i = 1;
1433    while i < argv.len() {
1434        let arg = argv[i].as_str();
1435        if end_options || !arg.starts_with('-') || arg == "-" {
1436            operands += 1;
1437            if operands > 1 {
1438                return needs_approval(
1439                    "uniq can write to an output file when given a second operand",
1440                );
1441            }
1442            i += 1;
1443            continue;
1444        }
1445        if arg == "--" {
1446            end_options = true;
1447            i += 1;
1448            continue;
1449        }
1450        if arg.starts_with("--") {
1451            if uniq_long_option_consumes_next(arg) && !arg.contains('=') {
1452                i += 1;
1453            }
1454            if !uniq_long_option_safe(arg) {
1455                return needs_approval("uniq option is not on the safe display allowlist");
1456            }
1457            i += 1;
1458            continue;
1459        }
1460        let Some(consumes_next) = uniq_short_options_safe(arg) else {
1461            return needs_approval("uniq option is not on the safe display allowlist");
1462        };
1463        if consumes_next {
1464            i += 1;
1465        }
1466        i += 1;
1467    }
1468    safe_read("uniq display command")
1469}
1470
1471fn uniq_long_option_safe(arg: &str) -> bool {
1472    let name = arg.split('=').next().unwrap_or(arg);
1473    matches!(
1474        name,
1475        "--count"
1476            | "--repeated"
1477            | "--all-repeated"
1478            | "--unique"
1479            | "--ignore-case"
1480            | "--zero-terminated"
1481            | "--group"
1482            | "--skip-fields"
1483            | "--skip-chars"
1484            | "--check-chars"
1485    )
1486}
1487
1488fn uniq_long_option_consumes_next(arg: &str) -> bool {
1489    let name = arg.split('=').next().unwrap_or(arg);
1490    matches!(name, "--skip-fields" | "--skip-chars" | "--check-chars")
1491}
1492
1493/// Returns `Some(consumes_next)` when every flag in the cluster is safe,
1494/// `None` otherwise. `-f`/`-s`/`-w` take a value and must end the cluster.
1495fn uniq_short_options_safe(arg: &str) -> Option<bool> {
1496    let chars: Vec<char> = arg.chars().collect();
1497    for (idx, r) in chars.iter().enumerate().skip(1) {
1498        match r {
1499            'c' | 'd' | 'u' | 'i' | 'z' => continue,
1500            'f' | 's' | 'w' => {
1501                if idx == chars.len() - 1 {
1502                    return Some(true);
1503                }
1504                return Some(false);
1505            }
1506            _ => return None,
1507        }
1508    }
1509    if chars.len() > 1 {
1510        Some(false)
1511    } else {
1512        None
1513    }
1514}
1515
1516// --- git read-only whitelist --------------------------------------------------
1517
1518fn git_command_read_only(argv: &[String]) -> bool {
1519    if argv.len() < 2 || argv[0] != "git" {
1520        return false;
1521    }
1522    if argv[1..].iter().any(|f| arg_contains_unsafe_meta(f)) {
1523        return false;
1524    }
1525    let mut subcommand_index = 1;
1526    while subcommand_index < argv.len() {
1527        let arg = argv[subcommand_index].as_str();
1528        if arg == "-c" || arg == "--config-env" || arg.starts_with("--config-env=") {
1529            return false;
1530        }
1531        if arg == "-C" {
1532            if subcommand_index + 1 >= argv.len()
1533                || !git_relative_path_allowed(&argv[subcommand_index + 1], false)
1534            {
1535                return false;
1536            }
1537            subcommand_index += 2;
1538            continue;
1539        }
1540        if let Some(path) = arg.strip_prefix("-C") {
1541            if !git_relative_path_allowed(path, false) {
1542                return false;
1543            }
1544            subcommand_index += 1;
1545            continue;
1546        }
1547        if arg.starts_with("-c") {
1548            // Inline config can change command behaviour arbitrarily.
1549            return false;
1550        }
1551        if arg.starts_with('-') {
1552            return false;
1553        }
1554        break;
1555    }
1556    if subcommand_index >= argv.len() {
1557        return false;
1558    }
1559    let subcommand = argv[subcommand_index].as_str();
1560    let args = &argv[subcommand_index + 1..];
1561    match subcommand {
1562        "status" | "rev-parse" => git_args_are_read_only(args),
1563        "symbolic-ref" => git_args_are_read_only(args) && git_symbolic_ref_args_read_only(args),
1564        "branch" => git_args_are_read_only(args) && git_branch_args_read_only(args),
1565        "remote" => git_args_are_read_only(args) && git_remote_args_read_only(args),
1566        "config" => !args.is_empty() && args[0] == "--get" && git_args_are_read_only(args),
1567        "diff" => git_args_are_read_only(args) && git_diff_args_read_only(args),
1568        "show" | "log" | "shortlog" | "ls-files" => git_args_are_read_only(args),
1569        _ => false,
1570    }
1571}
1572
1573fn git_symbolic_ref_args_read_only(args: &[String]) -> bool {
1574    if args.is_empty() {
1575        return false;
1576    }
1577    let mut refs = 0;
1578    for arg in args {
1579        match arg.as_str() {
1580            "--short" | "-q" | "--quiet" => continue,
1581            _ => {
1582                if arg.starts_with('-') {
1583                    return false;
1584                }
1585                refs += 1;
1586            }
1587        }
1588    }
1589    refs == 1
1590}
1591
1592fn git_branch_args_read_only(args: &[String]) -> bool {
1593    let mut saw_list = false;
1594    for arg in args {
1595        match arg.as_str() {
1596            "--show-current" | "--all" | "--remotes" | "--list" | "--verbose" | "--color"
1597            | "--no-color" | "-a" | "-r" | "-l" | "-v" | "-vv" => {
1598                if arg == "--list" || arg == "-l" {
1599                    saw_list = true;
1600                }
1601                continue;
1602            }
1603            _ => {
1604                if arg.starts_with("--color=") {
1605                    continue;
1606                }
1607                if saw_list && !arg.starts_with('-') {
1608                    continue;
1609                }
1610                return false;
1611            }
1612        }
1613    }
1614    true
1615}
1616
1617fn git_remote_args_read_only(args: &[String]) -> bool {
1618    if args.is_empty() {
1619        return true;
1620    }
1621    if args.len() == 1 && args[0] == "-v" {
1622        return true;
1623    }
1624    args.len() >= 2 && args[0] == "get-url"
1625}
1626
1627fn git_args_are_read_only(args: &[String]) -> bool {
1628    for arg in args {
1629        if arg.starts_with("--output=") {
1630            return false;
1631        }
1632        match arg.as_str() {
1633            "--output" | "--ext-diff" | "--external-diff" | "--textconv" => return false,
1634            _ => {}
1635        }
1636    }
1637    true
1638}
1639
1640fn git_diff_args_read_only(args: &[String]) -> bool {
1641    if !args.iter().any(|a| a == "--no-index") {
1642        return true;
1643    }
1644    let paths = git_diff_no_index_paths(args);
1645    if paths.len() != 2 {
1646        return false;
1647    }
1648    git_relative_path_allowed(paths[0], true) && git_relative_path_allowed(paths[1], false)
1649}
1650
1651fn git_diff_no_index_paths(args: &[String]) -> Vec<&String> {
1652    let mut paths = Vec::with_capacity(2);
1653    let mut end_of_options = false;
1654    let mut i = 0;
1655    while i < args.len() {
1656        let arg = &args[i];
1657        if !end_of_options && arg == "--" {
1658            end_of_options = true;
1659            i += 1;
1660            continue;
1661        }
1662        if !end_of_options && arg.starts_with('-') {
1663            if git_diff_flag_consumes_next_arg(arg) && !arg.contains('=') {
1664                i += 1;
1665            }
1666            i += 1;
1667            continue;
1668        }
1669        paths.push(arg);
1670        i += 1;
1671    }
1672    paths
1673}
1674
1675fn git_diff_flag_consumes_next_arg(arg: &str) -> bool {
1676    // Conservative list of read-only git diff flags that consume the next
1677    // arg. If this list misses an option, --no-index parsing may reject the
1678    // command rather than accidentally treating an option value as a path.
1679    matches!(
1680        arg,
1681        "--relative"
1682            | "--diff-filter"
1683            | "--word-diff-regex"
1684            | "--color-words"
1685            | "--ws-error-highlight"
1686            | "--abbrev"
1687            | "--break-rewrites"
1688            | "--find-renames"
1689            | "--find-copies"
1690            | "--diff-algorithm"
1691            | "--inter-hunk-context"
1692            | "-S"
1693            | "-G"
1694            | "-O"
1695    )
1696}
1697
1698fn git_relative_path_allowed(path: &str, allow_dev_null: bool) -> bool {
1699    let path = path.trim();
1700    if path.is_empty() {
1701        return false;
1702    }
1703    if allow_dev_null && path == "/dev/null" {
1704        return true;
1705    }
1706    if path.starts_with('/') || path.starts_with('~') || path.starts_with('-') {
1707        return false;
1708    }
1709    path.split('/')
1710        .all(|part| !part.is_empty() && part != "." && part != "..")
1711}
1712
1713fn arg_contains_unsafe_meta(arg: &str) -> bool {
1714    arg.contains(['$', '`', ';', '&', '|', '<', '>', '\n', '\r'])
1715}
1716
1717// --- bounded-write build/test commands ----------------------------------------
1718
1719fn classify_bounded_write(lower: &[String]) -> Option<ShellRiskDecision> {
1720    match lower[0].as_str() {
1721        "go" => {
1722            if lower.len() >= 2 {
1723                match lower[1].as_str() {
1724                    "test" => {
1725                        if has_any_flag_prefix(&lower[2..], &["-exec", "-toolexec"]) {
1726                            return Some(needs_approval(
1727                                "go test can run an execution wrapper with this option",
1728                            ));
1729                        }
1730                        if has_any_flag_prefix(&lower[2..], &["-c"]) {
1731                            return Some(needs_approval("go test -c emits a test binary"));
1732                        }
1733                        if has_any_flag_prefix(
1734                            &lower[2..],
1735                            &[
1736                                "-coverprofile",
1737                                "-cpuprofile",
1738                                "-memprofile",
1739                                "-blockprofile",
1740                                "-mutexprofile",
1741                                "-trace",
1742                                "-o",
1743                            ],
1744                        ) {
1745                            return Some(needs_approval(
1746                                "go test writes to an explicit output path with this option",
1747                            ));
1748                        }
1749                        return Some(bounded_write(
1750                            "go test may write build and test cache files".into(),
1751                        ));
1752                    }
1753                    "build" => {
1754                        if has_any_flag_prefix(&lower[2..], &["-o"]) {
1755                            return Some(needs_approval(
1756                                "go build writes to an explicit output path with this option",
1757                            ));
1758                        }
1759                        return Some(needs_approval("go build may emit a workspace binary"));
1760                    }
1761                    "vet" => {
1762                        return Some(bounded_write("go vet may write build cache files".into()))
1763                    }
1764                    _ => {}
1765                }
1766            }
1767        }
1768        "make" => {
1769            if lower.len() == 2 && MAKE_BOUNDED_TARGETS.contains(&lower[1].as_str()) {
1770                return Some(bounded_write(format!(
1771                    "make {} may write project-local build or test artifacts",
1772                    lower[1]
1773                )));
1774            }
1775        }
1776        "cargo" => {
1777            if lower.len() >= 2 {
1778                match lower[1].as_str() {
1779                    "build" | "test" | "check" | "clippy" | "fmt" => {
1780                        if has_any_flag_prefix(&lower[2..], &["--target-dir"]) {
1781                            return Some(needs_approval(
1782                                "cargo writes to an explicit target directory with this option",
1783                            ));
1784                        }
1785                        return Some(bounded_write(format!(
1786                            "cargo {} may write target build artifacts",
1787                            lower[1]
1788                        )));
1789                    }
1790                    _ => {}
1791                }
1792            }
1793        }
1794        "npm" | "pnpm" => {
1795            if lower.len() >= 2 {
1796                if lower[1] == "test" {
1797                    return Some(bounded_write(format!(
1798                        "{} test may write project-local test artifacts",
1799                        lower[0]
1800                    )));
1801                }
1802                if lower.len() >= 3 && lower[1] == "run" && npm_bounded_script(&lower[2]) {
1803                    return Some(bounded_write(format!(
1804                        "{} run {} may write project-local build or test artifacts",
1805                        lower[0], lower[2]
1806                    )));
1807                }
1808            }
1809        }
1810        "npx" => {
1811            if lower.len() >= 2 {
1812                match lower[1].as_str() {
1813                    "jest" | "vitest" => {
1814                        if has_known_test_output_flag(&lower[2..]) {
1815                            return Some(needs_approval(
1816                                "test runner writes to an explicit output path with this option",
1817                            ));
1818                        }
1819                        return Some(bounded_write(format!(
1820                            "npx {} may write project-local test artifacts",
1821                            lower[1]
1822                        )));
1823                    }
1824                    "tsc" if lower.len() >= 3 && lower[2] == "--noemit" => {
1825                        return Some(bounded_write(
1826                            "npx tsc --noEmit may write compiler cache files".into(),
1827                        ));
1828                    }
1829                    _ => {}
1830                }
1831            }
1832        }
1833        "pytest" => {
1834            if has_known_test_output_flag(&lower[1..]) {
1835                return Some(needs_approval(
1836                    "pytest writes to an explicit output path with this option",
1837                ));
1838            }
1839            return Some(bounded_write(
1840                "pytest may write project-local test artifacts".into(),
1841            ));
1842        }
1843        "python" | "python3" => {
1844            if lower.len() >= 3 && lower[1] == "-m" && lower[2] == "pytest" {
1845                if has_known_test_output_flag(&lower[3..]) {
1846                    return Some(needs_approval(
1847                        "pytest writes to an explicit output path with this option",
1848                    ));
1849                }
1850                return Some(bounded_write(format!(
1851                    "{} -m pytest may write project-local test artifacts",
1852                    lower[0]
1853                )));
1854            }
1855        }
1856        "deno" | "bun" if lower.len() >= 2 && lower[1] == "test" => {
1857            return Some(bounded_write(format!(
1858                "{} test may write project-local test artifacts",
1859                lower[0]
1860            )));
1861        }
1862        _ => {}
1863    }
1864    None
1865}
1866
1867fn has_known_test_output_flag(args: &[String]) -> bool {
1868    args.iter().any(|arg| {
1869        arg == "--outputfile"
1870            || arg == "--output-file"
1871            || arg.starts_with("--outputfile=")
1872            || arg.starts_with("--output-file=")
1873            || arg == "--junitxml"
1874            || arg == "--junit-xml"
1875            || arg.starts_with("--junitxml=")
1876            || arg.starts_with("--junit-xml=")
1877            || arg == "--html"
1878            || arg.starts_with("--html=")
1879            || arg.starts_with("--cov-report=xml:")
1880            || arg.starts_with("--cov-report=html:")
1881            || arg.starts_with("--cov-report=lcov:")
1882            || arg.starts_with("--cov-report=json:")
1883    })
1884}
1885
1886fn has_any_flag_prefix(args: &[String], prefixes: &[&str]) -> bool {
1887    args.iter().any(|arg| {
1888        prefixes
1889            .iter()
1890            .any(|prefix| arg == prefix || arg.starts_with(&format!("{prefix}=")))
1891    })
1892}
1893
1894fn npm_bounded_script(script: &str) -> bool {
1895    matches!(script, "build" | "test" | "lint" | "typecheck")
1896}
1897
1898fn argv_has_prefix(argv: &[String], prefix: &str) -> bool {
1899    let prefix_argv: Vec<&str> = prefix.split_whitespace().collect();
1900    if argv.len() < prefix_argv.len() {
1901        return false;
1902    }
1903    prefix_argv
1904        .iter()
1905        .enumerate()
1906        .all(|(i, want)| argv[i] == *want)
1907}
1908
1909#[cfg(test)]
1910mod tests {
1911    use super::*;
1912
1913    fn level(command: &str) -> ShellRiskLevel {
1914        classify_shell_command(command).level
1915    }
1916
1917    // --- safe reads ---------------------------------------------------------
1918
1919    #[test]
1920    fn read_only_commands_are_safe() {
1921        for cmd in [
1922            "ls -la",
1923            "pwd",
1924            "cat src/main.rs",
1925            "grep -rn pattern src",
1926            "rg TODO",
1927            "head -n 20 file.txt",
1928            "wc -l file.txt",
1929            "which cargo",
1930            "uname -a",
1931            "whoami",
1932            "date -u",
1933            "printf hello",
1934            "go version",
1935            "rustc --version",
1936        ] {
1937            assert_eq!(level(cmd), ShellRiskLevel::SafeRead, "command: {cmd}");
1938        }
1939    }
1940
1941    #[test]
1942    fn pipelines_of_read_only_commands_are_safe() {
1943        assert_eq!(
1944            level("cat file.txt | grep foo | wc -l"),
1945            ShellRiskLevel::SafeRead
1946        );
1947        assert_eq!(level("ls && pwd"), ShellRiskLevel::SafeRead);
1948        assert_eq!(level("pwd; ls"), ShellRiskLevel::SafeRead);
1949    }
1950
1951    #[test]
1952    fn trailing_stderr_redirect_is_transparent() {
1953        assert_eq!(level("ls -la 2>/dev/null"), ShellRiskLevel::SafeRead);
1954        assert_eq!(level("cat file 2>&1"), ShellRiskLevel::SafeRead);
1955    }
1956
1957    #[test]
1958    fn git_read_only_commands_are_safe() {
1959        for cmd in [
1960            "git status",
1961            "git log --oneline",
1962            "git diff HEAD~1",
1963            "git branch --show-current",
1964            "git remote -v",
1965            "git config --get user.name",
1966        ] {
1967            assert_eq!(level(cmd), ShellRiskLevel::SafeRead, "command: {cmd}");
1968        }
1969    }
1970
1971    #[test]
1972    fn git_mutating_commands_need_approval() {
1973        for cmd in [
1974            "git push origin main",
1975            "git commit -m x",
1976            "git checkout -b f",
1977            "git diff --output=/tmp/d.patch",
1978            "git -c core.editor=vim log",
1979        ] {
1980            assert_eq!(level(cmd), ShellRiskLevel::NeedsApproval, "command: {cmd}");
1981        }
1982    }
1983
1984    #[test]
1985    fn sed_stream_substitution_is_safe_but_in_place_is_not() {
1986        assert_eq!(level("sed s/foo/bar/g file.txt"), ShellRiskLevel::SafeRead);
1987        assert_eq!(level("sed -n 1,20p file.txt"), ShellRiskLevel::SafeRead);
1988        assert_eq!(
1989            level("sed -i s/foo/bar/ file.txt"),
1990            ShellRiskLevel::NeedsApproval
1991        );
1992    }
1993
1994    #[test]
1995    fn sort_uniq_display_safe_output_flags_not() {
1996        assert_eq!(level("sort -u file.txt"), ShellRiskLevel::SafeRead);
1997        assert_eq!(level("uniq -c file.txt"), ShellRiskLevel::SafeRead);
1998        assert_eq!(
1999            level("sort -o out.txt file.txt"),
2000            ShellRiskLevel::NeedsApproval
2001        );
2002        assert_eq!(
2003            level("uniq file.txt out.txt"),
2004            ShellRiskLevel::NeedsApproval
2005        );
2006    }
2007
2008    // --- bounded writes -------------------------------------------------------
2009
2010    #[test]
2011    fn build_test_commands_are_bounded_writes() {
2012        for cmd in [
2013            "cargo test",
2014            "cargo check",
2015            "cargo clippy",
2016            "go test ./...",
2017            "go vet ./...",
2018            "npm test",
2019            "pnpm run build",
2020            "pytest",
2021            "python -m pytest tests",
2022            "make test",
2023        ] {
2024            assert_eq!(level(cmd), ShellRiskLevel::BoundedWrite, "command: {cmd}");
2025        }
2026    }
2027
2028    #[test]
2029    fn bounded_write_with_explicit_output_needs_approval() {
2030        for cmd in [
2031            "go test -coverprofile=cover.out ./...",
2032            "cargo build --target-dir /tmp/x",
2033            "pytest --junitxml=report.xml",
2034            "make test EXTRA=1",
2035        ] {
2036            assert_eq!(level(cmd), ShellRiskLevel::NeedsApproval, "command: {cmd}");
2037        }
2038    }
2039
2040    // --- needs approval (default) ---------------------------------------------
2041
2042    #[test]
2043    fn unparseable_or_unknown_commands_need_approval() {
2044        for cmd in [
2045            "curl https://example.com -o out.html",
2046            "echo $(whoami)",
2047            "ls > listing.txt",
2048            "ls *.rs",
2049            "foo || bar",
2050            "rm file.txt",
2051            "npm install",
2052            "pip install requests",
2053        ] {
2054            assert_eq!(level(cmd), ShellRiskLevel::NeedsApproval, "command: {cmd}");
2055        }
2056    }
2057
2058    #[test]
2059    fn unsafe_expansion_args_need_approval() {
2060        assert_eq!(level("echo `id`"), ShellRiskLevel::NeedsApproval);
2061        assert_eq!(
2062            level("find . -name x -delete"),
2063            ShellRiskLevel::NeedsApproval
2064        );
2065        assert_eq!(level("find . -exec rm {} +"), ShellRiskLevel::NeedsApproval);
2066        assert_eq!(level("rg --pre cat TODO"), ShellRiskLevel::NeedsApproval);
2067        assert_eq!(level("npx jest -u"), ShellRiskLevel::NeedsApproval);
2068    }
2069
2070    #[test]
2071    fn pipeline_with_non_read_only_stage_needs_approval() {
2072        assert_eq!(
2073            level("cat file.txt | tee out.txt"),
2074            ShellRiskLevel::NeedsApproval
2075        );
2076        assert_eq!(level("ls && cargo test"), ShellRiskLevel::NeedsApproval);
2077    }
2078
2079    // --- hard deny --------------------------------------------------------------
2080
2081    #[test]
2082    fn recursive_delete_of_critical_paths_is_blocked() {
2083        for cmd in [
2084            "rm -rf /",
2085            "rm -rf /*",
2086            "rm -fr /usr",
2087            "rm -r /etc",
2088            "rm -rf /var/",
2089            "rm --recursive --force /bin",
2090            "rm --no-preserve-root -rf /",
2091            "sudo rm -rf /usr",
2092        ] {
2093            assert_eq!(level(cmd), ShellRiskLevel::Blocked, "command: {cmd}");
2094        }
2095    }
2096
2097    #[test]
2098    fn workspace_recursive_delete_is_not_blocked() {
2099        for cmd in ["rm -rf target", "rm -rf ./build", "rm -rf /tmp/scratch"] {
2100            assert_eq!(level(cmd), ShellRiskLevel::NeedsApproval, "command: {cmd}");
2101        }
2102    }
2103
2104    #[test]
2105    fn raw_device_writes_are_blocked() {
2106        for cmd in [
2107            "dd if=/dev/zero of=/dev/sda",
2108            "mkfs.ext4 /dev/sda1",
2109            "mkswap /dev/sda2",
2110            "wipefs -a /dev/sda",
2111            "echo x > /dev/sda",
2112            "cat data >> /dev/nvme0n1",
2113            "fdisk /dev/sda",
2114        ] {
2115            assert_eq!(level(cmd), ShellRiskLevel::Blocked, "command: {cmd}");
2116        }
2117    }
2118
2119    #[test]
2120    fn benign_device_usage_is_not_blocked() {
2121        assert_eq!(
2122            level("dd if=/dev/zero of=test.img bs=1M count=10"),
2123            ShellRiskLevel::NeedsApproval
2124        );
2125        assert_eq!(level("ls /dev/sda"), ShellRiskLevel::SafeRead);
2126        assert_eq!(level("fdisk -l"), ShellRiskLevel::NeedsApproval);
2127    }
2128
2129    #[test]
2130    fn system_lifecycle_commands_are_blocked() {
2131        for cmd in [
2132            "shutdown -h now",
2133            "reboot",
2134            "halt",
2135            "poweroff",
2136            "init 0",
2137            "telinit 6",
2138            "systemctl reboot",
2139            "systemctl poweroff",
2140        ] {
2141            assert_eq!(level(cmd), ShellRiskLevel::Blocked, "command: {cmd}");
2142        }
2143        // Non-lifecycle systemctl and init usage is not on the deny list.
2144        assert_eq!(
2145            level("systemctl status nginx"),
2146            ShellRiskLevel::NeedsApproval
2147        );
2148    }
2149
2150    #[test]
2151    fn killing_pid_one_is_blocked() {
2152        for cmd in [
2153            "kill 1",
2154            "kill -9 1",
2155            "kill -TERM 1",
2156            "kill -9 -1",
2157            "killall5",
2158        ] {
2159            assert_eq!(level(cmd), ShellRiskLevel::Blocked, "command: {cmd}");
2160        }
2161        assert_eq!(level("kill -9 12345"), ShellRiskLevel::NeedsApproval);
2162        assert_eq!(level("kill -1 12345"), ShellRiskLevel::NeedsApproval); // SIGHUP
2163    }
2164
2165    #[test]
2166    fn fork_bomb_is_blocked() {
2167        assert_eq!(level(":(){ :|:& };:"), ShellRiskLevel::Blocked);
2168        assert_eq!(level("bomb(){ bomb|bomb& };bomb"), ShellRiskLevel::Blocked);
2169    }
2170
2171    #[test]
2172    fn permission_sweep_on_system_paths_is_blocked() {
2173        assert_eq!(level("chmod -R 777 /"), ShellRiskLevel::Blocked);
2174        assert_eq!(level("chmod -R 000 /usr"), ShellRiskLevel::Blocked);
2175        assert_eq!(level("chown -R nobody /etc"), ShellRiskLevel::Blocked);
2176        // Project-local sweeps are fine (well, approval-gated).
2177        assert_eq!(
2178            level("chmod -R 755 ./scripts"),
2179            ShellRiskLevel::NeedsApproval
2180        );
2181    }
2182
2183    #[test]
2184    fn deny_scan_sees_through_compound_syntax() {
2185        for cmd in [
2186            "ls; rm -rf /usr",
2187            "true && rm -rf /etc",
2188            "false || rm -rf /var",
2189            "echo hi | tee log; reboot",
2190            "(rm -rf /usr)",
2191            "echo $(rm -rf /etc)",
2192            "bash -c 'rm -rf /usr'",
2193            "sudo sh -c \"rm -rf /etc\"",
2194            "env FOO=1 rm -rf /usr",
2195            "nohup reboot",
2196            "timeout 30 rm -rf /etc",
2197        ] {
2198            assert_eq!(level(cmd), ShellRiskLevel::Blocked, "command: {cmd}");
2199        }
2200    }
2201
2202    #[test]
2203    fn quoted_destructive_text_is_not_blocked() {
2204        // The dangerous string is data, not a command.
2205        assert_eq!(level("echo 'rm -rf /usr'"), ShellRiskLevel::SafeRead);
2206        assert_eq!(level("grep 'rm -rf /' README.md"), ShellRiskLevel::SafeRead);
2207    }
2208
2209    #[test]
2210    fn decisions_carry_reasons() {
2211        let decision = classify_shell_command("rm -rf /");
2212        assert_eq!(decision.level, ShellRiskLevel::Blocked);
2213        assert!(!decision.reason.is_empty());
2214        assert_eq!(decision.level.as_str(), "blocked");
2215    }
2216}
harness/shell_risk.rs

harness/
shell_risk.rs