bashkit 0.8.0

Awesomely fast virtual sandbox with bash and file system
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
//! Test-only helpers used by integration tests in `tests/*.rs` and the
//! cargo-fuzz targets in `fuzz/fuzz_targets/*.rs`.
//!
//! This module is `#[doc(hidden)]` because it isn't part of the supported
//! public API — it exists so external test/fuzz code can share the same
//! cross-tool invariants as the inline `#[cfg(test)]` modules without us
//! duplicating the banned-substring list and the canary plumbing.
//!
//! The invariants enforced here are documented in `specs/threat-model.md`:
//!  - **TM-INF-022** — no Rust Debug shapes in stderr
//!  - **TM-INF-016** — no host paths (`/rustc/`, `~/.cargo/registry/`,
//!    `target/debug/deps/`) in stderr
//!  - **TM-INF-013** — no host environment variables leak through
//!    builtins; verified via the canary mechanism: `fuzz_init()` sets a
//!    magic env var on the host process, and `assert_fuzz_invariants`
//!    asserts that magic value never appears in builtin stdout/stderr.

use crate::{Bash, ControlFlow, ExecResult};

/// Cross-tool banned substrings. Any of these in stderr means a leak —
/// either a Rust `Debug` formatter reached the agent (TM-INF-022) or a
/// host path/internal struct shape escaped sanitization (TM-INF-016).
/// Per-tool tests extend this with their own internals.
pub const UNIVERSAL_BANNED: &[&str] = &[
    // -- Debug shapes (TM-INF-022) --
    "File {",
    "path: ()",
    "Token(",
    "Tok::",
    "Undefined::",
    "Errors {",
    "Vec [",
    " { code:",
    "Some([",
    "Span {",
    "Range {",
    // -- Host paths (TM-INF-016) --
    // Rust compiler internals leaked via panic backtraces.
    "/rustc/",
    // Cargo build artifacts — should never appear in user-facing stderr.
    "/.cargo/registry/",
    "target/debug/deps/",
    "target/release/deps/",
    "/.rustup/toolchains/",
];

/// Cap on per-call stderr length. A single bad input must not flood
/// stderr beyond this — catches "one bad regex generates 10 MB of
/// library error spam" regressions.
pub const MAX_STDERR_BYTES: usize = 1024;

/// Magic value seeded into the host OS environment by `fuzz_init`. If
/// this string ever appears in builtin stdout or stderr, a builtin is
/// reading from `std::env::vars()` instead of the sandboxed `ctx.env`
/// (TM-INF-013 regression).
pub const FUZZ_HOST_CANARY: &str = "BASHKIT_FUZZ_HOST_CANARY_47a83bcf_DO_NOT_LEAK";

/// Idempotently seed the host OS environment with the canary value.
/// Must be called by every fuzz target before its first `Bash::exec`.
///
/// Uses `std::sync::Once` so the unsafe `set_var` runs exactly once,
/// before any worker threads spawn — sound under the Rust 2024 rules
/// that made `set_var` `unsafe`.
pub fn fuzz_init() {
    static ONCE: std::sync::Once = std::sync::Once::new();
    ONCE.call_once(|| {
        // SAFETY: called exactly once at process start, before any
        // threads that read environment variables are spawned.
        unsafe {
            std::env::set_var("BASHKIT_FUZZ_HOST_SECRET", FUZZ_HOST_CANARY);
        }
    });
}

/// Execute a script under a fresh `Bash` and capture the result.
/// Both `Ok(ExecResult)` and the hard `Err(execution error)` paths are
/// normalized into `ExecResult` so callers don't have to branch.
pub async fn run(script: &str) -> ExecResult {
    let mut bash = Bash::new();
    bash.exec(script).await.unwrap_or_else(|e| ExecResult {
        stdout: String::new(),
        stderr: e.to_string(),
        exit_code: 1,
        control_flow: ControlFlow::None,
        ..Default::default()
    })
}

/// Run `script` on the caller-provided `Bash` instance and assert all
/// fuzz invariants. One-line replacement for the
/// `let _ = bash.exec(&script).await;` pattern in cargo-fuzz targets.
///
/// IMPORTANT: callers must NOT redirect stderr to `/dev/null` in the
/// script (`... 2>/dev/null`) — that throws away exactly what we want
/// to inspect.
pub async fn fuzz_exec(bash: &mut Bash, script: &str, ctx: &str, tool_banned: &[&str]) {
    let result = bash.exec(script).await.unwrap_or_else(|e| ExecResult {
        stdout: String::new(),
        stderr: e.to_string(),
        exit_code: 1,
        control_flow: ControlFlow::None,
        ..Default::default()
    });
    assert_fuzz_invariants(&result, ctx, tool_banned);
}

/// Assert that stderr is short and contains no banned Debug-shape or
/// host-path substring. Per-tool callers pass their own additional
/// banned list (env var names, prepended-source markers, etc.).
#[track_caller]
pub fn assert_no_leak(result: &ExecResult, ctx: &str, tool_banned: &[&str]) {
    let stderr = &result.stderr;
    assert!(
        stderr.len() <= MAX_STDERR_BYTES,
        "[{ctx}] stderr exceeds {MAX_STDERR_BYTES} bytes ({} bytes):\n---\n{stderr}\n---",
        stderr.len()
    );
    for pat in UNIVERSAL_BANNED.iter().chain(tool_banned.iter()) {
        assert!(
            !stderr.contains(pat),
            "[{ctx}] stderr leaks banned shape `{pat}`:\n---\n{stderr}\n---"
        );
    }
}

/// Lines fuzz/proptest targets inline arbitrary input bytes into shell
/// scripts, so bash and ls produce error messages that quote the input
/// verbatim — `bash: <cmd>: command not found`, `bash: <path>: No such
/// file or directory`, `ls: cannot access '<path>': …`. These are real
/// shell echoes of user input, not internal Debug leaks; if they happen
/// to contain a banned substring (e.g. user input `Tok"` becomes the
/// command name `Tok:`, which bash's `bash: %s: command not found`
/// formatter renders as `bash: Tok:: command not found`, accidentally
/// matching the parser-token shape `Tok::`), the leak detector must not
/// trip. This filter strips lines that match a recognized real-shell
/// error template before the banned-shape check; the byte-length cap
/// and the host-canary check still run on the unfiltered stderr so
/// flood and TM-INF-013 regressions are still caught.
fn strip_real_shell_error_lines(stderr: &str) -> String {
    let lines: Vec<&str> = stderr
        .lines()
        .filter(|line| !is_real_shell_error_line(line))
        .collect();
    lines.join("\n")
}

/// Recognize stderr lines that bash, ls, or a uutils clap CLI produces
/// verbatim from user input. Conservative: each branch matches a fixed
/// real-shell error template that quotes input.
fn is_real_shell_error_line(line: &str) -> bool {
    const SHELL_ERROR_SUFFIXES: &[&str] = &[
        ": command not found",
        ": No such file or directory",
        ": Is a directory",
        ": Permission denied",
        ": cannot execute: required file not found",
        ": cannot execute binary file",
    ];
    if let Some(rest) = line.strip_prefix("bash: ") {
        if SHELL_ERROR_SUFFIXES.iter().any(|suf| rest.ends_with(suf)) {
            return true;
        }
        // Did-you-mean variant: `bash: <cmd>: command not found. Did you mean: ., :, [?`
        if rest.ends_with(". Did you mean: ., :, [?") {
            return true;
        }
        return false;
    }
    if let Some(rest) = line.strip_prefix("ls: ") {
        if rest.starts_with("cannot access ")
            && (rest.ends_with(": No such file or directory")
                || rest.ends_with(": Is a directory")
                || rest.ends_with(": Permission denied"))
        {
            return true;
        }
        return false;
    }
    is_clap_error_chrome_line(line)
}

/// Recognize the four-line clap CLI error template that uutils builtins
/// (`ls`, `cat`, `truncate`, …) emit when a user passes an unknown flag.
/// Each line quotes the offending argument verbatim, so a fuzz input
/// containing `/rustc/` becomes
/// `error: unexpected argument '--i{fi/rustc/fi{{RRi' found` and trips
/// the banned-shape check even though no internal Debug formatter ran.
///
/// Patterns are anchored on clap-specific chrome that does not occur in
/// real Debug leaks: the literal `unexpected argument '`, the leading
/// `  tip: to pass '`, the exact `Usage: ` prefix, and the
/// `For more information, try '` help footer.
fn is_clap_error_chrome_line(line: &str) -> bool {
    // `error: unexpected argument '<input>' found`
    // `error: invalid value '<input>' for '<flag>': <reason>`
    // `error: the argument '<flag>' cannot be used with '<flag>'`
    // `error: unrecognized subcommand '<input>'`
    if let Some(rest) = line.strip_prefix("error: ") {
        const CLAP_ERROR_FRAGMENTS: &[&str] = &[
            "unexpected argument '",
            "the argument '",
            "unrecognized subcommand '",
            "the following required arguments were not provided",
            "a value is required for '",
            "equal sign is needed when assigning values to '",
        ];
        if CLAP_ERROR_FRAGMENTS.iter().any(|frag| rest.contains(frag)) {
            return true;
        }
        return false;
    }
    // `  tip: to pass '<input>' as a value, use '-- <input>'`
    if let Some(rest) = line.strip_prefix("  tip: ") {
        if rest.starts_with("to pass '") && rest.contains("' as a value, use '") {
            return true;
        }
        return false;
    }
    // `Usage: <cmd> [OPTION]... [FILE]...` — user input does not appear
    // in the Usage line itself, but it follows the error/tip block and
    // we strip it for completeness so the leftover stderr is a clean
    // signal of real internal leaks.
    if let Some(rest) = line.strip_prefix("Usage: ") {
        if rest.contains(" [") || rest.ends_with(" --help") || rest.ends_with(" --version") {
            return true;
        }
        return false;
    }
    // `For more information, try '--help'.` /
    // `For more information, try '<cmd> --help'.`
    if line.starts_with("For more information, try '") && line.ends_with("'.") {
        return true;
    }
    false
}

/// Full fuzz-invariant check. Like [`assert_no_leak`] but tolerates
/// real-shell-style error lines (which echo user input verbatim) and
/// adds the host-canary check (TM-INF-013): the canary must not appear
/// in stdout or stderr.
///
/// Call this from cargo-fuzz targets and proptest cases — anywhere
/// random input runs through a builtin.
#[track_caller]
pub fn assert_fuzz_invariants(result: &ExecResult, ctx: &str, tool_banned: &[&str]) {
    let stderr = &result.stderr;
    assert!(
        stderr.len() <= MAX_STDERR_BYTES,
        "[{ctx}] stderr exceeds {MAX_STDERR_BYTES} bytes ({} bytes):\n---\n{stderr}\n---",
        stderr.len()
    );
    let stripped = strip_real_shell_error_lines(stderr);
    for pat in UNIVERSAL_BANNED.iter().chain(tool_banned.iter()) {
        assert!(
            !stripped.contains(pat),
            "[{ctx}] stderr leaks banned shape `{pat}` (after stripping shell echoes):\n\
             ---raw stderr---\n{stderr}\n---stripped---\n{stripped}\n---"
        );
    }
    assert!(
        !result.stdout.contains(FUZZ_HOST_CANARY),
        "[{ctx}] FUZZ canary leaked into stdout (TM-INF-013 regression — \
         a builtin is reading host env). stdout:\n---\n{}\n---",
        truncate(&result.stdout, 512)
    );
    assert!(
        !result.stderr.contains(FUZZ_HOST_CANARY),
        "[{ctx}] FUZZ canary leaked into stderr (TM-INF-013 regression — \
         a builtin is reading host env). stderr:\n---\n{}\n---",
        truncate(&result.stderr, 512)
    );
}

fn truncate(s: &str, max: usize) -> String {
    if s.len() <= max {
        s.to_string()
    } else {
        format!("{}...<truncated>", &s[..max.min(s.len())])
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn strip_keeps_unrelated_lines() {
        let s = "warning: something\nthread panicked at lib.rs:1\n";
        assert_eq!(
            strip_real_shell_error_lines(s),
            "warning: something\nthread panicked at lib.rs:1"
        );
    }

    #[test]
    fn strip_removes_command_not_found() {
        // From a real glob_fuzz failure — the input ended with `Tok"`,
        // bash formatted `bash: <cmd>: command not found`, and the
        // separator `:` after `Tok` formed the banned `Tok::` substring.
        let s = "bash: Tok:: command not found\n";
        assert_eq!(strip_real_shell_error_lines(s), "");
    }

    #[test]
    fn strip_removes_no_such_file() {
        // From a real arithmetic_fuzz failure — input contained
        // `/.rustup/toolchains/` literally, bash echoed it back.
        let s = "bash: /.rustup/toolchains/gww: No such file or directory\n";
        assert_eq!(strip_real_shell_error_lines(s), "");
    }

    #[test]
    fn strip_removes_did_you_mean_variant() {
        let s = "bash: : command not found. Did you mean: ., :, [?\n";
        assert_eq!(strip_real_shell_error_lines(s), "");
    }

    #[test]
    fn strip_removes_ls_cannot_access() {
        // From #1621 — input contained `Span {`, ls echoed it back.
        let s = "ls: cannot access '/tmp/==(Span {(;': No such file or directory\n";
        assert_eq!(strip_real_shell_error_lines(s), "");
    }

    #[test]
    fn strip_keeps_internal_panic_lines() {
        // A real internal Debug leak that doesn't match the shell
        // template must NOT be stripped — otherwise the leak detector
        // would silently pass real regressions.
        let s = "thread 'fuzz' panicked at parse.rs:42:\nFile { code: \"oops\", path: () }\n";
        let stripped = strip_real_shell_error_lines(s);
        assert!(stripped.contains("File {"), "stripped: {stripped:?}");
        assert!(stripped.contains("path: ()"), "stripped: {stripped:?}");
    }

    #[test]
    fn strip_keeps_partial_matches() {
        // Lines that look like shell errors but don't match the exact
        // template must remain — defense in depth against accidentally
        // masking real leaks.
        let s = "bash: something weird Span { not at end\n\
                 some-other-tool: Tok:: blah\n";
        let stripped = strip_real_shell_error_lines(s);
        assert!(stripped.contains("Span {"));
        assert!(stripped.contains("Tok::"));
    }

    #[test]
    fn strip_handles_multiline_mixed() {
        let s = "bash: foo: command not found\n\
                 bash: /tmp/Span {bar: No such file or directory\n\
                 thread panicked at runtime.rs:1\n\
                 ls: cannot access 'baz': No such file or directory\n";
        let stripped = strip_real_shell_error_lines(s);
        assert!(!stripped.contains("command not found"));
        assert!(!stripped.contains("/tmp/Span {"));
        assert!(!stripped.contains("cannot access"));
        assert!(stripped.contains("thread panicked"));
    }

    #[test]
    fn strip_removes_clap_unexpected_argument_block() {
        // From a real glob_fuzz failure on main: input bytes contained
        // `--i{fi/rustc/fi{{RRi`; uutils `ls` (clap) echoed it back in
        // its standard four-line error template. The banned shape
        // `/rustc/` came from the user input, not an internal formatter.
        let s = "error: unexpected argument '--i{fi/rustc/fi{{RRi' found\n\
                 \n\
                 \x20\x20tip: to pass '--i{fi/rustc/fi{{RRi' as a value, use '-- --i{fi/rustc/fi{{RRi'\n\
                 \n\
                 Usage: ls [OPTION]... [FILE]...\n\
                 \n\
                 For more information, try '--help'.\n";
        let stripped = strip_real_shell_error_lines(s);
        assert!(!stripped.contains("/rustc/"), "stripped: {stripped:?}");
        assert!(
            !stripped.contains("unexpected argument"),
            "stripped: {stripped:?}"
        );
        assert!(!stripped.contains("Usage:"), "stripped: {stripped:?}");
        assert!(
            !stripped.contains("For more information"),
            "stripped: {stripped:?}"
        );
    }

    #[test]
    fn strip_keeps_clap_invalid_value_line() {
        let s = "error: invalid value 'Span {abc' for '--width <N>': not a number\n";
        let stripped = strip_real_shell_error_lines(s);
        assert!(stripped.contains("Span {"), "stripped: {stripped:?}");
    }

    #[test]
    fn strip_keeps_unrelated_error_prefix_lines() {
        // `error: ` lines that are not clap chrome must remain — e.g.
        // a real internal error with a Debug shape glued on.
        let s = "error: parser failed: Tok::Ident\n";
        let stripped = strip_real_shell_error_lines(s);
        assert!(stripped.contains("Tok::"), "stripped: {stripped:?}");
    }

    #[test]
    fn strip_keeps_usage_lookalikes() {
        // A line that begins with `Usage:` but lacks the clap shape
        // (no bracketed option block, no --help/--version trailer)
        // must stay — defense against masking real leaks.
        let s = "Usage: see Span { for details\n";
        let stripped = strip_real_shell_error_lines(s);
        assert!(stripped.contains("Span {"), "stripped: {stripped:?}");
    }
}