fusevm 0.13.7

Language-agnostic bytecode VM with fused superinstructions and a 3-tier Cranelift JIT (linear/block/tracing)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
//! Shell host callback interface.
//!
//! Frontends that emit shell-specific bytecodes (zshrs) provide a `ShellHost`
//! implementation. The VM dispatches `Op::Glob`, `Op::TildeExpand`,
//! `Op::ExpandParam`, `Op::CmdSubst`, `Op::Redirect`, pipeline ops, etc.
//! through the host so the actual semantics live in the frontend, not the VM.
//!
//! Without a host, the relevant ops fall back to minimal stubs — the VM
//! still runs, but shell-specific ops are no-ops or pass through inputs.
//!
//! Sub-execution (cmd substitution, process substitution, trap handlers,
//! function bodies) is delivered to the host as `&Chunk` references taken
//! from the parent chunk's `sub_chunks` table. The host is responsible for
//! running them on a fresh or shared VM and reporting captured output / exit
//! status back.

use crate::chunk::Chunk;
use crate::value::Value;

/// Frontend-supplied implementation of shell-specific runtime behavior.
///
/// All methods have default no-op or identity implementations so frontends
/// only override what they need.
pub trait ShellHost: Send {
    /// Glob expand `pattern`. If `recursive`, treat `**` as a recursive marker.
    /// Returns the matched paths (empty when no match — caller decides how to
    /// handle nullglob/nomatch options).
    fn glob(&mut self, pattern: &str, recursive: bool) -> Vec<String> {
        let _ = recursive;
        glob::glob(pattern)
            .into_iter()
            .flat_map(|paths| paths.filter_map(|p| p.ok()))
            .map(|p| p.to_string_lossy().into_owned())
            .collect()
    }

    /// Tilde expansion: `~` → $HOME, `~user` → user's home, `~+`/`~-` → dir stack.
    /// Returns input unchanged if no expansion applies.
    fn tilde_expand(&mut self, s: &str) -> String {
        s.to_string()
    }

    /// Brace expansion: `{a,b}` → ["a","b"], `{1..10}` → 10 strings.
    /// Returns a single-element vec containing the input when no braces present.
    fn brace_expand(&mut self, s: &str) -> Vec<String> {
        vec![s.to_string()]
    }

    /// Word splitting using current IFS rules.
    fn word_split(&mut self, s: &str) -> Vec<String> {
        s.split_whitespace().map(|w| w.to_string()).collect()
    }

    /// Parameter expansion: `${var:-default}`, `${#var}`, `${var/pat/rep}`, etc.
    /// `modifier` is one of `crate::op::param_mod::*`.
    /// `args` are the modifier operands (already evaluated to Values) — for
    /// `${var:-x}` it's `[x]`, for `${var/p/r}` it's `[p, r]`, for `${var:o:l}`
    /// it's `[o, l]`. `LENGTH`/`UPPER`/`LOWER`/`KEYS`/`INDIRECT` take no args.
    fn expand_param(&mut self, name: &str, modifier: u8, args: &[Value]) -> Value {
        let _ = (name, modifier, args);
        Value::str("")
    }

    /// Index into an array variable: `${arr[idx]}`. `index` is the evaluated
    /// subscript (Int for indexed arrays, Str for associative).
    fn array_index(&mut self, name: &str, index: &Value) -> Value {
        let _ = (name, index);
        Value::Undef
    }

    /// Run a sub-chunk and capture its stdout as a string. (`$(cmd)`,`` `cmd` ``)
    fn cmd_subst(&mut self, sub: &Chunk) -> String {
        let _ = sub;
        String::new()
    }

    /// Process substitution input: spawn `sub`, return path to a fd/FIFO that
    /// reads its stdout. (`<(cmd)`)
    fn process_sub_in(&mut self, sub: &Chunk) -> String {
        let _ = sub;
        String::new()
    }

    /// Process substitution output: spawn `sub`, return path to a fd/FIFO that
    /// writes to its stdin. (`>(cmd)`)
    fn process_sub_out(&mut self, sub: &Chunk) -> String {
        let _ = sub;
        String::new()
    }

    /// Apply a redirection at the next exec/builtin call.
    /// `fd` is the source fd, `op` from `crate::op::redirect_op::*`, `target`
    /// is the (already-expanded) filename or fd reference.
    fn redirect(&mut self, fd: u8, op: u8, target: &str) {
        let _ = (fd, op, target);
    }

    /// Heredoc body for the next command's stdin.
    fn heredoc(&mut self, content: &str) {
        let _ = content;
    }

    /// Herestring body for the next command's stdin.
    fn herestring(&mut self, content: &str) {
        let _ = content;
    }

    /// Begin an N-stage pipeline. Subsequent `pipeline_stage` calls separate
    /// stages; `pipeline_end` waits for completion and returns final status.
    fn pipeline_begin(&mut self, n: u8) {
        let _ = n;
    }

    /// Wire next pipeline stage (set up pipe between previous and next).
    fn pipeline_stage(&mut self) {}

    /// Wait for the pipeline to complete; return last command's exit status.
    fn pipeline_end(&mut self) -> i32 {
        0
    }

    /// Begin subshell scope (snapshot/save state).
    fn subshell_begin(&mut self) {}

    /// End subshell scope (restore state). Returns `Some(status)` when
    /// the subshell terminated with a deferred exit (the host wants the
    /// VM's `last_status` updated so the parent's `$?` sees the exit
    /// value); returns `None` to leave `last_status` untouched.
    fn subshell_end(&mut self) -> Option<i32> {
        None
    }

    /// Install a trap handler for signal `sig`. The handler is a sub-chunk
    /// that the host runs when the signal fires.
    fn trap_set(&mut self, sig: &str, handler: &Chunk) {
        let _ = (sig, handler);
    }

    /// Process pending traps (called periodically by the VM dispatch loop).
    fn trap_check(&mut self) {}

    /// Begin scoped redirect block — `cmd > out.txt` style applied to a
    /// compound command. The host saves current fd state.
    fn with_redirects_begin(&mut self, count: u8) {
        let _ = count;
    }

    /// End scoped redirect block — restore fd state.
    fn with_redirects_end(&mut self) {}

    /// Call a user-defined shell function. Returns `Some(status)` when the
    /// function exists, `None` to fall through to external `exec`.
    fn call_function(&mut self, name: &str, args: Vec<String>) -> Option<i32> {
        let _ = (name, args);
        None
    }

    /// Spawn an external command and wait. Default uses `std::process::Command`.
    fn exec(&mut self, args: Vec<String>) -> i32 {
        use std::process::{Command, Stdio};
        let cmd = match args.first() {
            Some(c) => c,
            None => return 0,
        };
        Command::new(cmd)
            .args(&args[1..])
            .stdout(Stdio::inherit())
            .stderr(Stdio::inherit())
            .status()
            .map(|s| s.code().unwrap_or(1))
            .unwrap_or(127)
    }

    /// Spawn an external command in the background and detach. Returns the
    /// child pid (or 0 on failure / when the host doesn't track pids). Default
    /// uses `std::process::Command::spawn()`. Frontends override to register
    /// the pid in their job table so `jobs`, `fg`, `wait`, `disown` see it.
    fn exec_bg(&mut self, args: Vec<String>) -> i32 {
        use std::process::{Command, Stdio};
        let cmd = match args.first() {
            Some(c) => c,
            None => return 0,
        };
        Command::new(cmd)
            .args(&args[1..])
            .stdout(Stdio::null())
            .stderr(Stdio::null())
            .spawn()
            .map(|c| c.id() as i32)
            .unwrap_or(0)
    }

    /// Glob match: does `s` match the shell glob pattern `pat`?
    /// Used by `[[ x = pat ]]` and `case`. Default is exact equality.
    fn str_match(&mut self, s: &str, pat: &str) -> bool {
        s == pat
    }

    /// Regex match: `s =~ regex` (extended POSIX or PCRE per host).
    fn regex_match(&mut self, s: &str, regex: &str) -> bool {
        let _ = (s, regex);
        false
    }
}

/// Minimal default host — every method uses the trait's default impl.
/// Useful for tests and for non-shell frontends that still want shell ops
/// to be stack-discipline-correct without writing a full host.
pub struct DefaultHost;

impl ShellHost for DefaultHost {}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::chunk::Chunk;

    #[test]
    fn tilde_expand_is_identity_by_default() {
        let mut h = DefaultHost;
        assert_eq!(h.tilde_expand("~/foo"), "~/foo");
        assert_eq!(h.tilde_expand(""), "");
    }

    #[test]
    fn brace_expand_returns_single_element_vec_by_default() {
        let mut h = DefaultHost;
        assert_eq!(h.brace_expand("{a,b}"), vec!["{a,b}".to_string()]);
        assert_eq!(h.brace_expand("plain"), vec!["plain".to_string()]);
    }

    #[test]
    fn word_split_splits_on_whitespace() {
        let mut h = DefaultHost;
        assert_eq!(h.word_split("one two  three"), vec!["one", "two", "three"]);
        assert!(h.word_split("").is_empty());
        assert!(h.word_split("   \t  ").is_empty());
    }

    #[test]
    fn expand_param_default_returns_empty_string() {
        let mut h = DefaultHost;
        let v = h.expand_param("VAR", 0, &[]);
        assert_eq!(v, Value::str(""));
    }

    #[test]
    fn array_index_default_returns_undef() {
        let mut h = DefaultHost;
        assert_eq!(h.array_index("arr", &Value::Int(0)), Value::Undef);
    }

    #[test]
    fn cmd_subst_and_process_sub_default_to_empty_string() {
        let mut h = DefaultHost;
        let c = Chunk::new();
        assert_eq!(h.cmd_subst(&c), "");
        assert_eq!(h.process_sub_in(&c), "");
        assert_eq!(h.process_sub_out(&c), "");
    }

    #[test]
    fn pipeline_end_default_is_success() {
        let mut h = DefaultHost;
        h.pipeline_begin(2);
        h.pipeline_stage();
        assert_eq!(h.pipeline_end(), 0);
    }

    #[test]
    fn call_function_default_returns_none() {
        let mut h = DefaultHost;
        assert_eq!(h.call_function("fn", vec!["a".into()]), None);
    }

    #[test]
    fn str_match_default_is_exact_equality() {
        let mut h = DefaultHost;
        assert!(h.str_match("foo", "foo"));
        assert!(!h.str_match("foo", "bar"));
        assert!(!h.str_match("foo", "f*"), "default does not glob");
    }

    #[test]
    fn regex_match_default_is_false() {
        let mut h = DefaultHost;
        assert!(!h.regex_match("anything", "."));
    }

    #[test]
    fn noop_methods_do_not_panic() {
        // Verify the methods with `()` returns and no observable state are safe to call.
        let mut h = DefaultHost;
        h.redirect(1, 0, "file");
        h.heredoc("body");
        h.herestring("body");
        h.subshell_begin();
        h.subshell_end();
        h.trap_check();
        h.with_redirects_begin(1);
        h.with_redirects_end();
        h.trap_set("INT", &Chunk::new());
    }

    #[test]
    fn exec_with_empty_args_returns_zero() {
        // First-arg guard avoids spawning anything.
        let mut h = DefaultHost;
        assert_eq!(h.exec(vec![]), 0);
        assert_eq!(h.exec_bg(vec![]), 0);
    }

    // ─── glob default uses system glob crate ──────────────────────────

    #[test]
    fn glob_default_returns_paths_for_literal_pattern() {
        // Default glob impl uses the `glob` crate. A literal path that exists
        // resolves to one entry; the implementation must not panic and must
        // return an empty Vec on no match (not error).
        let mut h = DefaultHost;
        // `/` always exists on Unix; on Windows `C:\` etc. — use temp_dir as a
        // portable existing target.
        let tmp = std::env::temp_dir();
        let tmp_str = tmp.to_string_lossy().to_string();
        let result = h.glob(&tmp_str, false);
        assert_eq!(result.len(), 1, "literal existing path matches itself");
        // Path resolution is implementation-defined; just confirm something came back.
        assert!(!result[0].is_empty());
    }

    #[test]
    fn glob_default_returns_empty_for_nonmatching_pattern() {
        // No matches → empty Vec (nullglob-style default — caller decides
        // how to handle nomatch).
        let mut h = DefaultHost;
        // Use a guaranteed-non-existent absolute pattern.
        let result = h.glob(
            "/this/path/definitely/does/not/exist/anywhere_xyz_*.tmp",
            false,
        );
        assert!(result.is_empty(), "no match → empty, got: {:?}", result);
    }

    #[test]
    fn glob_default_ignores_recursive_flag() {
        // Default impl accepts `recursive` but ignores it (no `**` semantics).
        // Verify the boolean does not change behavior for a simple literal.
        let mut h = DefaultHost;
        let tmp = std::env::temp_dir();
        let tmp_str = tmp.to_string_lossy().to_string();
        let r1 = h.glob(&tmp_str, false);
        let r2 = h.glob(&tmp_str, true);
        assert_eq!(r1, r2);
    }

    // ─── expand_param ignores its arguments by default ────────────────

    #[test]
    fn expand_param_default_ignores_modifier_and_args() {
        // No matter the modifier byte or args, the default impl returns Value::str("").
        // Pins the no-op contract — overriding hosts must replace this method.
        let mut h = DefaultHost;
        assert_eq!(h.expand_param("ANY", 0, &[]), Value::str(""));
        assert_eq!(h.expand_param("ANY", 255, &[]), Value::str(""));
        assert_eq!(
            h.expand_param("ANY", 7, &[Value::Int(42), Value::str("x")]),
            Value::str("")
        );
    }

    // ─── word_split contract: collapses runs of whitespace ────────────

    #[test]
    fn word_split_collapses_consecutive_whitespace() {
        // Mixed spaces and tabs between words should yield the words without
        // empty separators — matches POSIX IFS default behavior.
        let mut h = DefaultHost;
        assert_eq!(
            h.word_split("  a\t b\n\nc \t d  "),
            vec!["a", "b", "c", "d"]
        );
    }

    // ─── array_index returns Undef for any kind of index ──────────────

    #[test]
    fn array_index_default_returns_undef_for_any_index_type() {
        let mut h = DefaultHost;
        assert_eq!(h.array_index("a", &Value::Int(-1)), Value::Undef);
        assert_eq!(h.array_index("", &Value::str("key")), Value::Undef);
        assert_eq!(h.array_index("a", &Value::Undef), Value::Undef);
    }

    // ─── pipeline_end stays at zero across multiple beginnings ────────

    #[test]
    fn pipeline_lifecycle_does_not_drift_status_in_default_impl() {
        // Default impl doesn't track state — repeated cycles always yield 0.
        let mut h = DefaultHost;
        for _ in 0..5 {
            h.pipeline_begin(3);
            h.pipeline_stage();
            h.pipeline_stage();
            assert_eq!(h.pipeline_end(), 0);
        }
    }

    // ─── trap_set with empty signal name ──────────────────────────────

    #[test]
    fn trap_set_default_accepts_any_signal_name_without_panic() {
        let mut h = DefaultHost;
        let c = Chunk::new();
        h.trap_set("", &c);
        h.trap_set("SIGINT", &c);
        h.trap_set("EXIT", &c);
        h.trap_set("\0nonsense", &c);
        // No observable state to assert — the contract is "no panic".
    }
}